dplyr/0000755000176200001440000000000014200434032011374 5ustar liggesusersdplyr/NAMESPACE0000644000176200001440000002765614177162131012646 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("$<-",grouped_df) S3method("[",fun_list) S3method("[",grouped_df) S3method("[",rowwise_df) S3method("[<-",grouped_df) S3method("[<-",rowwise_df) S3method("[[<-",grouped_df) S3method("names<-",grouped_df) S3method("names<-",rowwise_df) S3method(add_count,data.frame) S3method(add_count,default) S3method(anti_join,data.frame) S3method(arrange,data.frame) S3method(arrange_,data.frame) S3method(arrange_,tbl_df) S3method(as.data.frame,grouped_df) S3method(as.tbl,data.frame) S3method(as.tbl,tbl) S3method(as_tibble,grouped_df) S3method(as_tibble,rowwise_df) S3method(auto_copy,data.frame) S3method(cbind,grouped_df) S3method(collapse,data.frame) S3method(collect,data.frame) S3method(common_by,"NULL") S3method(common_by,character) S3method(common_by,default) S3method(common_by,list) S3method(compute,data.frame) S3method(copy_to,DBIConnection) S3method(copy_to,src_local) S3method(count,data.frame) S3method(default_missing,data.frame) S3method(default_missing,default) S3method(distinct,data.frame) S3method(distinct_,data.frame) S3method(distinct_,grouped_df) S3method(distinct_,tbl_df) S3method(do,"NULL") S3method(do,data.frame) S3method(do,grouped_df) S3method(do,rowwise_df) S3method(do_,"NULL") S3method(do_,data.frame) S3method(do_,grouped_df) S3method(do_,rowwise_df) S3method(dplyr_col_modify,data.frame) S3method(dplyr_col_modify,grouped_df) S3method(dplyr_col_modify,rowwise_df) S3method(dplyr_reconstruct,data.frame) S3method(dplyr_reconstruct,grouped_df) S3method(dplyr_reconstruct,rowwise_df) S3method(dplyr_row_slice,data.frame) S3method(dplyr_row_slice,grouped_df) S3method(dplyr_row_slice,rowwise_df) S3method(filter,data.frame) S3method(filter,ts) S3method(filter_,data.frame) S3method(filter_,tbl_df) S3method(filter_bullets,"dplyr:::filter_incompatible_size") S3method(filter_bullets,"dplyr:::filter_incompatible_type") S3method(filter_bullets,default) S3method(format,src_local) S3method(full_join,data.frame) S3method(group_by,data.frame) S3method(group_by_,data.frame) S3method(group_by_,rowwise_df) S3method(group_by_drop_default,default) S3method(group_by_drop_default,grouped_df) S3method(group_data,data.frame) S3method(group_data,grouped_df) S3method(group_data,rowwise_df) S3method(group_data,tbl_df) S3method(group_indices,data.frame) S3method(group_indices_,data.frame) S3method(group_indices_,grouped_df) S3method(group_indices_,rowwise_df) S3method(group_keys,data.frame) S3method(group_map,data.frame) S3method(group_modify,data.frame) S3method(group_modify,grouped_df) S3method(group_nest,data.frame) S3method(group_nest,grouped_df) S3method(group_size,data.frame) S3method(group_split,data.frame) S3method(group_split,grouped_df) S3method(group_split,rowwise_df) S3method(group_trim,data.frame) S3method(group_trim,grouped_df) S3method(group_vars,data.frame) S3method(groups,data.frame) S3method(inner_join,data.frame) S3method(intersect,data.frame) S3method(left_join,data.frame) S3method(mutate,data.frame) S3method(mutate_,data.frame) S3method(mutate_,tbl_df) S3method(mutate_bullets,"dplyr:::error_incompatible_combine") S3method(mutate_bullets,"dplyr:::mutate_constant_recycle_error") S3method(mutate_bullets,"dplyr:::mutate_incompatible_size") S3method(mutate_bullets,"dplyr:::mutate_mixed_null") S3method(mutate_bullets,"dplyr:::mutate_not_vector") S3method(mutate_bullets,default) S3method(n_groups,data.frame) S3method(nest_by,data.frame) S3method(nest_by,grouped_df) S3method(nest_join,data.frame) S3method(print,all_vars) S3method(print,any_vars) S3method(print,dplyr_sel_vars) S3method(print,fun_list) S3method(print,location) S3method(print,src) S3method(pull,data.frame) S3method(rbind,grouped_df) S3method(recode,character) S3method(recode,factor) S3method(recode,numeric) S3method(relocate,data.frame) S3method(rename,data.frame) S3method(rename_,data.frame) S3method(rename_,grouped_df) S3method(rename_with,data.frame) S3method(right_join,data.frame) S3method(rows_delete,data.frame) S3method(rows_insert,data.frame) S3method(rows_patch,data.frame) S3method(rows_update,data.frame) S3method(rows_upsert,data.frame) S3method(rowwise,data.frame) S3method(rowwise,grouped_df) S3method(same_src,data.frame) S3method(sample_frac,data.frame) S3method(sample_frac,default) S3method(sample_n,data.frame) S3method(sample_n,default) S3method(select,data.frame) S3method(select,list) S3method(select_,data.frame) S3method(select_,grouped_df) S3method(semi_join,data.frame) S3method(setdiff,data.frame) S3method(setequal,data.frame) S3method(slice,data.frame) S3method(slice_,data.frame) S3method(slice_,tbl_df) S3method(slice_head,data.frame) S3method(slice_max,data.frame) S3method(slice_min,data.frame) S3method(slice_sample,data.frame) S3method(slice_tail,data.frame) S3method(src_tbls,src_local) S3method(summarise,data.frame) S3method(summarise,grouped_df) S3method(summarise,rowwise_df) S3method(summarise_,data.frame) S3method(summarise_,tbl_df) S3method(summarise_bullets,"dplyr:::error_incompatible_combine") S3method(summarise_bullets,"dplyr:::summarise_incompatible_size") S3method(summarise_bullets,"dplyr:::summarise_mixed_null") S3method(summarise_bullets,"dplyr:::summarise_unsupported_type") S3method(summarise_bullets,default) S3method(tally,data.frame) S3method(tbl,DBIConnection) S3method(tbl,src_local) S3method(tbl_ptype,default) S3method(tbl_sum,grouped_df) S3method(tbl_sum,rowwise_df) S3method(tbl_vars,data.frame) S3method(transmute,data.frame) S3method(transmute_,data.frame) S3method(ungroup,data.frame) S3method(ungroup,grouped_df) S3method(ungroup,rowwise_df) S3method(union,data.frame) S3method(union_all,data.frame) S3method(union_all,default) export("%>%") export(.data) export(across) export(add_count) export(add_count_) export(add_row) export(add_rownames) export(add_tally) export(add_tally_) export(all_equal) export(all_of) export(all_vars) export(anti_join) export(any_of) export(any_vars) export(arrange) export(arrange_) export(arrange_all) export(arrange_at) export(arrange_if) export(as.tbl) export(as_data_frame) export(as_label) export(as_tibble) export(auto_copy) export(bench_tbls) export(between) export(bind_cols) export(bind_rows) export(c_across) export(case_when) export(changes) export(check_dbplyr) export(coalesce) export(collapse) export(collect) export(combine) export(common_by) export(compare_tbls) export(compare_tbls2) export(compute) export(contains) export(copy_to) export(count) export(count_) export(cumall) export(cumany) export(cume_dist) export(cummean) export(cur_column) export(cur_data) export(cur_data_all) export(cur_group) export(cur_group_id) export(cur_group_rows) export(current_vars) export(data_frame) export(data_frame_) export(db_analyze) export(db_begin) export(db_commit) export(db_create_index) export(db_create_indexes) export(db_create_table) export(db_data_type) export(db_desc) export(db_drop_table) export(db_explain) export(db_has_table) export(db_insert_into) export(db_list_tables) export(db_query_fields) export(db_query_rows) export(db_rollback) export(db_save_query) export(db_write_table) export(dense_rank) export(desc) export(dim_desc) export(distinct) export(distinct_) export(distinct_all) export(distinct_at) export(distinct_if) export(distinct_prepare) export(do) export(do_) export(dplyr_col_modify) export(dplyr_reconstruct) export(dplyr_row_slice) export(ends_with) export(enexpr) export(enexprs) export(enquo) export(enquos) export(ensym) export(ensyms) export(eval_tbls) export(eval_tbls2) export(everything) export(explain) export(expr) export(failwith) export(filter) export(filter_) export(filter_all) export(filter_at) export(filter_if) export(first) export(frame_data) export(full_join) export(funs) export(funs_) export(glimpse) export(group_by) export(group_by_) export(group_by_all) export(group_by_at) export(group_by_drop_default) export(group_by_if) export(group_by_prepare) export(group_cols) export(group_data) export(group_indices) export(group_indices_) export(group_keys) export(group_map) export(group_modify) export(group_nest) export(group_rows) export(group_size) export(group_split) export(group_trim) export(group_vars) export(group_walk) export(grouped_df) export(groups) export(id) export(ident) export(if_all) export(if_any) export(if_else) export(inner_join) export(intersect) export(is.grouped_df) export(is.src) export(is.tbl) export(is_grouped_df) export(lag) export(last) export(last_col) export(lead) export(left_join) export(location) export(lst) export(lst_) export(make_tbl) export(matches) export(min_rank) export(mutate) export(mutate_) export(mutate_all) export(mutate_at) export(mutate_each) export(mutate_each_) export(mutate_if) export(n) export(n_distinct) export(n_groups) export(na_if) export(near) export(nest_by) export(nest_join) export(new_grouped_df) export(new_rowwise_df) export(nth) export(ntile) export(num_range) export(one_of) export(order_by) export(percent_rank) export(progress_estimated) export(pull) export(quo) export(quo_name) export(quos) export(recode) export(recode_factor) export(relocate) export(rename) export(rename_) export(rename_all) export(rename_at) export(rename_if) export(rename_vars) export(rename_vars_) export(rename_with) export(right_join) export(row_number) export(rows_delete) export(rows_insert) export(rows_patch) export(rows_update) export(rows_upsert) export(rowwise) export(same_src) export(sample_frac) export(sample_n) export(select) export(select_) export(select_all) export(select_at) export(select_if) export(select_var) export(select_vars) export(select_vars_) export(semi_join) export(setdiff) export(setequal) export(show_query) export(slice) export(slice_) export(slice_head) export(slice_max) export(slice_min) export(slice_sample) export(slice_tail) export(sql) export(sql_escape_ident) export(sql_escape_string) export(sql_join) export(sql_select) export(sql_semi_join) export(sql_set_op) export(sql_subquery) export(sql_translate_env) export(src) export(src_df) export(src_local) export(src_mysql) export(src_postgres) export(src_sqlite) export(src_tbls) export(starts_with) export(summarise) export(summarise_) export(summarise_all) export(summarise_at) export(summarise_each) export(summarise_each_) export(summarise_if) export(summarize) export(summarize_) export(summarize_all) export(summarize_at) export(summarize_each) export(summarize_each_) export(summarize_if) export(sym) export(syms) export(tally) export(tally_) export(tbl) export(tbl_df) export(tbl_nongroup_vars) export(tbl_ptype) export(tbl_sum) export(tbl_vars) export(tibble) export(top_frac) export(top_n) export(transmute) export(transmute_) export(transmute_all) export(transmute_at) export(transmute_if) export(tribble) export(type_sum) export(ungroup) export(union) export(union_all) export(validate_grouped_df) export(validate_rowwise_df) export(vars) export(with_groups) export(with_order) export(wrap_dbplyr_obj) import(rlang) import(vctrs, except = data_frame) importFrom(R6,R6Class) importFrom(generics,intersect) importFrom(generics,setdiff) importFrom(generics,setequal) importFrom(generics,union) importFrom(glue,glue) importFrom(glue,glue_collapse) importFrom(glue,glue_data) importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") importFrom(methods,is) importFrom(pillar,format_glimpse) importFrom(pillar,glimpse) importFrom(stats,lag) importFrom(stats,setNames) importFrom(stats,update) importFrom(tibble,add_row) importFrom(tibble,as_data_frame) importFrom(tibble,as_tibble) importFrom(tibble,data_frame) importFrom(tibble,data_frame_) importFrom(tibble,frame_data) importFrom(tibble,is_tibble) importFrom(tibble,lst) importFrom(tibble,lst_) importFrom(tibble,new_tibble) importFrom(tibble,tbl_sum) importFrom(tibble,tibble) importFrom(tibble,tribble) importFrom(tibble,type_sum) importFrom(tibble,view) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) importFrom(tidyselect,contains) importFrom(tidyselect,ends_with) importFrom(tidyselect,everything) importFrom(tidyselect,last_col) importFrom(tidyselect,matches) importFrom(tidyselect,num_range) importFrom(tidyselect,one_of) importFrom(tidyselect,starts_with) importFrom(utils,head) importFrom(utils,tail) useDynLib(dplyr, .registration = TRUE) dplyr/LICENSE0000644000176200001440000000005214121112104012371 0ustar liggesusersYEAR: 2013-2019 COPYRIGHT HOLDER: RStudio dplyr/README.md0000644000176200001440000001433414176716423012703 0ustar liggesusers # dplyr [![CRAN status](https://www.r-pkg.org/badges/version/dplyr)](https://cran.r-project.org/package=dplyr) [![R build status](https://github.com/tidyverse/dplyr/workflows/R-CMD-check/badge.svg)](https://github.com/tidyverse/dplyr/actions?workflow=R-CMD-check) [![Codecov test coverage](https://codecov.io/gh/tidyverse/dplyr/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dplyr?branch=main) ## Overview dplyr is a grammar of data manipulation, providing a consistent set of verbs that help you solve the most common data manipulation challenges: - `mutate()` adds new variables that are functions of existing variables - `select()` picks variables based on their names. - `filter()` picks cases based on their values. - `summarise()` reduces multiple values down to a single summary. - `arrange()` changes the ordering of the rows. These all combine naturally with `group_by()` which allows you to perform any operation “by group”. You can learn more about them in `vignette("dplyr")`. As well as these single-table verbs, dplyr also provides a variety of two-table verbs, which you can learn about in `vignette("two-table")`. If you are new to dplyr, the best place to start is the [data transformation chapter](https://r4ds.had.co.nz/transform.html) in R for data science. ## Backends In addition to data frames/tibbles, dplyr makes working with other computational backends accessible and efficient. Below is a list of alternative backends: - [dtplyr](https://dtplyr.tidyverse.org/): for large, in-memory datasets. Translates your dplyr code to high performance [data.table](https://rdatatable.gitlab.io/data.table/) code. - [dbplyr](https://dbplyr.tidyverse.org/): for data stored in a relational database. Translates your dplyr code to SQL. - [sparklyr](https://spark.rstudio.com): for very large datasets stored in [Apache Spark](https://spark.apache.org). ## Installation ``` r # The easiest way to get dplyr is to install the whole tidyverse: install.packages("tidyverse") # Alternatively, install just dplyr: install.packages("dplyr") ``` ### Development version To get a bug fix or to use a feature from the development version, you can install the development version of dplyr from GitHub. ``` r # install.packages("devtools") devtools::install_github("tidyverse/dplyr") ``` ## Cheat Sheet ## Usage ``` r library(dplyr) starwars %>% filter(species == "Droid") #> # A tibble: 6 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 C-3PO 167 75 gold yellow 112 none masculi… #> 2 R2-D2 96 32 white, blue red 33 none masculi… #> 3 R5-D4 97 32 white, red red NA none masculi… #> 4 IG-88 200 140 none metal red 15 none masculi… #> 5 R4-P17 96 NA none silver, red red, blue NA none feminine #> # … with 1 more row, and 5 more variables: homeworld , species , #> # films , vehicles , starships starwars %>% select(name, ends_with("color")) #> # A tibble: 87 × 4 #> name hair_color skin_color eye_color #> #> 1 Luke Skywalker blond fair blue #> 2 C-3PO gold yellow #> 3 R2-D2 white, blue red #> 4 Darth Vader none white yellow #> 5 Leia Organa brown light brown #> # … with 82 more rows starwars %>% mutate(name, bmi = mass / ((height / 100) ^ 2)) %>% select(name:mass, bmi) #> # A tibble: 87 × 4 #> name height mass bmi #> #> 1 Luke Skywalker 172 77 26.0 #> 2 C-3PO 167 75 26.9 #> 3 R2-D2 96 32 34.7 #> 4 Darth Vader 202 136 33.3 #> 5 Leia Organa 150 49 21.8 #> # … with 82 more rows starwars %>% arrange(desc(mass)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 Jabba De… 175 1358 green-tan… orange 600 herm… mascu… #> 2 Grievous 216 159 none brown, wh… green, y… NA male mascu… #> 3 IG-88 200 140 none metal red 15 none mascu… #> 4 Darth Va… 202 136 none white yellow 41.9 male mascu… #> 5 Tarfful 234 136 brown brown blue NA male mascu… #> # … with 82 more rows, and 5 more variables: homeworld , species , #> # films , vehicles , starships starwars %>% group_by(species) %>% summarise( n = n(), mass = mean(mass, na.rm = TRUE) ) %>% filter( n > 1, mass > 50 ) #> # A tibble: 8 × 3 #> species n mass #> #> 1 Droid 6 69.8 #> 2 Gungan 3 74 #> 3 Human 35 82.8 #> 4 Kaminoan 2 88 #> 5 Mirialan 2 53.1 #> # … with 3 more rows ``` ## Getting help If you encounter a clear bug, please file an issue with a minimal reproducible example on [GitHub](https://github.com/tidyverse/dplyr/issues). For questions and other discussion, please use [community.rstudio.com](https://community.rstudio.com/) or the [manipulatr mailing list](https://groups.google.com/d/forum/manipulatr). ------------------------------------------------------------------------ Please note that this project is released with a [Contributor Code of Conduct](https://dplyr.tidyverse.org/CODE_OF_CONDUCT). By participating in this project you agree to abide by its terms. dplyr/data/0000755000176200001440000000000014151641776012331 5ustar liggesusersdplyr/data/starwars.rda0000644000176200001440000000663314121112104014646 0ustar liggesusersBZh91AY&SYe@$2[W@FH}fWs$GA`jTF 4D2jd2 0ɦѠ@C@4SzP& hm54=@UMjb 2biFFh ɦ h i h4Q)S'iM 4  &#LFC0FC& C10&#F2d$HM Sjf&&j)zzi444@4h4 G75hTsG'}šZYϧä˕$v5^rWb ̒@?SLbII 8Iv$^UHB$ ,$ H}rgn`h8 .Ǫb C .H5T$ܲMC-e T\ܛrE.of&|I!ҐO<55wa[nVҟ+L}j7hYfӗֻp0qn r}:Bu m$v@܆ZYM]WViFtd,c^D!('4&}UYNP+-Tz{,MN2)H\,#g;>Z,H[Z^ك|.s2$[V*R3ifrѷUaF;Zg”Ȥ'vpq9 BN$!갊I0 B@H ΉAa H;Ԁr PX,DEEAV((*AAB,"1XAd&ߧ"\EˡY+50 (yB>&'0URzZGZ|Ҥ+J=.rqpz7O9LUvNY[ jdx60ET`=9TFdžY`8fM'CغHQR‹U)נȨYfHe¥)ͮoUƻt-5& 0$6"@/HK3@)Dʍ]Cێ h8aX+4P9fh0u[뫙4rSg"^eD$@:3lE$"T?a+CB`z>w9wK`YϞLaMF Seȍ 09T# A IIU J |B6% 5&0P&0@&:vdЄס7ebG૳V& 6h=SJtw9u)<'$b5Bc*?C1@h&/ZR!W(c1PZ"Q)KϞuN$~d"");V戥] '8+eZ"c&/ӱWͥ&Id27@L3YPjNt7F4ۨR.ъ4 +uhF h!}H+|K`s8TZ2Ѵ@)!v)@ h@r 58$yNCŒqIDaG'A.B{$X)$AaD   @:ʱhdOLI#g?~OaSB3bL$]Eid|66?>z OBbm94b\vNZ`{X)t%wU8$&mnq+W :Gzm"*kps/NђͦBaDU UY\3m坷6צAx%*`A9f.d3*;I5-=܉Qfg$MJP#I4N  chٌ=C7(aIBRi"W6:q&%1H1:/~|&iyBHㅲn 0 Yv "JۈWP˸g*xƲbm bYE,@8kBK{\8cpr+ȑx> ]äjl "P  gYh/<^& ʮзL8 p\]je-MSʠEU!  1@MQ 9.E2w-11ët85FuH{Ъ "k=o" ʂX$eAPRX $L}0<@16g_ޠQr;sMp`T`bS#mTڢS,hwW洑vHEH;ƶ~*~X|͑d2Uqt^ʛxodz-$!Y=dR 3PN t{޶dถ1|EDQ` @euNSt)=t|q_?Rek 29C[ªaa~$ 䖇RZ ]SˠFJj%Lpn!I$Lw%1sU#A>ab5+gEo;fЕ ෵]A"@R.-?[kѮC9|ՙƵQG]9D@>tI?;b@"<Ӑ`2^Acrr,&y7id#Ed`30D&^.nJQĞZHy*e koޅUA[(>?dI/#aAiĐ]B@gk(dplyr/data/band_instruments2.rda0000644000176200001440000000032214121112104016426 0ustar liggesusersBZh91AY&SYH,s2hP@ޠ@U?M=#b1iƚ 2h 414`R' 4 " WEU2؋%aX+}KIm~cu̥P A#8L}6yE*1˘(kK7!F)7UրIz!/) (RȲ }MV ث:ʻO=‘M2(4&, oH)&Z X5? Z"(H8[Ԁdplyr/data/storms.rda0000644000176200001440000013312614151641776014356 0ustar liggesusersBZh91AY&SYrHsEDH1 A$EDLd`=}{'z)}l\6n@uqg㜏-km'= ]^δn۶S[e 픻j-*W@5*] =@( R/`AP Pޭ仡üسOL|^S$ (Nx5<4. @;9(JHE}<"=RA@J@J}N i=H$KszO  Z@pbt@{0= 8+ |cqNsdoQ! "hЈbOzdjm=)*4PGM4oR< 4i F4h45J6=@ԁ?R~&4b12 2zO 1 hh =RA= ѵ H{SLh5)!F*za?T?E6Qↀh i F ѡOOQM5&f2dhb44?ٿ&ч]r:\9ۛ[Ts mI+dm 96*66N*Z@`xnV.ak*A(j.aRE躢AZE囕抢VXZa兖A yHQ;??1?MQTJ_Rm@>!>EUЧP~TT_(ORI,`WB'ؑO(> I'/Q_ᔧ*EGOHT9Bؐ]"?HgwV ,L /D@ďLR0C,15J3>XO=na!PU)hBR"{f B*-Pn3(Y\ LINKtgz.So%;YāܙDzM=c금Nz$ O[FhI_/qu L:\փ9U<|LW`%ZIJ\ NkCLL<%x[2DDm^^HΫ Hsu ) Ki4T)b#]%H򞖲ÍU5y5y ͱK GhJPDuzOV,i?? ;<^kYtec<Lph׮tPBPRvrT)YGׅ_7:%۷:;u5s6u3cNm[Se^8wNNbuxWk*[ײa_u&L1Q(F>mfN(U Ӄ2xVȶ;ju\]n[OVU֎ɴu-XY:'X\\f.iCNbmMD<1]c6[U;[ljm^PlE̪esH*mCaJWZX6uNئ#Sh4U+_v6b6j,գ`QTZ`[)fک&-l+lCdllmiMMS [PTڛ!m*i#*aMuNe bm"jCe"cюw9l[Fj**mF6l#e2[[% PSbچl6SeFmQMFVe+iF`G\z7:[+`3Kh[ R+`֊-mjڍ3TvXWm.[yiϣgRBx!&X T6ONu^N'n^9[6N;\#iW}G|4SD5@`e$W5^2ڧNKuغư:6/I<\6?+sǧfzA7P'I `nK"J|z#Zʾh{I?|$$+=AwY;Ry;K<}w=\7|W uBo jEB?Owr?;z[W$~(<TOIbzsp'[ ApZ'~kM_vQ6׿_ p^Xyc=95fxs,IHuY$iP L$ a6uhwԮ#\Ԏ̆;r^4DݵSj-^:&GP<38ɚ.Rmv̖mKbsR:öG4*KSIU"]:uBc}G2%+O,ZbNK#cSdvԹ﨎N]a<^Mbג&(AQ܈QAɱy<{Wy󝧔GZxz)Hig@с?&x l52/k^US~\/4WU<[=z9Fxnlhۖƹr\˩2,wvߜ fhV}/Vٕv͕ydyd{VJOQ SѡEWI#z35XXlX!Hb7*+sœ5{jކz}ǰ?JGo0ؔ`#ڹծXzryd;Zl?jye*x=s<ke^;͙Sc#ӗ}Glօ9 Y 3H XWkgttzk@NxI**X5[ 8,7zJUm6"Dwz2]{986!Cw:ˌڮڹrfFM ?7@΋ll=Z>SNi㍆Dy+b9LT{ ٕ;bZڏ=/>^\FCS H6Cd=6ح٦N|r]o km_KZ>հsS? kVL1kkn^|sMfd+7O^_^'mxc>|[%m[k^P6-m^̝I*@QAa $@Mh-X_ܭ c>=,,!G¸l'|9׳Kc)ګѩ!yduX;6h4QF=x捙G7~ߟ~IxgڣӗFl\#hŢM[j(vIΕ9RaO_O^SS-'Ua㷫Ċ͍Tց:WTHPP}SuJh^cjWpNlX^Mn4bx[i6u-}f);&'|̆IUXؐH7+6x>բ]Wy\ʽ:fz|q=xw;DEKhe}^[m2_թ-e+ZjtJAɮj6sLFnө1PKJwOnf$9egNjv0زNx`E1>m{5m#d4hHIkWV`H9Bs'Nd``Tjg3#g+ :Y9x)1ƾnx7nj'QW+6*py&F^F *-'7lFC/N^yie6$X5} hd}SWپ9>wѩs UԾĂ-E$QZK܈dN"2 hHp]NGU|]Kߔ/=Z!j,lAu[۩^>G哾ڋӤ2jig\ᴳ#a2v֮cydZ;=Z'0+Эxk_^4HXaDY`P wԎz|2KϖU߮I6m}U+[MF0Tb5|M6)|_v _^W|2ڗ`ut9ёשٔ *<5p;Вe"0x!C'Sψ[߃6|uenfNmlںĠH_} $HWC9%Ywד[MvxmR?~ǣʼn^OST;`xrs+W$uO ]^Bٶ0ö*;km啲znI Bi&N߀N˨&}᳾J􏗚}M{_5{ku\νwty+=bԟ_}5ѡzu/<`ړenh ;R>V:}t^cSd&ا OU玴qՕysm')&žm^?FDUFVTkFVZ4h,h*LZJQcR[lFj-h [6[X6ţmhZ1[Eclj5bEljQ#T[cTThFM&MXѣX6V(XQZ1md4[IEhƳ\#lEQbhՍ+F-TQlUmEQVmETlh Xֳ,ZMFQ6mF4hشbMbŶMcQ-b#TXIEQʴTX%UDhبՋc1[ЕcXh Q*s̫eŢj-En[Qck\cRFnUVܭj-nX6mIb櫚*FkDXŻ4Q (,2lTQ%X,mQF6LLlAccQIhk5% -͊ Εskʍ9r03gvʹ6,m1[6m\Fۦh\͍nb5PcRE]cˎ[b\bѺ[FsS6ۑ-wurܮZZK\6ܹm`G-5FwFܴ%FʹrtŢXͮlEFXh6wq[j-mmmQsj ssQW7)75W5Z3QsN5sFh16.sc[sE-cr4mr9rƣ[rѢѫrlbJ*ձb6Z-s\b-G Sbu֍[QFZ܋kr,EJ7*ʹFNDѬDc\ы\ۛErۛ!mZەF6XnXr泺ܹMѢE&;+whŷ6#W#V]"k`Z;ٖb(-IF(*#lccE֊*6رb6+F1j4j6梣d9smW6b-sZƴh XesWw\Q&kFc4DZ.mwj4QAX1)6.rԉbS6#lb0`Ei#kMɊ4[ tL*-QŤj,SMœ 4PPQF.ZV6湈ṵr6-dJ1l[hk6**#5ERhUʣZb16J6Nmr\c Z6Ecc2]A,nh܃F1X#h0hH,mlc$mMd1h&nEd`F,#i*F5h,i6M66(  lV2m[ƍE nhڍ36lٴlm ~4*~fB@h1!2"i*:.=g^kkfX[>]5\̥77AyMC#{u~/rM E5XblEF2/jTyD U2LW\ ]׽6N[I$܌ 6i%Qٻɦfn6K=\h1vjY@p(qا4`~B+ɵ+(vyWQ[':MJn/*:!N3mOmvW~sj>H|kSvτ~t(!(R?TShJVF!Z :)'ZJt3)Ľjc}r$ܩ@7ͤj }vy3φ..AS]CDQ&T#ȰXl fFĠqUjTh?RKFP0u089+LP~2.W9Gx0:w^_NVon Ͼu=+^X`cWrE[If%"̤I崱^}|mMu:]^u]w~q8[Z/%OF 4֐l&jM=eEН  \Khh Ts+\*Es.")21H"\TEuOErU25҈#CpCP]uBBԭ @W$$̉MJSE t=7$+u<$*S#Ar!Hgopr]\­f5 %=H13\\y]fyyC$,"IpO*"OTQ-B,K(C$P#B%zzU I"+fy4+ՇF=0IgS5*$A,T O=RQWO4J45u3uȣBG5L2C"7>[rf5E6N"vٝ_0N:es4ҧXast]'^ӜɪۅlN) K}gI;Zm^=-shi*'b`tȮ\or9ny.Ȟ\rprW{9*hu^l[sUԝe iBB:? 1v/q ֔ % ڳ+vQNzG"lG̭X)ig ѝ"TM(YbGFDE$ONT^(`$M=!6ʺMvb,B 6$tsScJ/KRd^dBKDCED!5cSҦטKO&dzdnLdLϮWsޗIWUM490b[?R~,&#X" }H؁r|AD&5fa"ď>05Ƭk%XKf ҅Yzno M9brl OH $=aʒ),Yll^zcחDi*ŘZ߅61 s 2ss7+\L% 4@H" t_vJKvQOZb:GT[p/ɭ#4U<*!'ʫŒs$ k'@|ݼ]v793Z]0c~`L 7!w nfQ-ҔИdV|qESC{|8_e&y<$rU8(> le-0Zh`+5FKZ,,&FMa145i4aXjV-5{e'!q1-&F,ȵd4j1,5UU4aԙhZhZc(JjaZ-+ Hłѕ40ԙ`IRVV$łj,_V>3z9ߑ]`l<[6;Zլ!SP̋Hj8%xpt.Z8DkfES 8YT-KsG> ~ R2#| ć?Ih Y3ʁQ9Z*"NroHxI'HϮAշbv2'Z~u{>WIߤ.mSFUujTjj͘)(4XV-Z5\h_smF&ks[ׇ9ZܪX֣kAQsrµs^%y3Ċmm|{ļ &i*!AvDpme3yAq6.5˞HugJ'nσ*qiw6MjVVrmz_6ztW=g ͱcdFԻm^\v\eqE8"5$bo.É4/"&^O;iE3-.|Ϛݼm˵f񵷊">>r*o>||%טru=BhԵAhљZaE [2Tw*xtd&*MI|#R+2l TO)KCK66m)Ga:QTGI䢺Dv;T<0[#`\.]yȝK.EwJU-A܆.q')p&I"iGbyj+5!d!$cIzhٛbdz>Ny1IQk;dtΫu*~E]4h+%M00H3. ӻ3̥) =U/ t*qW Fe* `65*:X){@Lu}RK ;2km}f^;/ݽ1Pݦ7qL/NUp}aX^pfN(¢a pIUI3L*o~)a{k1Ewwscj̶i?OBlWfyS_7eE~1%WUEeE)ڋ]''QBFxW: ӑLD'YP]JM'Up8+QsT}2}<Kj'؟%:8eX%OZ$w(಴zqN!R`*Qwb~5 n&;66:ػͶǗuoGƽ^ݽ*'=W+\KǾ0/bVcsb"?bʝn^, 'R"4NOq}jԵ_"!p\^{6GrxSWS9;ujv}EҝRugB'SW)/j}O*<<|^KJnpѸsʞ =^;y;2DQ_SԿFU_ЊIJ$hYHH!t^߮ݪ'IȭTOTWdGԁ'I g>~ܣY%a}, AU~jDԽ>I>SRj#OS@_` "}USc1)\CE"T% &)vFU?DQ .%uipQZivt^݉}I_"^z(WzD|-_:r{?[tT N\|znnW.NwwJ].)6BmV] Kyx$;JyiIʑ'#je.bl _m ~2ֈWD|9MWE>~8lqV 9n(BG5f9 Ir=mG )x~OD"JvzW=bEH~ tIU_{C>%#}4xy}Ax!WK@_g9m8̓CUm.a.h.j4l+$2C]uE Q liJ }6JT(# G`a {T.aH$Ņ(Ĥatc9B1K"5.tjboZJJl[|1z3xU ^#1VOsɭI[eRS|ݮ*a_] , Ewuov7ܣ;)rmW U{y_CybA6-=郟gv7C7w Ӣӵ vҥ{VQop*bE*[`kj( ga_KPxHRl>(¿3p 2ƐT7y ra؜z_C愨hyP wn~h|?2P/I'=U-A@/ZWŽ>ʓاi"/?QjUju9ʗP9@ꎧEut)>JA_-UJyU?T| /5ӖʣEK?0'QOJE/T~7 oydߏI(NU]锑{Ȼ }^/YL WJ/rURO~:_+KA'Wnix_IO}qKʪ}0RGi}?UMқ6ڧ!1XBDr 'REqRr''jP˶}wN];u]t9^J87%Y;|}~IT@>'%?"'j9mK.\ z@Tz_YwJzQ%<Sr }rJX(H|%O/QS|WJ@5UJPt+TtOG(+clrCu}zP1.;ҝ7)>u .m!RTPQE""oW{^◬UׯJ1~ሧqi }/{B!_/~T =뫖J+"O[wT~j><~xa~;X}ֲaW/R'bx"JxQ^!ih$i9U]E8#$TO~ˆ5rǫr-Q7&釬C/UmGn\KuR(E;k^_"^"Wi!%{ˆ^n]N1`cFSz!]΃c2%jB\ګJ]YF Ch􋛗7!y5BEbװOhã^,ux'8p9iRc~n(^Na88b<%ӹ+-\κE;^ʺGLc/P}u]j{,#aP$5 rחawuٱ^T0t膍Yn]dzH̦kjt5CRTnLmՖAfCN. رcZŚaV\ ae58euSemacgm)[=mKΞKl6cJA Sv5]lFENŝ",JNYV'vaM#0fYMLf-NpԤ$Ztqic[Nd fvgi2j3A8MFh;FSݙ*YHB͜g2QJsi6&j&BnV31fJEJ[dXQpݷ g V-rldxgmlg8%mэꍥU]BSTHb"Cm6&]6M\EG5[a-4 Uuۅ\Ħ3;vmm:R:RhL1tWZ 6 S.Ì$6b"&{$Nζd5 84ZsjU-2SgPN76]lA&--Q1J-#䑣4:79 rjuwIYTLēɫLkIɅU]:vR"=iE2i"n'}0bbK(00|)}4lR{%+1LjPMmJ`Պ{ȫҫxbLnlz݅&Q&/%!dy)+<A겧Q]mb$V$]XΒ}smkɠb2OoH)!PPRmA_^Wxs^,o .s^ Tչ mMېYPbÇ.L%Xē5#$gb| ;N a*%`,pxS<RY*|% RJ.c60C7ڼ}﹯hƪ1*kkmF6EhձZaM6Yf iMC¤D4i42ii c(LB%2SH a "d"ҘPk402̒f0J)"4"$̈́H i) !jBI04bƔI3DXaF0%IaDI4DQIEHJ%")JD$d4ҙ)LLf2 Q H1fК̘1Lf3 #MMe F fR,FfѤ"Ii651i46LIFJ`i5͒KI*L&b&`D6K($ Qd2h"0hC6dIFH$MEFJFLJ6($2kS60cb#"b4QIcITb&6 ,SHLbь4i@XԘؒ!Pm&hhX"Ťbƈ&ѓTE5kES(64lآU*1Ib&2Ech6$hc%5)3F&l6Eb)$(R#ƶb&)2I6ME%k"6UتI5fTl'I'zQW{|wۮr6, EaL Z0Bux#FF'm`J T}<wT?/dI䨉QD맸tS^zo)O_a|OF⯋~xO!o_T=:'yȟ[zǓyzHaXEzm^|蚾^<]H#=zMH({_n*Eۻ71H?f{1*jBt01A"[Ief2C|k=)}rN~{Փ/Bzs[]UΏ8y]_?GUk㶯G6ּ% e"Zi)pJw["޺E9iTˈ'ؓbB|(:R|o\>#eoC[{m_!v(JG^]zk<%z 2ɜ 0~~{Ҁ DtZJjJkhVmh" ۺ5]u]ۻI\4PJJ(*@/cT1 $$D(KO:u8G]5Ufж4mV:RSS֓*BB4TiFFHF=M4=@j4zGh i=FF"!ҙ?SD<i 4y)" &"! hD@BLP&&jdih4 h@h44@44h 4ښdzA$ PM=M@zڀSM A@hC@hzAjSFq%$='} KG0u_ոMO1]>'Iڧ>wcтM\=/M߰~}]vKop̰G>u™i+ֿ﯆+G/o_}ߊͷAc7oxN۞Y^x>]s|}^{m9icuUg}L뿩*|öy>%U:s~mt\/Fc|^㜢bJvf3Ӻ;eyKLu;Pgkϑ0 PY`,Ub(@Y TT`YI "*)",$,EU((0U1"Ed #I YTQAdEX, "(őb)Db` )E) *E*,E R )dXD`)"1!PFEPXXBP+!R*RAAEda "R %dR DX,$Y R**ȰQB)RPU ,HUR,V"AB,E1E H,F#XXE#djDAD`*X,Y"AB E #  6ʀUd(XEARA@)X,bŒ,P "( -T,T*U`EʒQYB+$P-!14,¼hsJ! )PQba''$ XJVXE I* , F26lY\J BCLdE b`b@[VE"*"dVJ+`"@ 1%@ЊE!PHDd T+$Y%HUI+Y 堰1TɈr HmQh) R(TXBAAH%"ԀX$Y"°+ X"*,(mDT@@+H%%I+) QT Rk Y` @ lXA AkVX"+"%EE$hT+%IYREU ,@,X ,PD*`J2*Y*6ŀaPj,VH6T 0UR1!c%`BQH0()(, T"B,"*Vb X))+ U+!XVJʕam$IR)ZV(,+(FY QJ`*ĀZAZY!PZTYȪe`XPYPHU B`" H)T Db0XEQF*b )$Q`*X"EUXA`i* (dYX-aU$AXX,"QTB#YVP%HBH *IP%aQ @ V@*%JDh\H(SiL+$:ETE* +RRf(T555$Ԭ!YDd+"UAT-Ɍ"IS) 0P*"VA)*V *  dX "-(Yc"֫bEZRTX(Vk)RQ+*H*VAaiBVTBm$,"ŃlV( <amBڵ+P+ Y%-)*TXKlPRXPQ[EPX -'3AP"Ă@PQAb"1AkF,E-ւEkX*4aZ5 (l""d`ňIB @+(,E _=E!z>Ł,ҔcFalX)Dc1PQf%[U`&f Ab T) +ҍAkUbҫ`V RVJ* 5,(BZX 2JmTTDE"Em,V2`UUV1DQ1DVTEUTV"(\!z[8sLaYZYRJJȵ -KjV"TZŨ@0` $" YJŕ-jV,Q-)YV$ꕌLVԁXI0$ 1XTGk%`- XmiE12dRc[h bV*,(ĎfLE P%QV1!RTDʔ 2,G0UTR*EHq2pL"&:l3E4 ]h + vUb:@Y'g,g=2b(*dY U19Jv:ҿ6j8ł2gt$>#nrkUN4XۙBmQU)X*.!X*'m?P;g;gO6Mh0JR(C'[khL f53J9 TmBXƜzuї.2e hVq:f!('9W* }-_guCjDg\OD-M̨.08S| CO 11`@$sKns8lp\@ 9F`w} 0)N&4ڸm5K8Q;[r19{|3TFj *)TFJbă%gs (٨2)72ޫV +vrMi2DPFE 9lbV(wl-ONjO ;tCRvⳚr̔ʹ"KYFjKܡN7d1R%d;e_w 1@ԆTPMu1l=ҲtghT feZr`(S_ aYF) O,qR) {S|*(OD gPY- *dMfqP-OO]_,5B3[XQ<3R u, ٪˯k}E(za^ j_k {$֕0\Iw@|[=ye=Oa{J['n橋Ju12_ry(,Y۞|uN;RX,5*/QbzC=ԬxMDmDQUڱEbIOd(w08-SW9|-DghL 7vL24JVP S{͆5Q!ctS۳;>ҩ}ݵOUL"e;ETN[UT[ W%eV,otQEQ֫Q˔j 2J0X V,ʱyjV .$j$eTH A10]²M$ ˺jKmZQt !m(Tǎi+#X$fڐ)idJ5ȉզ5[1XDTB`l"Xфʈa0VjTZ4 @3b֊{f_^] e ;|S涪ږvar.6އ1,-j+l mV>)r-)k01ȡlP%kqHlFmmE2A 7nhxQGYs=M73Zp9Wc)U-eP5. y2C488u+zp6 ~13mea(TY+ חk-&%-rIi$ CZn轱eN^P-*i^RDQvEAf!c2CKڴ/3J X(§Tsa7 XU#dRUPUFp`oc*e #Ôxo ̨SҐi3]:f( &Wn ׌JT獕tXJqe"4h[Mqリ`zZm(,E 1FKQ4+%jiP|z`ِ bK`s(aeQgv]MuSyL6ZbWR&(]m*9BfDPQJԨ M2*`f#pn#%y;LD\n LPXB1E&jCS!{a mqԱ^378>zǤQ3e1Ak:^хK]*>m7 >hT8w,)UﻮkxTJ H8 u`%XAxD*0f9U<&"Emt8v龿%8)8cq"ΚZRaOJUaZwJi='VcY ^LbO)+;|3U8eE*j,Lj(mZ%o&dmi{ˊDX#yy/w9US]A.&󰡅hX߮  U\צ+\:M*g 1X qF"N[ET-Rf1W좊5n2a£1FQ'x}(κsvh8RJ~q}B.x e*Mƅq57)]clvS^RVCP [NxEm[Vͣ M0R0 2"WuJ,G7%pTT+,Q )S XE`3EQx]v1[Ղgs> q]\k[% Q"rBT&"mH`@!mj Wͬ9nPR֕ F'Uc ah[E26$J53w,7\3t.2/_(jqz%r>sVm,t ft]h(OOGb";1\@ {¦-uh,PG#=Ԩ/YUfҩZCXUTkVj3]t 3p7w4mS|;LfG9>-mFhf)ZFM48B3y ip@j >+3q[ ڹD@kuX[O bfV⢜n4(գ\ adm-G* xgxlJ,REci01Dqxؑ6:*& QȉUE "pOUX7LDKB \?xA'͑"왇/sȺD89UPM1<+--5lk7rE ~MMr58ؘ 9:i ˘q*0TM<"m>HXT G)!D3aJxؠ3Yn&:u-*QyUɻWRc!e)Pl r"̳9︳i)hBh¼^Qm(Q-\mЧTcXL),28Pf X&7Lh]VytUX90)bT}h_ޤ&1"7FD 0*pҘF 2<^,'YС^]\G3eɮfeuCYYw CV-CΨR(+ڝ~\;25S U喌r0cZSv̈́u{ @٬l(%90##gujmQgߵ.6m#N D.S4tMc9|Ĩod}UKvSU`N5bF#B`h[/Gn3xɮ0ֈ1Gs){K17(bW ԲQѝ҉{xοk+xؒvǶ٘)Փ`eS-`s3yi].'O Vbԯ"6G"%W'EeƲbQbۀdUTb#c퉋 (,6ԾmU`EWк;2Ydki .e@%%2Ü;DnBYvYV/X3jeL]3MQcyd:--Zٗ& AD$2赠|K0xr=ʄsU0-즹9}LP :(cm3rs*$?صUBBMbn71q<ޞfUv=νyM, )iJ+Q6'rcrE%5yGe$y"54cqXq4O &Q}1'kB r CRpGOcr΢}Zybb3!="ˀ]|eէnCd2";Mk>ç +Y4r9z{b>$IZ甹8"H)JiUj q5#=WV:g72HT,j0$@Ϥ-^ZYAf2R "ͦx͵$h|Y"Gb~7_jd&E# ۤAt#aRѵ`.>:q ԭ!M.<+RO MmF)d&W}Em(M .f-em{:7sZ(%G]1ïHiTf\AKx !3F;2XȠ/[$Z&@U *S.Tz( }4(t4:@" v ڻm/oNuPqvAej&QȂ;R9R<"'%cq HCq01 {w Gu!-ySQ5f7't2n᡿Gj^EȤ΁tf3`۽#3 6|l9 `O{N&- COLh]dEƔ=/xxlwܚB07km:BF"PC@_i{߱g*>HfCcz[fbPLj)*/a\Qakd+TW0 *O%`cMVZҌj!+FܒQ5vf%a+ $fQz~@rR320"8:RӈGPAu!1 B{uv.|zw.gh3[\7[?)JW{<DT 'ҵ?߳ctCz01/iqy;W oS<~?gϰ9yYU9}O-j2-5f߀;ȿ~?m":tQx|nam᧕DDwq׿M]zsy7ۓj8\LًÌOOG<3 9NqGߥ^\NZoצkvᏠ`Z[zoJWj*'>'kB#jg|N2x#6f0翲r('GqyVϖ,H)}|2ᖄcůVm]oaA˗0G卷[wg폆7]\B :=><|lY[4nb%= ^Ær:_^=6Ӓ!4/^y[&;z|>wSd_ǚ3;,9e?gW fDp0>=DL~moT8j*eg8? 7n qzq:{龃IWuφ7M8&}*ΝYMgVnMHؓ3&Y Wm[Ʒ0Л^To˞@DaϘdFIGVh'S=<ǞevĉQ]E |N7? &-i 5Ppf7R.V9T0Mt+᦯uw]D-;>I5f.nj3;g,% m^r:yNAj˪Z/7WC܊b m;Uη죮2n{6[x˸|0yOEfegf]fM[!Fk 2ZeE; WRu÷A\ɸͮ!#jkꩫF- .j'ުdj"s 0®ͳww? _{g/1ͣ\.3!(_l\>sW̉J;BluX@ҪU%ժV)<}/]=VuϚd6ߎ6년7<3<=7װpX߇瞷gi*Ng L1fokW`##b8 СEԯ TWej[fk=/% ħpdgU-Yֲ`e647G3T> 6^mnEhNgt0}O-1xvȊL[\!͔9ۖ*AűzQ݋fTs㾵Nһx~Ҝ~A JUa.3%8CFjc~0YPNMK#ő46MU'xbiUHR&b esYFr*īكYcft4"[Ub[YFjpk5eKgJ\[tz(SNeb O96r_Q !i>-Zs#)=əbצW7Sߏ[sEp)])zRtףSw!q"mdC4 \.t1ڻ% F0hOvʋ*75%hgem۵\ zSDוֹ(((W,4,7ae yAݫ؅5(nnݝޙچ R2C LK[U]J# ԟG‚9Sr Y{%-R|\E:qUa˶n!i<ߺx=zϝq՜f+`.2L0vdU6q.\ȕ읍 UhCV.f,[7QhNlmj(o^FnGTUN.ØũJPκt7%t $.T&^hӝi/b!ewM%GL).vHzfLRɄN/`:Hu&¶źa<H뽖E3W3Q!LPYw*; U CiEt[ʡ&fc\j6l2Y04m[{phqP$еm-EB`]#U]j< 7gv,u*fM61 8q ڵYf\R.N̡plծd񶮉UG5x5z/@ɂc*[KuZ0RΊnP)s^^{]68CsdJ3$G+yKڬBJfr.:׵^u+iZ&SysbF H*o:T)&Bc|^"~k[[U 1#4;BPÇ$ L̯Q-N2Hbi6lRf{ffո1St$k+0 %Ž-Z qLWZ6h=iAa\굘Ch oŝA'̝ > }+}ޕV˩8F4Mnb8O0 [1 $:r-6WQ1m;赯ܪ`83q" 5 x[eL8|c3D [R뤞me,2'7QuZexi*5{;OTRڕS|EV&2IDÔ@])8uF,jAg71&o u™roɯ}Vϼ0 [y#b&y-֋{H"%Sl íUsr^rPC6+)y}i?=>BF'r aNLEFij %D=vmcmt`RFT>t;}F܃ $gaGXa`UdB![ W5ar$(b4$#+9<ґ[TXg,}s'Aܛ6U.8Wvαxf.L se#<99vUvj*m ڑRʫ[k䆆-OatAm ̬EbZpsuzns{k>Y 0CBaTAPzjiV\Fٝ,Fm*j9|M5EgJj\_;ޭy̫uJ QN [W-E IZ&V8+ՎWcDZJn:rUR%V#=򚽝#ӳ5[B@@A#mjd]ɁW"q&i-ݒ/1f ër1plgH(8$OK -*vյ鵎ܷ+*iJPKү-rJWnq[p|)S>Js")iJ,4jǬw(Z okƩf/-?4P p>d*Xvy3.C9BMᠬF|=ffup`VQh2 =ǀ 9d0Jf i[܅o/OLbu^B (40[ o" O4 h(R>ᓠ^(PD(V0) Z;9 {j=2E[U)旯AB49(B0 aFU&QB zWY/8@jDY"vE{hOy64E&8]Q6nzxo_Xra7t*G?7B_7c=V(68=PhX^1^+$PL_1 еO"]COAomE"vDSmRQ#sA_B;:%jє&"`I2Xo ؽ&ԧ@ͪ6JgT5 peDHL᠍1 ηtezWYo , 2UIOk~cP,>syh^w%x7l:uzk>P[V$<]KPbPM4|ξ/]|n6"iB+b BC`,B2#|s@'h|YVN܁˂{8sڼ.;#b11_y86W17* u?Š%AE:i}^݌t{}ٷϟөDE~,(y`TH$R(Jd ("% -!@U(Q@Q +F)=y[|Y38qiŶT JtU?^>}Q7s`27R»Nau-)1#EkRI9`)jdvSo%@mG谝},G-:mq9.kD 1COGraQr"Tzٹ]'֎6! b: Qm`E8q SڔS^"N"wrsJ@@Pe%ua m~X]aՂ{ׁ3)\BW@&l sH uDJY`mAAd()xT$vLH|$<ۭ;ad}1NuaХ ՕI^-\ؗA6 I JbB[&VLFFWZiAϛ!!l ?BHODJY$=RDSD8L.C8DPֆM 2(@5a!ឬ t'T@*H&iׄ+Nꄨ5; ɖCSȴX;u%*J&Sw ŇI"$ZVUUFIƾΓ4mk#_lac ]##2I)S%˖fBBf,JV lMbMl0kb-:)fmyc1n|~d~˝J@ R8e+a<)!Ѻ{I=v.JxSPb uL֕Nb#bj-Uƫexc>uHNJ\k\x~WK{Tz(3T;jW؊Mg?J/vmm&#Or 9|r ҢCre ;04! I!PP1wK9  (:Y,VTJvJ$6D AC@<3MZH(B€   H=3X@@6FI82IPV(HG$ PG"D ,P J@0P! dpSEE[8`0*`0 4J#TJ@W:H#%(R RA, E0)E)A@A U" ҀI aPXA*I@D0*i2r@(Ṳ(&T"c!%A0:.p!Fdplyr/data/band_instruments.rda0000644000176200001440000000030214121112104016342 0ustar liggesusersBZh91AY&SY>s2HP@ޠ@ S="=P QM3SAdA)sdb-T}P߻"I~pnTQ 'K đ^l&enXe`feUG/|*HCGN!koQlF^Mp+=x/H =dplyr/man/0000755000176200001440000000000014176714177012176 5ustar liggesusersdplyr/man/glimpse.Rd0000644000176200001440000000176314151641776014131 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reexport-tibble.r \name{glimpse} \alias{glimpse} \title{Get a glimpse of your data} \value{ x original x is (invisibly) returned, allowing \code{glimpse()} to be used within a data pipeline. } \description{ \code{glimpse()} is like a transposed version of \code{print()}: columns run down the page, and data runs across. This makes it possible to see every column in a data frame. It's a little like \code{\link[=str]{str()}} applied to a data frame but it tries to show you as much data as possible. (And it always shows the underlying data, even when applied to a remote data source.) \code{glimpse()} is provided by the pillar package, and re-exported by dplyr. See \code{\link[pillar:glimpse]{pillar::glimpse()}} for more details. } \examples{ glimpse(mtcars) # Note that original x is (invisibly) returned, allowing `glimpse()` to be # used within a pipeline. mtcars \%>\% glimpse() \%>\% select(1:3) glimpse(starwars) } dplyr/man/tbl_vars.Rd0000644000176200001440000000116614121112104014251 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbl.r \name{tbl_vars} \alias{tbl_vars} \alias{tbl_nongroup_vars} \title{List variables provided by a tbl.} \usage{ tbl_vars(x) tbl_nongroup_vars(x) } \arguments{ \item{x}{A tbl object} } \description{ \code{tbl_vars()} returns all variables while \code{tbl_nongroup_vars()} returns only non-grouping variables. The \code{groups} attribute of the object returned by \code{tbl_vars()} is a character vector of the grouping columns. } \seealso{ \code{\link[=group_vars]{group_vars()}} for a function that returns grouping variables. } \keyword{internal} dplyr/man/count.Rd0000644000176200001440000000611614121112104013565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/count-tally.R \name{count} \alias{count} \alias{tally} \alias{add_count} \alias{add_tally} \title{Count observations by group} \usage{ count(x, ..., wt = NULL, sort = FALSE, name = NULL) tally(x, wt = NULL, sort = FALSE, name = NULL) add_count(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) add_tally(x, wt = NULL, sort = FALSE, name = NULL) } \arguments{ \item{x}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr).} \item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> Variables to group by.} \item{wt}{<\code{\link[=dplyr_data_masking]{data-masking}}> Frequency weights. Can be \code{NULL} or a variable: \itemize{ \item If \code{NULL} (the default), counts the number of rows in each group. \item If a variable, computes \code{sum(wt)} for each group. }} \item{sort}{If \code{TRUE}, will show the largest groups at the top.} \item{name}{The name of the new column in the output. If omitted, it will default to \code{n}. If there's already a column called \code{n}, it will error, and require you to specify the name.} \item{.drop}{For \code{count()}: if \code{FALSE} will include counts for empty groups (i.e. for levels of factors that don't exist in the data). Deprecated in \code{add_count()} since it didn't actually affect the output.} } \value{ An object of the same type as \code{.data}. \code{count()} and \code{add_count()} group transiently, so the output has the same groups as the input. } \description{ \code{count()} lets you quickly count the unique values of one or more variables: \code{df \%>\% count(a, b)} is roughly equivalent to \code{df \%>\% group_by(a, b) \%>\% summarise(n = n())}. \code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent to \code{df \%>\% summarise(n = n())}. Supply \code{wt} to perform weighted counts, switching the summary from \code{n = n()} to \code{n = sum(wt)}. \code{add_count()} and \code{add_tally()} are equivalents to \code{count()} and \code{tally()} but use \code{mutate()} instead of \code{summarise()} so that they add a new column with group-wise counts. } \examples{ # count() is a convenient way to get a sense of the distribution of # values in a dataset starwars \%>\% count(species) starwars \%>\% count(species, sort = TRUE) starwars \%>\% count(sex, gender, sort = TRUE) starwars \%>\% count(birth_decade = round(birth_year, -1)) # use the `wt` argument to perform a weighted count. This is useful # when the data has already been aggregated once df <- tribble( ~name, ~gender, ~runs, "Max", "male", 10, "Sandra", "female", 1, "Susan", "female", 4 ) # counts rows: df \%>\% count(gender) # counts runs: df \%>\% count(gender, wt = runs) # tally() is a lower-level function that assumes you've done the grouping starwars \%>\% tally() starwars \%>\% group_by(species) \%>\% tally() # both count() and tally() have add_ variants that work like # mutate() instead of summarise df \%>\% add_count(gender, wt = runs) df \%>\% add_tally(wt = runs) } dplyr/man/summarise_each.Rd0000644000176200001440000000157214151641776015454 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-lazyeval.R \name{summarise_each} \alias{summarise_each} \alias{summarise_each_} \alias{mutate_each} \alias{mutate_each_} \alias{summarize_each} \alias{summarize_each_} \title{Summarise and mutate multiple columns.} \usage{ summarise_each(tbl, funs, ...) summarise_each_(tbl, funs, vars) mutate_each(tbl, funs, ...) mutate_each_(tbl, funs, vars) summarize_each(tbl, funs, ...) summarize_each_(tbl, funs, vars) } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{mutate_each()} and \code{summarise_each()} are deprecated in favour of the new \code{\link[=across]{across()}} function that works within \code{summarise()} and \code{mutate()}. } \keyword{internal} dplyr/man/do.Rd0000644000176200001440000000373714174551644013075 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-do.r \name{do} \alias{do} \title{Do anything} \usage{ do(.data, ...) } \arguments{ \item{.data}{a tbl} \item{...}{Expressions to apply to each group. If named, results will be stored in a new column. If unnamed, must return a data frame. You can use \code{.} to refer to the current group. You can not mix named and unnamed arguments.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{do()} is superseded as of dplyr 1.0.0, because its syntax never really felt like it belonged with the rest of dplyr. It's replaced by a combination of \code{\link[=summarise]{summarise()}} (which can now produce multiple rows and multiple columns), \code{\link[=nest_by]{nest_by()}} (which creates a \link{rowwise} tibble of nested data), and \code{\link[=across]{across()}} (which allows you to access the data for the "current" group). } \examples{ # do() with unnamed arguments becomes summarise() # . becomes across() by_cyl <- mtcars \%>\% group_by(cyl) by_cyl \%>\% do(head(., 2)) # -> by_cyl \%>\% summarise(head(across(), 2)) by_cyl \%>\% slice_head(n = 2) # Can refer to variables directly by_cyl \%>\% do(mean = mean(.$vs)) # -> by_cyl \%>\% summarise(mean = mean(vs)) # do() with named arguments becomes nest_by() + mutate() & list() models <- by_cyl \%>\% do(mod = lm(mpg ~ disp, data = .)) # -> models <- mtcars \%>\% nest_by(cyl) \%>\% mutate(mod = list(lm(mpg ~ disp, data = data))) models \%>\% summarise(rsq = summary(mod)$r.squared) # use broom to turn models into data models \%>\% do(data.frame( var = names(coef(.$mod)), coef(summary(.$mod))) ) \dontshow{if (requireNamespace("broom", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # -> models \%>\% summarise(broom::tidy(mod)) \dontshow{\}) # examplesIf} } \keyword{internal} dplyr/man/bind.Rd0000644000176200001440000000554314151641776013405 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind.r \name{bind} \alias{bind} \alias{bind_rows} \alias{bind_cols} \title{Efficiently bind multiple data frames by row and column} \usage{ bind_rows(..., .id = NULL) bind_cols( ..., .name_repair = c("unique", "universal", "check_unique", "minimal") ) } \arguments{ \item{...}{Data frames to combine. Each argument can either be a data frame, a list that could be a data frame, or a list of data frames. When row-binding, columns are matched by name, and any missing columns will be filled with NA. When column-binding, rows are matched by position, so all data frames must have the same number of rows. To match by value, not position, see \link{mutate-joins}.} \item{.id}{Data frame identifier. When \code{.id} is supplied, a new column of identifiers is created to link each row to its original data frame. The labels are taken from the named arguments to \code{bind_rows()}. When a list of data frames is supplied, the labels are taken from the names of the list. If no names are found a numeric sequence is used instead.} \item{.name_repair}{One of \code{"unique"}, \code{"universal"}, or \code{"check_unique"}. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for the meaning of these options.} } \value{ \code{bind_rows()} and \code{bind_cols()} return the same type as the first input, either a data frame, \code{tbl_df}, or \code{grouped_df}. } \description{ This is an efficient implementation of the common pattern of \code{do.call(rbind, dfs)} or \code{do.call(cbind, dfs)} for binding many data frames into one. } \details{ The output of \code{bind_rows()} will contain a column if that column appears in any of the inputs. } \examples{ one <- starwars[1:4, ] two <- starwars[9:12, ] # You can supply data frames as arguments: bind_rows(one, two) # The contents of lists are spliced automatically: bind_rows(list(one, two)) bind_rows(split(starwars, starwars$homeworld)) bind_rows(list(one, two), list(two, one)) # In addition to data frames, you can supply vectors. In the rows # direction, the vectors represent rows and should have inner # names: bind_rows( c(a = 1, b = 2), c(a = 3, b = 4) ) # You can mix vectors and data frames: bind_rows( c(a = 1, b = 2), tibble(a = 3:4, b = 5:6), c(a = 7, b = 8) ) # When you supply a column name with the `.id` argument, a new # column is created to link each row to its original data frame bind_rows(list(one, two), .id = "id") bind_rows(list(a = one, b = two), .id = "id") bind_rows("group 1" = one, "group 2" = two, .id = "groups") # Columns don't need to match when row-binding bind_rows(tibble(x = 1:3), tibble(y = 1:4)) # Row sizes must be compatible when column-binding try(bind_cols(tibble(x = 1:3), tibble(y = 1:2))) # Even with 0 columns try(bind_cols(tibble(x = 1:3), tibble())) bind_cols(one, two) bind_cols(list(one, two)) } dplyr/man/top_n.Rd0000644000176200001440000000360614151641776013606 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/top-n.R \name{top_n} \alias{top_n} \alias{top_frac} \title{Select top (or bottom) n rows (by value)} \usage{ top_n(x, n, wt) top_frac(x, n, wt) } \arguments{ \item{x}{A data frame.} \item{n}{Number of rows to return for \code{top_n()}, fraction of rows to return for \code{top_frac()}. If \code{n} is positive, selects the top rows. If negative, selects the bottom rows. If \code{x} is grouped, this is the number (or fraction) of rows per group. Will include more rows if there are ties.} \item{wt}{(Optional). The variable to use for ordering. If not specified, defaults to the last variable in the tbl.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{top_n()} has been superseded in favour of \code{\link[=slice_min]{slice_min()}}/\code{\link[=slice_max]{slice_max()}}. While it will not be deprecated in the near future, retirement means that we will only perform critical bug fixes, so we recommend moving to the newer alternatives. \code{top_n()} was superseded because the name was fundamentally confusing as it returned what you might reasonably consider to be the \emph{bottom} rows. Additionally, the \code{wt} variable had a confusing name, and strange default (the last column in the data frame). Unfortunately we could not see an easy way to fix the existing \code{top_n()} function without breaking existing code, so we created a new alternative. } \examples{ df <- data.frame(x = c(6, 4, 1, 10, 3, 1, 1)) df \%>\% top_n(2) # highest values df \%>\% top_n(-2) # lowest values # now use df \%>\% slice_max(x, n = 2) df \%>\% slice_min(x, n = 2) # top_frac() -> prop argument of slice_min()/slice_max() df \%>\% top_frac(.5) # -> df \%>\% slice_max(x, prop = 0.5) } \keyword{internal} dplyr/man/arrange_all.Rd0000644000176200001440000000431114151641776014730 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-arrange.R \name{arrange_all} \alias{arrange_all} \alias{arrange_at} \alias{arrange_if} \title{Arrange rows by a selection of variables} \usage{ arrange_all(.tbl, .funs = list(), ..., .by_group = FALSE) arrange_at(.tbl, .vars, .funs = list(), ..., .by_group = FALSE) arrange_if(.tbl, .predicate, .funs = list(), ..., .by_group = FALSE) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to grouped data frames only.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} variants of \code{\link[=arrange]{arrange()}} sort a data frame by a selection of variables. Like \code{\link[=arrange]{arrange()}}, you can modify the variables before ordering with the \code{.funs} argument. } \section{Grouping variables}{ The grouping variables that are part of the selection participate in the sorting of the data frame. } \examples{ df <- as_tibble(mtcars) arrange_all(df) # -> arrange(df, across()) arrange_all(df, desc) # -> arrange(df, across(everything(), desc)) } \keyword{internal} dplyr/man/group_trim.Rd0000644000176200001440000000215314151641776014652 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group_trim.R \name{group_trim} \alias{group_trim} \title{Trim grouping structure} \usage{ group_trim(.tbl, .drop = group_by_drop_default(.tbl)) } \arguments{ \item{.tbl}{A \link[=grouped_df]{grouped data frame}} \item{.drop}{See \code{\link[=group_by]{group_by()}}} } \value{ A \link[=grouped_df]{grouped data frame} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Drop unused levels of all factors that are used as grouping variables, then recalculates the grouping structure. \code{group_trim()} is particularly useful after a \code{\link[=filter]{filter()}} that is intended to select a subset of groups. } \examples{ iris \%>\% group_by(Species) \%>\% filter(Species == "setosa", .preserve = TRUE) \%>\% group_trim() } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_map}()}, \code{\link{group_nest}()}, \code{\link{group_split}()} } \concept{grouping functions} dplyr/man/summarise.Rd0000644000176200001440000001311114151641776014464 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarise.R \name{summarise} \alias{summarise} \alias{summarize} \title{Summarise each group to fewer rows} \usage{ summarise(.data, ..., .groups = NULL) summarize(.data, ..., .groups = NULL) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> Name-value pairs of summary functions. The name will be the name of the variable in the result. The value can be: \itemize{ \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. \item A vector of length \code{n}, e.g. \code{quantile()}. \item A data frame, to add multiple columns from a single expression. }} \item{.groups}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Grouping structure of the result. \itemize{ \item "drop_last": dropping the last level of grouping. This was the only supported option before version 1.0.0. \item "drop": All levels of grouping are dropped. \item "keep": Same grouping structure as \code{.data}. \item "rowwise": Each row is its own group. } When \code{.groups} is not specified, it is chosen based on the number of rows of the results: \itemize{ \item If all the results have 1 row, you get "drop_last". \item If the number of rows varies, you get "keep". } In addition, a message informs you of that choice, unless the result is ungrouped, the option "dplyr.summarise.inform" is set to \code{FALSE}, or when \code{summarise()} is called from a function in a package.} } \value{ An object \emph{usually} of the same type as \code{.data}. \itemize{ \item The rows come from the underlying \code{\link[=group_keys]{group_keys()}}. \item The columns are a combination of the grouping keys and the summary expressions that you provide. \item The grouping structure is controlled by the \verb{.groups=} argument, the output may be another \link{grouped_df}, a \link{tibble} or a \link{rowwise} data frame. \item Data frame attributes are \strong{not} preserved, because \code{summarise()} fundamentally creates a new data frame. } } \description{ \code{summarise()} creates a new data frame. It will have one (or more) rows for each combination of grouping variables; if there are no grouping variables, the output will have a single row summarising all observations in the input. It will contain one column for each grouping variable and one column for each of the summary statistics that you have specified. \code{summarise()} and \code{summarize()} are synonyms. } \section{Useful functions}{ \itemize{ \item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}} \item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}} \item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=quantile]{quantile()}} \item Position: \code{\link[=first]{first()}}, \code{\link[=last]{last()}}, \code{\link[=nth]{nth()}}, \item Count: \code{\link[=n]{n()}}, \code{\link[=n_distinct]{n_distinct()}} \item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}} } } \section{Backend variations}{ The data frame backend supports creating a variable and using it in the same summary. This means that previously created summary variables can be further transformed or combined within the summary, as in \code{\link[=mutate]{mutate()}}. However, it also means that summary variables with the same names as previous variables overwrite them, making those variables unavailable to later summary variables. This behaviour may not be supported in other backends. To avoid unexpected results, consider using new names for your summary variables, especially when creating multiple summaries. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}. } \examples{ # A summary applied to ungrouped tbl returns a single row mtcars \%>\% summarise(mean = mean(disp), n = n()) # Usually, you'll want to group first mtcars \%>\% group_by(cyl) \%>\% summarise(mean = mean(disp), n = n()) # dplyr 1.0.0 allows to summarise to more than one value: mtcars \%>\% group_by(cyl) \%>\% summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) # You use a data frame to create multiple columns so you can wrap # this up into a function: my_quantile <- function(x, probs) { tibble(x = quantile(x, probs), probs = probs) } mtcars \%>\% group_by(cyl) \%>\% summarise(my_quantile(disp, c(0.25, 0.75))) # Each summary call removes one grouping level (since that group # is now just a single row) mtcars \%>\% group_by(cyl, vs) \%>\% summarise(cyl_n = n()) \%>\% group_vars() # BEWARE: reusing variables may lead to unexpected results mtcars \%>\% group_by(cyl) \%>\% summarise(disp = mean(disp), sd = sd(disp)) # Refer to column names stored as strings with the `.data` pronoun: var <- "mass" summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE)) # Learn more in ?dplyr_data_masking } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()} } \concept{single table verbs} dplyr/man/group_nest.Rd0000644000176200001440000000416614151641776014656 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group_nest.R \name{group_nest} \alias{group_nest} \title{Nest a tibble using a grouping specification} \usage{ group_nest(.tbl, ..., .key = "data", keep = FALSE) } \arguments{ \item{.tbl}{A tbl} \item{...}{Grouping specification, forwarded to \code{\link[=group_by]{group_by()}}} \item{.key}{the name of the list column} \item{keep}{Should the grouping columns be kept in the list column.} } \value{ A tbl with one row per unique combination of the grouping variables. The first columns are the grouping variables, followed by a list column of tibbles with matching rows of the remaining columns. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Nest a tibble using a grouping specification } \section{Grouped data frames}{ The primary use case for \code{\link[=group_nest]{group_nest()}} is with already grouped data frames, typically a result of \code{\link[=group_by]{group_by()}}. In this case \code{\link[=group_nest]{group_nest()}} only uses the first argument, the grouped tibble, and warns when \code{...} is used. } \section{Ungrouped data frames}{ When used on ungrouped data frames, \code{\link[=group_nest]{group_nest()}} forwards the \code{...} to \code{\link[=group_by]{group_by()}} before nesting, therefore the \code{...} are subject to the data mask. } \examples{ #----- use case 1: a grouped data frame iris \%>\% group_by(Species) \%>\% group_nest() # this can be useful if the grouped data has been altered before nesting iris \%>\% group_by(Species) \%>\% filter(Sepal.Length > mean(Sepal.Length)) \%>\% group_nest() #----- use case 2: using group_nest() on a ungrouped data frame with # a grouping specification that uses the data mask starwars \%>\% group_nest(species, homeworld) } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_map}()}, \code{\link{group_split}()}, \code{\link{group_trim}()} } \concept{grouping functions} \keyword{internal} dplyr/man/starwars.Rd0000644000176200001440000000231414121112104014277 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data-starwars.R \docType{data} \name{starwars} \alias{starwars} \title{Starwars characters} \format{ A tibble with 87 rows and 14 variables: \describe{ \item{name}{Name of the character} \item{height}{Height (cm)} \item{mass}{Weight (kg)} \item{hair_color,skin_color,eye_color}{Hair, skin, and eye colors} \item{birth_year}{Year born (BBY = Before Battle of Yavin)} \item{sex}{The biological sex of the character, namely male, female, hermaphroditic, or none (as in the case for Droids).} \item{gender}{The gender role or gender identity of the character as determined by their personality or the way they were programmed (as in the case for Droids).} \item{homeworld}{Name of homeworld} \item{species}{Name of species} \item{films}{List of films the character appeared in} \item{vehicles}{List of vehicles the character has piloted} \item{starships}{List of starships the character has piloted} } } \usage{ starwars } \description{ The original data, from SWAPI, the Star Wars API, \url{https://swapi.dev/}, has been revised to reflect additional research into gender and sex determinations of characters. } \examples{ starwars } \keyword{datasets} dplyr/man/select.Rd0000644000176200001440000002171214176717554013751 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/select.R \name{select} \alias{select} \title{Subset columns using their names and types} \usage{ select(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> One or more unquoted expressions separated by commas. Variable names can be used as if they were positions in the data frame, so expressions like \code{x:y} can be used to select a range of variables.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item Output columns are a subset of input columns, potentially with a different order. Columns will be renamed if \code{new_name = old_name} form is used. \item Data frame attributes are preserved. \item Groups are maintained; you can't select off grouping variables. } } \description{ Select (and optionally rename) variables in a data frame, using a concise mini-language that makes it easy to refer to variables based on their name (e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the right). You can also use predicate functions like \link{is.numeric} to select variables based on their properties. \subsection{Overview of selection features}{ Tidyverse selections implement a dialect of R where operators make it easy to select variables: \itemize{ \item \code{:} for selecting a range of consecutive variables. \item \code{!} for taking the complement of a set of variables. \item \code{&} and \code{|} for selecting the intersection or the union of two sets of variables. \item \code{c()} for combining selections. } In addition, you can use \strong{selection helpers}. Some helpers select specific columns: \itemize{ \item \code{\link[tidyselect:everything]{everything()}}: Matches all variables. \item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset. } These helpers select variables by matching patterns in their names: \itemize{ \item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix. \item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix. \item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string. \item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression. \item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03. } These helpers select variables from a character vector: \itemize{ \item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All names must be present, otherwise an out-of-bounds error is thrown. \item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown for names that don't exist. } This helper selects variables with a function: \itemize{ \item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those for which the function returns \code{TRUE}. } } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. } \section{Examples}{ Here we show the usage for the basic selection operators. See the specific help pages to learn about helpers like \code{\link[=starts_with]{starts_with()}}. The selection language can be used in functions like \code{dplyr::select()} or \code{tidyr::pivot_longer()}. Let's first attach the tidyverse:\if{html}{\out{
}}\preformatted{library(tidyverse) # For better printing iris <- as_tibble(iris) }\if{html}{\out{
}} Select variables by name:\if{html}{\out{
}}\preformatted{starwars \%>\% select(height) #> # A tibble: 87 x 1 #> height #> #> 1 172 #> 2 167 #> 3 96 #> 4 202 #> # ... with 83 more rows iris \%>\% pivot_longer(Sepal.Length) #> # A tibble: 150 x 6 #> Sepal.Width Petal.Length Petal.Width Species name value #> #> 1 3.5 1.4 0.2 setosa Sepal.Length 5.1 #> 2 3 1.4 0.2 setosa Sepal.Length 4.9 #> 3 3.2 1.3 0.2 setosa Sepal.Length 4.7 #> 4 3.1 1.5 0.2 setosa Sepal.Length 4.6 #> # ... with 146 more rows }\if{html}{\out{
}} Select multiple variables by separating them with commas. Note how the order of columns is determined by the order of inputs:\if{html}{\out{
}}\preformatted{starwars \%>\% select(homeworld, height, mass) #> # A tibble: 87 x 3 #> homeworld height mass #> #> 1 Tatooine 172 77 #> 2 Tatooine 167 75 #> 3 Naboo 96 32 #> 4 Tatooine 202 136 #> # ... with 83 more rows }\if{html}{\out{
}} Functions like \code{tidyr::pivot_longer()} don't take variables with dots. In this case use \code{c()} to select multiple variables:\if{html}{\out{
}}\preformatted{iris \%>\% pivot_longer(c(Sepal.Length, Petal.Length)) #> # A tibble: 300 x 5 #> Sepal.Width Petal.Width Species name value #> #> 1 3.5 0.2 setosa Sepal.Length 5.1 #> 2 3.5 0.2 setosa Petal.Length 1.4 #> 3 3 0.2 setosa Sepal.Length 4.9 #> 4 3 0.2 setosa Petal.Length 1.4 #> # ... with 296 more rows }\if{html}{\out{
}} \subsection{Operators:}{ The \code{:} operator selects a range of consecutive variables:\if{html}{\out{
}}\preformatted{starwars \%>\% select(name:mass) #> # A tibble: 87 x 3 #> name height mass #> #> 1 Luke Skywalker 172 77 #> 2 C-3PO 167 75 #> 3 R2-D2 96 32 #> 4 Darth Vader 202 136 #> # ... with 83 more rows }\if{html}{\out{
}} The \code{!} operator negates a selection:\if{html}{\out{
}}\preformatted{starwars \%>\% select(!(name:mass)) #> # A tibble: 87 x 11 #> hair_color skin_color eye_color birth_year sex gender homeworld species #> #> 1 blond fair blue 19 male masculine Tatooine Human #> 2 gold yellow 112 none masculine Tatooine Droid #> 3 white, blue red 33 none masculine Naboo Droid #> 4 none white yellow 41.9 male masculine Tatooine Human #> # ... with 83 more rows, and 3 more variables: films , vehicles , #> # starships iris \%>\% select(!c(Sepal.Length, Petal.Length)) #> # A tibble: 150 x 3 #> Sepal.Width Petal.Width Species #> #> 1 3.5 0.2 setosa #> 2 3 0.2 setosa #> 3 3.2 0.2 setosa #> 4 3.1 0.2 setosa #> # ... with 146 more rows iris \%>\% select(!ends_with("Width")) #> # A tibble: 150 x 3 #> Sepal.Length Petal.Length Species #> #> 1 5.1 1.4 setosa #> 2 4.9 1.4 setosa #> 3 4.7 1.3 setosa #> 4 4.6 1.5 setosa #> # ... with 146 more rows }\if{html}{\out{
}} \code{&} and \code{|} take the intersection or the union of two selections:\if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & ends_with("Width")) #> # A tibble: 150 x 1 #> Petal.Width #> #> 1 0.2 #> 2 0.2 #> 3 0.2 #> 4 0.2 #> # ... with 146 more rows iris \%>\% select(starts_with("Petal") | ends_with("Width")) #> # A tibble: 150 x 3 #> Petal.Length Petal.Width Sepal.Width #> #> 1 1.4 0.2 3.5 #> 2 1.4 0.2 3 #> 3 1.3 0.2 3.2 #> 4 1.5 0.2 3.1 #> # ... with 146 more rows }\if{html}{\out{
}} To take the difference between two selections, combine the \code{&} and \code{!} operators:\if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & !ends_with("Width")) #> # A tibble: 150 x 1 #> Petal.Length #> #> 1 1.4 #> 2 1.4 #> 3 1.3 #> 4 1.5 #> # ... with 146 more rows }\if{html}{\out{
}} } } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/tbl_df.Rd0000644000176200001440000000101714151641776013713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-tibble.R \name{tbl_df} \alias{tbl_df} \alias{as.tbl} \title{Coerce to a tibble} \usage{ tbl_df(data) as.tbl(x, ...) } \arguments{ \item{data, x}{Object to coerce} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{\link[tibble:as_tibble]{tibble::as_tibble()}} instead. } \keyword{internal} dplyr/man/bench_compare.Rd0000644000176200001440000000354114151641776015252 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-bench-compare.r \name{bench_compare} \alias{bench_compare} \alias{bench_tbls} \alias{compare_tbls} \alias{compare_tbls2} \alias{eval_tbls} \alias{eval_tbls2} \title{Evaluate, compare, benchmark operations of a set of srcs.} \usage{ bench_tbls(tbls, op, ..., times = 10) compare_tbls(tbls, op, ref = NULL, compare = equal_data_frame, ...) compare_tbls2(tbls_x, tbls_y, op, ref = NULL, compare = equal_data_frame, ...) eval_tbls(tbls, op) eval_tbls2(tbls_x, tbls_y, op) } \arguments{ \item{tbls, tbls_x, tbls_y}{A list of \code{\link[=tbl]{tbl()}}s.} \item{op}{A function with a single argument, called often with each element of \code{tbls}.} \item{\dots}{For \code{compare_tbls()}: additional parameters passed on the \code{compare()} function For \code{bench_tbls()}: additional benchmarks to run.} \item{times}{For benchmarking, the number of times each operation is repeated.} \item{ref}{For checking, a data frame to test results against. If not supplied, defaults to the results from the first \code{src}.} \item{compare}{A function used to compare the results. Defaults to \code{equal_data_frame} which ignores the order of rows and columns.} } \value{ \code{eval_tbls()}: a list of data frames. \code{compare_tbls()}: an invisible \code{TRUE} on success, otherwise an error is thrown. \code{bench_tbls()}: an object of class \code{\link[microbenchmark:microbenchmark]{microbenchmark::microbenchmark()}} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions are deprecated because we now believe that you're better of performing the comparisons directly, yourself, in order to generate more informative test failures. } \keyword{internal} dplyr/man/nth.Rd0000644000176200001440000000306214121112104013223 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nth-value.R \name{nth} \alias{nth} \alias{first} \alias{last} \title{Extract the first, last or nth value from a vector} \usage{ nth(x, n, order_by = NULL, default = default_missing(x)) first(x, order_by = NULL, default = default_missing(x)) last(x, order_by = NULL, default = default_missing(x)) } \arguments{ \item{x}{A vector} \item{n}{For \code{nth()}, a single integer specifying the position. Negative integers index from the end (i.e. \code{-1L} will return the last value in the vector). If a double is supplied, it will be silently truncated.} \item{order_by}{An optional vector used to determine the order} \item{default}{A default value to use if the position does not exist in the input. This is guessed by default for base vectors, where a missing value of the appropriate type is returned, and for lists, where a \code{NULL} is return. For more complicated objects, you'll need to supply this value. Make sure it is the same type as \code{x}.} } \value{ A single value. \code{[[} is used to do the subsetting. } \description{ These are straightforward wrappers around \code{\link{[[}}. The main advantage is that you can provide an optional secondary vector that defines the ordering, and provide a default value to use when the input is shorter than expected. } \examples{ x <- 1:10 y <- 10:1 first(x) last(y) nth(x, 1) nth(x, 5) nth(x, -2) nth(x, 11) last(x) # Second argument provides optional ordering last(x, y) # These functions always return a single value first(integer()) } dplyr/man/same_src.Rd0000644000176200001440000000062314121112104014226 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/src.r \name{same_src} \alias{same_src} \title{Figure out if two sources are the same (or two tbl have the same source)} \usage{ same_src(x, y) } \arguments{ \item{x, y}{src or tbls to test} } \value{ a logical flag } \description{ Figure out if two sources are the same (or two tbl have the same source) } \keyword{internal} dplyr/man/lead-lag.Rd0000644000176200001440000000267714121112104014113 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lead-lag.R \name{lead-lag} \alias{lead-lag} \alias{lag} \alias{lead} \title{Compute lagged or leading values} \usage{ lag(x, n = 1L, default = NA, order_by = NULL, ...) lead(x, n = 1L, default = NA, order_by = NULL, ...) } \arguments{ \item{x}{Vector of values} \item{n}{Positive integer of length 1, giving the number of positions to lead or lag by} \item{default}{Value used for non-existent rows. Defaults to \code{NA}.} \item{order_by}{Override the default ordering to use another vector or column} \item{...}{Needed for compatibility with lag generic.} } \description{ Find the "previous" (\code{lag()}) or "next" (\code{lead()}) values in a vector. Useful for comparing values behind of or ahead of the current values. } \examples{ lag(1:5) lead(1:5) x <- 1:5 tibble(behind = lag(x), x, ahead = lead(x)) # If you want to look more rows behind or ahead, use `n` lag(1:5, n = 1) lag(1:5, n = 2) lead(1:5, n = 1) lead(1:5, n = 2) # If you want to define a value for non-existing rows, use `default` lag(1:5) lag(1:5, default = 0) lead(1:5) lead(1:5, default = 6) # If data are not already ordered, use `order_by` scrambled <- slice_sample(tibble(year = 2000:2005, value = (0:5) ^ 2), prop = 1) wrong <- mutate(scrambled, previous_year_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, previous_year_value = lag(value, order_by = year)) arrange(right, year) } dplyr/man/storms.Rd0000644000176200001440000000322114151641776014007 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data-storms.R \docType{data} \name{storms} \alias{storms} \title{Storm tracks data} \format{ A tibble with 11,859 observations and 13 variables: \describe{ \item{name}{Storm Name} \item{year,month,day}{Date of report} \item{hour}{Hour of report (in UTC)} \item{lat,long}{Location of storm center} \item{status}{Storm classification (Tropical Depression, Tropical Storm, or Hurricane)} \item{category}{Saffir-Simpson storm category (estimated from wind speed. -1 = Tropical Depression, 0 = Tropical Storm)} \item{wind}{storm's maximum sustained wind speed (in knots)} \item{pressure}{Air pressure at the storm's center (in millibars)} \item{tropicalstorm_force_diameter}{Diameter (in nautical miles) of the area experiencing tropical storm strength winds (34 knots or above)} \item{hurricane_force_diameter}{Diameter (in nautical miles) of the area experiencing hurricane strength winds (64 knots or above)} } } \usage{ storms } \description{ This data is a subset of the NOAA Atlantic hurricane database best track data, \url{https://www.nhc.noaa.gov/data/#hurdat}. The data includes the positions and attributes of storms from 1975-2020, measured every six hours during the lifetime of a storm. } \examples{ # show a plot of the storm paths if (requireNamespace("ggplot2", quietly = TRUE)) { library(ggplot2) ggplot(storms) + aes(x=long, y=lat, color=paste(year, name)) + geom_path() + guides(color='none') + facet_wrap(~year) } storms } \seealso{ The script to create the storms data set: \url{https://github.com/tidyverse/dplyr/blob/main/data-raw/storms.R} } \keyword{datasets} dplyr/man/auto_copy.Rd0000644000176200001440000000125314121112104014434 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/copy-to.r \name{auto_copy} \alias{auto_copy} \title{Copy tables to same source, if necessary} \usage{ auto_copy(x, y, copy = FALSE, ...) } \arguments{ \item{x, y}{\code{y} will be copied to \code{x}, if necessary.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{...}{Other arguments passed on to methods.} } \description{ Copy tables to same source, if necessary } dplyr/man/scoped.Rd0000644000176200001440000001215714151641776013745 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{scoped} \alias{scoped} \title{Operate on a selection of variables} \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. The variants suffixed with \verb{_if}, \verb{_at} or \verb{_all} apply an expression (sometimes several) to all variables within a specified subset. This subset can contain all variables (\verb{_all} variants), a \code{\link[=vars]{vars()}} selection (\verb{_at} variants), or variables selected with a predicate (\verb{_if} variants). The verbs with scoped variants are: \itemize{ \item \code{\link[=mutate]{mutate()}}, \code{\link[=transmute]{transmute()}} and \code{\link[=summarise]{summarise()}}. See \code{\link[=summarise_all]{summarise_all()}}. \item \code{\link[=filter]{filter()}}. See \code{\link[=filter_all]{filter_all()}}. \item \code{\link[=group_by]{group_by()}}. See \code{\link[=group_by_all]{group_by_all()}}. \item \code{\link[=rename]{rename()}} and \code{\link[=select]{select()}}. See \code{\link[=select_all]{select_all()}}. \item \code{\link[=arrange]{arrange()}}. See \code{\link[=arrange_all]{arrange_all()}} } There are three kinds of scoped variants. They differ in the scope of the variable selection on which operations are applied: \itemize{ \item Verbs suffixed with \verb{_all()} apply an operation on all variables. \item Verbs suffixed with \verb{_at()} apply an operation on a subset of variables specified with the quoting function \code{\link[=vars]{vars()}}. This quoting function accepts \code{\link[tidyselect:vars_select]{tidyselect::vars_select()}} helpers like \code{\link[=starts_with]{starts_with()}}. Instead of a \code{\link[=vars]{vars()}} selection, you can also supply an \link[rlang:is_integerish]{integerish} vector of column positions or a character vector of column names. \item Verbs suffixed with \verb{_if()} apply an operation on the subset of variables for which a predicate function returns \code{TRUE}. Instead of a predicate function, you can also supply a logical vector. } } \section{Grouping variables}{ Most of these operations also apply on the grouping variables when they are part of the selection. This includes: \itemize{ \item \code{\link[=arrange_all]{arrange_all()}}, \code{\link[=arrange_at]{arrange_at()}}, and \code{\link[=arrange_if]{arrange_if()}} \item \code{\link[=distinct_all]{distinct_all()}}, \code{\link[=distinct_at]{distinct_at()}}, and \code{\link[=distinct_if]{distinct_if()}} \item \code{\link[=filter_all]{filter_all()}}, \code{\link[=filter_at]{filter_at()}}, and \code{\link[=filter_if]{filter_if()}} \item \code{\link[=group_by_all]{group_by_all()}}, \code{\link[=group_by_at]{group_by_at()}}, and \code{\link[=group_by_if]{group_by_if()}} \item \code{\link[=select_all]{select_all()}}, \code{\link[=select_at]{select_at()}}, and \code{\link[=select_if]{select_if()}} } This is not the case for summarising and mutating variants where operations are \emph{not} applied on grouping variables. The behaviour depends on whether the selection is \strong{implicit} (\code{all} and \code{if} selections) or \strong{explicit} (\code{at} selections). Grouping variables covered by explicit selections (with \code{\link[=summarise_at]{summarise_at()}}, \code{\link[=mutate_at]{mutate_at()}}, and \code{\link[=transmute_at]{transmute_at()}}) are always an error. For implicit selections, the grouping variables are always ignored. In this case, the level of verbosity depends on the kind of operation: \itemize{ \item Summarising operations (\code{\link[=summarise_all]{summarise_all()}} and \code{\link[=summarise_if]{summarise_if()}}) ignore grouping variables silently because it is obvious that operations are not applied on grouping variables. \item On the other hand it isn't as obvious in the case of mutating operations (\code{\link[=mutate_all]{mutate_all()}}, \code{\link[=mutate_if]{mutate_if()}}, \code{\link[=transmute_all]{transmute_all()}}, and \code{\link[=transmute_if]{transmute_if()}}). For this reason, they issue a message indicating which grouping variables are ignored. } } dplyr/man/src.Rd0000644000176200001440000000115614121112104013223 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/src.r \name{src} \alias{src} \alias{is.src} \title{Create a "src" object} \usage{ src(subclass, ...) is.src(x) } \arguments{ \item{subclass}{name of subclass. "src" is an abstract base class, so you must supply this value. \code{src_} is automatically prepended to the class name} \item{...}{fields used by object. These dots are evaluated with \link[rlang:list2]{explicit splicing}.} \item{x}{object to test for "src"-ness.} } \description{ \code{src()} is the standard constructor for srcs and \code{is.src()} tests. } \keyword{internal} dplyr/man/desc.Rd0000644000176200001440000000077714121112104013362 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/desc.r \name{desc} \alias{desc} \title{Descending order} \usage{ desc(x) } \arguments{ \item{x}{vector to transform} } \description{ Transform a vector into a format that will be sorted in descending order. This is useful within \code{\link[=arrange]{arrange()}}. } \examples{ desc(1:10) desc(factor(letters)) first_day <- seq(as.Date("1910/1/1"), as.Date("1920/1/1"), "years") desc(first_day) starwars \%>\% arrange(desc(mass)) } dplyr/man/select_vars.Rd0000644000176200001440000000151414151641776014775 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-tidyselect.R \name{select_vars} \alias{select_vars} \alias{rename_vars} \alias{select_var} \alias{current_vars} \title{Select variables} \usage{ select_vars(vars = chr(), ..., include = chr(), exclude = chr()) rename_vars(vars = chr(), ..., strict = TRUE) select_var(vars, var = -1) current_vars(...) } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions now live in the tidyselect package as \code{\link[tidyselect:vars_select]{tidyselect::vars_select()}}, \code{\link[tidyselect:vars_select]{tidyselect::vars_rename()}} and \code{\link[tidyselect:vars_pull]{tidyselect::vars_pull()}}. } \keyword{internal} dplyr/man/dplyr-package.Rd0000644000176200001440000000165614121112104015164 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr.r \docType{package} \name{dplyr-package} \alias{dplyr} \alias{dplyr-package} \title{dplyr: A Grammar of Data Manipulation} \description{ To learn more about dplyr, start with the vignettes: \code{browseVignettes(package = "dplyr")} } \seealso{ Useful links: \itemize{ \item \url{https://dplyr.tidyverse.org} \item \url{https://github.com/tidyverse/dplyr} \item Report bugs at \url{https://github.com/tidyverse/dplyr/issues} } } \author{ \strong{Maintainer}: Hadley Wickham \email{hadley@rstudio.com} (\href{https://orcid.org/0000-0003-4757-117X}{ORCID}) Authors: \itemize{ \item Romain François (\href{https://orcid.org/0000-0002-2444-4226}{ORCID}) \item Lionel Henry \item Kirill Müller (\href{https://orcid.org/0000-0002-1416-3412}{ORCID}) } Other contributors: \itemize{ \item RStudio [copyright holder, funder] } } \keyword{internal} dplyr/man/arrange.Rd0000644000176200001440000000555714154654644014116 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/arrange.R \name{arrange} \alias{arrange} \title{Arrange rows by column values} \usage{ arrange(.data, ..., .by_group = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> Variables, or functions of variables. Use \code{\link[=desc]{desc()}} to sort a variable in descending order.} \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to grouped data frames only.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item All rows appear in the output, but (usually) in a different place. \item Columns are not modified. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ \code{arrange()} orders the rows of a data frame by the values of selected columns. Unlike other dplyr verbs, \code{arrange()} largely ignores grouping; you need to explicitly mention grouping variables (or use \code{.by_group = TRUE}) in order to group by them, and functions of variables are evaluated once per data frame, not once per group. } \details{ \subsection{Locales}{ The sort order for character vectors will depend on the collating sequence of the locale in use: see \code{\link[=locales]{locales()}}. } \subsection{Missing values}{ Unlike base sorting with \code{sort()}, \code{NA} are: \itemize{ \item always sorted to the end for local data, even when wrapped with \code{desc()}. \item treated differently for remote data, depending on the backend. } } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("arrange")}. } \examples{ arrange(mtcars, cyl, disp) arrange(mtcars, desc(disp)) # grouped arrange ignores groups by_cyl <- mtcars \%>\% group_by(cyl) by_cyl \%>\% arrange(desc(wt)) # Unless you specifically ask: by_cyl \%>\% arrange(desc(wt), .by_group = TRUE) # use embracing when wrapping in a function; # see ?dplyr_data_masking for more details tidy_eval_arrange <- function(.data, var) { .data \%>\% arrange({{ var }}) } tidy_eval_arrange(mtcars, mpg) # use across() access select()-style semantics iris \%>\% arrange(across(starts_with("Sepal"))) iris \%>\% arrange(across(starts_with("Sepal"), desc)) } \seealso{ Other single table verbs: \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/nest_by.Rd0000644000176200001440000000676514151641776014143 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nest_by.R \name{nest_by} \alias{nest_by} \title{Nest by one or more variables} \usage{ nest_by(.data, ..., .key = "data", .keep = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{In \code{group_by()}, variables or computations to group by. Computations are always done on the ungrouped data frame. To perform computations on the grouped data, you need to use a separate \code{mutate()} step before the \code{group_by()}. Computations are not allowed in \code{nest_by()}. In \code{ungroup()}, variables to remove from the grouping.} \item{.key}{Name of the list column} \item{.keep}{Should the grouping columns be kept in the list column.} } \value{ A \link{rowwise} data frame. The output has the following properties: \itemize{ \item The rows come from the underlying \code{\link[=group_keys]{group_keys()}}. \item The columns are the grouping keys plus one list-column of data frames. \item Data frame attributes are \strong{not} preserved, because \code{nest_by()} fundamentally creates a new data frame. } A tbl with one row per unique combination of the grouping variables. The first columns are the grouping variables, followed by a list column of tibbles with matching rows of the remaining columns. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{nest_by()} is closely related to \code{\link[=group_by]{group_by()}}. However, instead of storing the group structure in the metadata, it is made explicit in the data, giving each group key a single row along with a list-column of data frames that contain all the other data. \code{nest_by()} returns a \link{rowwise} data frame, which makes operations on the grouped data particularly elegant. See \code{vignette("rowwise")} for more details. } \details{ Note that \code{df \%>\% nest_by(x, y)} is roughly equivalent to\preformatted{df \%>\% group_by(x, y) \%>\% summarise(data = list(cur_data())) \%>\% rowwise() } If you want to unnest a nested data frame, you can either use \code{tidyr::unnest()} or take advantage of \code{summarise()}s multi-row behaviour:\preformatted{nested \%>\% summarise(data) } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_by")}. } \examples{ # After nesting, you get one row per group iris \%>\% nest_by(Species) starwars \%>\% nest_by(species) # The output is grouped by row, which makes modelling particularly easy models <- mtcars \%>\% nest_by(cyl) \%>\% mutate(model = list(lm(mpg ~ wt, data = data))) models models \%>\% summarise(rsq = summary(model)$r.squared) \dontshow{if (requireNamespace("broom", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # This is particularly elegant with the broom functions models \%>\% summarise(broom::glance(model)) models \%>\% summarise(broom::tidy(model)) \dontshow{\}) # examplesIf} # Note that you can also summarise to unnest the data models \%>\% summarise(data) } \keyword{internal} dplyr/man/rowwise.Rd0000644000176200001440000000532514121112104014135 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowwise.r \name{rowwise} \alias{rowwise} \title{Group input by rows} \usage{ rowwise(data, ...) } \arguments{ \item{data}{Input data frame.} \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Variables to be preserved when calling \code{\link[=summarise]{summarise()}}. This is typically a set of variables whose combination uniquely identify each row. \strong{NB}: unlike \code{group_by()} you can not create new variables here but instead you can select multiple variables with (e.g.) \code{everything()}.} } \value{ A row-wise data frame with class \code{rowwise_df}. Note that a \code{rowwise_df} is implicitly grouped by row, but is not a \code{grouped_df}. } \description{ \code{rowwise()} allows you to compute on a data frame a row-at-a-time. This is most useful when a vectorised function doesn't exist. Most dplyr verbs preserve row-wise grouping. The exception is \code{\link[=summarise]{summarise()}}, which return a \link{grouped_df}. You can explicitly ungroup with \code{\link[=ungroup]{ungroup()}} or \code{\link[=as_tibble]{as_tibble()}}, or convert to a \link{grouped_df} with \code{\link[=group_by]{group_by()}}. } \section{List-columns}{ Because a rowwise has exactly one row per group it offers a small convenience for working with list-columns. Normally, \code{summarise()} and \code{mutate()} extract a groups worth of data with \code{[}. But when you index a list in this way, you get back another list. When you're working with a \code{rowwise} tibble, then dplyr will use \code{[[} instead of \code{[} to make your life a little easier. } \examples{ df <- tibble(x = runif(6), y = runif(6), z = runif(6)) # Compute the mean of x, y, z in each row df \%>\% rowwise() \%>\% mutate(m = mean(c(x, y, z))) # use c_across() to more easily select many variables df \%>\% rowwise() \%>\% mutate(m = mean(c_across(x:z))) # Compute the minimum of x and y in each row df \%>\% rowwise() \%>\% mutate(m = min(c(x, y, z))) # In this case you can use an existing vectorised function: df \%>\% mutate(m = pmin(x, y, z)) # Where these functions exist they'll be much faster than rowwise # so be on the lookout for them. # rowwise() is also useful when doing simulations params <- tribble( ~sim, ~n, ~mean, ~sd, 1, 1, 1, 1, 2, 2, 2, 4, 3, 3, -1, 2 ) # Here I supply variables to preserve after the summary params \%>\% rowwise(sim) \%>\% summarise(z = rnorm(n, mean, sd)) # If you want one row per simulation, put the results in a list() params \%>\% rowwise(sim) \%>\% summarise(z = list(rnorm(n, mean, sd))) } \seealso{ \code{\link[=nest_by]{nest_by()}} for a convenient way of creating rowwise data frames with nested data. } dplyr/man/dplyr_extending.Rd0000644000176200001440000001111514151641776015660 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/generics.R \name{dplyr_extending} \alias{dplyr_extending} \alias{dplyr_row_slice} \alias{dplyr_col_modify} \alias{dplyr_reconstruct} \title{Extending dplyr with new data frame subclasses} \usage{ dplyr_row_slice(data, i, ...) dplyr_col_modify(data, cols) dplyr_reconstruct(data, template) } \arguments{ \item{data}{A tibble. We use tibbles because they avoid some inconsistent subset-assignment use cases} \item{i}{A numeric or logical vector that indexes the rows of \code{.data}.} \item{cols}{A named list used modify columns. A \code{NULL} value should remove an existing column.} \item{template}{Template to use for restoring attributes} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} These three functions, along with \verb{names<-} and 1d numeric \code{[} (i.e. \code{x[loc]}) methods, provide a minimal interface for extending dplyr to work with new data frame subclasses. This means that for simple cases you should only need to provide a couple of methods, rather than a method for every dplyr verb. These functions are a stop-gap measure until we figure out how to solve the problem more generally, but it's likely that any code you write to implement them will find a home in what comes next. } \section{Basic advice}{ This section gives you basic advice if you want to extend dplyr to work with your custom data frame subclass, and you want the dplyr methods to behave in basically the same way. \itemize{ \item If you have data frame attributes that don't depend on the rows or columns (and should unconditionally be preserved), you don't need to do anything. \item If you have \strong{scalar} attributes that depend on \strong{rows}, implement a \code{dplyr_reconstruct()} method. Your method should recompute the attribute depending on rows now present. \item If you have \strong{scalar} attributes that depend on \strong{columns}, implement a \code{dplyr_reconstruct()} method and a 1d \code{[} method. For example, if your class requires that certain columns be present, your method should return a data.frame or tibble when those columns are removed. \item If your attributes are \strong{vectorised} over \strong{rows}, implement a \code{dplyr_row_slice()} method. This gives you access to \code{i} so you can modify the row attribute accordingly. You'll also need to think carefully about how to recompute the attribute in \code{dplyr_reconstruct()}, and you will need to carefully verify the behaviour of each verb, and provide additional methods as needed. \item If your attributes that are \strong{vectorised} over \strong{columns}, implement \code{dplyr_col_modify()}, 1d \code{[}, and \verb{names<-} methods. All of these methods know which columns are being modified, so you can update the column attribute according. You'll also need to think carefully about how to recompute the attribute in \code{dplyr_reconstruct()}, and you will need to carefully verify the behaviour of each verb, and provide additional methods as needed. } } \section{Current usage}{ \itemize{ \item \code{arrange()}, \code{filter()}, \code{slice()}, \code{semi_join()}, and \code{anti_join()} work by generating a vector of row indices, and then subsetting with \code{dplyr_row_slice()}. \item \code{mutate()} generates a list of new column value (using \code{NULL} to indicate when columns should be deleted), then passes that to \code{dplyr_col_modify()}. \code{transmute()} does the same then uses 1d \code{[} to select the columns. \item \code{summarise()} works similarly to \code{mutate()} but the data modified by \code{dplyr_col_modify()} comes from \code{group_data()}. \item \code{select()} uses 1d \code{[} to select columns, then \verb{names<-} to rename them. \code{rename()} just uses \verb{names<-}. \code{relocate()} just uses 1d \code{[}. \item \code{inner_join()}, \code{left_join()}, \code{right_join()}, and \code{full_join()} coerces \code{x} to a tibble, modify the rows, then uses \code{dplyr_reconstruct()} to convert back to the same type as \code{x}. \item \code{nest_join()} uses \code{dplyr_col_modify()} to cast the key variables to common type and add the nested-df that \code{y} becomes. \item \code{distinct()} does a \code{mutate()} if any expressions are present, then uses 1d \code{[} to select variables to keep, then \code{dplyr_row_slice()} to select distinct rows. } Note that \code{group_by()} and \code{ungroup()} don't use any these generics and you'll need to provide methods directly. } \keyword{internal} dplyr/man/if_else.Rd0000644000176200001440000000260414121112104014041 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/if_else.R \name{if_else} \alias{if_else} \title{Vectorised if} \usage{ if_else(condition, true, false, missing = NULL) } \arguments{ \item{condition}{Logical vector} \item{true, false}{Values to use for \code{TRUE} and \code{FALSE} values of \code{condition}. They must be either the same length as \code{condition}, or length 1. They must also be the same type: \code{if_else()} checks that they have the same type and same class. All other attributes are taken from \code{true}.} \item{missing}{If not \code{NULL}, will be used to replace missing values.} } \value{ Where \code{condition} is \code{TRUE}, the matching value from \code{true}, where it's \code{FALSE}, the matching value from \code{false}, otherwise \code{NA}. } \description{ Compared to the base \code{\link[=ifelse]{ifelse()}}, this function is more strict. It checks that \code{true} and \code{false} are the same type. This strictness makes the output type more predictable, and makes it somewhat faster. } \examples{ x <- c(-5:5, NA) if_else(x < 0, NA_integer_, x) if_else(x < 0, "negative", "positive", "missing") # Unlike ifelse, if_else preserves types x <- factor(sample(letters[1:5], 10, replace = TRUE)) ifelse(x \%in\% c("a", "b", "c"), x, factor(NA)) if_else(x \%in\% c("a", "b", "c"), x, factor(NA)) # Attributes are taken from the `true` vector, } dplyr/man/tbl.Rd0000644000176200001440000000063414121112104013215 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbl.r \name{tbl} \alias{tbl} \alias{is.tbl} \title{Create a table from a data source} \usage{ tbl(src, ...) is.tbl(x) } \arguments{ \item{src}{A data source} \item{...}{Other arguments passed on to the individual methods} \item{x}{Any object} } \description{ This is a generic method that dispatches based on the first argument. } dplyr/man/cumall.Rd0000644000176200001440000000264414121112104013714 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.R \name{cumall} \alias{cumall} \alias{cumany} \alias{cummean} \title{Cumulativate versions of any, all, and mean} \usage{ cumall(x) cumany(x) cummean(x) } \arguments{ \item{x}{For \code{cumall()} and \code{cumany()}, a logical vector; for \code{cummean()} an integer or numeric vector.} } \value{ A vector the same length as \code{x}. } \description{ dplyr provides \code{cumall()}, \code{cumany()}, and \code{cummean()} to complete R's set of cumulative functions. } \section{Cumulative logical functions}{ These are particularly useful in conjunction with \code{filter()}: \itemize{ \item \code{cumall(x)}: all cases until the first \code{FALSE}. \item \code{cumall(!x)}: all cases until the first \code{TRUE}. \item \code{cumany(x)}: all cases after the first \code{TRUE}. \item \code{cumany(!x)}: all cases after the first \code{FALSE}. } } \examples{ # `cummean()` returns a numeric/integer vector of the same length # as the input vector. x <- c(1, 3, 5, 2, 2) cummean(x) cumsum(x) / seq_along(x) # `cumall()` and `cumany()` return logicals cumall(x < 5) cumany(x == 3) # `cumall()` vs. `cumany()` df <- data.frame( date = as.Date("2020-01-01") + 0:6, balance = c(100, 50, 25, -25, -50, 30, 120) ) # all rows after first overdraft df \%>\% filter(cumany(balance < 0)) # all rows until first overdraft df \%>\% filter(cumall(!(balance < 0))) } dplyr/man/grouped_df.Rd0000644000176200001440000000156014121112104014551 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/grouped-df.r \name{grouped_df} \alias{grouped_df} \alias{is.grouped_df} \alias{is_grouped_df} \title{A grouped data frame.} \usage{ grouped_df(data, vars, drop = group_by_drop_default(data)) is.grouped_df(x) is_grouped_df(x) } \arguments{ \item{data}{a tbl or data frame.} \item{vars}{A character vector.} \item{drop}{When \code{.drop = TRUE}, empty groups are dropped.} } \description{ The easiest way to create a grouped data frame is to call the \code{group_by()} method on a data frame or tbl: this will take care of capturing the unevaluated expressions for you. These functions are designed for programmatic use. For data analysis purposes see \code{\link[=group_data]{group_data()}} for the accessor functions that retrieve various metadata from a grouped data frames. } \keyword{internal} dplyr/man/add_rownames.Rd0000644000176200001440000000114014151641776015121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-tibble.R \name{add_rownames} \alias{add_rownames} \title{Convert row names to an explicit variable.} \usage{ add_rownames(df, var = "rowname") } \arguments{ \item{df}{Input data frame with rownames.} \item{var}{Name of variable to use} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{\link[tibble:rownames]{tibble::rownames_to_column()}} instead. } \keyword{internal} dplyr/man/tidyeval-compat.Rd0000644000176200001440000000222414151641776015564 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils-tidy-eval.R \name{tidyeval-compat} \alias{tidyeval-compat} \alias{expr} \alias{enquo} \alias{enquos} \alias{sym} \alias{syms} \alias{as_label} \alias{quo} \alias{quos} \alias{quo_name} \alias{ensym} \alias{ensyms} \alias{enexpr} \alias{enexprs} \title{Other tidy eval tools} \description{ These tidy eval functions are no longer for normal usage, but are still exported from dplyr for backward compatibility. See \code{\link[=dplyr_data_masking]{?dplyr_data_masking}} and \code{vignette("programming")} for the latest recommendations. \itemize{ \item \link[rlang:expr]{expr()} \item \link[rlang:enquo]{enquo()} \item \link[rlang:enquo]{enquos()} \item \link[rlang:sym]{sym()} \item \link[rlang:sym]{syms()} \item \link[rlang:as_label]{as_label()} \item \link[rlang:defusing-advanced]{quo()} \item \link[rlang:defusing-advanced]{quos()} \item \link[rlang:quo_label]{quo_name()} \item \link[rlang:defusing-advanced]{ensym()} \item \link[rlang:defusing-advanced]{ensyms()} \item \link[rlang:defusing-advanced]{enexpr()} \item \link[rlang:defusing-advanced]{enexprs()} } } \keyword{internal} dplyr/man/vars.Rd0000644000176200001440000000217414121112104013410 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{vars} \alias{vars} \title{Select variables} \usage{ vars(...) } \arguments{ \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Variables to include/exclude in mutate/summarise. You can use same specifications as in \code{\link[=select]{select()}}. If missing, defaults to all non-grouping variables.} } \description{ \code{vars()} was only needed for the scoped verbs, which have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. This helper is intended to provide equivalent semantics to \code{\link[=select]{select()}}. It is used for instance in scoped summarising and mutating verbs (\code{\link[=mutate_at]{mutate_at()}} and \code{\link[=summarise_at]{summarise_at()}}). Note that verbs accepting a \code{vars()} specification also accept a numeric vector of positions or a character vector of column names. } \seealso{ \code{\link[=all_vars]{all_vars()}} and \code{\link[=any_vars]{any_vars()}} for other quoting functions that you can use with scoped verbs. } dplyr/man/distinct_all.Rd0000644000176200001440000000512614151641776015137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-distinct.R \name{distinct_all} \alias{distinct_all} \alias{distinct_at} \alias{distinct_if} \title{Select distinct rows by a selection of variables} \usage{ distinct_all(.tbl, .funs = list(), ..., .keep_all = FALSE) distinct_at(.tbl, .vars, .funs = list(), ..., .keep_all = FALSE) distinct_if(.tbl, .predicate, .funs = list(), ..., .keep_all = FALSE) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} variants of \code{\link[=distinct]{distinct()}} extract distinct rows by a selection of variables. Like \code{distinct()}, you can modify the variables before ordering with the \code{.funs} argument. } \section{Grouping variables}{ The grouping variables that are part of the selection are taken into account to determine distinct rows. } \examples{ df <- tibble(x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2) distinct_all(df) # -> distinct(df, across()) distinct_at(df, vars(x,y)) # -> distinct(df, across(c(x, y))) distinct_if(df, is.numeric) # -> distinct(df, across(where(is.numeric))) # You can supply a function that will be applied before extracting the distinct values # The variables of the sorted tibble keep their original values. distinct_all(df, round) # -> distinct(df, across(everything(), round)) } \keyword{internal} dplyr/man/dplyr_data_masking.Rd0000644000176200001440000000600514121112104016266 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-params.R \name{dplyr_data_masking} \alias{dplyr_data_masking} \title{Argument type: data-masking} \description{ This page describes the \verb{} argument modifier which indicates that the argument uses tidy evaluation with \strong{data masking}. If you've never heard of tidy evaluation before, start with \code{vignette("programming")}. } \section{Key terms}{ The primary motivation for tidy evaluation in dplyr is that it provides \strong{data masking}, which blurs the distinction between two types of variables: \itemize{ \item \strong{env-variables} are "programming" variables and live in an environment. They are usually created with \verb{<-}. Env-variables can be any type of R object. \item \strong{data-variables} are "statistical" variables and live in a data frame. They usually come from data files (e.g. \code{.csv}, \code{.xls}), or are created by manipulating existing variables. Data-variables live inside data frames, so must be vectors. } } \section{General usage}{ Data masking allows you to refer to variables in the "current" data frame (usually supplied in the \code{.data} argument), without any other prefix. It's what allows you to type (e.g.) \code{filter(diamonds, x == 0 & y == 0 & z == 0)} instead of \code{diamonds[diamonds$x == 0 & diamonds$y == 0 & diamonds$z == 0, ]}. } \section{Indirection}{ The main challenge of data masking arises when you introduce some indirection, i.e. instead of directly typing the name of a variable you want to supply it in a function argument or character vector. There are two main cases: \itemize{ \item If you want the user to supply the variable (or function of variables) in a function argument, embrace the argument, e.g. \code{filter(df, {{ var }})}.\preformatted{dist_summary <- function(df, var) \{ df \%>\% summarise(n = n(), min = min(\{\{ var \}\}), max = max(\{\{ var \}\})) \} mtcars \%>\% dist_summary(mpg) mtcars \%>\% group_by(cyl) \%>\% dist_summary(mpg) } \item If you have the column name as a character vector, use the \code{.data} pronoun, e.g. \code{summarise(df, mean = mean(.data[[var]]))}.\preformatted{for (var in names(mtcars)) \{ mtcars \%>\% count(.data[[var]]) \%>\% print() \} lapply(names(mtcars), function(var) mtcars \%>\% count(.data[[var]])) } } } \section{Dot-dot-dot (...)}{ When this modifier is applied to \code{...}, there is one other useful technique which solves the problem of creating a new variable with a name supplied by the user. Use the interpolation syntax from the glue package: \code{"{var}" := expression}. (Note the use of \verb{:=} instead of \code{=} to enable this syntax).\preformatted{var_name <- "l100km" mtcars \%>\% mutate("\{var_name\}" := 235 / mpg) } Note that \code{...} automatically provides indirection, so you can use it as is (i.e. without embracing) inside a function:\preformatted{grouped_mean <- function(df, var, ...) \{ df \%>\% group_by(...) \%>\% summarise(mean = mean(\{\{ var \}\})) \} } } \keyword{internal} dplyr/man/mutate_all.Rd0000644000176200001440000001552614151641776014622 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-mutate.R \name{mutate_all} \alias{mutate_all} \alias{mutate_if} \alias{mutate_at} \alias{transmute_all} \alias{transmute_if} \alias{transmute_at} \title{Mutate multiple columns} \usage{ mutate_all(.tbl, .funs, ...) mutate_if(.tbl, .predicate, .funs, ...) mutate_at(.tbl, .vars, .funs, ..., .cols = NULL) transmute_all(.tbl, .funs, ...) transmute_if(.tbl, .predicate, .funs, ...) transmute_at(.tbl, .vars, .funs, ..., .cols = NULL) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.cols}{This argument has been renamed to \code{.vars} to fit dplyr's terminology and is deprecated.} } \value{ A data frame. By default, the newly created columns have the shortest names needed to uniquely identify the output. To force inclusion of a name, even when not needed, name the input (see examples for details). } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. The \link{scoped} variants of \code{\link[=mutate]{mutate()}} and \code{\link[=transmute]{transmute()}} make it easy to apply the same transformation to multiple variables. There are three variants: \itemize{ \item _all affects every variable \item _at affects variables selected with a character vector or vars() \item _if affects variables selected with a predicate function: } } \section{Grouping variables}{ If applied on a grouped tibble, these operations are \emph{not} applied to the grouping variables. The behaviour depends on whether the selection is \strong{implicit} (\code{all} and \code{if} selections) or \strong{explicit} (\code{at} selections). \itemize{ \item Grouping variables covered by explicit selections in \code{mutate_at()} and \code{transmute_at()} are always an error. Add \code{-group_cols()} to the \code{\link[=vars]{vars()}} selection to avoid this:\preformatted{data \%>\% mutate_at(vars(-group_cols(), ...), myoperation) } Or remove \code{group_vars()} from the character vector of column names:\preformatted{nms <- setdiff(nms, group_vars(data)) data \%>\% mutate_at(vars, myoperation) } \item Grouping variables covered by implicit selections are ignored by \code{mutate_all()}, \code{transmute_all()}, \code{mutate_if()}, and \code{transmute_if()}. } } \section{Naming}{ The names of the new columns are derived from the names of the input variables and the names of the functions. \itemize{ \item if there is only one unnamed function (i.e. if \code{.funs} is an unnamed list of length one), the names of the input variables are used to name the new columns; \item for \verb{_at} functions, if there is only one unnamed variable (i.e., if \code{.vars} is of the form \code{vars(a_single_column)}) and \code{.funs} has length greater than one, the names of the functions are used to name the new columns; \item otherwise, the new names are created by concatenating the names of the input variables and the names of the functions, separated with an underscore \code{"_"}. } The \code{.funs} argument can be a named or unnamed list. If a function is unnamed and the name cannot be derived automatically, a name of the form "fn#" is used. Similarly, \code{\link[=vars]{vars()}} accepts named and unnamed arguments. If a variable in \code{.vars} is named, a new column by that name will be created. Name collisions in the new columns are disambiguated using a unique suffix. } \section{Life cycle}{ The functions are maturing, because the naming scheme and the disambiguation algorithm are subject to change in dplyr 0.9.0. } \examples{ iris <- as_tibble(iris) # All variants can be passed functions and additional arguments, # purrr-style. The _at() variants directly support strings. Here # we'll scale the variables `height` and `mass`: scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm) starwars \%>\% mutate_at(c("height", "mass"), scale2) # -> starwars \%>\% mutate(across(c("height", "mass"), scale2)) # You can pass additional arguments to the function: starwars \%>\% mutate_at(c("height", "mass"), scale2, na.rm = TRUE) starwars \%>\% mutate_at(c("height", "mass"), ~scale2(., na.rm = TRUE)) # -> starwars \%>\% mutate(across(c("height", "mass"), ~ scale2(.x, na.rm = TRUE))) # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): iris \%>\% mutate_at(vars(matches("Sepal")), log) iris \%>\% mutate(across(matches("Sepal"), log)) # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we divide all the numeric columns by 100: starwars \%>\% mutate_if(is.numeric, scale2, na.rm = TRUE) starwars \%>\% mutate(across(where(is.numeric), ~ scale2(.x, na.rm = TRUE))) # mutate_if() is particularly useful for transforming variables from # one type to another iris \%>\% mutate_if(is.factor, as.character) iris \%>\% mutate_if(is.double, as.integer) # -> iris \%>\% mutate(across(where(is.factor), as.character)) iris \%>\% mutate(across(where(is.double), as.integer)) # Multiple transformations ---------------------------------------- # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: iris \%>\% mutate_if(is.numeric, list(scale2, log)) iris \%>\% mutate_if(is.numeric, list(~scale2(.), ~log(.))) iris \%>\% mutate_if(is.numeric, list(scale = scale2, log = log)) # -> iris \%>\% as_tibble() \%>\% mutate(across(where(is.numeric), list(scale = scale2, log = log))) # When there's only one function in the list, it modifies existing # variables in place. Give it a name to instead create new variables: iris \%>\% mutate_if(is.numeric, list(scale2)) iris \%>\% mutate_if(is.numeric, list(scale = scale2)) } \seealso{ \link[=scoped]{The other scoped verbs}, \code{\link[=vars]{vars()}} } \keyword{internal} dplyr/man/group_map.Rd0000644000176200001440000001043314151641776014454 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group_map.R \name{group_map} \alias{group_map} \alias{group_modify} \alias{group_walk} \title{Apply a function to each group} \usage{ group_map(.data, .f, ..., .keep = FALSE) group_modify(.data, .f, ..., .keep = FALSE) group_walk(.data, .f, ...) } \arguments{ \item{.data}{A grouped tibble} \item{.f}{A function or formula to apply to each group. If a \strong{function}, it is used as is. It should have at least 2 formal arguments. If a \strong{formula}, e.g. \code{~ head(.x)}, it is converted to a function. In the formula, you can use \itemize{ \item \code{.} or \code{.x} to refer to the subset of rows of \code{.tbl} for the given group \item \code{.y} to refer to the key, a one row tibble with one column per grouping variable that identifies the group }} \item{...}{Additional arguments passed on to \code{.f}} \item{.keep}{are the grouping variables kept in \code{.x}} } \value{ \itemize{ \item \code{group_modify()} returns a grouped tibble. In that case \code{.f} must return a data frame. \item \code{group_map()} returns a list of results from calling \code{.f} on each group. \item \code{group_walk()} calls \code{.f} for side effects and returns the input \code{.tbl}, invisibly. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{group_map()}, \code{group_modify()} and \code{group_walk()} are purrr-style functions that can be used to iterate on grouped tibbles. } \details{ Use \code{group_modify()} when \code{summarize()} is too limited, in terms of what you need to do and return for each group. \code{group_modify()} is good for "data frame in, data frame out". If that is too limited, you need to use a \link[=group_nest]{nested} or \link[=group_split]{split} workflow. \code{group_modify()} is an evolution of \code{\link[=do]{do()}}, if you have used that before. Each conceptual group of the data frame is exposed to the function \code{.f} with two pieces of information: \itemize{ \item The subset of the data for the group, exposed as \code{.x}. \item The key, a tibble with exactly one row and columns for each grouping variable, exposed as \code{.y}. } For completeness, \code{group_modify()}, \code{group_map} and \code{group_walk()} also work on ungrouped data frames, in that case the function is applied to the entire data frame (exposed as \code{.x}), and \code{.y} is a one row tibble with no column, consistently with \code{\link[=group_keys]{group_keys()}}. } \examples{ # return a list mtcars \%>\% group_by(cyl) \%>\% group_map(~ head(.x, 2L)) # return a tibble grouped by `cyl` with 2 rows per group # the grouping data is recalculated mtcars \%>\% group_by(cyl) \%>\% group_modify(~ head(.x, 2L)) \dontshow{if (requireNamespace("broom", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # a list of tibbles iris \%>\% group_by(Species) \%>\% group_map(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) # a restructured grouped tibble iris \%>\% group_by(Species) \%>\% group_modify(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) \dontshow{\}) # examplesIf} # a list of vectors iris \%>\% group_by(Species) \%>\% group_map(~ quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75))) # to use group_modify() the lambda must return a data frame iris \%>\% group_by(Species) \%>\% group_modify(~ { quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75)) \%>\% tibble::enframe(name = "prob", value = "quantile") }) iris \%>\% group_by(Species) \%>\% group_modify(~ { .x \%>\% purrr::map_dfc(fivenum) \%>\% mutate(nms = c("min", "Q1", "median", "Q3", "max")) }) # group_walk() is for side effects dir.create(temp <- tempfile()) iris \%>\% group_by(Species) \%>\% group_walk(~ write.csv(.x, file = file.path(temp, paste0(.y$Species, ".csv")))) list.files(temp, pattern = "csv$") unlink(temp, recursive = TRUE) # group_modify() and ungrouped data frames mtcars \%>\% group_modify(~ head(.x, 2L)) } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_nest}()}, \code{\link{group_split}()}, \code{\link{group_trim}()} } \concept{grouping functions} dplyr/man/rename.Rd0000644000176200001440000000415314121112104013703 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rename.R \name{rename} \alias{rename} \alias{rename_with} \title{Rename columns} \usage{ rename(.data, ...) rename_with(.data, .fn, .cols = everything(), ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{For \code{rename()}: <\code{\link[=dplyr_tidy_select]{tidy-select}}> Use \code{new_name = old_name} to rename selected variables. For \code{rename_with()}: additional arguments passed onto \code{.fn}.} \item{.fn}{A function used to transform the selected \code{.cols}. Should return a character vector the same length as the input.} \item{.cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to rename; defaults to all columns.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item Column names are changed; column order is preserved. \item Data frame attributes are preserved. \item Groups are updated to reflect new names. } } \description{ \code{rename()} changes the names of individual variables using \code{new_name = old_name} syntax; \code{rename_with()} renames columns using a function. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}. } \examples{ iris <- as_tibble(iris) # so it prints a little nicer rename(iris, petal_length = Petal.Length) rename_with(iris, toupper) rename_with(iris, toupper, starts_with("Petal")) rename_with(iris, ~ tolower(gsub(".", "_", .x, fixed = TRUE))) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/sql.Rd0000644000176200001440000000070614121112104013233 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compat-dbplyr.R \name{sql} \alias{sql} \title{SQL escaping.} \usage{ sql(...) } \arguments{ \item{...}{Character vectors that will be combined into a single SQL expression.} } \description{ These functions are critical when writing functions that translate R functions to sql functions. Typically a conversion function should escape all its inputs and return an sql object. } dplyr/man/sample_n.Rd0000644000176200001440000000565214151641776014270 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sample.R \name{sample_n} \alias{sample_n} \alias{sample_frac} \title{Sample n rows from a table} \usage{ sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) sample_frac(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) } \arguments{ \item{tbl}{A data.frame.} \item{size}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> For \code{sample_n()}, the number of rows to select. For \code{sample_frac()}, the fraction of rows to select. If \code{tbl} is grouped, \code{size} applies to each group.} \item{replace}{Sample with or without replacement?} \item{weight}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.} \item{.env}{DEPRECATED.} \item{...}{ignored} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{sample_n()} and \code{sample_frac()} have been superseded in favour of \code{\link[=slice_sample]{slice_sample()}}. While they will not be deprecated in the near future, retirement means that we will only perform critical bug fixes, so we recommend moving to the newer alternative. These functions were superseded because we realised it was more convenient to have two mutually exclusive arguments to one function, rather than two separate functions. This also made it to clean up a few other smaller design issues with \code{sample_n()}/\code{sample_frac}: \itemize{ \item The connection to \code{slice()} was not obvious. \item The name of the first argument, \code{tbl}, is inconsistent with other single table verbs which use \code{.data}. \item The \code{size} argument uses tidy evaluation, which is surprising and undocumented. \item It was easier to remove the deprecated \code{.env} argument. \item \code{...} was in a suboptimal position. } } \examples{ by_cyl <- mtcars \%>\% group_by(cyl) # sample_n() -> slice_sample() ---------------------------------------------- sample_n(mtcars, 10) sample_n(mtcars, 50, replace = TRUE) sample_n(mtcars, 10, weight = mpg) # Changes: # * explicitly name the `n` argument, # * the `weight` argument is now `weight_by`. slice_sample(mtcars, n = 10) slice_sample(mtcars, n = 50, replace = TRUE) slice_sample(mtcars, n = 10, weight_by = mpg) # Note that sample_n() would error if n was bigger than the group size # slice_sample() will just use the available rows for consistency with # the other slice helpers like slice_head() # sample_frac() -> slice_sample() ------------------------------------------- sample_frac(mtcars) sample_frac(mtcars, replace = TRUE) # Changes: # * use prop = 1 to randomly sample all rows slice_sample(mtcars, prop = 1) slice_sample(mtcars, prop = 1, replace = TRUE) } \keyword{internal} dplyr/man/mutate.Rd0000644000176200001440000001745414151641776013774 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mutate.R \name{mutate} \alias{mutate} \alias{mutate.data.frame} \alias{transmute} \title{Create, modify, and delete columns} \usage{ mutate(.data, ...) \method{mutate}{data.frame}( .data, ..., .keep = c("all", "used", "unused", "none"), .before = NULL, .after = NULL ) transmute(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> Name-value pairs. The name gives the name of the column in the output. The value can be: \itemize{ \item A vector of length 1, which will be recycled to the correct length. \item A vector the same length as the current group (or the whole data frame if ungrouped). \item \code{NULL}, to remove the column. \item A data frame or tibble, to create multiple columns in the output. }} \item{.keep}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Control which columns from \code{.data} are retained in the output. Grouping columns and columns created by \code{...} are always kept. \itemize{ \item \code{"all"} retains all columns from \code{.data}. This is the default. \item \code{"used"} retains only the columns used in \code{...} to create new columns. This is useful for checking your work, as it displays inputs and outputs side-by-side. \item \code{"unused"} retains only the columns \emph{not} used in \code{...} to create new columns. This is useful if you generate new columns, but no longer need the columns used to generate them. \item \code{"none"} doesn't retain any extra columns from \code{.data}. Only the grouping variables and columns created by \code{...} are kept. }} \item{.before, .after}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns should appear (the default is to add to the right hand side). See \code{\link[=relocate]{relocate()}} for more details.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item For \code{mutate()}: \itemize{ \item Columns from \code{.data} will be preserved according to the \code{.keep} argument. \item Existing columns that are modified by \code{...} will always be returned in their original location. \item New columns created through \code{...} will be placed according to the \code{.before} and \code{.after} arguments. } \item For \code{transmute()}: \itemize{ \item Columns created or modified through \code{...} will be returned in the order specified by \code{...}. \item Unmodified grouping columns will be placed at the front. } \item The number of rows is not affected. \item Columns given the value \code{NULL} will be removed. \item Groups will be recomputed if a grouping variable is mutated. \item Data frame attributes are preserved. } } \description{ \code{mutate()} adds new variables and preserves existing ones; \code{transmute()} adds new variables and drops existing ones. New variables overwrite existing variables of the same name. Variables can be removed by setting their value to \code{NULL}. } \section{Useful mutate functions}{ \itemize{ \item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings \item \code{\link[=lead]{lead()}}, \code{\link[=lag]{lag()}} \item \code{\link[=dense_rank]{dense_rank()}}, \code{\link[=min_rank]{min_rank()}}, \code{\link[=percent_rank]{percent_rank()}}, \code{\link[=row_number]{row_number()}}, \code{\link[=cume_dist]{cume_dist()}}, \code{\link[=ntile]{ntile()}} \item \code{\link[=cumsum]{cumsum()}}, \code{\link[=cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[=cumany]{cumany()}}, \code{\link[=cumall]{cumall()}} \item \code{\link[=na_if]{na_if()}}, \code{\link[=coalesce]{coalesce()}} \item \code{\link[=if_else]{if_else()}}, \code{\link[=recode]{recode()}}, \code{\link[=case_when]{case_when()}} } } \section{Grouped tibbles}{ Because mutating expressions are computed within groups, they may yield different results on grouped tibbles. This will be the case as soon as an aggregating, lagging, or ranking function is involved. Compare this ungrouped mutate:\preformatted{starwars \%>\% select(name, mass, species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) } With the grouped equivalent:\preformatted{starwars \%>\% select(name, mass, species) \%>\% group_by(species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) } The former normalises \code{mass} by the global average whereas the latter normalises by the averages within species levels. } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{mutate()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. \item \code{transmute()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("transmute")}. } } \examples{ # Newly created variables are available immediately starwars \%>\% select(name, mass) \%>\% mutate( mass2 = mass * 2, mass2_squared = mass2 * mass2 ) # As well as adding new variables, you can use mutate() to # remove variables and modify existing variables. starwars \%>\% select(name, height, mass, homeworld) \%>\% mutate( mass = NULL, height = height * 0.0328084 # convert to feet ) # Use across() with mutate() to apply a transformation # to multiple columns in a tibble. starwars \%>\% select(name, homeworld, species) \%>\% mutate(across(!name, as.factor)) # see more in ?across # Window functions are useful for grouped mutates: starwars \%>\% select(name, mass, homeworld) \%>\% group_by(homeworld) \%>\% mutate(rank = min_rank(desc(mass))) # see `vignette("window-functions")` for more details # By default, new columns are placed on the far right. # Experimental: you can override with `.before` or `.after` df <- tibble(x = 1, y = 2) df \%>\% mutate(z = x + y) df \%>\% mutate(z = x + y, .before = 1) df \%>\% mutate(z = x + y, .after = x) # By default, mutate() keeps all columns from the input data. # Experimental: You can override with `.keep` df <- tibble(x = 1, y = 2, a = "a", b = "b") df \%>\% mutate(z = x + y, .keep = "all") # the default df \%>\% mutate(z = x + y, .keep = "used") df \%>\% mutate(z = x + y, .keep = "unused") df \%>\% mutate(z = x + y, .keep = "none") # same as transmute() # Grouping ---------------------------------------- # The mutate operation may yield different results on grouped # tibbles because the expressions are computed within groups. # The following normalises `mass` by the global average: starwars \%>\% select(name, mass, species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) # Whereas this normalises `mass` by the averages within species # levels: starwars \%>\% select(name, mass, species) \%>\% group_by(species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) # Indirection ---------------------------------------- # Refer to column names stored as strings with the `.data` pronoun: vars <- c("mass", "height") mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]]) # Learn more in ?dplyr_data_masking } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/na_if.Rd0000644000176200001440000000232214121112104013504 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/na_if.R \name{na_if} \alias{na_if} \title{Convert values to NA} \usage{ na_if(x, y) } \arguments{ \item{x}{Vector to modify} \item{y}{Value to replace with NA} } \value{ A modified version of \code{x} that replaces any values that are equal to \code{y} with NA. } \description{ This is a translation of the SQL command \code{NULLIF}. It is useful if you want to convert an annoying value to \code{NA}. } \examples{ na_if(1:5, 5:1) x <- c(1, -1, 0, 10) 100 / x 100 / na_if(x, 0) y <- c("abc", "def", "", "ghi") na_if(y, "") # na_if() is particularly useful inside mutate(), # and is meant for use with vectors rather than entire data frames starwars \%>\% select(name, eye_color) \%>\% mutate(eye_color = na_if(eye_color, "unknown")) # na_if() can also be used with mutate() and across() # to mutate multiple columns starwars \%>\% mutate(across(where(is.character), ~na_if(., "unknown"))) } \seealso{ \code{\link[=coalesce]{coalesce()}} to replace missing values with a specified value. \code{\link[tidyr:replace_na]{tidyr::replace_na()}} to replace \code{NA} with a value. \code{\link[=recode]{recode()}} to more generally replace values. } dplyr/man/check_dbplyr.Rd0000644000176200001440000000157714144435746015125 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compat-dbplyr.R \name{check_dbplyr} \alias{check_dbplyr} \alias{wrap_dbplyr_obj} \title{dbplyr compatibility functions} \usage{ check_dbplyr() wrap_dbplyr_obj(obj_name) } \description{ In dplyr 0.7.0, a number of database and SQL functions moved from dplyr to dbplyr. The generic functions stayed in dplyr (since there is no easy way to conditionally import a generic from different packages), but many other SQL and database helper functions moved. If you have written a backend, these functions generate the code you need to work with both dplyr 0.5.0 dplyr 0.7.0. } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} wrap_dbplyr_obj("build_sql") wrap_dbplyr_obj("base_agg") \dontshow{\}) # examplesIf} } \keyword{internal} dplyr/man/dim_desc.Rd0000644000176200001440000000064514121112104014205 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils-format.r \name{dim_desc} \alias{dim_desc} \title{Describing dimensions} \usage{ dim_desc(x) } \arguments{ \item{x}{Object to show dimensions for.} } \description{ Prints the dimensions of an array-like object in a user-friendly manner, substituting \code{NA} with ?? (for SQL queries). } \examples{ dim_desc(mtcars) } \keyword{internal} dplyr/man/nest_join.Rd0000644000176200001440000000633614121112104014431 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join.r \name{nest_join} \alias{nest_join} \alias{nest_join.data.frame} \title{Nest join} \usage{ nest_join(x, y, by = NULL, copy = FALSE, keep = FALSE, name = NULL, ...) \method{nest_join}{data.frame}(x, y, by = NULL, copy = FALSE, keep = FALSE, name = NULL, ...) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join by different variables on \code{x} and \code{y}, use a named vector. For example, \code{by = c("a" = "b")} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a vector with length > 1. For example, \code{by = c("a", "b")} will match \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. Use a named vector to match different variables in \code{x} and \code{y}. For example, \code{by = c("a" = "b", "c" = "d")} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, use \code{by = character()}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{keep}{Should the join keys from both \code{x} and \code{y} be preserved in the output?} \item{name}{The name of the list column nesting joins create. If \code{NULL} the name of \code{y} is used.} \item{...}{Other parameters passed onto methods.} } \description{ \code{nest_join()} returns all rows and columns in \code{x} with a new nested-df column that contains all matches from \code{y}. When there is no match, the list column is a 0-row tibble. } \details{ In some sense, a \code{nest_join()} is the most fundamental join since you can recreate the other joins from it: \itemize{ \item \code{inner_join()} is a \code{nest_join()} plus \code{\link[tidyr:nest]{tidyr::unnest()}} \item \code{left_join()} \code{nest_join()} plus \code{unnest(.drop = FALSE)}. \item \code{semi_join()} is a \code{nest_join()} plus a \code{filter()} where you check that every element of data has at least one row, \item \code{anti_join()} is a \code{nest_join()} plus a \code{filter()} where you check every element has zero rows. } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_join")}. } \examples{ band_members \%>\% nest_join(band_instruments) } \seealso{ Other joins: \code{\link{filter-joins}}, \code{\link{mutate-joins}} } \concept{joins} dplyr/man/pull.Rd0000644000176200001440000000407614176476366013455 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pull.R \name{pull} \alias{pull} \title{Extract a single column} \usage{ pull(.data, var = -1, name = NULL, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{var}{A variable specified as: \itemize{ \item a literal variable name \item a positive integer, giving the position counting from the left \item a negative integer, giving the position counting from the right. } The default returns the last column (on the assumption that's the column you've created most recently). This argument is taken by expression and supports \link[rlang:topic-inject]{quasiquotation} (you can unquote column names and column locations).} \item{name}{An optional parameter that specifies the column to be used as names for a named vector. Specified in a similar manner as \code{var}.} \item{...}{For use by methods.} } \value{ A vector the same size as \code{.data}. } \description{ \code{pull()} is similar to \code{$}. It's mostly useful because it looks a little nicer in pipes, it also works with remote data frames, and it can optionally name the output. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. } \examples{ mtcars \%>\% pull(-1) mtcars \%>\% pull(1) mtcars \%>\% pull(cyl) \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # Also works for remote sources df <- dbplyr::memdb_frame(x = 1:10, y = 10:1, .name = "pull-ex") df \%>\% mutate(z = x * y) \%>\% pull() \dontshow{\}) # examplesIf} # Pull a named vector starwars \%>\% pull(height, name) } dplyr/man/filter-joins.Rd0000644000176200001440000000751614121112104015047 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join.r \name{filter-joins} \alias{filter-joins} \alias{semi_join} \alias{semi_join.data.frame} \alias{anti_join} \alias{anti_join.data.frame} \title{Filtering joins} \usage{ semi_join(x, y, by = NULL, copy = FALSE, ...) \method{semi_join}{data.frame}(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) anti_join(x, y, by = NULL, copy = FALSE, ...) \method{anti_join}{data.frame}(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join by different variables on \code{x} and \code{y}, use a named vector. For example, \code{by = c("a" = "b")} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a vector with length > 1. For example, \code{by = c("a", "b")} will match \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. Use a named vector to match different variables in \code{x} and \code{y}. For example, \code{by = c("a" = "b", "c" = "d")} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, use \code{by = character()}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{...}{Other parameters passed onto methods.} \item{na_matches}{Should \code{NA} and \code{NaN} values match one another? The default, \code{"na"}, treats two \code{NA} or \code{NaN} values as equal, like \code{\%in\%}, \code{\link[=match]{match()}}, \code{\link[=merge]{merge()}}. Use \code{"never"} to always treat two \code{NA} or \code{NaN} values as different, like joins for database sources, similarly to \code{merge(incomparables = FALSE)}.} } \value{ An object of the same type as \code{x}. The output has the following properties: \itemize{ \item Rows are a subset of the input, but appear in the same order. \item Columns are not modified. \item Data frame attributes are preserved. \item Groups are taken from \code{x}. The number of groups may be reduced. } } \description{ Filtering joins filter rows from \code{x} based on the presence or absence of matches in \code{y}: \itemize{ \item \code{semi_join()} return all rows from \code{x} with a match in \code{y}. \item \code{anti_join()} return all rows from \code{x} with\strong{out} a match in \code{y}. } } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{semi_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("semi_join")}. \item \code{anti_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("anti_join")}. } } \examples{ # "Filtering" joins keep cases from the LHS band_members \%>\% semi_join(band_instruments) band_members \%>\% anti_join(band_instruments) # To suppress the message about joining variables, supply `by` band_members \%>\% semi_join(band_instruments, by = "name") # This is good practice in production code } \seealso{ Other joins: \code{\link{mutate-joins}}, \code{\link{nest_join}()} } \concept{joins} dplyr/man/group_by_all.Rd0000644000176200001440000000606014151641776015142 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-group-by.R \name{group_by_all} \alias{group_by_all} \alias{group_by_at} \alias{group_by_if} \title{Group by a selection of variables} \usage{ group_by_all( .tbl, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_at( .tbl, .vars, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_if( .tbl, .predicate, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.add}{See \code{\link[=group_by]{group_by()}}} \item{.drop}{Drop groups formed by factor levels that don't appear in the data? The default is \code{TRUE} except when \code{.data} has been previously grouped with \code{.drop = FALSE}. See \code{\link[=group_by_drop_default]{group_by_drop_default()}} for details.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} variants of \code{\link[=group_by]{group_by()}} group a data frame by a selection of variables. Like \code{\link[=group_by]{group_by()}}, they have optional \link{mutate} semantics. } \section{Grouping variables}{ Existing grouping variables are maintained, even if not included in the selection. } \examples{ # Group a data frame by all variables: group_by_all(mtcars) # -> mtcars \%>\% group_by(across()) # Group by variables selected with a predicate: group_by_if(iris, is.factor) # -> iris \%>\% group_by(across(where(is.factor))) # Group by variables selected by name: group_by_at(mtcars, vars(vs, am)) # -> mtcars \%>\% group_by(across(c(vs, am))) # Like group_by(), the scoped variants have optional mutate # semantics. This provide a shortcut for group_by() + mutate(): d <- tibble(x=c(1,1,2,2), y=c(1,2,1,2)) group_by_all(d, as.factor) # -> d \%>\% group_by(across(everything(), as.factor)) group_by_if(iris, is.factor, as.character) # -> iris \%>\% group_by(across(where(is.factor), as.character)) } \keyword{internal} dplyr/man/recode.Rd0000644000176200001440000001330014151641776013720 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R \name{recode} \alias{recode} \alias{recode_factor} \title{Recode values} \usage{ recode(.x, ..., .default = NULL, .missing = NULL) recode_factor(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE) } \arguments{ \item{.x}{A vector to modify} \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Replacements. For character and factor \code{.x}, these should be named and replacement is based only on their name. For numeric \code{.x}, these can be named or not. If not named, the replacement is done based on position i.e. \code{.x} represents positions to look for in replacements. See examples. When named, the argument names should be the current values to be replaced, and the argument values should be the new (replacement) values. All replacements must be the same type, and must have either length one or the same length as \code{.x}.} \item{.default}{If supplied, all values not otherwise matched will be given this value. If not supplied and if the replacements are the same type as the original values in \code{.x}, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with \code{NA}. \code{.default} must be either length 1 or the same length as \code{.x}.} \item{.missing}{If supplied, any missing values in \code{.x} will be replaced by this value. Must be either length 1 or the same length as \code{.x}.} \item{.ordered}{If \code{TRUE}, \code{recode_factor()} creates an ordered factor.} } \value{ A vector the same length as \code{.x}, and the same type as the first of \code{...}, \code{.default}, or \code{.missing}. \code{recode_factor()} returns a factor whose levels are in the same order as in \code{...}. The levels in \code{.default} and \code{.missing} come last. } \description{ This is a vectorised version of \code{\link[=switch]{switch()}}: you can replace numeric values based on their position or their name, and character or factor values only by their name. This is an S3 generic: dplyr provides methods for numeric, character, and factors. For logical vectors, use \code{\link[=if_else]{if_else()}}. For more complicated criteria, use \code{\link[=case_when]{case_when()}}. You can use \code{recode()} directly with factors; it will preserve the existing order of levels while changing the values. Alternatively, you can use \code{recode_factor()}, which will change the order of levels to match the order of replacements. See the \href{https://forcats.tidyverse.org/}{forcats} package for more tools for working with factors and their levels. \code{recode()} is questioning because the arguments are in the wrong order. We have \code{new <- old}, \code{mutate(df, new = old)}, and \code{rename(df, new = old)} but \code{recode(x, old = new)}. We don't yet know how to fix this problem, but it's likely to involve creating a new function then retiring or deprecating \code{recode()}. } \examples{ # For character values, recode values with named arguments only. Unmatched # values are unchanged. char_vec <- sample(c("a", "b", "c"), 10, replace = TRUE) recode(char_vec, a = "Apple") recode(char_vec, a = "Apple", b = "Banana") # Use .default as replacement for unmatched values. Note that NA and # replacement values need to be of the same type. For more information, see # https://adv-r.hadley.nz/vectors-chap.html#missing-values recode(char_vec, a = "Apple", b = "Banana", .default = NA_character_) # Throws an error as NA is logical, not character. try(recode(char_vec, a = "Apple", b = "Banana", .default = NA)) # Use a named character vector for unquote splicing with !!! level_key <- c(a = "apple", b = "banana", c = "carrot") recode(char_vec, !!!level_key) # For numeric values, named arguments can also be used num_vec <- c(1:4, NA) recode(num_vec, `2` = 20L, `4` = 40L) # Or if you don't name the arguments, recode() matches by position. # (Only works for numeric vector) recode(num_vec, "a", "b", "c", "d") # .x (position given) looks in (...), then grabs (... value at position) # so if nothing at position (here 5), it uses .default or NA. recode(c(1,5,3), "a", "b", "c", "d", .default = "nothing") # Note that if the replacements are not compatible with .x, # unmatched values are replaced by NA and a warning is issued. recode(num_vec, `2` = "b", `4` = "d") # use .default to change the replacement value recode(num_vec, "a", "b", "c", .default = "other") # use .missing to replace missing values in .x recode(num_vec, "a", "b", "c", .default = "other", .missing = "missing") # For factor values, use only named replacements # and supply default with levels() factor_vec <- factor(c("a", "b", "c")) recode(factor_vec, a = "Apple", .default = levels(factor_vec)) # Use recode_factor() to create factors with levels ordered as they # appear in the recode call. The levels in .default and .missing # come last. recode_factor(num_vec, `1` = "z", `2` = "y", `3` = "x") recode_factor(num_vec, `1` = "z", `2` = "y", `3` = "x", .default = "D") recode_factor(num_vec, `1` = "z", `2` = "y", `3` = "x", .default = "D", .missing = "M") # When the input vector is a compatible vector (character vector or # factor), it is reused as default. recode_factor(letters[1:3], b = "z", c = "y") recode_factor(factor(letters[1:3]), b = "z", c = "y") # Use a named character vector to recode factors with unquote splicing. level_key <- c(a = "apple", b = "banana", c = "carrot") recode_factor(char_vec, !!!level_key) } \seealso{ \code{\link[=na_if]{na_if()}} to replace specified values with a \code{NA}. \code{\link[=coalesce]{coalesce()}} to replace missing values with a specified value. \code{\link[tidyr:replace_na]{tidyr::replace_na()}} to replace \code{NA} with a value. } dplyr/man/mutate-joins.Rd0000644000176200001440000001563114121112104015056 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join.r \name{mutate-joins} \alias{mutate-joins} \alias{join} \alias{join.data.frame} \alias{inner_join} \alias{inner_join.data.frame} \alias{left_join} \alias{left_join.data.frame} \alias{right_join} \alias{right_join.data.frame} \alias{full_join} \alias{full_join.data.frame} \title{Mutating joins} \usage{ inner_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE ) \method{inner_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never") ) left_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE ) \method{left_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never") ) right_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE ) \method{right_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never") ) full_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE ) \method{full_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never") ) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join by different variables on \code{x} and \code{y}, use a named vector. For example, \code{by = c("a" = "b")} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a vector with length > 1. For example, \code{by = c("a", "b")} will match \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. Use a named vector to match different variables in \code{x} and \code{y}. For example, \code{by = c("a" = "b", "c" = "d")} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, use \code{by = character()}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} \item{...}{Other parameters passed onto methods.} \item{keep}{Should the join keys from both \code{x} and \code{y} be preserved in the output?} \item{na_matches}{Should \code{NA} and \code{NaN} values match one another? The default, \code{"na"}, treats two \code{NA} or \code{NaN} values as equal, like \code{\%in\%}, \code{\link[=match]{match()}}, \code{\link[=merge]{merge()}}. Use \code{"never"} to always treat two \code{NA} or \code{NaN} values as different, like joins for database sources, similarly to \code{merge(incomparables = FALSE)}.} } \value{ An object of the same type as \code{x}. The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item For \code{inner_join()}, a subset of \code{x} rows. For \code{left_join()}, all \code{x} rows. For \code{right_join()}, a subset of \code{x} rows, followed by unmatched \code{y} rows. For \code{full_join()}, all \code{x} rows, followed by unmatched \code{y} rows. \item For all joins, rows will be duplicated if one or more rows in \code{x} matches multiple rows in \code{y}. \item Output columns include all \code{x} columns and all \code{y} columns. If columns in \code{x} and \code{y} have the same name (and aren't included in \code{by}), \code{suffix}es are added to disambiguate. \item Output columns included in \code{by} are coerced to common type across \code{x} and \code{y}. \item Groups are taken from \code{x}. } } \description{ The mutating joins add columns from \code{y} to \code{x}, matching rows based on the keys: \itemize{ \item \code{inner_join()}: includes all rows in \code{x} and \code{y}. \item \code{left_join()}: includes all rows in \code{x}. \item \code{right_join()}: includes all rows in \code{y}. \item \code{full_join()}: includes all rows in \code{x} or \code{y}. } If a row in \code{x} matches multiple rows in \code{y}, all the rows in \code{y} will be returned once for each matching row in \code{x}. } \section{Methods}{ These functions are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. } } \examples{ band_members \%>\% inner_join(band_instruments) band_members \%>\% left_join(band_instruments) band_members \%>\% right_join(band_instruments) band_members \%>\% full_join(band_instruments) # To suppress the message about joining variables, supply `by` band_members \%>\% inner_join(band_instruments, by = "name") # This is good practice in production code # Use a named `by` if the join variables have different names band_members \%>\% full_join(band_instruments2, by = c("name" = "artist")) # By default, the join keys from `x` and `y` are coalesced in the output; use # `keep = TRUE` to keep the join keys from both `x` and `y` band_members \%>\% full_join(band_instruments2, by = c("name" = "artist"), keep = TRUE) # If a row in `x` matches multiple rows in `y`, all the rows in `y` will be # returned once for each matching row in `x` df1 <- tibble(x = 1:3) df2 <- tibble(x = c(1, 1, 2), y = c("first", "second", "third")) df1 \%>\% left_join(df2) # By default, NAs match other NAs so that there are two # rows in the output of this join: df1 <- data.frame(x = c(1, NA), y = 2) df2 <- data.frame(x = c(1, NA), z = 3) left_join(df1, df2) # You can optionally request that NAs don't match, giving a # a result that more closely resembles SQL joins left_join(df1, df2, na_matches = "never") } \seealso{ Other joins: \code{\link{filter-joins}}, \code{\link{nest_join}()} } \concept{joins} dplyr/man/src_tbls.Rd0000644000176200001440000000076614121112104014255 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/src.r \name{src_tbls} \alias{src_tbls} \title{List all tbls provided by a source.} \usage{ src_tbls(x, ...) } \arguments{ \item{x}{a data src.} \item{...}{other arguments passed on to the individual methods.} } \description{ This is a generic method which individual src's will provide methods for. Most methods will not be documented because it's usually pretty obvious what possible results will be. } \keyword{internal} dplyr/man/filter_all.Rd0000644000176200001440000000625614151641776014610 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-filter.R \name{filter_all} \alias{filter_all} \alias{filter_if} \alias{filter_at} \title{Filter within a selection of variables} \usage{ filter_all(.tbl, .vars_predicate, .preserve = FALSE) filter_if(.tbl, .predicate, .vars_predicate, .preserve = FALSE) filter_at(.tbl, .vars, .vars_predicate, .preserve = FALSE) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.vars_predicate}{A quoted predicate expression as returned by \code{\link[=all_vars]{all_vars()}} or \code{\link[=any_vars]{any_vars()}}. Can also be a function or purrr-like formula. In this case, the intersection of the results is taken by default and there's currently no way to request the union.} \item{.preserve}{when \code{FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise it is kept as is.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} filtering verbs apply a predicate expression to a selection of variables. The predicate expression should be quoted with \code{\link[=all_vars]{all_vars()}} or \code{\link[=any_vars]{any_vars()}} and should mention the pronoun \code{.} to refer to variables. } \section{Grouping variables}{ The grouping variables that are part of the selection are taken into account to determine filtered rows. } \examples{ # While filter() accepts expressions with specific variables, the # scoped filter verbs take an expression with the pronoun `.` and # replicate it over all variables. This expression should be quoted # with all_vars() or any_vars(): all_vars(is.na(.)) any_vars(is.na(.)) # You can take the intersection of the replicated expressions: filter_all(mtcars, all_vars(. > 150)) # -> filter(mtcars, if_all(everything(), ~ .x > 150)) # Or the union: filter_all(mtcars, any_vars(. > 150)) # -> filter(mtcars, if_any(everything(), ~ . > 150)) # You can vary the selection of columns on which to apply the # predicate. filter_at() takes a vars() specification: filter_at(mtcars, vars(starts_with("d")), any_vars((. \%\% 2) == 0)) # -> filter(mtcars, if_any(starts_with("d"), ~ (.x \%\% 2) == 0)) # And filter_if() selects variables with a predicate function: filter_if(mtcars, ~ all(floor(.) == .), all_vars(. != 0)) # -> is_int <- function(x) all(floor(x) == x) filter(mtcars, if_all(where(is_int), ~ .x != 0)) } \keyword{internal} dplyr/man/dplyr_tidy_select.Rd0000644000176200001440000000563314121112104016162 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-params.R \name{dplyr_tidy_select} \alias{dplyr_tidy_select} \title{Argument type: tidy-select} \description{ This page the describes the \verb{} argument modifier which indicates the argument supports \strong{tidy selections}. Tidy selection provides a concise dialect of R for selecting variables based on their names or properties. Tidy selection is a variant of tidy evaluation. This means that inside functions, tidy-select arguments require special attention, as described in the Indirection section. If you've never heard of tidy evaluation before, start with \code{vignette("programming")}. } \section{Overview of selection features}{ Tidyverse selections implement a dialect of R where operators make it easy to select variables: \itemize{ \item \code{:} for selecting a range of consecutive variables. \item \code{!} for taking the complement of a set of variables. \item \code{&} and \code{|} for selecting the intersection or the union of two sets of variables. \item \code{c()} for combining selections. } In addition, you can use \strong{selection helpers}. Some helpers select specific columns: \itemize{ \item \code{\link[tidyselect:everything]{everything()}}: Matches all variables. \item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset. } These helpers select variables by matching patterns in their names: \itemize{ \item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix. \item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix. \item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string. \item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression. \item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03. } These helpers select variables from a character vector: \itemize{ \item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All names must be present, otherwise an out-of-bounds error is thrown. \item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown for names that don't exist. } This helper selects variables with a function: \itemize{ \item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those for which the function returns \code{TRUE}. } } \section{Indirection}{ There are two main cases: \itemize{ \item If you have a character vector of column names, use \code{all_of()} or \code{any_of()}, depending on whether or not you want unknown variable names to cause an error, e.g \code{select(df, all_of(vars))}, \code{select(df, !any_of(vars))}. \item If you you want the user to supply a tidyselect specification in a function argument, embrace the function argument, e.g \code{select(df, {{ vars }})}. } } \keyword{internal} dplyr/man/select_all.Rd0000644000176200001440000000574214176714175014602 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-select.R \name{select_all} \alias{select_all} \alias{rename_all} \alias{select_if} \alias{rename_if} \alias{select_at} \alias{rename_at} \title{Select and rename a selection of variables} \usage{ select_all(.tbl, .funs = list(), ...) rename_all(.tbl, .funs = list(), ...) select_if(.tbl, .predicate, .funs = list(), ...) rename_if(.tbl, .predicate, .funs = list(), ...) select_at(.tbl, .vars, .funs = list(), ...) rename_at(.tbl, .vars, .funs = list(), ...) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a purrr style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{rename_if()}, \code{rename_at()}, and \code{rename_all()} have been superseded by \code{rename_with()}. The matching select statements have been superseded by the combination of a \code{select()} + \code{rename_with()}. Any predicate functions passed as arguments to \code{select()} or \code{rename_with()} must be wrapped in \code{\link[=where]{where()}}. These functions were superseded because \code{mutate_if()} and friends were superseded by \code{across()}. \code{select_if()} and \code{rename_if()} already use tidy selection so they can't be replaced by \code{across()} and instead we need a new function. } \examples{ mtcars <- as_tibble(mtcars) # for nicer printing mtcars \%>\% rename_all(toupper) # -> mtcars \%>\% rename_with(toupper) # NB: the transformation comes first in rename_with is_whole <- function(x) all(floor(x) == x) mtcars \%>\% rename_if(is_whole, toupper) # -> mtcars \%>\% rename_with(toupper, where(is_whole)) mtcars \%>\% rename_at(vars(mpg:hp), toupper) # -> mtcars \%>\% rename_with(toupper, mpg:hp) # You now must select() and then rename mtcars \%>\% select_all(toupper) # -> mtcars \%>\% rename_with(toupper) # Selection drops unselected variables: mtcars \%>\% select_if(is_whole, toupper) # -> mtcars \%>\% select(where(is_whole)) \%>\% rename_with(toupper) mtcars \%>\% select_at(vars(-contains("ar"), starts_with("c")), toupper) # -> mtcars \%>\% select(!contains("ar") | starts_with("c")) \%>\% rename_with(toupper) } \keyword{internal} dplyr/man/funs.Rd0000644000176200001440000000323514151641776013440 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-funs.R \name{funs} \alias{funs} \title{Create a list of function calls} \usage{ funs(..., .args = list()) } \arguments{ \item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> A list of functions specified by: \itemize{ \item Their name, \code{"mean"} \item The function itself, \code{mean} \item A call to the function with \code{.} as a dummy argument, \code{mean(., na.rm = TRUE)} } The following notations are \strong{not} supported, see examples: \itemize{ \item An anonymous function, \code{function(x) mean(x, na.rm = TRUE)} \item An anonymous function in \pkg{purrr} notation, \code{~mean(., na.rm = TRUE)} }} \item{.args, args}{A named list of additional arguments to be added to all function calls. As \code{funs()} is being deprecated, use other methods to supply arguments: \code{...} argument in \link[=summarise_at]{scoped verbs} or make own functions with \code{\link[purrr:partial]{purrr::partial()}}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{funs()} is deprecated; please use \code{list()} instead. We deprecated this function because it provided a unique way of specifying anonymous functions, rather than adopting the conventions used by purrr and other packages in the tidyverse. } \examples{ funs("mean", mean(., na.rm = TRUE)) # -> list(mean = mean, mean = ~ mean(.x, na.rm = TRUE)) funs(m1 = mean, m2 = "mean", m3 = mean(., na.rm = TRUE)) # -> list(m1 = mean, m2 = "mean", m3 = ~ mean(.x, na.rm = TRUE)) } \keyword{internal} dplyr/man/n_distinct.Rd0000644000176200001440000000100114121112104014557 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distinct.R \name{n_distinct} \alias{n_distinct} \title{Efficiently count the number of unique values in a set of vectors} \usage{ n_distinct(..., na.rm = FALSE) } \arguments{ \item{\dots}{vectors of values} \item{na.rm}{if \code{TRUE} missing values don't count} } \description{ This is a faster and more concise equivalent of \code{length(unique(x))} } \examples{ x <- sample(1:10, 1e5, rep = TRUE) length(unique(x)) n_distinct(x) } dplyr/man/explain.Rd0000644000176200001440000000311314144435746014120 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/explain.r \name{explain} \alias{explain} \alias{show_query} \title{Explain details of a tbl} \usage{ explain(x, ...) show_query(x, ...) } \arguments{ \item{x}{An object to explain} \item{...}{Other parameters possibly used by generic} } \value{ The first argument, invisibly. } \description{ This is a generic function which gives more details about an object than \code{\link[=print]{print()}}, and is more focused on human readable output than \code{\link[=str]{str()}}. } \section{Databases}{ Explaining a \code{tbl_sql} will run the SQL \code{EXPLAIN} command which will describe the query plan. This requires a little bit of knowledge about how \code{EXPLAIN} works for your database, but is very useful for diagnosing performance problems. } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ lahman_s <- dbplyr::lahman_sqlite() batting <- tbl(lahman_s, "Batting") batting \%>\% show_query() batting \%>\% explain() # The batting database has indices on all ID variables: # SQLite automatically picks the most restrictive index batting \%>\% filter(lgID == "NL" & yearID == 2000L) \%>\% explain() # OR's will use multiple indexes batting \%>\% filter(lgID == "NL" | yearID == 2000) \%>\% explain() # Joins will use indexes in both tables teams <- tbl(lahman_s, "Teams") batting \%>\% left_join(teams, c("yearID", "teamID")) \%>\% explain() } \dontshow{\}) # examplesIf} } dplyr/man/src_dbi.Rd0000644000176200001440000000472614174552625014077 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-dbi.R \name{src_dbi} \alias{src_dbi} \alias{src_mysql} \alias{src_postgres} \alias{src_sqlite} \title{Source for database backends} \usage{ src_mysql( dbname, host = NULL, port = 0L, username = "root", password = "", ... ) src_postgres( dbname = NULL, host = NULL, port = NULL, user = NULL, password = NULL, ... ) src_sqlite(path, create = FALSE) } \arguments{ \item{dbname}{Database name} \item{host, port}{Host name and port number of database} \item{...}{for the src, other arguments passed on to the underlying database connector, \code{\link[DBI:dbConnect]{DBI::dbConnect()}}. For the tbl, included for compatibility with the generic, but otherwise ignored.} \item{user, username, password}{User name and password. Generally, you should avoid saving username and password in your scripts as it is easy to accidentally expose valuable credentials. Instead, retrieve them from environment variables, or use database specific credential scores. For example, with MySQL you can set up \code{my.cnf} as described in \code{\link[RMySQL:MySQLDriver-class]{RMySQL::MySQL()}}.} \item{path}{Path to SQLite database. You can use the special path ":memory:" to create a temporary in memory database.} \item{create}{if \code{FALSE}, \code{path} must already exist. If \code{TRUE}, will create a new SQLite3 database at \code{path} if \code{path} does not exist and connect to the existing database if \code{path} does exist.} } \value{ An S3 object with class \code{src_dbi}, \code{src_sql}, \code{src}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions have been deprecated; instead please use \code{\link[=tbl]{tbl()}} directly on an \code{DBIConnection}. See \url{https://dbplyr.tidyverse.org/} for more details. } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") copy_to(con, mtcars) # To retrieve a single table from a source, use `tbl()` mtcars <- con \%>\% tbl("mtcars") mtcars # You can also use pass raw SQL if you want a more sophisticated query con \%>\% tbl(sql("SELECT * FROM mtcars WHERE cyl == 8")) \dontshow{\}) # examplesIf} } \keyword{internal} dplyr/man/src_local.Rd0000644000176200001440000000137514151641776014431 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-src-local.r \name{src_local} \alias{src_local} \alias{src_df} \title{A local source} \usage{ src_local(tbl, pkg = NULL, env = NULL) src_df(pkg = NULL, env = NULL) } \arguments{ \item{tbl}{name of the function used to generate \code{tbl} objects} \item{pkg, env}{Either the name of a package or an environment object in which to look for objects.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This function was deprecated since it existed to support a style of testing dplyr backends that turned out not to be useful. } \keyword{internal} dplyr/man/setops.Rd0000644000176200001440000000231614151641776014001 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sets.r \name{setops} \alias{setops} \alias{union_all} \title{Set operations} \usage{ union_all(x, y, ...) } \arguments{ \item{x, y}{objects to perform set function on (ignoring order)} \item{...}{These dots are for future extensions and must be empty.} } \description{ These functions override the set functions provided in base to make them generic so that efficient versions for data frames and other tables can be provided. The default methods call the base versions. Beware that \code{intersect()}, \code{union()} and \code{setdiff()} remove duplicates. } \examples{ mtcars$model <- rownames(mtcars) first <- mtcars[1:20, ] second <- mtcars[10:32, ] intersect(first, second) union(first, second) setdiff(first, second) setdiff(second, first) union_all(first, second) setequal(mtcars, mtcars[32:1, ]) # Handling of duplicates: a <- data.frame(column = c(1:10, 10)) b <- data.frame(column = c(1:5, 5)) # intersection is 1 to 5, duplicates removed (5) intersect(a, b) # union is 1 to 10, duplicates removed (5 and 10) union(a, b) # set difference, duplicates removed (10) setdiff(a, b) # union all does not remove duplicates union_all(a, b) } dplyr/man/with_order.Rd0000644000176200001440000000073514121112104014604 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order-by.R \name{with_order} \alias{with_order} \title{Run a function with one order, translating result back to original order} \usage{ with_order(order_by, fun, x, ...) } \arguments{ \item{order_by}{vector to order by} \item{fun}{window function} \item{x, ...}{arguments to \code{f}} } \description{ This is used to power the ordering parameters of dplyr's window functions } \keyword{internal} dplyr/man/near.Rd0000644000176200001440000000101514121112104013353 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/near.R \name{near} \alias{near} \title{Compare two numeric vectors} \usage{ near(x, y, tol = .Machine$double.eps^0.5) } \arguments{ \item{x, y}{Numeric vectors to compare} \item{tol}{Tolerance of comparison.} } \description{ This is a safe way of comparing if two vectors of floating point numbers are (pairwise) equal. This is safer than using \code{==}, because it has a built in tolerance } \examples{ sqrt(2) ^ 2 == 2 near(sqrt(2) ^ 2, 2) } dplyr/man/group_by_prepare.Rd0000644000176200001440000000156714151641776016037 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distinct.R, R/group-by.r \name{distinct_prepare} \alias{distinct_prepare} \alias{group_by_prepare} \title{Same basic philosophy as group_by_prepare(): lazy_dots comes in, list of data and vars (character vector) comes out.} \usage{ distinct_prepare( .data, vars, group_vars = character(), .keep_all = FALSE, caller_env = caller_env(2), error_call = caller_env() ) group_by_prepare( .data, ..., caller_env = caller_env(2), .add = FALSE, .dots = deprecated(), add = deprecated(), error_call = caller_env() ) } \value{ A list \item{data}{Modified tbl} \item{groups}{Modified groups} } \description{ \verb{*_prepare()} performs standard manipulation that is needed prior to actual data processing. They are only be needed by packages that implement dplyr backends. } \keyword{internal} dplyr/man/figures/0000755000176200001440000000000014121112104013606 5ustar liggesusersdplyr/man/figures/lifecycle-defunct.svg0000644000176200001440000000170414121112104017716 0ustar liggesuserslifecyclelifecycledefunctdefunct dplyr/man/figures/lifecycle-maturing.svg0000644000176200001440000000170614121112104020116 0ustar liggesuserslifecyclelifecyclematuringmaturing dplyr/man/figures/logo.png0000644000176200001440000013047714121112104015270 0ustar liggesusersPNG  IHDRޫhgAMA a cHRMz&u0`:pQ<bKGD pHYs.#.#x?vtIME %6m:IDATxwu qrL A1%R {-Qea}zAmIk[,۲)" D:zg{gzf@ 7/\g:T=Usιsc|!RJUq"|RI)7Ӈz mo{)%B]_Rv!f_w/clo`<(xa)^/x+_E!D}{;}*A;}1+pH)_UUqbY1fZ{266i;}ZQ'.{7^v; EQR~DJN[GE_B㪪d2#<ӧxuAp=rPU]/ !&O !VUqMfF^/).'l0aZ]RߑRU{\.G0UUk11!GǤ\.Ƿ^˨| {fq7R3Rj{iavv.hnn.fY((׾^X| -oy EQueWWA<gllIRTnwQDG}t2lXT([Rd2%T ===lZ YBP1@2ea=zdPRۀߖR 8Oi,,,pEuTVikk5>N ! (?u].L"i(u}Rʶj3cp8\s솎jc_k||l;͸QU,tH)5)+ L2>>8d!t: Ng9B楔hv9c͸lۈ RMCiBfaddp8%Z!kBg ! $5M[ֶO*.64l6X#RߒR>O,..rE`qF( Z,Br-:7wqـff9[R~E)eWh4ett ǹjA^o,Y!DAx9>Z\6-i@Ҕ]R)&&&#Hy\.p>.rIJyo.pw)%RJ;L,###Be; ! AV$Q)EI_ƍenLnTZόsx".WUU:::8" 带iکqlvE1o@^Zd~BbX!M;}:5ncSz;!ħRJ.'6 =z_Jn 5OA:fjjQbNʆx^!![2eeTV)]q= 4y.^Ҷǹ6\.װr-> |TQK)hd22U/ qErz\޽q~1,KTl BfYV.AeE^[J2gǙX#jx^nnvzzzPl6g?Y}نe#!>\ƵW7HWW)(`Eu 5v+ロ]vaX fUUygg>e _B|LJ !Dd=3ۺvo$\6 X53D"033nn6nF~pcR( \.@=66V1:sss,//S-BmX.'q[ef;O399k* 2k޵d2 o]bA1|\/ѣG?1z;pO~S*”qŚe~xX,Fss3RJq:* wݻwhii! Y^^.hE!n:@ @{{{ၘNfrrrCx<ΩSaxxueB|xTQ>Sv7|MӄH)?KJYoiiD`0H?RJ|2ம.1zޙ$Ϝ9C2$ 288Ossr\ݍn(p'o*QgNx,LX'>y7_xր ϟ_o\*3$RJ?H~_լ{e~r9E)xW_}Y^^&Df7N333SSP((B[[455({4311L$^#Npsss5"8* !ėٽ{7t~xˎw'ceEew"W{VHR,03@ww7RJZ[[xH6;Nֆl#v> !xRQ^xΝ;el6[drݴp5𖷼׋, 4UUKY2E{{UE 8СC8p]N$?^zKxUUk~t]'͢iZM)pJl<;}je) ܒ?%(R2;;i!ltvvrIfZ8qg-..F ڮ.Vk!.5b] K (-) ! ZJ) Isvv'x}WPa[%k$LuT3d2YH&TKΙrɹrŖB'0d_B/?2ضz o~!.j) .NUU W{Gil6mmm%rrr91ꫯ$Xq+ /O>,$aXX,߿V(GYNpGoooaZpMxYB&IZa#EAvެ(ʧOl nl${ۏU#*[դ)wWWvGxG!Pg}~;3{] F3,.uvvr…Plv_Q(B<_;NUVVTŬf+L~u #aRw)O'F7d|횦Co ƶU *v^v*ISFnJa馛 u`㘤|dL"s~f9y$=gΜ|*9> (6yrL/MÅi5;fBr,;=|Ƕz 7]Ū\c$h4ZZN<_~<1y*1$.\ô:ccc;v^ziG(0Ôp$HnJR~J ܏![|gݣG&7Zˀʹi pCx:d~###okt:[;s#=r]w  2?&3+f<(r9?#ν:d~###euơCX,O=?,٧,uttp8֭ExW_}I(+]Ngjד]&''7|L٢IˬQx dsOڶz,**2G0S$[ZZŋ5<`0Ue %#_0`{yqܹruWi9]W\e~ۍfzP>S(ꡤ09sDL%o\ր ̇y⧦x'x܌d~^tw7$å)FN Mv3yt]GӴ½joqٳ-V:T+mjtN:ҐII6EvuNP1efeѲ,kV k͇1j Z: f&az|zs w#N'€18R`A2?@,=93CV/4) *Th+F)Ƥ 4$ ]gA2MZ:+$ qoڜˆ:8!$ɒ5.ZUU-\&OԋZ:xVÏD`\Ȗhii)K`%m,4>݊ӌ@ kz&VmGQ*vr_.wEқ0'K828lE0z0v/3yb#o:jl ZKUY P%e쎧Mҝt4R<瑌Ak1FGh\MR9p0LA%PRE-K%r$s ~YLQ+K^v*歈sD~l x3226p-,lDW-"c&3ҝKr8/Ʀm7Z08I2gɯ ߗur5ez+pP/,ë/6&]䉺װ(Yh:;;70|{b;L`s?- u)(f \ <5+rO:fjjjC2lf&tuPR%R׉OMnԉ77a7qvuD˕\"hdQj@{&] TkBS$E_%[&B)xbfWd~攃D=u!%B %RQ\ҭM5#!WYsd7#L1KހakS gmzzzJŒ|tܮ͗K n'اT>0/-I^[kBYz&OTiB!J|~C$[Iy]D𽧋Z&VnF x|ȱud Qʡ~ki#H\+Y.F?PQ'UJŸ3ƥfo7 ө+Ee?4_,|6­}5 ^ /0 KKΤ+@i^N?N#$'t*ʊ_A| Fӆ="]Nͫ.90| 9BvڗjGUԲ7Ġ?޶MQ,\e 'ND8I1eP-x6?^ᝃ ^+$s4V9]\юj*hv{' UnIu!pV2~R` vBi;?ς&qNK /s8u xY0.@AljdjQq:D+7KS׹g>ĵ1nˬ`< cVr!.kUS x(ĹNy&MۍK2?)R[EAs4Hwex@&rn;z& jA|}GgH^ A Rl]q]/ *@9LCZ$ xÊp8p:D̵U9s9quJ[q{+@FJ&s.f|'T|7%%n~»o+aYd'R"e~0HJ&Jl>bý$z4yЬUĆv"~瑿 &CLMͤ`9oep-8HŦ*dİޢl6.'Vcs%XmB:rB Nm,$\f3|>s8KZjŋos}x:ɈĹo^RoBA4HuuO޻7u;ЅyMѻ xȯ? ywy<.ŎVle?Cs!iS"/\ VӉf+$ZyCkKS+q]̻I )uG\t vrZV`)cS5~teB!@?m(6YtG n{k'A ne QtBmH̯*ƶpiqVhXhgaL ʺc \Ȳ^XU8n+TGvl25=MprY\kEf,j9>dT f}u H L+E0 25kxnd=mՎP4+L{>gZVVe6U4z+64?OCa,l!!(L^(jLtMRN'NnF,wH %1Oz_HmwAk <ߍ>v^rv[!Uj !䷎_[E|뇾? +LaYmh-7ଡ଼OsG> Q%va lv\NVP!Z`sE" vB2JLjԡ ޠ9+TE뮂=p`0u{UVK9 = H ió5AO;(6H'~?ĿUĸyp^ߍANrXkQlWma=eҖJ,=B\[+@!ń8HSn7** o̴͠+:xb a&_k[aYR+pYɖ[4C? C}jWS۾p*|BMn<5!n?jEfע;@_zo} x<ĀaJVzQP U`P:I(f@Cc ]gpCSw;q{8I"_:fvldTymSj|@dB~fr:W~A،%O_nTk ÈDSyX9?g_2 v5fAÈ'Ļ VsZjȾn~W_7o=Pnl5t0aٍ5/@Xz;[B@:#Rc%I Sס+Fo S'lnA/SdexTF.r7fbI@<8n`Y>vR-tn'_ ?9L!p /?[v zXժ-,&"ϾFkPh4K?WUVLP `J¼tEE~Z|e hnkyZ*=0wd 7A2=^/$2SGl\MVM`= r5~:* 䛺;B:#l`T憎2Ϙrͤi.oۈ=D:̌7oMpfycY/|eT6ִF  do8wQ͈__[xL=LC?{ְd~Qi.tq% N|(؊ḧ́vVt v9G JuPjC `aK:ijtX^p8 1ژ3gaw:$ۨQUQ^jƫ@(|u[;Gvݺ*%1cVxiq ksCc,Jv+ )m ͧVFz 9] 7^HY85^@pGKG_SrSxK:=)GloA ~硳p/Q6G$cE~e\UhA;aW] ^:]x2^TJСClX.ڸAD;~yJHU[hr̰ I[m}0l౗6ˆ&eWF!ZUaiMע| s/#`r^3!(ۼg_ Cml&]=^lrWwvvR٬_Wg-!8v y( j) bk95,/XV'OKR $̏z?u !A`Jy(J#}+rVZDg_ 5\xMϺxyEq WGP˾"PY*5<5Tir%+p_!um(Ţi) / 5!@97ap:":6I0>XXs=c7EVC+@ԉ9!Gt-AB$1xE?Oݧӣ1Px+x9T>km*n4,z]:BiD"EYv6飑Z$m]:撆0ᓧ葫'OXe{]F'9a7\R E>|1j7C#60ϸf{xQsu PX'lusTF1ǶeDpRWYǟoh8JFէg%%ɣ3Nzp#댯k({_+,yIbp_2; ^W4~g(QNγ|ʼnk6$N#(*O95BCeĂ!zx[Ů6;Cf3*LIiH ;z# ,cw|AΘFsB2SȥoCnV cɄ '5w,l&4 d0Zd0<{*6@҆ЅX(0;o:7oqMk~g.1JEWϲ^].#駙w3[dWZ ,:Ʉ$6)V4ަ4Ë4nz,<4)kgx]0޿ߍga!s- qb*n[nuzl7J)06 hω9҉Pd'V!C>xFSahi|[ a_|*yW]\\DJ(t,]]|E޺VS )rhW[JŢ8VR sbGRժ{ ͞+͆OI>yJ$ަC z?6+((v<ӿ@JGw/KoՁ# ޟ"xz*{e߅x$g8 taܙΏ9֎@>wܠ@v"~`sK{8 mϾFQǙ-ĹfY0[h;=("9I<h n :glRR]XĎ+Kn;烒Ϟ|kB%c CXZ~' ɮS?4`Vy=[  vuHڑg,gfĹbkkKoԏ oB5++:وaUb7Z QI#^119'DEbSʮ&xy`7 eP@Ϩ$J VNWelvmM=u1e ɱB-+[”=Ac}v|̚yac92 b?`n*F ׾q f'7^5eπRxjM Xw73 T¸)]TIڪiY"{\ 0 aq[E;_B+ T%vjͅn[oIҿk sDk\X$|}\Ң$Vk~α[Ο3l!n$-N /B~T 03 hpV}+ۣ)(C0}_>BJ*w@Vvk;Gc3C @em_ b꤄1(ԉ5fZM.LÕҏ]XYH$A:BCPǘr &$ƈ\,sL%i蘆]7∜Fp.X4zĥ`@[V4%^K[㋒oB~P~?FFxmem\ Ǽ ?ƃ-{5v#]t  GyEq3ǵGM M;H.#ɐPUhmzFʍWHv܀ErBgQƹ 84iFYb)4,6 = ,%[.cKz Ck 1,eX™ lřO1wgYC9#dL&zSb|+VtTNm,0&i΢+Hk^]ڭju#Hnd \cwaJCf_-t2NLn ܍hZ@xo~U.#.@ C3F[j+P|?8yulA +#45u"Pxx&GY2gwvv֯nRH#hsS.x"L3:>J$juPVűhQ,\p5d ײ|5̷! n}ƣk 4Qa|(6hdн0 ϿB#DF%_a2c,Vagb1dуA %lz۬EgڛdG3mQI"ˌg16fZd`pC#H7+l,MmMB*߅x5s/{4ݎ\^C^o3ZT!L4[= 85+8SLahpVi*W )д$KKY>z{{4o]0 7iq5+]\l~Z&)#H`2yj4,NMlŖ"z] >oka[FmDlYzRdߵmE+tߏ5Nf2fgfXX W]>? X+ބï(fm]8#˩/u.3KU%tٱ1juhZ\pox˺gAvv{xoB{1o&$ֹAc=ltFP.(Io 0p-Y֕ƧYro.t[ěeW3Qd",,~%^ˡ2`*8GK̺ Ȝ\p'N>䅎ohqX7}V-ǟ/x2Z3bzAx}Txjh0: 6KMc+0~tΫ*ÌOL0IrC_`IY0^sq -*pae ,tt\M"_/46`E buxgwl0t( h4ZAqj!yC#+ 9%+7> wµ7-λ4Z*_5f[^Ee-o#,݊fu9J_| s8{{`%$S(  Į_G1 u' :jh,n᣷wlBSzQ+Rjl85I&''ƬBK` 5BEo;웏Խ @X%H {D佬]] FRA5%o|/X)7zX5e~Lino+t1_؇M3%ǷvfZsgɍ^`.UUp8̗4j$c-q 9!HTZ6T_Pg9ta?rqdjPi$af#_[E 1;ުg2?we6ʢ9Ţ́~:*sR᮱aABJI}lxyMMM L(s5CP9dY,bQ[K cV^`\0]] *sE)0O+x-ǛCx)xj~2*AXgm~c} _*S!KgJ\!9Uh4>'Au ]BT*ӧˆe/KnazS 1M D>v(300/ĹU*F1Kdv1C}4;)տ[T.XW!A,4$1`ƒFnSl|k:Po=M cQ!=}0ܞ2 *H`άM@* R خ'7Ћ" Mփ)K4_ XsxrhzHY4#έ[sB m$V3t$ oh#_du]5 Y0QFr61b& (QF5'8jKlGgDIg@(?8?,򆛡9Xr p0ÖWgi,:Ƴ>#lPr=yWaI$>VB<cbrdL`nns3pu!QʹZڬSa*gGϐˮ/K I"Z9f]"֢RWczO7OC؄ZԨ^h3-&k9]bIJxxWAJ :|韐5Fe7\X1 \ ؎qgRQ[>ap)@lڪ_4 OLc/FhZpuMc!dlzLJWsqx.aopp׻&F&-9̈́=p8^c;)[#0i ),ab2+dXEqkE #b[5C 8bc%L #䋿 ]!fEF0ZD g0<9ɼh>X{l&)XZT +kڭL\5Ձ*Q&wÍd_Ik$ p^O8Tg+Ӑ}puѮ&ty!O"Rʿ~hw}VŘlØó2#[bx]Z9:8d%i,ݏHDZ3L*Fkޝd: mhĹFʎMӸp3335^<,KQՀ+%ytM6&7l} TE$]{: h3q禥yzn≠m=R6`\s@Fs- -ux+-4FiiRكaJ߭d5X> >0\]qHSq/aBٻ N8D2_p#VZ|y&X(btt&_l$)q\߿F?l+tCBt:ѱu+LG$e6Mmz4&VW- dzR< 8df 2I}b#kkSr,bĹXǪO^@~ᙯ ~i}5lKaMG4EƦ 7@{o H&a<\8 HLN" k_[e!D )4P ,< Nȏo5b^l6\.'VE'xmFNWdz5 7vѨPPNor&X7MBCIe#SEH"Ӑ\$w͈#ZJQ`cub">!LF!<9^OgcE$k wE7\N3d~ZosY4ˋ<OrQo0 DpEVU\.fXN@J 8knwaB ,E_O⫓$eN\Xqzn w% 3^!%2BeqjYX٢*RXbab!%2?: dv2&pCx nx7q CՌcļ$i4Lw&/M;G0;ݪ;8Ho_.fB +Us Ý}m FQk?2}i._7?jȡ$vQuL&ןL9rzau~s(b9¢OGr aS =>*:V)fq .1C,`KXD UȤD +] ,/!?`xY lHrYYk`x.,@oGz_O1>7Onf!Y 0zR}*FbYk2}2^UBŧWIΟ?(fz%,.x<ݒ% 33=I<Ը$[;܏=c翂=D6 :U뼖u>Y @ ]86-+)pQbU$>4f[4uBX]B W6d@'> /j)1fs*dg^X7;1V淴d^w tvĹemWb#.|ZY=rzzފuj+]1u 'F&&LOOV x_pD pXDjzH;JLYǫ*B gh[ _5.#3:oug9C\to&xbf5Zs(d1LZABnD;CloՀECJ p#-/Zc  _S_5GڜLvUߔ)7VnB&&&2ʡp=w"{O\g;3]ZZ*>FwO7>5+/)^]:^6D!ziHt;E"uF|9B£MW9QmL9\ Y7P 6V5YàFƀ|HŸhe ^;8@uc$ B[Z!R6"rhO\uxU ƛ٫%v Mj5^899Y < ~ GPq{<^\.'R'"35;jfz)ND{?.$pclw Ħjy ,\_ Y|9:cst$jq1̇T8fllb# |75SLI}dgrAHe~&]tOOOls1;;[L2idcJQvfLfrLj`[psErvFYjɤ嬥$eC>c/M7x%†-tߣIymtn/+u Xmd?=m~y#Ǯ>Y~A:m A淼d$&F!"3)VCᅧޣXvl663vK:UW_I7A(17a/@,M( Wzy!?~gϑ^Os]/monnnFwŽhb࿀s)kut%jui~r\Mae!Y\\B6-^?)ߒ׿!q9Ud01]Yor./-bn'^oae~#ZZy^ܵE?j2ܰVD"z?R;#ߦ)n*.yn )TuH$S'\.W^݉n͉o[vxUGH$Wd~}0F,}fLW u6M%!%IRDqJYҶr>l.xÊ<؍cDt >=sÕM5xEO'툡ҭzU.:jqqtW 뻶xV9b AG#S.kr ZX"3vGWm`9Fy%Mnc\暡%z9gЁ"^"Lb ͂7P7'^sbC4 t]2=y)FH43Ä7">,EG]H)tM ~x.UF#ݴ_ I.1|iSÍ^o]";}ʽma z\3@[ UՑ@]"$I'gc׻\-WXnD f9'ط9p!ȩcy-U/ N ,{x?Ϗo KgggQ]Ug&w?d}0X_9Bh$$sYܮ4ue~90 e~dSN\~zVVucZiz`4B5K:a+M ^ HJ3L_[0Mt0X@%ރFY~m &ѷywEgρZۍґ ] >-۴4빐Erض|9I|r UH&3FƔ VW=,6q:]x9 r0ϕ5vҸ V^nbdƇ|Q0嬥;{0)4t4,R=`~=û}ӔlA9р{B<>4GN]R zCERdfHtnNV5f#:q\_M[jHCU9AEZ"K63S,fCP,E#pLCe~&ur#26ー>?dWgÃKt%+$S8fCNèc^ pv7n1G_F Ƚ''/aѵ-4^5MʢpfwV[]7rL<еû _QZ4=~妔dC[nN@zbSh`aq)-22 ^7 R:3V B;w`:m̯U9C8W \TL-yxiѠ D`r=H 1r/PpJN0B/H7EԞ+kR 4F:)[Xk a~7h׎l $mqhqS7 f\*k̯[ĵSfE׃f՛{z"8EqP,E2iɌEHD D] 2M *݁2?#H` }3Oq󸲉-`LZk,;(/?7D_]7zg4c>\.Wr:w/Umrk_&"Rټ/D߂X+GUR:h̯҈\Ͳre_8Wsci+&xup yըɀ7%󫺂VDO#9S׸3|_x=1-7މ8_?ɼ3UxK,b =UlfJƳ]B4I+ x44Z.|Y¾&u7AK-2A}9_[[e~Sn8jj6f|;D|{aK̅mt9:%Hs׉mC>Dfw)?+Rx[|$㭔oiҪMy=OgY^8ݽ52Fs Ae~'NfV=1L9`l*-q\ia(]mSך:w=4)B'Rk+B~:TH+kT} ~-fOӛ7Nrh[dUj`!Nc#UUW~Nn:)쇉@[=nߢ%8;1tA7d,e~}}}/[Uz Zu;q<{{"86jR ɒJg[<[2XYzrr“q xo?i[Cgp |Uҳ#|{baնVF`jY^*cjFqgoC.d-Y',oңs/.o{뫾UBfQ+8פ?VM%n/g+KI,e&N5NX 0٩j SFw-EA Φ|9d~G)5^XUBir o8>Gpf \](f';#|k!G`fCJUo֌-$v~-Y'Ud{893k27o=`1 vufpv_r;\4-"Y-r}nE+X|7C-r;hUsԀ/#Uрc/^Ml}t&78|W6&-.n-YVzH nny759C9mႱ~Yxb`HD,aၫ]4,cK[?s`bc,ELAE7?kn_oo/o4\.W+zzz5mWγ|sŹR޶ٸlM ~P9 iY;Uщh~Z67]Ў+E70̵n#(.3vrk]('SiN[A$]>l/%kJ99jd_a*1^-\YuFGF@j߈Tp8Y&Zu}d~Xe~^[6 RznIkCBP~̊њvVF2g1t vlԗc\h+ >g\!Ƈ+R: d%(fIgs-QZN[K81XΩƻ Vein!HOC~Pd~_ss3455\OAkk+@M{x b-2?\bUqnN EHVn٘]q{kؤd'9tnц ؃e`7 DGmj dgwtA,.W9^$Wg/9=W]<ɽZh$dg3!߰E Bt?_LOlP2n }->R;ᥑVF=dt[8`O&Oܱ[䕃$5'b~ 5fp$ OEu u7dax+.lD~kal/vhM(Ri<3AD,-r)2|_>n$CYNEqW|m5Q}gfIrkH"9s`^7Qly__^724x̯w]h&[Wrqnjh ds 4{Ɋ[dJ˗[I n^M6ޗ-vi 0mu{Vzkhkͼ6ބqŸjgJ[Ur+gNyo%W|p3 blȀ;ԚEjw3gM8[= /%_[ De&w/ruH%lMtG)n*֎!p8WI:rn&izϚE.BrCf~q#욤ϖ^>X'Q܃Bc2`)-ɥ AЛɡ //]f2a$|DoMthgrp@4t=]ػg$H&Fb3Mݰ8yz,@غzp:\/Оg}mIIJWAǷU_o*gWQ'M;2q~&)4[ JԱAZ׍@UDw%WB6j!Jԯh/P uҬiL FQTrxj] t t6$[`c+=RaOwU+Ĺ&ɒRmn"-\uDW[ k*4-s'Cy<n3 ,uonne׮V=~rUބ oN zfjbrZ:2$yn.dݺeH+00(2q:X !BkZV6455si>(iӡ)%Cs\ÝK:VK.DWC-tq7}e-ٲq\WR;_[n%+(uzW5}}C_Bu2\ jqѬ롱Dewxc o\\)4Ҳ{G-`Bo,OS0^S71jx 2h4ʉ'XZ^Cռjt:Q2z܋V̯S)s#Hd#.iOHPu `kHpw#|?g=d`MkB2.}jOkf(\)A!ޅmp6ωIW)sj'-rhVu]eZ.de*c]c͟E@YsM_OO\D }T4^Uvq]RHz#B-e\,F1Ʊ\8WQ$B+ IgB] ,Hg~j|mCx홴Ows"B+<2GrW+oR!sAu^ƫ]Z>WϪ |Ml{9c]`|zikD9f=@##e\;JZvb?YNY&s߹ӆHZDLPpeXb^dm̯feBC/v빶%nl[Rg^J/ZYY߿Hi{`Si{M7 FfsVPKD/"\9"h3M8j} O6ްmtk%ιBLg۾i~mE7B,Kf% rU7:^F6XՈm/g6Ip(d {]~B4iQG`z=8Sfjhnnֹo+;3е]qwv ;P &3ZU\)vX 5o)*o[_ \ݜh-lzl1ٶ{Bz̅?1f59}}20bv&r`J{i⬁t=lPÐarh"T] :&htQ=4"XV<3]'ʓ*٥6djkho;vuF q/# BW\(6BY^ǢM R~.J+b;\n5}U,dƹ&SQN{m,b7rZn>@HINQtwp=™̸&OO4F\czkOGJP{ { ke9Q@ pdnH)Ke~>7\ ]N%tzDFE|q\,sƹfp8]o m!Jc>j*$oZW/*֮$*A6FϷ^sk F AoZdu_efjr{mTWqaas?͜v;٭"eCI }Bz8{6?ͯF_^+y!p:<Hͅ%-vK,/[WKs ø-1^X?rVf6/s7J[)7lYvP,hԽWŀ D+{ct&ִH4"䜢lsÏC.Fm2(>KpfGn/=2$lg&V&Tg6H}8sQRm=~۲Fdz<5űnȔƙ* hmmɣsEW/ufqe$;&QR)fm|3ʏnnt9:,*c:D֠$2<2G{bKFA S '\=|:p/}˸I6 YBfyO'=v2io&XPPBSc?NیoZ4ۘODo&ZcHg %2kd~6^8-5  n˵JILulґ].{ hן\$)ήn//mlнW, ~i:Grs}A[3-Lٛ*6DVtGymvLfFL+-[ j2KDߍ.o t8| d~Q؊@z\Fu%P˅)(@ `+aU7Z .sKaF_I+ӹfCOo~=m &p]XAkb|QIN*0˭|k\1,B3kQqnqHjWfH95IN +I,\Bs~8y :H.m!* -PHs;I#-r = ^` vKVYb@ؓ9IA+atO4F1 hjyl}ۛ}6PhJe!DMRK;r? f_)5`(oc+$Ш{Z[},1}' *,rrkEd~g-!;n#.{ol>\Cxp~AZZZ 8:\*&jl6^Y}n@ӂB (d~ 6+]8hkon'p kMoh2UnGRͫ#A!TbփVmB`A?BPdxضz4r/`EJPUI&'' / N'\Ð\EQp9]xdd a*ڭ,W-2\~ 9!@+zKHF=wN˚znb|-)h,E BwŢLNq^y)knnßinup0) mEN#f 3~i0y rirRp3Ep7߶U{*V{Jx^<~%FFFػw/ۘf"DA!M围Ǚν9sdW@Gȉ5 [GT͂~Ke/{‰IǑ$|  ^°HRIuYx<^q% M87)VDk;zf8wqxr"0{hKqm/R{#? K,4/'3}0ߙum;ONu5o$ʽp&ݼt1e~&Xz{{b]]]nl^Ոdb $bo<Ē~#U,%-#F}\aJrPl^rEzdr: @"! W fpe8WvrKT-R߲bDS;N?}QZRKn@I$Lpfr&grf93$Ƅ!Nv q, 1}UZ[TګJRrݪ[{]C|͊K?ye0Z[[hBɞ(7uJaax78t\O4CSxf ߝ<2XUVۛ077a~8t[(>F\tU %F!7RžZ(ldT B̈́I^\sKh"X߹sFf*Y'ޅ参PWU-)vJNk3KI9 'ɰRO8Ԧq5˙!Άep8 2YDeNQa X˞Q3W.eEEHwOӍ|7,g[KͅCGGs&&&H$qN'BzeQRRQ+h T1c!fAT39ױV b/FB͜chb41oA hN n]07th.7p>HCo,6sؑV+ʎ NYr gmx8 qCW>;|Wm)V r'$ja ֶ{jjp/_a($011A2\xazXN(^F+Q!FbF7)U_ GP㏢>B./MMy 0?G/.WdŘ vlK~eKRe6$GSpi"Wr㵤PHL-D]?MxdiiBbttCՅ$J0Ȇ$ v4`m4l6[*nV05Q5$݄#D#̪v2Pl9N*3#?fƝ; 8ea~~si'!MUa [nkan+c~oۧwDg+ޓdARy=$ {2h OENkDBSY---LMMiMMMH$项\.lrJwE!0~$"Y;R*+"ϊȋ{?RJ bEךedd#vw4֊` Hr>ucIfC`K@ai76Q߂|cW\vS\57_h[PAh}Icwt ¾&Vh\Q EVBuUlۀ4` f ntsjOZJo@֊:111A,piݜ:u1R&T]VJD^O* O-K)!CJ݊ 0<<_5j:$ěȽѵ꺒jj_``$:_nWWWW¡Rc~c~S#%BJȤgF%GIqKƑ]BS@EA!R@/p\@d}.Yi0Xsj̸K<v{nhT*E[['O$D]] .yV;<*"CJ)xcǎYyAq:7aBOoo~JBڽLB5_}4ob 30I1]AhXK# )]Zt9P?KwJpZjr ̺iݳ\(Uzyv,ߏ0u昚P贈B.#HgΜ9Cgg'xB8 Q'nt: ĉ _=zԪu?VJͼ+~0\ z{{D"/4%CIpm F?g knc~### 1p[0LL W)Gpˮӟ@m# q=sF`W^_Ѐ>"R9\6QCQEsՋQ.GJDNka~e}_܌C4errd2墡..6]a"C!yH|\N]]]2:: i GooXi3DkJ0 /jEI|~[)\Il+/%' p}-r]IS4PRx(ϵo}>d2|>s9 ;v N/hSɔ <Yǭ.8[aJ) (pL)_6ڙ/ns[NZ ϫ(yq a| ӷw݅)3888. wx`~<[1F4`/)nm sG 3ڒV} n0Oo,ֶ *1#)aMxq4ɟ=ua~qx-)cݜ>}`0H,#q!Ν;G}}=8NN>M"(FZ; << (" ՚Q= *X)&Xu"NVϾDkqgPIC$b6J臌]&Ymn1g} Yh%xչMSPN(OnPIFJ\5EdAcQ|TΝϜ2|Kws5iSwec~֔Ϸ(6<Ս7 LF\R( 455ӳ$R" iTR)$@U-Y,2+q F#C{J9Us$9S(Y6/K2=Y3_[\nW]]]4/i~ 0 =  3)nN'ykH`J28L212p35bWU viTVOjշ< y'G%_[[Hh4磫 0}LC_wЬa^f!槡;,N=UV3;0\ 9g|Wдp7|P?wⳊ,6$R2>-+PZ[Ԗ(555}>:wvvvAK G!|5,z׻ebJ?ޛ_W9aEe\B^LB3(}Νh6Otnv60# iIJ?nNry,@*k[g]g+mm|8m3?S槔jz{{"Hixq+:b"<\,]-]cQɥz'&V6'N7͎p,0;63 L%TϽř-kͭ-]Y)ň&d8~E?-*-4uzaC| z/Ü:]vttYlc~*\n3ަ#4k~?c4RlT¤bP1hMj^Yn+pk`lK~waΦp8`;wNv388H{{;H^,`p"5  57`K7{p\!3"m#bJ) 2sHg蟍0͐a)w̙<\iWt.eR|> >;w200Ç$Ir6/*߂`aee|Jq-ܵ.vad-l1+O&9sp40?g {3ǃٰT6tú1[~7]N x!>xCoafʇL)N +*'{[Ը4~ϝ*)&":tP(D2d޽꫅qx`s 0V5\K[j^'NX$fǀ-vt\q߃0艒´jKN|1w¸7%s~j3d?wt%]Cfd9+0W|_yAg$N`zO,{hwK9inH$B</\a!|V1FK"+JIT*ŷr+Ֆ^V[}ylkb F̞f'|ȗ?zM90Wl"̳FoǨB/DM?wh}u WIZOmƢ?_YUFcgysZ~DvEuu5N;vpCB+y?~ǯ XdW+l"&ǔR&:ig>OT omCv{&7'cV Lܼ_KsגlG{7^Xpy_B ǾSnv"ž} D&RLysַ֥]wlocMlAVg>Gi-8¡dhRNcMƕaݽ${-2\UN@2_h|t-mTU#Mp8d2%2ه ~jn {tZݥvLsCt2+(d82kJ1}-llV3?z[XeQRORi|sP® 謎epG^M7Sptmm-]HYDaۀEZ:4;8sK[*Cyyw*d=U],C 4 ܲc+{K_ݭtPZE)qV+U+֝FmFowZi\M?tpf3y?Z.33GoL)~6^߾}u`~'")tz˧ʾ66Kw_ ]LluQmY-;92krR83aHgm\ l ?wws;t<}#\5](0hG^T[E[gՆai-Ji"WG;'9{Ϊ) I4NgxU[,s5 *J(Dq%U!#y.[Zz0,] Pz?&xp wp׮a=sdcu|Afa>w% ol"rBDuͶe1׵[J)/`aآ%] ^t%쯋Gz8X)3kܽi ){m˪kRWJ:a\3CV_l{i͕mCA~MXU^>|ID&_ m.҉'xG m6"l0 `WS,wt m`4ޓӵ6WE/abolO;Y[)u]2$b*3]<;,@Y -)~@~߶ᖯm^J_c)"+e y8PE` (z">sj&2WR}nmѣG DM)aw--[]ihqcf_ [( |tE?AZ xT-ȭMTrkHp޼s".H?7'"?RJ}GӴvkۀ7IVia7rl's۵t p *,<'".م_Qdt?i0\!"-`np *ѣ+[#&{kaV1G/ɽCu+PW/á2qU^>'"_m ɚ:v@[EC (81@"+K UVuc,zҶ_cc/beآ@ M3'D߀lֶoNUJ) [ +~>#[DTB)|sb{)ǯ-VP؏99ݭ<ҥKx"lO<ǫnW+"_l'|K.]˴"m[XE/2%bV;2܌|_D(2ד :ǭ@W[q#?CD^evz:ŋimmtl6l~[X{Ȱ< |f@)JR۫u:wߍpH63ZnJOZ_:g_q[ehۀS?~j"VJ݅?~3kc9< ੧ڰ{ۺz6XEFR Llrsy(kE;N<zR#o*Dd s4#JD">{O}[ԶT4 JzND&r-pWCS3%tEXtdate:create2020-05-12T13:37:54-05:004%tEXtdate:modify2020-05-12T13:37:54-05:00EtLIENDB`dplyr/man/figures/lifecycle-archived.svg0000644000176200001440000000170714121112104020056 0ustar liggesusers lifecyclelifecyclearchivedarchived dplyr/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172614121112104021343 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated dplyr/man/figures/lifecycle-questioning.svg0000644000176200001440000000171414121112104020634 0ustar liggesuserslifecyclelifecyclequestioningquestioning dplyr/man/figures/lifecycle-superseded.svg0000644000176200001440000000171314121112104020431 0ustar liggesusers lifecyclelifecyclesupersededsuperseded dplyr/man/figures/lifecycle-stable.svg0000644000176200001440000000167414121112104017546 0ustar liggesuserslifecyclelifecyclestablestable dplyr/man/figures/lifecycle-experimental.svg0000644000176200001440000000171614121112104020766 0ustar liggesuserslifecyclelifecycleexperimentalexperimental dplyr/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171214121112104020365 0ustar liggesuserslifecyclelifecycledeprecateddeprecated dplyr/man/figures/lifecycle-retired.svg0000644000176200001440000000170514121112104017725 0ustar liggesusers lifecyclelifecycleretiredretired dplyr/man/reexports.Rd0000644000176200001440000000401214174551644014511 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reexport-tibble.r, R/select-helpers.R, % R/sets.r, R/utils.r \docType{import} \name{reexports} \alias{reexports} \alias{data_frame} \alias{data_frame_} \alias{as_data_frame} \alias{lst} \alias{lst_} \alias{add_row} \alias{type_sum} \alias{frame_data} \alias{tribble} \alias{tibble} \alias{as_tibble} \alias{view} \alias{tbl_sum} \alias{contains} \alias{select_helpers} \alias{ends_with} \alias{everything} \alias{matches} \alias{num_range} \alias{one_of} \alias{starts_with} \alias{last_col} \alias{any_of} \alias{all_of} \alias{intersect} \alias{union} \alias{setdiff} \alias{setequal} \alias{\%>\%} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{generics}{\code{\link[generics:setops]{intersect}}, \code{\link[generics:setops]{setdiff}}, \code{\link[generics:setops]{setequal}}, \code{\link[generics:setops]{union}}} \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} \item{tibble}{\code{\link[tibble]{add_row}}, \code{\link[tibble:deprecated]{as_data_frame}}, \code{\link[tibble]{as_tibble}}, \code{\link[tibble:deprecated]{data_frame}}, \code{\link[tibble:deprecated]{data_frame_}}, \code{\link[tibble:deprecated]{frame_data}}, \code{\link[tibble]{lst}}, \code{\link[tibble:deprecated]{lst_}}, \code{\link[tibble:reexports]{tbl_sum}}, \code{\link[tibble]{tibble}}, \code{\link[tibble]{tribble}}, \code{\link[tibble:reexports]{type_sum}}, \code{\link[tibble]{view}}} \item{tidyselect}{\code{\link[tidyselect]{all_of}}, \code{\link[tidyselect:all_of]{any_of}}, \code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} }} dplyr/man/make_tbl.Rd0000644000176200001440000000117414121112104014212 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbl.r \name{make_tbl} \alias{make_tbl} \title{Create a "tbl" object} \usage{ make_tbl(subclass, ...) } \arguments{ \item{subclass}{name of subclass. "tbl" is an abstract base class, so you must supply this value. \code{tbl_} is automatically prepended to the class name} \item{...}{For \code{tbl()}, other fields used by class. For \code{as.tbl()}, other arguments passed to methods.} \item{object}{to test/coerce.} } \description{ \code{tbl()} is the standard constructor for tbls. \code{as.tbl()} coerces, and \code{is.tbl()} tests. } \keyword{internal} dplyr/man/group_by.Rd0000644000176200001440000000772314151641776014321 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-by.r \name{group_by} \alias{group_by} \alias{ungroup} \title{Group by one or more variables} \usage{ group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) ungroup(x, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{In \code{group_by()}, variables or computations to group by. Computations are always done on the ungrouped data frame. To perform computations on the grouped data, you need to use a separate \code{mutate()} step before the \code{group_by()}. Computations are not allowed in \code{nest_by()}. In \code{ungroup()}, variables to remove from the grouping.} \item{.add}{When \code{FALSE}, the default, \code{group_by()} will override existing groups. To add to the existing groups, use \code{.add = TRUE}. This argument was previously called \code{add}, but that prevented creating a new grouping variable called \code{add}, and conflicts with our naming conventions.} \item{.drop}{Drop groups formed by factor levels that don't appear in the data? The default is \code{TRUE} except when \code{.data} has been previously grouped with \code{.drop = FALSE}. See \code{\link[=group_by_drop_default]{group_by_drop_default()}} for details.} \item{x}{A \code{\link[=tbl]{tbl()}}} } \value{ A grouped data frame with class \code{\link{grouped_df}}, unless the combination of \code{...} and \code{add} yields a empty set of grouping columns, in which case a tibble will be returned. } \description{ Most data operations are done on groups defined by variables. \code{group_by()} takes an existing tbl and converts it into a grouped tbl where operations are performed "by group". \code{ungroup()} removes grouping. } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}. \item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}. } } \examples{ by_cyl <- mtcars \%>\% group_by(cyl) # grouping doesn't change how the data looks (apart from listing # how it's grouped): by_cyl # It changes how it acts with the other dplyr verbs: by_cyl \%>\% summarise( disp = mean(disp), hp = mean(hp) ) by_cyl \%>\% filter(disp == max(disp)) # Each call to summarise() removes a layer of grouping by_vs_am <- mtcars \%>\% group_by(vs, am) by_vs <- by_vs_am \%>\% summarise(n = n()) by_vs by_vs \%>\% summarise(n = sum(n)) # To removing grouping, use ungroup by_vs \%>\% ungroup() \%>\% summarise(n = sum(n)) # By default, group_by() overrides existing grouping by_cyl \%>\% group_by(vs, am) \%>\% group_vars() # Use add = TRUE to instead append by_cyl \%>\% group_by(vs, am, .add = TRUE) \%>\% group_vars() # You can group by expressions: this is a short-hand # for a mutate() followed by a group_by() mtcars \%>\% group_by(vsam = vs + am) # The implicit mutate() step is always performed on the # ungrouped data. Here we get 3 groups: mtcars \%>\% group_by(vs) \%>\% group_by(hp_cut = cut(hp, 3)) # If you want it to be performed by groups, # you have to use an explicit mutate() call. # Here we get 3 groups per value of vs mtcars \%>\% group_by(vs) \%>\% mutate(hp_cut = cut(hp, 3)) \%>\% group_by(hp_cut) # when factors are involved and .drop = FALSE, groups can be empty tbl <- tibble( x = 1:10, y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c")) ) tbl \%>\% group_by(y, .drop = FALSE) \%>\% group_rows() } \seealso{ Other grouping functions: \code{\link{group_map}()}, \code{\link{group_nest}()}, \code{\link{group_split}()}, \code{\link{group_trim}()} } \concept{grouping functions} dplyr/man/group_split.Rd0000644000176200001440000000645614151641776015044 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group_split.R \name{group_split} \alias{group_split} \title{Split data frame by groups} \usage{ group_split(.tbl, ..., .keep = TRUE) } \arguments{ \item{.tbl}{A tbl} \item{...}{Grouping specification, forwarded to \code{\link[=group_by]{group_by()}}} \item{.keep}{Should the grouping columns be kept} } \value{ \itemize{ \item \code{\link[=group_split]{group_split()}} returns a list of tibbles. Each tibble contains the rows of \code{.tbl} for the associated group and all the columns, including the grouping variables. \item \code{\link[=group_keys]{group_keys()}} returns a tibble with one row per group, and one column per grouping variable } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{\link[=group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but \itemize{ \item it uses the grouping structure from \code{\link[=group_by]{group_by()}} and therefore is subject to the data mask \item it does not name the elements of the list based on the grouping as this typically loses information and is confusing. } \code{\link[=group_keys]{group_keys()}} explains the grouping structure, by returning a data frame that has one row per group and one column per grouping variable. } \section{Grouped data frames}{ The primary use case for \code{\link[=group_split]{group_split()}} is with already grouped data frames, typically a result of \code{\link[=group_by]{group_by()}}. In this case \code{\link[=group_split]{group_split()}} only uses the first argument, the grouped tibble, and warns when \code{...} is used. Because some of these groups may be empty, it is best paired with \code{\link[=group_keys]{group_keys()}} which identifies the representatives of each grouping variable for the group. } \section{Ungrouped data frames}{ When used on ungrouped data frames, \code{\link[=group_split]{group_split()}} and \code{\link[=group_keys]{group_keys()}} forwards the \code{...} to \code{\link[=group_by]{group_by()}} before the split, therefore the \code{...} are subject to the data mask. Using these functions on an ungrouped data frame only makes sense if you need only one or the other, because otherwise the grouping algorithm is performed each time. } \section{Rowwise data frames}{ \code{\link[=group_split]{group_split()}} returns a list of one-row tibbles is returned, and the \code{...} are ignored and warned against } \examples{ # ----- use case 1 : on an already grouped tibble ir <- iris \%>\% group_by(Species) group_split(ir) group_keys(ir) # this can be useful if the grouped data has been altered before the split ir <- iris \%>\% group_by(Species) \%>\% filter(Sepal.Length > mean(Sepal.Length)) group_split(ir) group_keys(ir) # ----- use case 2: using a group_by() grouping specification # both group_split() and group_keys() have to perform the grouping # so it only makes sense to do this if you only need one or the other iris \%>\% group_split(Species) iris \%>\% group_keys(Species) } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_map}()}, \code{\link{group_nest}()}, \code{\link{group_trim}()} } \concept{grouping functions} dplyr/man/ident.Rd0000644000176200001440000000131614144435746013566 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compat-dbplyr.R \name{ident} \alias{ident} \title{Flag a character vector as SQL identifiers} \usage{ ident(...) } \arguments{ \item{...}{A character vector, or name-value pairs} } \description{ \code{ident()} takes unquoted strings and flags them as identifiers. \code{ident_q()} assumes its input has already been quoted, and ensures it does not get quoted again. This is currently used only for for \code{schema.table}. } \examples{ # Identifiers are escaped with " \dontshow{if (requireNamespace("dbplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} ident("x") \dontshow{\}) # examplesIf} } dplyr/man/across.Rd0000644000176200001440000001432614174721030013745 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/across.R \name{across} \alias{across} \alias{if_any} \alias{if_all} \title{Apply a function (or functions) across multiple columns} \usage{ across(.cols = everything(), .fns = NULL, ..., .names = NULL) if_any(.cols = everything(), .fns = NULL, ..., .names = NULL) if_all(.cols = everything(), .fns = NULL, ..., .names = NULL) } \arguments{ \item{.cols, cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to transform. Because \code{across()} is used within functions like \code{summarise()} and \code{mutate()}, you can't select or compute upon grouping variables.} \item{.fns}{Functions to apply to each of the selected columns. Possible values are: \itemize{ \item A function, e.g. \code{mean}. \item A purrr-style lambda, e.g. \code{~ mean(.x, na.rm = TRUE)} \item A list of functions/lambdas, e.g. \verb{list(mean = mean, n_miss = ~ sum(is.na(.x))} \item \code{NULL}: the default value, returns the selected columns in a data frame without applying a transformation. This is useful for when you want to use a function that takes a data frame. } Within these functions you can use \code{\link[=cur_column]{cur_column()}} and \code{\link[=cur_group]{cur_group()}} to access the current column and grouping keys respectively.} \item{...}{Additional arguments for the function calls in \code{.fns}. Using these \code{...} is strongly discouraged because of issues of timing of evaluation.} \item{.names}{A glue specification that describes how to name the output columns. This can use \code{{.col}} to stand for the selected column name, and \code{{.fn}} to stand for the name of the function being applied. The default (\code{NULL}) is equivalent to \code{"{.col}"} for the single function case and \code{"{.col}_{.fn}"} for the case where a list is used for \code{.fns}.} } \value{ \code{across()} returns a tibble with one column for each column in \code{.cols} and each function in \code{.fns}. \code{if_any()} and \code{if_all()} return a logical vector. } \description{ \code{across()} makes it easy to apply the same transformation to multiple columns, allowing you to use \code{\link[=select]{select()}} semantics inside in "data-masking" functions like \code{\link[=summarise]{summarise()}} and \code{\link[=mutate]{mutate()}}. See \code{vignette("colwise")} for more details. \code{if_any()} and \code{if_all()} apply the same predicate function to a selection of columns and combine the results into a single logical vector: \code{if_any()} is \code{TRUE} when the predicate is \code{TRUE} for \emph{any} of the selected columns, \code{if_all()} is \code{TRUE} when the predicate is \code{TRUE} for \emph{all} selected columns. \code{across()} supersedes the family of "scoped variants" like \code{summarise_at()}, \code{summarise_if()}, and \code{summarise_all()}. } \section{Timing of evaluation}{ R code in dplyr verbs is generally evaluated once per group. Inside \code{across()} however, code is evaluated once for each combination of columns and groups. If the evaluation timing is important, for example if you're generating random variables, think about when it should happen and place your code in consequence.\if{html}{\out{
}}\preformatted{gdf <- tibble(g = c(1, 1, 2, 3), v1 = 10:13, v2 = 20:23) \%>\% group_by(g) set.seed(1) # Outside: 1 normal variate n <- rnorm(1) gdf \%>\% mutate(across(v1:v2, ~ .x + n)) }\if{html}{\out{
}}\preformatted{## # A tibble: 4 × 3 ## # Groups: g [3] ## g v1 v2 ## ## 1 1 9.37 19.4 ## 2 1 10.4 20.4 ## 3 2 11.4 21.4 ## 4 3 12.4 22.4 }\if{html}{\out{
}}\preformatted{# Inside a verb: 3 normal variates (ngroup) gdf \%>\% mutate(n = rnorm(1), across(v1:v2, ~ .x + n)) }\if{html}{\out{
}}\preformatted{## # A tibble: 4 × 4 ## # Groups: g [3] ## g v1 v2 n ## ## 1 1 10.2 20.2 0.184 ## 2 1 11.2 21.2 0.184 ## 3 2 11.2 21.2 -0.836 ## 4 3 14.6 24.6 1.60 }\if{html}{\out{
}}\preformatted{# Inside `across()`: 6 normal variates (ncol * ngroup) gdf \%>\% mutate(across(v1:v2, ~ .x + rnorm(1))) }\if{html}{\out{
}}\preformatted{## # A tibble: 4 × 3 ## # Groups: g [3] ## g v1 v2 ## ## 1 1 10.3 20.7 ## 2 1 11.3 21.7 ## 3 2 11.2 22.6 ## 4 3 13.5 22.7 } } \examples{ # across() ----------------------------------------------------------------- # Different ways to select the same set of columns # See for details iris \%>\% as_tibble() \%>\% mutate(across(c(Sepal.Length, Sepal.Width), round)) iris \%>\% as_tibble() \%>\% mutate(across(c(1, 2), round)) iris \%>\% as_tibble() \%>\% mutate(across(1:Sepal.Width, round)) iris \%>\% as_tibble() \%>\% mutate(across(where(is.double) & !c(Petal.Length, Petal.Width), round)) # A purrr-style formula iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), ~ mean(.x, na.rm = TRUE))) # A named list of functions iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd))) # Use the .names argument to control the output names iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), mean, .names = "mean_{.col}")) iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{.col}.{.fn}")) # When the list is not named, .fn is replaced by the function's position iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}")) # across() returns a data frame, which can be used as input of another function df <- data.frame( x1 = c(1, 2, NA), x2 = c(4, NA, 6), y = c("a", "b", "c") ) df \%>\% mutate(x_complete = complete.cases(across(starts_with("x")))) df \%>\% filter(complete.cases(across(starts_with("x")))) # if_any() and if_all() ---------------------------------------------------- iris \%>\% filter(if_any(ends_with("Width"), ~ . > 4)) iris \%>\% filter(if_all(ends_with("Width"), ~ . > 2)) } \seealso{ \code{\link[=c_across]{c_across()}} for a function that returns a vector } dplyr/man/filter.Rd0000644000176200001440000001171314121112104013721 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/filter.R \name{filter} \alias{filter} \title{Subset rows using column values} \usage{ filter(.data, ..., .preserve = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> Expressions that return a logical value, and are defined in terms of the variables in \code{.data}. If multiple expressions are included, they are combined with the \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are kept.} \item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are a subset of the input, but appear in the same order. \item Columns are not modified. \item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}). \item Data frame attributes are preserved. } } \description{ The \code{filter()} function is used to subset a data frame, retaining all rows that satisfy your conditions. To be retained, the row must produce a value of \code{TRUE} for all conditions. Note that when a condition evaluates to \code{NA} the row will be dropped, unlike base subsetting with \code{[}. } \details{ The \code{filter()} function is used to subset the rows of \code{.data}, applying the expressions in \code{...} to the column values to determine which rows should be retained. It can be applied to both grouped and ungrouped data (see \code{\link[=group_by]{group_by()}} and \code{\link[=ungroup]{ungroup()}}). However, dplyr is not yet smart enough to optimise the filtering operation on grouped datasets that do not need grouped calculations. For this reason, filtering is often considerably faster on ungrouped data. } \section{Useful filter functions}{ There are many functions and operators that are useful when constructing the expressions used to filter the data: \itemize{ \item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc \item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}} \item \code{\link[=is.na]{is.na()}} \item \code{\link[=between]{between()}}, \code{\link[=near]{near()}} } } \section{Grouped tibbles}{ Because filtering expressions are computed within groups, they may yield different results on grouped tibbles. This will be the case as soon as an aggregating, lagging, or ranking function is involved. Compare this ungrouped filtering:\preformatted{starwars \%>\% filter(mass > mean(mass, na.rm = TRUE)) } With the grouped equivalent:\preformatted{starwars \%>\% group_by(gender) \%>\% filter(mass > mean(mass, na.rm = TRUE)) } In the ungrouped version, \code{filter()} compares the value of \code{mass} in each row to the global average (taken over the whole data set), keeping only the rows with \code{mass} greater than this global average. In contrast, the grouped version calculates the average mass separately for each \code{gender} group, and keeps rows with \code{mass} greater than the relevant within-gender average. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}. } \examples{ # Filtering by one criterion filter(starwars, species == "Human") filter(starwars, mass > 1000) # Filtering by multiple criteria within a single logical expression filter(starwars, hair_color == "none" & eye_color == "black") filter(starwars, hair_color == "none" | eye_color == "black") # When multiple expressions are used, they are combined using & filter(starwars, hair_color == "none", eye_color == "black") # The filtering operation may yield different results on grouped # tibbles because the expressions are computed within groups. # # The following filters rows where `mass` is greater than the # global average: starwars \%>\% filter(mass > mean(mass, na.rm = TRUE)) # Whereas this keeps rows with `mass` greater than the gender # average: starwars \%>\% group_by(gender) \%>\% filter(mass > mean(mass, na.rm = TRUE)) # To refer to column names that are stored as strings, use the `.data` pronoun: vars <- c("mass", "height") cond <- c(80, 150) starwars \%>\% filter( .data[[vars[[1]]]] > cond[[1]], .data[[vars[[2]]]] > cond[[2]] ) # Learn more in ?dplyr_data_masking } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/between.Rd0000644000176200001440000000120314121112104014056 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.R \name{between} \alias{between} \title{Do values in a numeric vector fall in specified range?} \usage{ between(x, left, right) } \arguments{ \item{x}{A numeric vector of values} \item{left, right}{Boundary values (must be scalars).} } \description{ This is a shortcut for \code{x >= left & x <= right}, implemented efficiently in C++ for local values, and translated to the appropriate SQL for remote tables. } \examples{ between(1:12, 7, 9) x <- rnorm(1e2) x[between(x, -1, 1)] ## Or on a tibble using filter filter(starwars, between(height, 100, 150)) } dplyr/man/relocate.Rd0000644000176200001440000000440314121112104014230 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/relocate.R \name{relocate} \alias{relocate} \title{Change column order} \usage{ relocate(.data, ..., .before = NULL, .after = NULL) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to move.} \item{.before, .after}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Destination of columns selected by \code{...}. Supplying neither will move columns to the left-hand side; specifying both is an error.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item The same columns appear in the output, but (usually) in a different place. \item Data frame attributes are preserved. \item Groups are not affected. } } \description{ Use \code{relocate()} to change column positions, using the same syntax as \code{select()} to make it easy to move blocks of columns at once. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("relocate")}. } \examples{ df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") df \%>\% relocate(f) df \%>\% relocate(a, .after = c) df \%>\% relocate(f, .before = b) df \%>\% relocate(a, .after = last_col()) # relocated columns can change name df \%>\% relocate(ff = f) # Can also select variables based on their type df \%>\% relocate(where(is.character)) df \%>\% relocate(where(is.numeric), .after = last_col()) # Or with any other select helper df \%>\% relocate(any_of(c("a", "e", "i", "o", "u"))) # When .before or .after refers to multiple variables they will be # moved to be immediately before/after the selected variables. df2 <- tibble(a = 1, b = "a", c = 1, d = "a") df2 \%>\% relocate(where(is.numeric), .after = where(is.character)) df2 \%>\% relocate(where(is.numeric), .before = where(is.character)) } dplyr/man/group_cols.Rd0000644000176200001440000000172014121112104014605 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/select-helpers.R \name{group_cols} \alias{group_cols} \title{Select grouping variables} \usage{ group_cols(vars = NULL, data = NULL) } \arguments{ \item{vars}{Deprecated; please use data instead.} \item{data}{For advanced use only. The default \code{NULL} automatically finds the "current" data frames.} } \description{ This selection helpers matches grouping variables. It can be used in \code{\link[=select]{select()}} or \code{\link[=vars]{vars()}} selections. } \examples{ gdf <- iris \%>\% group_by(Species) gdf \%>\% select(group_cols()) # Remove the grouping variables from mutate selections: gdf \%>\% mutate_at(vars(-group_cols()), `/`, 100) # -> No longer necessary with across() gdf \%>\% mutate(across(everything(), ~ . / 100)) } \seealso{ \code{\link[=groups]{groups()}} and \code{\link[=group_vars]{group_vars()}} for retrieving the grouping variables outside selection contexts. } dplyr/man/compute.Rd0000644000176200001440000000412014144435746014133 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compute-collect.r \name{compute} \alias{compute} \alias{collect} \alias{collapse} \title{Force computation of a database query} \usage{ compute(x, ...) collect(x, ...) collapse(x, ...) } \arguments{ \item{x}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{Arguments passed on to methods} } \description{ \code{compute()} stores results in a remote temporary table. \code{collect()} retrieves data into a local tibble. \code{collapse()} is slightly different: it doesn't force computation, but instead forces generation of the SQL query. This is sometimes needed to work around bugs in dplyr's SQL generation. All functions preserve grouping and ordering. } \section{Methods}{ These functions are \strong{generics}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{compute()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("compute")} \item \code{collect()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collect")} \item \code{collapse()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collapse")} } } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} mtcars2 <- dbplyr::src_memdb() \%>\% copy_to(mtcars, name = "mtcars2-cc", overwrite = TRUE) remote <- mtcars2 \%>\% filter(cyl == 8) \%>\% select(mpg:drat) # Compute query and save in remote table compute(remote) # Compute query bring back to this session collect(remote) # Creates a fresh query based on the generated SQL collapse(remote) \dontshow{\}) # examplesIf} } \seealso{ \code{\link[=copy_to]{copy_to()}}, the opposite of \code{collect()}: it takes a local data frame and uploads it to the remote source. } dplyr/man/case_when.Rd0000644000176200001440000001052414176714177014423 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/case_when.R \name{case_when} \alias{case_when} \title{A general vectorised if} \usage{ case_when(...) } \arguments{ \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A sequence of two-sided formulas. The left hand side (LHS) determines which values match this case. The right hand side (RHS) provides the replacement value. The LHS must evaluate to a logical vector. The RHS does not need to be logical, but all RHSs must evaluate to the same type of vector. Both LHS and RHS may have the same length of either 1 or \code{n}. The value of \code{n} must be consistent across all cases. The case of \code{n == 0} is treated as a variant of \code{n != 1}. \code{NULL} inputs are ignored.} } \value{ A vector of length 1 or \code{n}, matching the length of the logical input or output vectors, with the type (and attributes) of the first RHS. Inconsistent lengths or types will generate an error. } \description{ This function allows you to vectorise multiple \code{\link[=if_else]{if_else()}} statements. It is an R equivalent of the SQL \verb{CASE WHEN} statement. If no cases match, \code{NA} is returned. } \examples{ x <- 1:50 case_when( x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", TRUE ~ as.character(x) ) # Like an if statement, the arguments are evaluated in order, so you must # proceed from the most specific to the most general. This won't work: case_when( TRUE ~ as.character(x), x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", x \%\% 35 == 0 ~ "fizz buzz" ) # If none of the cases match, NA is used: case_when( x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", x \%\% 35 == 0 ~ "fizz buzz" ) # Note that NA values in the vector x do not get special treatment. If you want # to explicitly handle NA values you can use the `is.na` function: x[2:4] <- NA_real_ case_when( x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", is.na(x) ~ "nope", TRUE ~ as.character(x) ) # All RHS values need to be of the same type. Inconsistent types will throw an error. # This applies also to NA values used in RHS: NA is logical, use # typed values like NA_real_, NA_complex, NA_character_, NA_integer_ as appropriate. case_when( x \%\% 35 == 0 ~ NA_character_, x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", TRUE ~ as.character(x) ) case_when( x \%\% 35 == 0 ~ 35, x \%\% 5 == 0 ~ 5, x \%\% 7 == 0 ~ 7, TRUE ~ NA_real_ ) # case_when() evaluates all RHS expressions, and then constructs its # result by extracting the selected (via the LHS expressions) parts. # In particular NaNs are produced in this case: y <- seq(-2, 2, by = .5) case_when( y >= 0 ~ sqrt(y), TRUE ~ y ) # This throws an error as NA is logical not numeric try(case_when( x \%\% 35 == 0 ~ 35, x \%\% 5 == 0 ~ 5, x \%\% 7 == 0 ~ 7, TRUE ~ NA )) # case_when is particularly useful inside mutate when you want to # create a new variable that relies on a complex combination of existing # variables starwars \%>\% select(name:mass, gender, species) \%>\% mutate( type = case_when( height > 200 | mass > 200 ~ "large", species == "Droid" ~ "robot", TRUE ~ "other" ) ) # `case_when()` is not a tidy eval function. If you'd like to reuse # the same patterns, extract the `case_when()` call in a normal # function: case_character_type <- function(height, mass, species) { case_when( height > 200 | mass > 200 ~ "large", species == "Droid" ~ "robot", TRUE ~ "other" ) } case_character_type(150, 250, "Droid") case_character_type(150, 150, "Droid") # Such functions can be used inside `mutate()` as well: starwars \%>\% mutate(type = case_character_type(height, mass, species)) \%>\% pull(type) # `case_when()` ignores `NULL` inputs. This is useful when you'd # like to use a pattern only under certain conditions. Here we'll # take advantage of the fact that `if` returns `NULL` when there is # no `else` clause: case_character_type <- function(height, mass, species, robots = TRUE) { case_when( height > 200 | mass > 200 ~ "large", if (robots) species == "Droid" ~ "robot", TRUE ~ "other" ) } starwars \%>\% mutate(type = case_character_type(height, mass, species, robots = FALSE)) \%>\% pull(type) } dplyr/man/backend_dbplyr.Rd0000644000176200001440000000742114121112104015400 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dbplyr.R \name{backend_dbplyr} \alias{backend_dbplyr} \alias{db_desc} \alias{sql_translate_env} \alias{db_list_tables} \alias{db_has_table} \alias{db_data_type} \alias{db_save_query} \alias{db_begin} \alias{db_commit} \alias{db_rollback} \alias{db_write_table} \alias{db_create_table} \alias{db_insert_into} \alias{db_create_indexes} \alias{db_create_index} \alias{db_drop_table} \alias{db_analyze} \alias{db_explain} \alias{db_query_fields} \alias{db_query_rows} \alias{sql_select} \alias{sql_subquery} \alias{sql_join} \alias{sql_semi_join} \alias{sql_set_op} \alias{sql_escape_string} \alias{sql_escape_ident} \title{Database and SQL generics.} \usage{ db_desc(x) sql_translate_env(con) db_list_tables(con) db_has_table(con, table) db_data_type(con, fields) db_save_query(con, sql, name, temporary = TRUE, ...) db_begin(con, ...) db_commit(con, ...) db_rollback(con, ...) db_write_table(con, table, types, values, temporary = FALSE, ...) db_create_table(con, table, types, temporary = FALSE, ...) db_insert_into(con, table, values, ...) db_create_indexes(con, table, indexes = NULL, unique = FALSE, ...) db_create_index(con, table, columns, name = NULL, unique = FALSE, ...) db_drop_table(con, table, force = FALSE, ...) db_analyze(con, table, ...) db_explain(con, sql, ...) db_query_fields(con, sql, ...) db_query_rows(con, sql, ...) sql_select( con, select, from, where = NULL, group_by = NULL, having = NULL, order_by = NULL, limit = NULL, distinct = FALSE, ... ) sql_subquery(con, from, name = random_table_name(), ...) sql_join(con, x, y, vars, type = "inner", by = NULL, ...) sql_semi_join(con, x, y, anti = FALSE, by = NULL, ...) sql_set_op(con, x, y, method) sql_escape_string(con, x) sql_escape_ident(con, x) } \arguments{ \item{con}{A database connection.} \item{table}{A string, the table name.} \item{fields}{A list of fields, as in a data frame.} } \value{ Usually a logical value indicating success. Most failures should generate an error. However, \code{db_has_table()} should return \code{NA} if temporary tables cannot be listed with \code{\link[DBI:dbListTables]{DBI::dbListTables()}} (due to backend API limitations for example). As a result, you methods will rely on the backend to throw an error if a table exists when it shouldn't. } \description{ The \code{sql_} generics are used to build the different types of SQL queries. The default implementations in dbplyr generates ANSI 92 compliant SQL. The \code{db_} generics execute actions on the database. The default implementations in dbplyr typically just call the standard DBI S4 method. } \details{ A few backend methods do not call the standard DBI S4 methods including \itemize{ \item \code{db_data_type()}: Calls \code{\link[DBI:dbDataType]{DBI::dbDataType()}} for every field (e.g. data frame column) and returns a vector of corresponding SQL data types \item \code{db_save_query()}: Builds and executes a \verb{CREATE [TEMPORARY] TABLE ...} SQL command. \item \code{db_create_index()}: Builds and executes a \verb{CREATE INDEX ON
} SQL command. \item \code{db_drop_table()}: Builds and executes a \verb{DROP TABLE [IF EXISTS]
} SQL command. \item \code{db_analyze()}: Builds and executes an \verb{ANALYZE
} SQL command. } Currently, \code{\link[=copy_to]{copy_to()}} is the only user of \code{db_begin()}, \code{db_commit()}, \code{db_rollback()}, \code{db_write_table()}, \code{db_create_indexes()}, \code{db_drop_table()} and \code{db_analyze()}. If you find yourself overriding many of these functions it may suggest that you should just override \code{copy_to()} instead. \code{db_create_table()} and \code{db_insert_into()} have been deprecated in favour of \code{db_write_table()}. } \keyword{internal} dplyr/man/with_groups.Rd0000644000176200001440000000262214151641776015036 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/groups-with.R \name{with_groups} \alias{with_groups} \title{Perform an operation with temporary groups} \usage{ with_groups(.data, .groups, .f, ...) } \arguments{ \item{.data}{A data frame} \item{.groups}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> One or more variables to group by. Unlike \code{\link[=group_by]{group_by()}}, you can only group by existing variables, and you can use tidy-select syntax like \code{c(x, y, z)} to select multiple variables. Use \code{NULL} to temporarily \strong{un}group.} \item{.f}{Function to apply to regrouped data. Supports purrr-style \code{~} syntax} \item{...}{Additional arguments passed on to \code{...}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This is an experimental new function that allows you to modify the grouping variables for a single operation. } \examples{ df <- tibble(g = c(1, 1, 2, 2, 3), x = runif(5)) df \%>\% with_groups(g, mutate, x_mean = mean(x)) df \%>\% with_groups(g, ~ mutate(.x, x1 = first(x))) df \%>\% group_by(g) \%>\% with_groups(NULL, mutate, x_mean = mean(x)) # NB: grouping can't be restored if you remove the grouping variables df \%>\% group_by(g) \%>\% with_groups(NULL, mutate, g = NULL) } dplyr/man/ranking.Rd0000644000176200001440000000367714121112104014077 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rank.R \name{ranking} \alias{ranking} \alias{row_number} \alias{ntile} \alias{min_rank} \alias{dense_rank} \alias{percent_rank} \alias{cume_dist} \title{Windowed rank functions.} \usage{ row_number(x) ntile(x = row_number(), n) min_rank(x) dense_rank(x) percent_rank(x) cume_dist(x) } \arguments{ \item{x}{a vector of values to rank. Missing values are left as is. If you want to treat them as the smallest or largest values, replace with Inf or -Inf before ranking.} \item{n}{number of groups to split up into.} } \description{ Six variations on ranking functions, mimicking the ranking functions described in SQL2003. They are currently implemented using the built in \code{rank} function, and are provided mainly as a convenience when converting between R and SQL. All ranking functions map smallest inputs to smallest outputs. Use \code{\link[=desc]{desc()}} to reverse the direction. } \details{ \itemize{ \item \code{row_number()}: equivalent to \code{rank(ties.method = "first")} \item \code{min_rank()}: equivalent to \code{rank(ties.method = "min")} \item \code{dense_rank()}: like \code{min_rank()}, but with no gaps between ranks \item \code{percent_rank()}: a number between 0 and 1 computed by rescaling \code{min_rank} to \verb{[0, 1]} \item \code{cume_dist()}: a cumulative distribution function. Proportion of all values less than or equal to the current rank. \item \code{ntile()}: a rough rank, which breaks the input vector into \code{n} buckets. The size of the buckets may differ by up to one, larger buckets have lower rank. } } \examples{ x <- c(5, 1, 3, 2, 2, NA) row_number(x) min_rank(x) dense_rank(x) percent_rank(x) cume_dist(x) ntile(x, 2) ntile(1:8, 3) # row_number can be used with single table verbs without specifying x # (for data frames and databases that support windowing) mutate(mtcars, row_number() == 1L) mtcars \%>\% filter(between(row_number(), 1, 10)) } dplyr/man/copy_to.Rd0000644000176200001440000000205014121112104014102 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/copy-to.r \name{copy_to} \alias{copy_to} \title{Copy a local data frame to a remote src} \usage{ copy_to(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) } \arguments{ \item{dest}{remote data source} \item{df}{local data frame} \item{name}{name for new remote table.} \item{overwrite}{If \code{TRUE}, will overwrite an existing table with name \code{name}. If \code{FALSE}, will throw an error if \code{name} already exists.} \item{...}{other parameters passed to methods.} } \value{ a \code{tbl} object in the remote source } \description{ This function uploads a local data frame into a remote data source, creating the table definition as needed. Wherever possible, the new object will be temporary, limited to the current connection to the source. } \examples{ \dontrun{ iris2 <- dbplyr::src_memdb() \%>\% copy_to(iris, overwrite = TRUE) iris2 } } \seealso{ \code{\link[=collect]{collect()}} for the opposite action; downloading remote data into a local dbl. } dplyr/man/tbl_ptype.Rd0000644000176200001440000000071514121112104014436 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{tbl_ptype} \alias{tbl_ptype} \title{Return a prototype of a tbl} \usage{ tbl_ptype(.data) } \description{ Used in \verb{_if} functions to enable type-based selection even when the data is lazily generated. Should either return the complete tibble, or if that can not be computed quickly, a 0-row tibble where the columns are of the correct type. } \keyword{internal} dplyr/man/combine.Rd0000644000176200001440000000154214151641776014100 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-combine.R \name{combine} \alias{combine} \title{Combine vectors} \usage{ combine(...) } \arguments{ \item{...}{Vectors to combine.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{combine()} is deprecated in favour of \code{\link[vctrs:vec_c]{vctrs::vec_c()}}. \code{combine()} attempted to automatically guess whether you wanted \code{\link[=c]{c()}} or \code{\link[=unlist]{unlist()}}, but could fail in surprising ways. We now believe it's better to be explicit. } \examples{ f1 <- factor("a") f2 <- factor("b") combine(f1, f2) # -> vctrs::vec_c(f1, f1) combine(list(f1, f2)) # -> vctrs::vec_c(!!!list(f1, f2)) } \keyword{internal} dplyr/man/all_equal.Rd0000644000176200001440000000324214151641776014422 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/all-equal.r \name{all_equal} \alias{all_equal} \title{Flexible equality comparison for data frames} \usage{ all_equal( target, current, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE, ... ) } \arguments{ \item{target, current}{Two data frames to compare.} \item{ignore_col_order}{Should order of columns be ignored?} \item{ignore_row_order}{Should order of rows be ignored?} \item{convert}{Should similar classes be converted? Currently this will convert factor to character and integer to double.} \item{...}{Ignored. Needed for compatibility with \code{all.equal()}.} } \value{ \code{TRUE} if equal, otherwise a character vector describing the reasons why they're not equal. Use \code{\link[=isTRUE]{isTRUE()}} if using the result in an \code{if} expression. } \description{ \code{all_equal()} allows you to compare data frames, optionally ignoring row and column names. It is questioning as of dplyr 1.0.0, because it seems to solve a problem that no longer seems that important. } \examples{ scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))] # By default, ordering of rows and columns ignored all_equal(mtcars, scramble(mtcars)) # But those can be overriden if desired all_equal(mtcars, scramble(mtcars), ignore_col_order = FALSE) all_equal(mtcars, scramble(mtcars), ignore_row_order = FALSE) # By default all_equal is sensitive to variable differences df1 <- data.frame(x = "a", stringsAsFactors = FALSE) df2 <- data.frame(x = factor("a")) all_equal(df1, df2) # But you can request dplyr convert similar types all_equal(df1, df2, convert = TRUE) } \keyword{internal} dplyr/man/summarise_all.Rd0000644000176200001440000001333314151641776015322 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-mutate.R \name{summarise_all} \alias{summarise_all} \alias{summarise_if} \alias{summarise_at} \alias{summarize_all} \alias{summarize_if} \alias{summarize_at} \title{Summarise multiple columns} \usage{ summarise_all(.tbl, .funs, ...) summarise_if(.tbl, .predicate, .funs, ...) summarise_at(.tbl, .vars, .funs, ..., .cols = NULL) summarize_all(.tbl, .funs, ...) summarize_if(.tbl, .predicate, .funs, ...) summarize_at(.tbl, .vars, .funs, ..., .cols = NULL) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.cols}{This argument has been renamed to \code{.vars} to fit dplyr's terminology and is deprecated.} } \value{ A data frame. By default, the newly created columns have the shortest names needed to uniquely identify the output. To force inclusion of a name, even when not needed, name the input (see examples for details). } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. The \link{scoped} variants of \code{\link[=summarise]{summarise()}} make it easy to apply the same transformation to multiple variables. There are three variants. \itemize{ \item \code{summarise_all()} affects every variable \item \code{summarise_at()} affects variables selected with a character vector or vars() \item \code{summarise_if()} affects variables selected with a predicate function } } \section{Grouping variables}{ If applied on a grouped tibble, these operations are \emph{not} applied to the grouping variables. The behaviour depends on whether the selection is \strong{implicit} (\code{all} and \code{if} selections) or \strong{explicit} (\code{at} selections). \itemize{ \item Grouping variables covered by explicit selections in \code{summarise_at()} are always an error. Add \code{-group_cols()} to the \code{\link[=vars]{vars()}} selection to avoid this:\preformatted{data \%>\% summarise_at(vars(-group_cols(), ...), myoperation) } Or remove \code{group_vars()} from the character vector of column names:\preformatted{nms <- setdiff(nms, group_vars(data)) data \%>\% summarise_at(nms, myoperation) } \item Grouping variables covered by implicit selections are silently ignored by \code{summarise_all()} and \code{summarise_if()}. } } \section{Naming}{ The names of the new columns are derived from the names of the input variables and the names of the functions. \itemize{ \item if there is only one unnamed function (i.e. if \code{.funs} is an unnamed list of length one), the names of the input variables are used to name the new columns; \item for \verb{_at} functions, if there is only one unnamed variable (i.e., if \code{.vars} is of the form \code{vars(a_single_column)}) and \code{.funs} has length greater than one, the names of the functions are used to name the new columns; \item otherwise, the new names are created by concatenating the names of the input variables and the names of the functions, separated with an underscore \code{"_"}. } The \code{.funs} argument can be a named or unnamed list. If a function is unnamed and the name cannot be derived automatically, a name of the form "fn#" is used. Similarly, \code{\link[=vars]{vars()}} accepts named and unnamed arguments. If a variable in \code{.vars} is named, a new column by that name will be created. Name collisions in the new columns are disambiguated using a unique suffix. } \section{Life cycle}{ The functions are maturing, because the naming scheme and the disambiguation algorithm are subject to change in dplyr 0.9.0. } \examples{ # The _at() variants directly support strings: starwars \%>\% summarise_at(c("height", "mass"), mean, na.rm = TRUE) # -> starwars \%>\% summarise(across(c("height", "mass"), ~ mean(.x, na.rm = TRUE))) # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): starwars \%>\% summarise_at(vars(height:mass), mean, na.rm = TRUE) # -> starwars \%>\% summarise(across(height:mass, ~ mean(.x, na.rm = TRUE))) # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we apply mean() to the numeric columns: starwars \%>\% summarise_if(is.numeric, mean, na.rm = TRUE) starwars \%>\% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) by_species <- iris \%>\% group_by(Species) # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: by_species \%>\% summarise_all(list(min, max)) # -> by_species \%>\% summarise(across(everything(), list(min = min, max = max))) } \seealso{ \link[=scoped]{The other scoped verbs}, \code{\link[=vars]{vars()}} } \keyword{internal} dplyr/man/coalesce.Rd0000644000176200001440000000241614121112104014212 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/coalesce.R \name{coalesce} \alias{coalesce} \title{Find first non-missing element} \usage{ coalesce(...) } \arguments{ \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Vectors. Inputs should be recyclable (either be length 1 or same length as the longest vector) and coercible to a common type. If data frames, they are coalesced column by column.} } \value{ A vector the same length as the first \code{...} argument with missing values replaced by the first non-missing value. } \description{ Given a set of vectors, \code{coalesce()} finds the first non-missing value at each position. This is inspired by the SQL \code{COALESCE} function which does the same thing for \code{NULL}s. } \examples{ # Use a single value to replace all missing values x <- sample(c(1:5, NA, NA, NA)) coalesce(x, 0L) # Or match together a complete vector from missing pieces y <- c(1, 2, NA, NA, 5) z <- c(NA, NA, 3, 4, 5) coalesce(y, z) # Supply lists by with dynamic dots vecs <- list( c(1, 2, NA, NA, 5), c(NA, NA, 3, 4, 5) ) coalesce(!!!vecs) } \seealso{ \code{\link[=na_if]{na_if()}} to replace specified values with a \code{NA}. \code{\link[tidyr:replace_na]{tidyr::replace_na()}} to replace \code{NA} with a value } dplyr/man/order_by.Rd0000644000176200001440000000210114121112104014230 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order-by.R \name{order_by} \alias{order_by} \title{A helper function for ordering window function output} \usage{ order_by(order_by, call) } \arguments{ \item{order_by}{a vector to order_by} \item{call}{a function call to a window function, where the first argument is the vector being operated on} } \description{ This function makes it possible to control the ordering of window functions in R that don't have a specific ordering parameter. When translated to SQL it will modify the order clause of the OVER function. } \details{ This function works by changing the \code{call} to instead call \code{\link[=with_order]{with_order()}} with the appropriate arguments. } \examples{ order_by(10:1, cumsum(1:10)) x <- 10:1 y <- 1:10 order_by(x, cumsum(y)) df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, running = cumsum(value)) arrange(wrong, year) right <- mutate(scrambled, running = order_by(year, cumsum(value))) arrange(right, year) } dplyr/man/progress_estimated.Rd0000644000176200001440000000312614151641776016367 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/progress.R \name{progress_estimated} \alias{progress_estimated} \title{Progress bar with estimated time.} \usage{ progress_estimated(n, min_time = 0) } \arguments{ \item{n}{Total number of items} \item{min_time}{Progress bar will wait until at least \code{min_time} seconds have elapsed before displaying any results.} } \value{ A ref class with methods \code{tick()}, \code{print()}, \code{pause()}, and \code{stop()}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This progress bar has been deprecated since providing progress bars is not the responsibility of dplyr. Instead, you might try the more powerful \href{https://github.com/r-lib/progress}{progress} package. This reference class represents a text progress bar displayed estimated time remaining. When finished, it displays the total duration. The automatic progress bar can be disabled by setting option \code{dplyr.show_progress} to \code{FALSE}. } \examples{ p <- progress_estimated(3) p$tick() p$tick() p$tick() p <- progress_estimated(3) for (i in 1:3) p$pause(0.1)$tick()$print() p <- progress_estimated(3) p$tick()$print()$ pause(1)$stop() # If min_time is set, progress bar not shown until that many # seconds have elapsed p <- progress_estimated(3, min_time = 3) for (i in 1:3) p$pause(0.1)$tick()$print() \dontrun{ p <- progress_estimated(10, min_time = 3) for (i in 1:10) p$pause(0.5)$tick()$print() } } \keyword{internal} dplyr/man/defunct.Rd0000644000176200001440000000103614151641776014112 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/defunct.r \name{defunct} \alias{defunct} \alias{id} \alias{failwith} \title{Defunct functions} \usage{ id(.variables, drop = FALSE) failwith(default = NULL, f, quiet = FALSE) } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} } \details{ Executing these functions will tell you which function replaces them. } \keyword{internal} dplyr/man/se-deprecated.Rd0000644000176200001440000000742614151641776015200 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-lazyeval.R \name{se-deprecated} \alias{se-deprecated} \alias{add_count_} \alias{add_tally_} \alias{arrange_} \alias{count_} \alias{distinct_} \alias{do_} \alias{filter_} \alias{funs_} \alias{group_by_} \alias{group_indices_} \alias{mutate_} \alias{tally_} \alias{transmute_} \alias{rename_} \alias{rename_vars_} \alias{select_} \alias{select_vars_} \alias{slice_} \alias{summarise_} \alias{summarize_} \title{Deprecated SE versions of main verbs.} \usage{ add_count_(x, vars, wt = NULL, sort = FALSE) add_tally_(x, wt, sort = FALSE) arrange_(.data, ..., .dots = list()) count_(x, vars, wt = NULL, sort = FALSE, .drop = group_by_drop_default(x)) distinct_(.data, ..., .dots, .keep_all = FALSE) do_(.data, ..., .dots = list()) filter_(.data, ..., .dots = list()) funs_(dots, args = list(), env = base_env()) group_by_(.data, ..., .dots = list(), add = FALSE) group_indices_(.data, ..., .dots = list()) mutate_(.data, ..., .dots = list()) tally_(x, wt, sort = FALSE) transmute_(.data, ..., .dots = list()) rename_(.data, ..., .dots = list()) rename_vars_(vars, args) select_(.data, ..., .dots = list()) select_vars_(vars, args, include = chr(), exclude = chr()) slice_(.data, ..., .dots = list()) summarise_(.data, ..., .dots = list()) summarize_(.data, ..., .dots = list()) } \arguments{ \item{x}{A \code{\link[=tbl]{tbl()}}} \item{vars}{Various meanings depending on the verb.} \item{wt}{<\code{\link[=dplyr_data_masking]{data-masking}}> Frequency weights. Can be \code{NULL} or a variable: \itemize{ \item If \code{NULL} (the default), counts the number of rows in each group. \item If a variable, computes \code{sum(wt)} for each group. }} \item{sort}{If \code{TRUE}, will show the largest groups at the top.} \item{.data}{A data frame.} \item{.drop}{Drop groups formed by factor levels that don't appear in the data? The default is \code{TRUE} except when \code{.data} has been previously grouped with \code{.drop = FALSE}. See \code{\link[=group_by_drop_default]{group_by_drop_default()}} for details.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} \item{dots, .dots, ...}{Pair/values of expressions coercible to lazy objects.} \item{args}{Various meanings depending on the verb.} \item{env}{The environment in which functions should be evaluated.} \item{add}{When \code{FALSE}, the default, \code{group_by()} will override existing groups. To add to the existing groups, use \code{.add = TRUE}. This argument was previously called \code{add}, but that prevented creating a new grouping variable called \code{add}, and conflicts with our naming conventions.} \item{include, exclude}{Character vector of column names to always include/exclude.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} dplyr used to offer twin versions of each verb suffixed with an underscore. These versions had standard evaluation (SE) semantics: rather than taking arguments by code, like NSE verbs, they took arguments by value. Their purpose was to make it possible to program with dplyr. However, dplyr now uses tidy evaluation semantics. NSE verbs still capture their arguments, but you can now unquote parts of these arguments. This offers full programmability with NSE verbs. Thus, the underscored versions are now superfluous. Unquoting triggers immediate evaluation of its operand and inlines the result within the captured expression. This result can be a value or an expression to be evaluated later with the rest of the argument. See \code{vignette("programming")} for more information. } \keyword{internal} dplyr/man/group_data.Rd0000644000176200001440000000356414121112104014566 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group_data.R \name{group_data} \alias{group_data} \alias{group_keys} \alias{group_rows} \alias{group_indices} \alias{group_vars} \alias{groups} \alias{group_size} \alias{n_groups} \title{Grouping metadata} \usage{ group_data(.data) group_keys(.tbl, ...) group_rows(.data) group_indices(.data, ...) group_vars(x) groups(x) group_size(x) n_groups(x) } \arguments{ \item{.data, .tbl, x}{A data frame or extension (like a tibble or grouped tibble).} \item{...}{Use of \code{...} is now deprecated; please use \code{group_by()} first instead.} } \description{ \itemize{ \item \code{group_data()} returns a data frame that defines the grouping structure. The columns give the values of the grouping variables. The last column, always called \code{.rows}, is a list of integer vectors that gives the location of the rows in each group. You can retrieve just the grouping data with \code{group_keys()}, and just the locations with \code{group_rows()}. \item \code{group_indices()} returns an integer vector the same length as \code{.data} that gives the group that each row belongs to (cf. \code{group_rows()} which returns the rows which each group contains). \code{group_indices()} with no argument is deprecated, superseded by \code{\link[=cur_group_id]{cur_group_id()}}. \item \code{group_vars()} gives names of grouping variables as character vector; \code{groups()} gives the names as a list of symbols. \item \code{group_size()} gives the size of each group, and \code{n_groups()} gives the total number of groups. } See \link{context} for equivalent functions that return values for the \emph{current} group. } \examples{ df <- tibble(x = c(1,1,2,2)) group_vars(df) group_rows(df) group_data(df) group_indices(df) gf <- group_by(df, x) group_vars(gf) group_rows(gf) group_data(gf) group_indices(gf) } \keyword{internal} dplyr/man/slice.Rd0000644000176200001440000001455714151641776013575 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{slice} \alias{slice} \alias{slice_head} \alias{slice_tail} \alias{slice_min} \alias{slice_max} \alias{slice_sample} \title{Subset rows using their positions} \usage{ slice(.data, ..., .preserve = FALSE) slice_head(.data, ..., n, prop) slice_tail(.data, ..., n, prop) slice_min(.data, order_by, ..., n, prop, with_ties = TRUE) slice_max(.data, order_by, ..., n, prop, with_ties = TRUE) slice_sample(.data, ..., n, prop, weight_by = NULL, replace = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{For \code{slice()}: <\code{\link[=dplyr_data_masking]{data-masking}}> Integer row values. Provide either positive values to keep, or negative values to drop. The values provided must be either all positive or all negative. Indices beyond the number of rows in the input are silently ignored. For \code{slice_helpers()}, these arguments are passed on to methods.} \item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is.} \item{n, prop}{Provide either \code{n}, the number of rows, or \code{prop}, the proportion of rows to select. If neither are supplied, \code{n = 1} will be used. If a negative value of \code{n} or \code{prop} is provided, the specified number or proportion of rows will be removed. If \code{n} is greater than the number of rows in the group (or \code{prop > 1}), the result will be silently truncated to the group size. If the \code{prop}ortion of a group size does not yield an integer number of rows, the absolute value of \code{prop*nrow(.data)} is rounded down.} \item{order_by}{Variable or function of variables to order by.} \item{with_ties}{Should ties be kept together? The default, \code{TRUE}, may return more rows than you request. Use \code{FALSE} to ignore ties, and return the first \code{n} rows.} \item{weight_by}{Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.} \item{replace}{Should sampling be performed with (\code{TRUE}) or without (\code{FALSE}, the default) replacement.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Each row may appear 0, 1, or many times in the output. \item Columns are not modified. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ \code{slice()} lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows. It is accompanied by a number of helpers for common use cases: \itemize{ \item \code{slice_head()} and \code{slice_tail()} select the first or last rows. \item \code{slice_sample()} randomly selects rows. \item \code{slice_min()} and \code{slice_max()} select rows with highest or lowest values of a variable. } If \code{.data} is a \link{grouped_df}, the operation will be performed on each group, so that (e.g.) \code{slice_head(df, n = 5)} will select the first five rows in each group. } \details{ Slice does not work with relational databases because they have no intrinsic notion of row order. If you want to perform the equivalent operation, use \code{\link[=filter]{filter()}} and \code{\link[=row_number]{row_number()}}. } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } } \examples{ mtcars \%>\% slice(1L) # Similar to tail(mtcars, 1): mtcars \%>\% slice(n()) mtcars \%>\% slice(5:n()) # Rows can be dropped with negative indices: slice(mtcars, -(1:4)) # First and last rows based on existing order mtcars \%>\% slice_head(n = 5) mtcars \%>\% slice_tail(n = 5) # Rows with minimum and maximum values of a variable mtcars \%>\% slice_min(mpg, n = 5) mtcars \%>\% slice_max(mpg, n = 5) # slice_min() and slice_max() may return more rows than requested # in the presence of ties. Use with_ties = FALSE to suppress mtcars \%>\% slice_min(cyl, n = 1) mtcars \%>\% slice_min(cyl, n = 1, with_ties = FALSE) # slice_sample() allows you to random select with or without replacement mtcars \%>\% slice_sample(n = 5) mtcars \%>\% slice_sample(n = 5, replace = TRUE) # you can optionally weight by a variable - this code weights by the # physical weight of the cars, so heavy cars are more likely to get # selected mtcars \%>\% slice_sample(weight_by = wt, n = 5) # Group wise operation ---------------------------------------- df <- tibble( group = rep(c("a", "b", "c"), c(1, 2, 4)), x = runif(7) ) # All slice helpers operate per group, silently truncating to the group # size, so the following code works without error df \%>\% group_by(group) \%>\% slice_head(n = 2) # When specifying the proportion of rows to include non-integer sizes # are rounded down, so group a gets 0 rows df \%>\% group_by(group) \%>\% slice_head(prop = 0.5) # Filter equivalents -------------------------------------------- # slice() expressions can often be written to use `filter()` and # `row_number()`, which can also be translated to SQL. For many databases, # you'll need to supply an explicit variable to use to compute the row number. filter(mtcars, row_number() == 1L) filter(mtcars, row_number() == n()) filter(mtcars, between(row_number(), 5, n())) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/c_across.Rd0000644000176200001440000000215514121112104014230 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/across.R \name{c_across} \alias{c_across} \title{Combine values from multiple columns} \usage{ c_across(cols = everything()) } \arguments{ \item{cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to transform. Because \code{across()} is used within functions like \code{summarise()} and \code{mutate()}, you can't select or compute upon grouping variables.} } \description{ \code{c_across()} is designed to work with \code{\link[=rowwise]{rowwise()}} to make it easy to perform row-wise aggregations. It has two differences from \code{c()}: \itemize{ \item It uses tidy select semantics so you can easily select multiple variables. See \code{vignette("rowwise")} for more details. \item It uses \code{\link[vctrs:vec_c]{vctrs::vec_c()}} in order to give safer outputs. } } \examples{ df <- tibble(id = 1:4, w = runif(4), x = runif(4), y = runif(4), z = runif(4)) df \%>\% rowwise() \%>\% mutate( sum = sum(c_across(w:z)), sd = sd(c_across(w:z)) ) } \seealso{ \code{\link[=across]{across()}} for a function that returns a tibble. } dplyr/man/rows.Rd0000644000176200001440000000771114151641776013462 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rows.R \name{rows} \alias{rows} \alias{rows_insert} \alias{rows_update} \alias{rows_patch} \alias{rows_upsert} \alias{rows_delete} \title{Manipulate individual rows} \usage{ rows_insert(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) rows_update(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) rows_patch(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) rows_upsert(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) rows_delete(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) } \arguments{ \item{x, y}{A pair of data frames or data frame extensions (e.g. a tibble). \code{y} must have the same columns of \code{x} or a subset.} \item{by}{An unnamed character vector giving the key columns. The key values must uniquely identify each row (i.e. each combination of key values occurs at most once), and the key columns must exist in both \code{x} and \code{y}. By default, we use the first column in \code{y}, since the first column is a reasonable place to put an identifier variable.} \item{...}{Other parameters passed onto methods.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{in_place}{Should \code{x} be modified in place? This argument is only relevant for mutable backends (e.g. databases, data.tables). When \code{TRUE}, a modified version of \code{x} is returned invisibly; when \code{FALSE}, a new object representing the resulting changes is returned.} } \value{ An object of the same type as \code{x}. The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item \code{rows_update()} preserves rows as is; \code{rows_insert()} and \code{rows_upsert()} return all existing rows and potentially new rows; \code{rows_delete()} returns a subset of the rows. \item Columns are not added, removed, or relocated, though the data may be updated. \item Groups are taken from \code{x}. \item Data frame attributes are taken from \code{x}. } If \code{in_place = TRUE}, the result will be returned invisibly. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} These functions provide a framework for modifying rows in a table using a second table of data. The two tables are matched \code{by} a set of key variables whose values must uniquely identify each row. The functions are inspired by SQL's \code{INSERT}, \code{UPDATE}, and \code{DELETE}, and can optionally modify \code{in_place} for selected backends. \itemize{ \item \code{rows_insert()} adds new rows (like \code{INSERT}); key values in \code{y} must not occur in \code{x}. \item \code{rows_update()} modifies existing rows (like \code{UPDATE}); key values in \code{y} must occur in \code{x}. \item \code{rows_patch()} works like \code{rows_update()} but only overwrites \code{NA} values. \item \code{rows_upsert()} inserts or updates depending on whether or not the key value in \code{y} already exists in \code{x}. \item \code{rows_delete()} deletes rows (like \code{DELETE}); key values in \code{y} must exist in \code{x}. } } \examples{ data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) data # Insert rows_insert(data, tibble(a = 4, b = "z")) try(rows_insert(data, tibble(a = 3, b = "z"))) # Update rows_update(data, tibble(a = 2:3, b = "z")) rows_update(data, tibble(b = "z", a = 2:3), by = "a") # Variants: patch and upsert rows_patch(data, tibble(a = 2:3, b = "z")) rows_upsert(data, tibble(a = 2:4, b = "z")) # Delete and truncate rows_delete(data, tibble(a = 2:3)) rows_delete(data, tibble(a = 2:3, b = "b")) try(rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b"))) } dplyr/man/context.Rd0000644000176200001440000000372714121112104014126 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/context.R \name{context} \alias{context} \alias{n} \alias{cur_data} \alias{cur_data_all} \alias{cur_group} \alias{cur_group_id} \alias{cur_group_rows} \alias{cur_column} \title{Context dependent expressions} \usage{ n() cur_data() cur_data_all() cur_group() cur_group_id() cur_group_rows() cur_column() } \description{ These functions return information about the "current" group or "current" variable, so only work inside specific contexts like \code{summarise()} and \code{mutate()} \itemize{ \item \code{n()} gives the current group size. \item \code{cur_data()} gives the current data for the current group (excluding grouping variables). \item \code{cur_data_all()} gives the current data for the current group (including grouping variables) \item \code{cur_group()} gives the group keys, a tibble with one row and one column for each grouping variable. \item \code{cur_group_id()} gives a unique numeric identifier for the current group. \item \code{cur_group_rows()} gives the row indices for the current group. \item \code{cur_column()} gives the name of the current column (in \code{\link[=across]{across()}} only). } See \code{\link[=group_data]{group_data()}} for equivalent functions that return values for all groups. } \section{data.table}{ If you're familiar with data.table: \itemize{ \item \code{cur_data()} <-> \code{.SD} \item \code{cur_group_id()} <-> \code{.GRP} \item \code{cur_group()} <-> \code{.BY} \item \code{cur_group_rows()} <-> \code{.I} } } \examples{ df <- tibble( g = sample(rep(letters[1:3], 1:3)), x = runif(6), y = runif(6) ) gf <- df \%>\% group_by(g) gf \%>\% summarise(n = n()) gf \%>\% mutate(id = cur_group_id()) gf \%>\% summarise(row = cur_group_rows()) gf \%>\% summarise(data = list(cur_group())) gf \%>\% summarise(data = list(cur_data())) gf \%>\% summarise(data = list(cur_data_all())) gf \%>\% mutate(across(everything(), ~ paste(cur_column(), round(.x, 2)))) } dplyr/man/rmd/0000755000176200001440000000000014121112104012724 5ustar liggesusersdplyr/man/rmd/overview.Rmd0000644000176200001440000000275014121112104015242 0ustar liggesusers Tidyverse selections implement a dialect of R where operators make it easy to select variables: - `:` for selecting a range of consecutive variables. - `!` for taking the complement of a set of variables. - `&` and `|` for selecting the intersection or the union of two sets of variables. - `c()` for combining selections. In addition, you can use __selection helpers__. Some helpers select specific columns: * [`everything()`][tidyselect::everything]: Matches all variables. * [`last_col()`][tidyselect::last_col]: Select last variable, possibly with an offset. These helpers select variables by matching patterns in their names: * [`starts_with()`][tidyselect::starts_with]: Starts with a prefix. * [`ends_with()`][tidyselect::ends_with()]: Ends with a suffix. * [`contains()`][tidyselect::contains()]: Contains a literal string. * [`matches()`][tidyselect::matches()]: Matches a regular expression. * [`num_range()`][tidyselect::num_range()]: Matches a numerical range like x01, x02, x03. These helpers select variables from a character vector: * [`all_of()`][tidyselect::all_of()]: Matches variable names in a character vector. All names must be present, otherwise an out-of-bounds error is thrown. * [`any_of()`][tidyselect::any_of()]: Same as `all_of()`, except that no error is thrown for names that don't exist. This helper selects variables with a function: * [`where()`][tidyselect::where()]: Applies a function to all variables and selects those for which the function returns `TRUE`. dplyr/man/rmd/setup.Rmd0000644000176200001440000000035514121112104014533 0ustar liggesusers ```{r, include = FALSE} options( tibble.print_min = 4, tibble.max_extra_cols = 8, digits = 2, crayon.enabled = FALSE, cli.unicode = FALSE ) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(tidyverse) ``` dplyr/man/distinct.Rd0000644000176200001440000000470614121112104014261 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distinct.R \name{distinct} \alias{distinct} \title{Subset distinct/unique rows} \usage{ distinct(.data, ..., .keep_all = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> Optional variables to use when determining uniqueness. If there are multiple rows for a given combination of inputs, only the first row will be preserved. If omitted, will use all variables.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are a subset of the input but appear in the same order. \item Columns are not modified if \code{...} is empty or \code{.keep_all} is \code{TRUE}. Otherwise, \code{distinct()} first calls \code{mutate()} to create new columns. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ Select only unique/distinct rows from a data frame. This is similar to \code{\link[=unique.data.frame]{unique.data.frame()}} but considerably faster. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}. } \examples{ df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) nrow(df) nrow(distinct(df)) nrow(distinct(df, x, y)) distinct(df, x) distinct(df, y) # You can choose to keep all other variables as well distinct(df, x, .keep_all = TRUE) distinct(df, y, .keep_all = TRUE) # You can also use distinct on computed variables distinct(df, diff = abs(x - y)) # use across() to access select()-style semantics distinct(starwars, across(contains("color"))) # Grouping ------------------------------------------------- # The same behaviour applies for grouped data frames, # except that the grouping variables are always included df <- tibble( g = c(1, 1, 2, 2), x = c(1, 1, 2, 1) ) \%>\% group_by(g) df \%>\% distinct(x) } dplyr/man/all_vars.Rd0000644000176200001440000000240214151641776014263 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{all_vars} \alias{all_vars} \alias{any_vars} \title{Apply predicate to all variables} \usage{ all_vars(expr) any_vars(expr) } \arguments{ \item{expr}{<\code{\link[=dplyr_data_masking]{data-masking}}> An expression that returns a logical vector, using \code{.} to refer to the "current" variable.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{all_vars()} and \code{any_vars()} were only needed for the scoped verbs, which have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These quoting functions signal to scoped filtering verbs (e.g. \code{\link[=filter_if]{filter_if()}} or \code{\link[=filter_all]{filter_all()}}) that a predicate expression should be applied to all relevant variables. The \code{all_vars()} variant takes the intersection of the predicate expressions with \code{&} while the \code{any_vars()} variant takes the union with \code{|}. } \seealso{ \code{\link[=vars]{vars()}} for other quoting functions that you can use with scoped verbs. } dplyr/man/new_grouped_df.Rd0000644000176200001440000000315214151641776015452 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/grouped-df.r, R/rowwise.r \name{new_grouped_df} \alias{new_grouped_df} \alias{validate_grouped_df} \alias{new_rowwise_df} \alias{validate_rowwise_df} \title{Low-level construction and validation for the grouped_df and rowwise_df classes} \usage{ new_grouped_df(x, groups, ..., class = character()) validate_grouped_df(x, check_bounds = FALSE) new_rowwise_df(data, group_data = NULL, ..., class = character()) validate_rowwise_df(x) } \arguments{ \item{x}{A data frame} \item{groups}{The grouped structure, \code{groups} should be a data frame. Its last column should be called \code{.rows} and be a list of 1 based integer vectors that all are between 1 and the number of rows of \code{.data}.} \item{...}{additional attributes} \item{class}{additional class, will be prepended to canonical classes.} \item{check_bounds}{whether to check all indices for out of bounds problems in \code{grouped_df} objects} } \description{ \code{new_grouped_df()} and \code{new_rowwise_df()} are constructors designed to be high-performance so only check types, not values. This means it is the caller's responsibility to create valid values, and hence this is for expert use only. \code{validate_grouped_df()} and \code{validate_rowwise_df()} validate the attributes of a \code{grouped_df} or a \code{rowwise_df}. } \examples{ # 5 bootstrap samples tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) # mean of each bootstrap sample summarise(tbl, x = mean(x)) } \keyword{internal} dplyr/man/location.Rd0000644000176200001440000000141514151641776014273 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-location.R \name{location} \alias{location} \alias{changes} \title{Print the location in memory of a data frame} \usage{ location(df) changes(x, y) } \arguments{ \item{df}{a data frame} \item{x, y}{two data frames to compare} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This is useful for understand how and when dplyr makes copies of data frames } \examples{ location(mtcars) # -> lobstr::ref(mtcars) mtcars2 <- mutate(mtcars, cyl2 = cyl * 2) # -> lobstr::ref(mtcars2) changes(mtcars, mtcars2) # -> lobstr::ref(mtcars, mtcars2) } \keyword{internal} dplyr/man/band_members.Rd0000644000176200001440000000166514121112104015057 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data-bands.R \docType{data} \name{band_members} \alias{band_members} \alias{band_instruments} \alias{band_instruments2} \title{Band membership} \format{ Each is a tibble with two variables and three observations } \usage{ band_members band_instruments band_instruments2 } \description{ These data sets describe band members of the Beatles and Rolling Stones. They are toy data sets that can be displayed in their entirety on a slide (e.g. to demonstrate a join). } \details{ \code{band_instruments} and \code{band_instruments2} contain the same data but use different column names for the first column of the data set. \code{band_instruments} uses \code{name}, which matches the name of the key column of \code{band_members}; \code{band_instruments2} uses \code{artist}, which does not. } \examples{ band_members band_instruments band_instruments2 } \keyword{datasets} dplyr/man/common_by.Rd0000644000176200001440000000042214121112104014411 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join-common-by.R \name{common_by} \alias{common_by} \title{Extract out common by variables} \usage{ common_by(by = NULL, x, y) } \description{ Extract out common by variables } \keyword{internal} dplyr/man/group_by_drop_default.Rd0000644000176200001440000000124014121112104017004 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-by.r \name{group_by_drop_default} \alias{group_by_drop_default} \title{Default value for .drop argument of group_by} \usage{ group_by_drop_default(.tbl) } \arguments{ \item{.tbl}{A data frame} } \value{ \code{TRUE} unless \code{.tbl} is a grouped data frame that was previously obtained by \code{group_by(.drop = FALSE)} } \description{ Default value for .drop argument of group_by } \examples{ group_by_drop_default(iris) iris \%>\% group_by(Species) \%>\% group_by_drop_default() iris \%>\% group_by(Species, .drop = FALSE) \%>\% group_by_drop_default() } \keyword{internal} dplyr/DESCRIPTION0000644000176200001440000000407714200434032013112 0ustar liggesusersType: Package Package: dplyr Title: A Grammar of Data Manipulation Version: 1.0.8 Authors@R: c(person(given = "Hadley", family = "Wickham", role = c("aut", "cre"), email = "hadley@rstudio.com", comment = c(ORCID = "0000-0003-4757-117X")), person(given = "Romain", family = "François", role = "aut", comment = c(ORCID = "0000-0002-2444-4226")), person(given = "Lionel", family = "Henry", role = "aut"), person(given = "Kirill", family = "Müller", role = "aut", comment = c(ORCID = "0000-0002-1416-3412")), person(given = "RStudio", role = c("cph", "fnd"))) Description: A fast, consistent tool for working with data frame like objects, both in memory and out of memory. License: MIT + file LICENSE URL: https://dplyr.tidyverse.org, https://github.com/tidyverse/dplyr BugReports: https://github.com/tidyverse/dplyr/issues Depends: R (>= 3.4.0) Imports: generics, glue (>= 1.3.2), lifecycle (>= 1.0.1), magrittr (>= 1.5), methods, R6, rlang (>= 1.0.0), tibble (>= 2.1.3), tidyselect (>= 1.1.1), utils, vctrs (>= 0.3.5), pillar (>= 1.5.1) Suggests: bench, broom, callr, covr, DBI, dbplyr (>= 1.4.3), ggplot2, knitr, Lahman, lobstr, microbenchmark, nycflights13, purrr, rmarkdown, RMySQL, RPostgreSQL, RSQLite, testthat (>= 3.1.1), tidyr, withr VignetteBuilder: knitr Encoding: UTF-8 LazyData: true RoxygenNote: 7.1.2 Config/testthat/edition: 3 Config/Needs/website: tidyverse, shiny, r-lib/pkgdown, tidyverse/tidytemplate NeedsCompilation: yes Packaged: 2022-02-07 08:29:01 UTC; romainfrancois Author: Hadley Wickham [aut, cre] (), Romain François [aut] (), Lionel Henry [aut], Kirill Müller [aut] (), RStudio [cph, fnd] Maintainer: Hadley Wickham Repository: CRAN Date/Publication: 2022-02-08 09:30:02 UTC dplyr/build/0000755000176200001440000000000014200154115012474 5ustar liggesusersdplyr/build/vignette.rds0000644000176200001440000000063514200154115015037 0ustar liggesusersSn0tB<[Zg|zA:KI9uo;\9㝕;;>BG#::**z(PP7$]l䃈'%s爧9,d 'Ћ$,`loaO{Aԅ>#Y$abӒe1/cIƳB\ӕ) L1DװȔf<>6go3Hb.< AXo*OMzZLW ҿ(d!Z" @$ {dt%ӹx N5|4.JW%az/0.ۭ*/;|^3 I)xc`]{d˕zv4Y ~~SɅF:lQ|mN%چYFTeM }dplyr/build/dplyr.pdf0000644000176200001440000104723214200154074014336 0ustar liggesusers%PDF-1.5 % 2 0 obj << /Type /ObjStm /N 100 /First 819 /Length 1249 /Filter /FlateDecode >> stream xڝr8z Cl,\TNgP,3췟S ,un) Vf0!dI.Ș0+4SY`J,+p[2 g8+:CbJeL0-]&.1˙-3Ì #*' 3l^_/ +zWHdE9laKB;"^ c^_K{ ^!NWU ^)q < xIL()7;%dR6hT"b%R% j,CdlB! Y@@@ 2'-@. tdMr*ȆnF|!BdP,%EȔJbbAA$E(2r("b";rrAF)LS(0APUJ\ATYU!@V9  F0C!qH+ D"qZ(??><MUQeSsFxyX5J=Q3ݿ\+#5a?,&:mXånqߏ~io糵}Ԗ;:^M`xxu]9Hp~T;9XUk]ma{Qx@;kDyl_4_)Լ,YK"̷-cSiq$AN^ BZv?miu@҄ߒ$qssx¼:!}۪ ʚZaSv vtSpݷQD7UHmӍ.XGoKqNR(t&xbau7kSxC]5dg- 7|M&yj|j6|5OnmÉ2xwͮMfkïjc>*Qp \eU]{"]9[NZS7.["([MQ*[xnሉ9 ^pϑu_gRa:S z۳O~mX߰R/ > stream xڕV]o6}ϯl,IJ-,i!6eΒhT Q"eD-7O(s= nnsC@)&fD 6E94ߧ%/0š:_n04@$ (!SBeMǷvD璀b!-R3Y,fGそU7y2LVhȆV3tq<`IuJ vOujgӆ*\4Ӭ,7S8cRy_ cS&SLLD$7g l{(FqYrG!7[zggI/&!wsVCq>,Q;֬-ӦpAd*gz9kR&Bz76vB\<=症kUݝ6v1F1tv4n—Hk[~[.4/ƀ YN i#d@{&Pr4J6'.W*ՋrBjvɹR).R-S?ŴG>jJz0y"e_Ktl?d > stream xK6:Z@EGf7@%) Yb|خ#ЅeYCPCQ輘=[w"w! ,S,\3zdH0b7{ބD` Z$ail ^J`ߝxF@{k膪몕Ysﯫ Bm%o|eR?Dk.^ ؜XOI@bhKJ.%yʛ1DJ!]Kҵ+*e62@/o^8eo}2Xƣ>|@C0͆;OP}eaVjE<|[v{;d}w^UuWQ¼R OFGU~g!<RTZlG,:_v]lネʞ.bs޴}^z!%>R.fZbw3k$/<Փ>INBdSY"" 3!j81'MժDsz B{Fѷ㐃SEËC0Q@BY''Ꮣi21lB`YvJ12]XDOLm.\Ւ mt(LNWGp h#[> X'ɇȨJ8m![@Ͳ*o3Vz=E? f<|C`%+ ؔ*%"n)> stream xZ[o6~$;{ذvXQ X>utUWPdQe=2C΅{~]‹P$1.=%1zW+/˜׫^cW-Ǭ 7{!0l'Njd=X$|4}c ^iG@K2sq$P0kq9&Ȯ|AI>h&MC-@yX 9b*ɗ,1W$ E@QtAgg Nm΃0اCQ^Q$L1I bw6qp2*8i+ CyT SQ1nSWs*{c?1'j{8NcF81nSgFn{:8rBT۽u4Qq:*-g $ 5|:*8H%IqjBΚ]IM1L;4U\ ▤h*L.rJ*6c09Wğ¿T!tR~&VبugutԜQrT]槃?u0BzeH#_M8&H+_9}ZtX"Co;| ^XzhqrSëcwu`$9et|u`"5( I;_6tgam"wEc4m ߎYmZ%TҲHYHH/ݡcCK62>` $!v\1kQov&T| W;[LѠ*ܬa);[ג kkd@)?MjY6nl 4 L[X'=n A><:zXQ$ܷJb$+"Z} .VqYlG 3>'&S0yFN3Q4W/M!;w˶U\5TF'-p eڻ&וŞ_> stream xڕYm6_aPTƭHlMCqw6wb!KW,zdqd+!9 ]_x \$" U, (jq/>yۿ~oCXҬ^|8|a5XYJ;[u8/V2aL爬.۫J$:FHb5HYᏈ 'wd㪵ͺK@DI0lzj[]*񺚈]VnvbҾ~Yڷl?]9gWD@ GsKQ7} 𓎅5YufP&,~GI;h ҦhM$|5:%9Cph]ߥݥ#u"b3p_uOgiR^?ǎtKuCͶMDҀiD!R >,@t]VDD$(M-6 =8np t/$f47roiBfpqAa~YVK{=L3Ħr}Zn lV̄AE,Gw6f/a 3fJ4"F=O}4Rު+nŕ2*$F(5zU- >49XR 2i:kAH=(3$\ѴǫhE+E\vʼnIcE_נMqG bWڗخo\2ED@Li.Mx;$[ъ ı>1p{s\v#P7}I BNۊ:ӿx72F,J0` lk4`š4)*g\:%od)r|^١g}sG><"2.TIz潑fr+"Ϲ J NRe:8-ec@g'UNfqI{do6C `{AAc? !_n@ϧ%6&!,C$Ck-L~eiY.үX Pr/bό%45bOώMk`hrqdrhoP0NJ;Xr_F4K%dy+H,7h *KɋL0˹mX:p__h+roCK~dߑ1o$uhe^ _9Q 9t JSF@&Q6՜P*.w -5Q")kK 6G:QZ|3* '/^-p/barSӨ\SZP`15ar V-񯂮njc$Tt^栱s(RSuSHB# ?͉ȡ&/V8AB H!Kk .,x΃Yc*ZCo)]ieZ2u<[izdi2GPi6{@sKN OO>@F]Z=>gq4^G 7]Yo̵Mց9~=Xq2*]eL|N;H~!-ó!ݬ[Zq䭭ly$IUtE,1-@|T헪-§5H zmd?T5FNFZxS0HA3J0tg̲z.*n)y- 8 Rՙ}0 nFhyG}@if_S:(h}_Ъnh;?,] NɻTa^hm m`TTke4<aVt}4d$4s:̀ia)U ւksPVRpls~xCfd{)1oKoiȐ$0*5*ddIJS/?1\ܭmXXtE=<}h͝#L}x2PgEEwީFmOh0X5tWC?h_i+7s(#V5.`%3 bG?`cp+'2 .mfU39h&&_C3p(%ޒ!y *UR;ɢ;5q3wNE;Os endstream endobj 203 0 obj << /Type /ObjStm /N 100 /First 892 /Length 2375 /Filter /FlateDecode >> stream xڽZQo~ׯc%9!Y6@ hĨmK}YڱXLa+-ٙ, ƙI&E3Gx-l/&8GAf!}D\Ld Ăq!HMQ"ƅ1>x `^`!qM#p`I#13e{CNtPT0⨣ 1Qa2,PL9 870K<Ĩ"R(hbRE;CzEg!ɰY@" Ybt@gBEgpY$A%F L. )1g.a)3rx5*U$%C/_ H&ΞLw٘DR$A;C:wg%.8ҙU *`rAY2C%Ȃr:JL.QG%SN[T , !RԢ!a.DY#ZRrv9CRr:T=r"铠Eq~y %AuR%w%ώ ^_WjًG蜻JO =-t9x0dy~3 B-aէPGvt'v}a?/~|GO[v{w~u6fݏx+=ӷP[3>_+lxznN>g/juDs6,ӹ6m~v~j0 HK@MWWW ~>]c{[RוMg6[-EFwO+`=h|tu F_V? ]}:_BC/A폲V0JrnUlgzdOÖH2^<|dˤۘtyg{ron թ>2a'1's8k9OoN^ ߼q5;Ũ҃ V97DO4186-6֖k+M͵q+WE2,a! v̡x 1M HvBx^x<%i{Xvav|K CpA&@f;xx,NMޖ-M+?_.nkBZbLw&բ?XiMHKO ME?l\M-ElZ_SmcmRۊ+x⅊*^x⅊*^xNG4L4L4LT6~Qis K6fP)lSe l|c}Srb4xyVE{مOa2wNGK.H[п"sܭUvBCp AkDaتk?rX V[uYOOb HJ# |lKFƟDsiCX3V(}n7Mf;`A -RIciHukЖ%-J58,<#B9D$-uZ |f [9(l,ٖV-.#M uH"b~4ȋX[kU$~1[\pZx~kpNbUGF)Gn7QF>o:dsIo endstream endobj 385 0 obj << /Length 996 /Filter /FlateDecode >> stream xWmo6_ul9 tXRl؇mm ЋARIe}LIy|x`ϳuS4Mh̖( `B6J]|66su&`uv59|,'lSȲ[ķa*r:q+TndK7ePǸ^ɦبL!!hj6YK0}5(U @~!;L,S;zK.o ӮxQD2'ށ|^ N"!{"d?ό&`j;%f9S/9D*د ~b_&R(Czԇî[pP%x?aG'#T:,z aCa>c'C2> stream xX[o6~AK-L\Qwx͖$~𜣸qw_&>s"1̜."p0QĹ J{$tO=Ix-ox)ĵ*/uB`cb[5߂! u)̦ NHމ$Y"";+j_[J#l{"iZѯ޼`H)z\=|r{q*$l!߁`Yv 㲐Ǖ/+K9{M3#n'x~vkL ͥP#v?Ev !,QfL727C\( x(A}܊ gmf JȬ!U)Eb~Į<"7oyǀ6o'C2x/>CXBDp. .#1)F=Z=cC ; ͊Dr\0uQ*0 f2.ܣ3Q䙹L]m\{ԝlq2˃2Ø#-[i61*[0WfEbKl| LXɎ endstream endobj 410 0 obj << /Length 1801 /Filter /FlateDecode >> stream xX[~ϯxrMo|[HveN;vfTud:h4r]]n>Vo(H8XF)2H` އU/vIcٍ nQY/ ;4sF˦?_I`RhƷ%Mb[Ꮴ=Y۪֞F-#/n2$Q{(\(qu]U5q_nNݲrsnT4K8<^JF 7&D^tl*w;mLq  Hq4hd 38砀պ{T󨊓oNaj(2[pd>|ɏI݆OYEklZ8iSS5zh4kZ$Z9;A2v-,?$-%X=aDa4% a!/^gwY/+z<{S}k(MFq.\)7yX+3a&kB&U%CIIl,W/cSYtg k.{Q^E#QM|zH"(} Quk yO"ngG% Q b`=0*Kby~C'bNH"XW<@׺pIyGZ 6QyK.8焲&Z)bGBŽHe˜1e?dsr*|˃ IûZ/>K9IS]]T>h_Q2K{>:ezmEm_{9WG pÿMV/uJa@fK8Q$@}ڿ^a#.PQm-s{ khF{,+U u9OOi۪h+<"AIׯ&̆H8.[پJ0o/.ZE<(2ne&wgw[N[kK-+)N]].cYA' N6ʻ]c$|0PN,8v̧弮{i[.w6cvBu<5KФkȍg$MKP='T`r"#qPp!p{{U;NN_Ŭ/MڋAg@顣:v8&Q^K7 KFYL$E/6ߡ;?FFF?S%#N, 2ϸzK/@p넪9=)s~/ȱG4xvXO^5cw\h*h܇ m.bjTF8*3,Pֶs# OIXjA^Bmp ޣ)6zWb?!7S@1etqx)" endstream endobj 432 0 obj << /Length 1561 /Filter /FlateDecode >> stream xڽXY6~R}tShQ4( }KhaUʳ=wi5Q#vXH*v;\|[b lt{CKb]Ej,Aث/a-l/DqvCjŎ0 -p sN8= (ۢ4v)Bu)_a܄IpWXa 3QB0ČfKTfzA8T*),:~K 9i &[L3r`d6-r`S8և\-WXx`WPްXk^ܷm{e^#W2+Ël{z,߼zd+zn .x!VwK|2fjZL2O;\`&jON\WOl />ErWY0JQNQ޽H(3gXk',B+zɌzqU%P?Gy>n7J9֔ԅK*goCcv8 (НS<9Y]?n0کzW9g`}ˡܽ=ziy@oͱ<؋VmQ~+:UIض`oE{(EiUi kq=h$G7NhQ'w41=)Cq3!UMDHwn&(seχhʀ!' n.O ذ֔nn0'Ԝ O!I<'~XC()tK [-h&1.2ĭ|,CY P˨,!Ƈb$Ӧ(H|2V#Vs5RBcPӪ\ >ײ~Cq:~qo S97:p jWfNT5C!o=Sؗ8&F `=z"(38i oh2rW endstream endobj 445 0 obj << /Length 1202 /Filter /FlateDecode >> stream xڽWKs6Wԡ&B$HL_vg2VOiQbw mT"gҞv[h%/mG 1JhD8IPѨ S-xhVW˷oAQ'K(*(VY⯸YaX&~NRD2I÷2Ma,uލNB@  p+#4Q@ V$Ob;IvroMLB,CMeRP2˵PU/ZvmI\F .};|&8^s neq!fv۸Ms[~Z[vV(4_d?8ޒLlZ@1(VFbna|7ߵTORZBݯ pQ\{K$y"O7M*s-ſ:قF"O0 bߊ4>OkX'i=-l(`OqKD%Iw}YZgG1r}XBԆGs3%_٪I^[);Yu kDOIOPbD1b|2YR22XI٠Qu-@,u,,or2՜$A;Ƅl#zmU]}hZ'ׂ!WB Rn:C0J̈́J90$t-< M7\lBCPA0) ={o0B߂w1x ,v"[#> stream xڝˎ6 Č(z!E6A A=lWm6N3Jfpf8o9[s,~[/#'eiGzpc`c"u\[~^# Cr r+Kc7?0^ZsEgc%U@GO ?[繱]Ss<D|R`7lhuY{#CcTzt7{[#繣>%5UIշ"kt%pV=> stream xڽWo6BlF[!6PbȊLhq~Ǘ,*n ɻw/Z[vrr~V ¶\/FX̺ICvR}Zz~ {(@ĘLl)hxlO\='6'x{~V́ǵY8D!$g ߶g?.ķiz=b-b?:_`g`՜;q{ՉERB/b%m"V:)VNԷ^w)¤#-5ؾ Je@cGTZn3ҪlxZoNW@R6ۜ$][IK$_NUyٕ;| ]G<(_vaD8>ܻsG룖>>>/kD_of3 D嶼+=jG>x "n(XU?VZ<ځ"@43Iq#2bX,?/N8a?0H*a$iUV'_bEtNDJP5-^φ`l*oGG_c_\jfn+ckz!3qHLO7dl<!'r~0֋\E*QE]c8b׃`X\-ߞ0dYar(mXıtuXˍL4iM75 U Q Qr [nXq`TtŇ̫3ǟ)/l #xbPGE;ȪזZ޾}LQlt@x&Q( -St՞nZ0)[!;ʬ{bɌKHH@l|K }M˻+X&.f%J| yg[:xiߧ*E֤dȈ? endstream endobj 487 0 obj << /Length 1430 /Filter /FlateDecode >> stream xWKo7W)H 8@Ҧ0|p^ ݬ(jr)Cp7)Ёp8Gd5'<yd,#!@χ^}aʊ<}jjn?}h<̜=%U|>9'aiP}ۦ܈jG,)yfMqR.qVM T]]l:̛iK͗7ˆt63O,MXTĐ"k6-Soڈw= wniSj\J=-*j[q[fNZ"e$,/Uryas BSQxS|[7Exh@=Vp Zr?1QJ#0 LC?CCP%?=Ti4 xbZ⠳&Nu)A^IAt״)c}ϣ߈w"D{4;, SN2hVh.`;4$GM ):wQt^h`dcCiHk%xD:*{rPz .`M#oȳ$rzMּ}x4<~f:K|{:18K}XelD?1Jp4LY3{3zb R.UYAscsshF-Y`"39mpK[ 1)~[|ZEXy~/=\Vx+w(d$h;K%7Gl'Wb)*R*U~mg$Ip E$_::,#1 K,8K^.vtGگȍlry ەBsB0<'ڮ勳AP_W(ƥ%4_>kt%Bvw}&Lg Бt}#~E%t}o__9JݵL'`iֵLv#E˚rΠ?3S 宩#$>?ʛ,~7|/Cf?Ћ2v5r˿z1? endstream endobj 381 0 obj << /Type /ObjStm /N 100 /First 878 /Length 1794 /Filter /FlateDecode >> stream xYMo#7W\(U$A|`vd3u栕{%Hr[VmEd_W"Y]pK$PPK ^WIvv\թZ] yĕ\Mgv1c*EJ&]x(F @KTQ-he0v}@A2*"q!=`ё0xMSdGAoTib,KtU}R$TrD[TleD웰(BޡFI#M`ʼnP(c`*PF8$g4\fWdT`MU9:m²kZ*+ ZFlptX0p̴B j`Ԁ2, NVtYL(5#͛[w͠ܕ_oM=w/rqo޸[6־}VLA{9oߠ]qn÷o}z/&?u7[lPU^ޭݺ9kGw3|]4HONIїMV%zn(ޏi_/W7ݪ>>nuliĘ}# 55 U3;7]}\t\?7`ISk['9Rp?p?N-5K>KEP\(u2%*}PW[/vA*pVT)R9lVM^O2>d>?t!^,ŃXľ|D6 /ԧ=֒=ֶ y6xY-eW̃z0 cX &C?To&INֿ? EG}AF2MIjOVF}*>=VטR Jrs D,aeۖr,dvaFNsB/;d?0:L `0U JKzqp8ssOS(s,]aaʻ:d\& wRomFEv8I"soBf%y{fX{P UC{\Xws|? =|21M!3]TG* ՞WvϱX={ֈMD;aN| K{^xٽ=݋؅ ٧{%"^8rZ{xoY7'ؙS_p9s4WW.%QFj#Pzi %)m)J raK,&(Cmt&EW7l1!pBI#$ '_$t@=ݬ_|{]Ҫ9[S`WK%_"A*/|V8Hn#rĩ,C%Hыc߹ȄtT}l͏k>{ֳv_wxp~ՁYTioTB݌/)5W=} 1|6m67`v(?Gس{ `~`C| ՗ endstream endobj 497 0 obj << /Length 1042 /Filter /FlateDecode >> stream xWmo6_!x"Yl-~OېmEj-K<ɲh-X0 '=w;?_8cg͒i̗N^%N^Ny,G?l׸s^]K3v B%r0v2)DJ P TW@]i͂LwƾEYͻ_~՛l0 B.p1;Vp+it5[E^1cy`x{x3|&uI-~bW/R+9b+Fu3Ļ5b( J<1>v3Z3F+A@JiK^?׫/V,:OF&K10(̢$eOqu@Eeזkq 'G3~r S+ Ï rz;W#S-Sb»aΉdk e!SNJokra`XØ"0bjRNXQ@Љ5bګ$yS"fBBX$KhS-+(T7pwjb +d? ؝5{&R1ѝ;o>˞AR=U6Sqʾy`ʤܷyٔPZ6bgOsg.yPLH[/JHt zwMo,H!NOM Ķ..7n7ϮpU`f~ңDy/ٓRdE%kLVk;YkK02ȔYaa'FI1r#1 ũllZb5S© dG-f ik!R?aO"F3/ 1șd3"m)z%)]׻eS*ٰSi D|o-B(Npx0 Ӽ; C/N~ĩd;/,J#PEF~  dt{v ]màCj=-α*o*>/_3O endstream endobj 505 0 obj << /Length 1620 /Filter /FlateDecode >> stream xXo6_!d(f1K>@&݆C/]1m %OwGRdˎۇ yЉ:,0=|'"l0J Dpי%Χ1&g??Ƀdq!3U F~zŅ! F( P+ o9S )$B# .Yx)Kת P&d*(-u,AĎIbpl⥡<"k,sC)T*1TWi{/3#Ѹ7sG7 M-km@rG $c"¨aʲg&D.c Ū,-O4@4ӆEH,UX6+YU*NUvj[{ Ь7YK~ ]fN~#A"™89"/JU}S4{~J2(E(ɓM]2AoMf`bc5ftJ~/l%6CN86nfmZE <ֲ<{lyJej9"OR]U xڔeQ.<6hT-dE\bn`z(Ը9cMUQzSW'ۢ٦$ dZx.B13٣vUęXxģ#͘|L'WVwE]mPÖig|FK;)5_[h ϙ,|hʧNe Zj{ћQo(12e/0ζfz){8 .o.]FRk 3׉WOGPk֕xXs3zO}֣!8El)ë'`&儁jstVU/́Jayj͝ 7P_ieƒ5Po҉+_&LwNTmC4Lq]/yɶ#HWf/K=4@yRmz!_Y,= 6YZ>%qQGj]TzÀ4x 84Qfil=Ƅͻas]^rq}yrlDkgBӱeБL \=ŕ\;9Dd 0y vLdX Ł<݇ > !)=xBWbuW~йz |h m[V#S.|\;ד\x;`w ""AІ!1f*T`tZ,AZ蟟xVLo`2> stream xWIo8WCe fDI`.L3AHMT%ۯ{\dIe ){h91gM֑9C=ađGX;UqZ 6GI.%G,<dWFxǸ|!*`NPa]4&Qj]-W/F,3Ӷc-9O<דB6xnpJA+5[++s2Ǽ v7|: szӊSRޜ>}J;oӽ5r'ڽGp梵.i[m]3Hۯ6x'fCg==1YY(#Ğs>Y'Ւz̅bg[Yz!-ፑى'xp_([9bDTBQ˃9ǟSw!_&#Ga(m 0M i6]ޚY۽Br#Qs,D4Cy}#I<Zz5ZKK\d!"@K uS7'6#1 04%?4y$<&Tn)-"ߊZrQvek*є4q[}t[*Ea:HQi?ݛ﹝e i=꘹}'JQ̆o }B=aG%zf;6U!ZYY LU`aJHhvV) (~d5=1+F%sveRX:Y"qPOAx:5,?h/ ߉׿d^SBF+VOsvF֟5FnϿٞ~PM;h.! ٶA̦z*0(㸄 Q\E5GtF3(Cf/y7(R{kɬ"q=iS:䪬a $YT+;,V rE\ d~>tq b}X#5ț #,P5385I^&#H`%1s}B JLO$hbݻė9VnojՋ 1)/r]aJ@GW`Gێ҈?N!$mCIs G %*!6UihNo(L+NRT}2k$)@B/L]qc`7bϱmʪI̶O="U(@doҮf:4AfCt1J6dE$OoϞc *BG9SNGV, )?{A{%Bd6|v/N endstream endobj 530 0 obj << /Length 1356 /Filter /FlateDecode >> stream xڽXmO6":iWb}yqP[r~cqB&F$q8;8!a_Byf23u6>u5{FNJȏ\0G. Y׹-x1 GA0 @DοZx#@xײ߶c=8_,НR*18RV{%=n¯lJIZ[ PEU-519:vIg2ʒg^,4EnsṥX39kZʢdu) :}nWLh@x;0y$/PJt2Ă8QԊ^~gUS> s (FQWfL* ]wmS>ɋ %W@Y~;c:Hw5G?̲3+#|#/W=<wى]e˥F=X{nk;ݬxuU%kQTATM|¡'=P5>dWA;pʢ!>(R#>ӐHZn2|%W]lLq˒{Dք 9_!lQ{h]f3;oH4$U\'EI4qhPc3s\>M&P@rJQf'i샩kI9oxھZ#AH\z)5&%B`Ϫ :Knq0-QƣCtXX $u#Bѫ a7%8ƹO96z֑xOVv݃6`1$:8-)HaeyO7b5$o s3 CĶ-d0 lz'`ZP[H=G:qte]U$ βϴ8S2Đ͛"K@: ]eaq+I\sXxo2WGu`y'}mfY/o,LĀ;b\ބFGpcaG4hvN60oʮF@{Dh ^E2EG3e)F tFtL)e(E&훍POvz#3"윲 &h4AC÷7fIb٠wF8FV$[{1c P {FӋ姳a'9cDv﷿PaEH$qp \97Y.y{^!M1 2$)* @>X@yLЎ1x`2WX+k}lfɕⲽ'+kK-]g<<_[^cۿbǬ,O9{Xi3[$U/Xguyy{;C6=7tȬg7O =h(\O1^^MWGimi兄k{w7d 2SӸQH>E~F0F T@#@xll ̦󎷹k%K&h!$]n]6Y>e K'. ^;0ϜDoNM?> stream xڽXmo6_!`(`5+R)fnK E5tE [MUEHNaEъ&{ys\O77W>u8PZmx >F !BǏzRDjQV4_XX5nO70"\?WkLㄏ:R4Y0& CFnʦ|~\5vLe1kaK+9v R4ÏO6LS0/7p/>GƼͮ'uɢUzmDi2MbqIަN& e \>DY7EP 4ϵUwu6/tպ Br#IEk7Ez,/.r{,t˯~|e[YokVfԠϷ:liVu2!}__,.2|g->q@7xdв:׋?fì`:A p@wg-o0Dy)٦nW`)⌣8 FSCǂ :1j򋨶eR(fE KtT^",x#|y˯M'=Y^=kW5yf^ժjV32ªD&K;c.JXxk /}5^of\$SlvTg/x~ B!xֲh.n7ǾqC2JĶ) sHcC~pt"JJsrCvDmDbIJd ,._5e~x>a}ax<hvtTçJ%~E}J> stream xXK6W[!VHl"!)mѶYr)j!3|R^oO7N:7O_$'y$ՄQJ( %qLnɇj5(QF 4i)ju)Ι%/ۊZwLPrQt7iB,p|91\q3ZI`Y33O۲4l|jr|bV[#F7SsHч^f;tҌ,dCbLo(H6 |YLuh|Oe>[4ݪug)%t[3\˦ۙ0KyX0M,f\Kw'%`\oT !~@( J̘SۻׯGb9H'v/z0L >Aܗ0);`J<|biw:i98Ѻ.y[B)rs12:R4:"a?h}`nw84cnﵥ#0) ʹ1 gGDɨsᅴ7^}YUfnN8l:1jф0x~ʶ#ʼn-˦u^XN(f[*% D `̬EF'!&бLHLQ|0 `28OgqzٚIv¿jaSɟ>O$Hz"{5G9friu'$ԹmK憇E#E, f\Ƙ߈z}Gt _-kO`}qua뛹JE7x|0Ϟe ]>W~wV]my%-Ex [+ˠPgQYOI#%2,߷/΁ehю>}4^5{!畸XuMǦ[]CvfA` ܺhj1 ]5`D_Nh}/OzǏð\]_4jso%7{ܳ^\%,:dO֋'qNAn$~AoaV\^>sFͫ'[ endstream endobj 576 0 obj << /Length 1295 /Filter /FlateDecode >> stream xXmo6_u(fc7QRd@/k}iMeG,zqHJxvagx9Y9i|ryÅPP̗1x g g·/ڜDF>‚=z2Z( buU.K=<%ŌinR=,3ϛ^OYf|uh@srLOI鶂#[лy(6{(moKIh`xB|n{~R^ i/8^CÔjг,Ss4We;ެYb}͉2lrĚH3 AԻuĔHӥ"||D0W?*Mab+a'K]dC r}/x હAzUyqER OM1B" ɤyt"PqmFrlJ(M\mA,n{)iP7zn^M(J!f:)X^;M,r: ):yB:|e+$_UwD2|ֆe?~uG!MKw`'#'Da~fThaU"{PO2Y[)i$8ԷffURW$ +\ZWtK76?qhv'pi:u^ـ5ӢFz7W?qSN}(=GĹ #ςգ!;۰O@f~УhY%Y)9x(f IG$Ų:vGr*Osӳ)>CmkH:ܣMԲe   Z"B~0^.y4Ky+t%$G II/ (Lg.| !:S9ۺ{Bf+]2B}`zJVLzݩ4/5oQWc)!$s~0 Z样BvTOC:p<<7PSH"qԙ>s$צ6+ׯ5k?yۙofq`|#XwsUZP&*|MBLa@ob]LEwU ~%.GEylTecS @Mp;}Ɍ@h*CDyr&bsoʬjoR\KEƌh&.v}##/I< endstream endobj 593 0 obj << /Length 1403 /Filter /FlateDecode >> stream xڵWmo6_u(&5W]vkVS[D;BQTNqwXQ^6?fL_,KͻvgRDTN'SݬnYw|ͦ5vU[ոvkpTa(wdHK#1 H. 1jMroѐBs\AL ?g*ɭ 1IR9pa1 $8={7d2RCɖŝ:]NrR׺흐g:r}aL]U]qX{X9o13O ^ ֌\h{= "#T=#4xvϮt:1's6΂ztЎQ-[ .LJmooz|??A+F>Z$'L^YC$!au7ʱ& QT98*naU}$5h/8ńr1c3uo3{HdfgՁx`, 1Xni'mMߌ1XtؒTy$R9HWp48#Xa|Fs\97K 8F4<][^GpY e;~bFg _] I;oMA}x*W{\ qN53z*m?|Z9 we21;8qNHNQB~ (n:ouS̜$%7'|@e|Hs?@fէRp*RPbkPr\&b>)d g++Ʈ5nŊk8wmR!(N5VaG4mjcmgxsyfy6$bҔ!LhvN#ۿb&W endstream endobj 494 0 obj << /Type /ObjStm /N 100 /First 881 /Length 1658 /Filter /FlateDecode >> stream xYKo7W\(r83$ #@p['NFɐ$f凬GmlK}f8r 8_UH3Zq)F'ǂ68E] -QɮVI%A 1aV(d.a$t2 bIcb2`#!1ivO?xt1>5uC)fZ|gODGDJi<gcwLRH \Sal&cr7 xؖ6[9*&=;QZc L%v@>"$GNpV!s!o`K"EJ:vVv!dfo9Ux=+vVeRr +v`J5|=)y$W\lTm4}-rEg%*{;Dd^y{N/v,c;6%V_6r,P=.O[]f]sq%Sdi ؛!% ?X ''$ v`I54uӝFp"lŖ &z#v[mwY]||hUP¾tIQ?$6E8dT,C},_OgOIPU[ۊӽ}#< ޗѰ ջN2 H|a%%oI~FEh'p*丈o_MSEm,\}m$vAh0'77Q+ endstream endobj 608 0 obj << /Length 1813 /Filter /FlateDecode >> stream xڭXmo6_!( 5#R^@&C w_ڢ%Wf~ɒ0(;=ўsq֙%dy5~LP GɄlD6j³Ap˪۳"4;/%][𲑣ŵ?cgcFηṢ%1錽[bPm*Y)U]O-bEBƊCO=2?aЛ 'z%LطJh2^ex5yZ M[lE-/{X繯V)W%I5\ {?S^egw-xJٰ$_?kNDLλZ/.0#F1ubdt"?{pQ0ΝşONzp04!ar2ÍP; ޴nE[-72nu/O31Mә8A!䧶 FT,1C!ĭ.,=_WKB]8~UaFL=b7c >* {JQژ ?;Q }D 1)HkTeGv$j,(>  ^VF;bU<$Rqr>HhE쪃sFu[_Xן3߳bzml\*4HGt:k\Շo r G;^i\Fl69d>9aVDfǛxP[7`eS zùɄ"?x2iC5t@z0J6<7i i7,(RT&r&pEA`) l(}.+ϯd܊rwJMPnk!rvj) %B9Ofp_x]R5k4\J"oB |vpJmh7ۀ3&SP_MS*6dD5OQF#k &<@5/`C%ۭr!u,$=ҮT͌~g3cu6Ld&iKf ha"=cS8)Q#DHo@{W)_'׾<:@5 j1YmN {j̠(*$Z1`:Iۈ-УCQ ykqkk?5U7Szx=p,g 4^4>+(?Vr6fgSnҸ&"Ձ׍pP+*J Dk#юL>V(guIVu6hvy6i($v E ;wA$~# B&LD~W!-:2흪@BBX*UӻKVW(IjSHM[>)ўW'5}_]Ӛ63.SR VF4a׺jӔ rߘбj6T0$ endstream endobj 622 0 obj << /Length 1095 /Filter /FlateDecode >> stream xڥWY6~KRtwMA,(ʢcmɡVd>pD:+:_ pbn,x~CJue,&,廒`x)#i^S,#X g\ThNϭw^D /bsFѕ-'O3y+ Ү AIFbg6K(~Ff BT-OʦZ7=ջzafY˭-_ț6/"kpjs $RGo5r,8bvRGonUј4+7K<}B jc!t&e98 =D0:_05Rϕ(EcA6ͱ]%PF{R<]$"=>ȩW%?>Qa`Tyyy'Q[1}G=З"{>D $Zl)ShVźp$k3Fk+帮dALk6m}ObZ!chRz c8~jJ\?؞-(^ۡǃ}ZN3}̢A6l/Q.tB78=VN4k938c.&3 *W`+jIyRK&c/@tc:_؅%J[7,90gS 0AkWxu endstream endobj 637 0 obj << /Length 1920 /Filter /FlateDecode >> stream xڥX{o6ߟB dYEIASE"^-p@ZZZD>pHiw#;;p83Ǐ˝Ý~Z}&NR gvSbcxU;--xËCy 'whn;"P8 Յ]Ni_xKPM{|pHġ{24mOqw';K3ѵJ=370aSV'@% L61-h̋8wXзXKz7)OA+$f)ۗ{4{$!߶uhǸWR2o< %&ldhoMQ چfo_> 99'[<ʬ;f9ڳ{Oq/P7Z0q,LF oH R4YS4/y/_N-f VmM A%?)sy[ɵwK꣌'Qd!qήgwg(#VxN^};,HAs8ةߟYrl8(mQ; ^*ECOay[ uC}V Ϛ$)APZ4҂(f%T ki0XQ8ŔYVd }hr @:31 Q-0rXwY;'~ T# bP8G6*FMYD89ˋzBj|Yh |CB1'=rb0֌S2FW3#j20IV.ZeN5lwY0zF ֎LIpBz*rH0ɧHrѵbS*a(Ԙ%VD|懹ޯ 7#GmΔ| bƘv/dgeo.ouzte> l j Ό/KЈG&}GdzDJlz]_p =#]5[Tni&`֫ߏɴ]t/ ][X | sx0fxFzl է7Rmۢ{#'{a2Y"";mgsVE|3cQn u-`X%Nq,"&1:o1I9 tGpk [p 54TnXsa \:B,hCi[EHMfxni8AM,MrL-$u߮IϦɾ05:iȫE4w`W_bbUc|,1ȿ)a^2D LAG0zh*&Hi|ܸs}k@h$x(٬~Sϧ]jwuc%?9jϺFUv{^Rx1*f7сMЋu3'#VzEݪb7>AS'̂E{DLLJʄY ,P% A*F-j@+I$YgeǞ :=vt= e?4zf3<5  endstream endobj 661 0 obj << /Length 2163 /Filter /FlateDecode >> stream xێܶ_1}H E @nNRlڇ(8wF$NEɛ=v=vZkQ$:|w/~(e,E wIYusQu/_,I2QDwWnHp|:YVfqnv~ˇ~N^,@Mt# x7yWfDWzF^i:; ދȻuqbQ"dyx 'l+jGVl E9cAa {_ز2YWG$Ad몯e ^g@u(v{*}ĆoMw&Ҩ ~Z,JUG9DVgqJLO%fe_~,&drf XkJ |)9IkyCv S0D.ǚ0_츹g4 .~ (Q 0l% wU1K箬KgjQ:F"EU+e;=o",]pܺ5OЪ,R -JBÍYXU%5O}ө0thfsO)"7/".L3?󉤿%=or_*=#kuwgI DѠQ sH9IFWY(sӕʼZH |!!UFzqb:)MTvBR6=(V݂!%ah{GY8X&yj]hls:Xuk,1b 4I0 EoCN_ V6w|2D.mH)K$-4vT캶tVJr\lz;0,,njZW`՘m Hy,lT;XX$ȥ5Vbj7_0Pji7l/i |M,;ʠ=<1/TKG3;ԏ),U&?bmp//G|U4~vLLhr2="a+,cKǪv>|ȡ״ȀFYѢDqG |w=L[`xnp@8|=Jњ"tx(>̣Uب>ͦk.n#f'"Yi$0k2Di[EkﮕV1۠;&̎>1j?h"o &q_bdğme7ddbr6(r"li+j]mZcǕ}%ڰ'VUjyM7/+ǴBW是Zſ絃4oêܠ!Ak V6f"pk`82upn>W!^m&YP䷌lx551(}ڶ5HJ2B|eSPZ+޺+9B\!J2pJz<{V7^U2Nd<Dq6xmv>ka ͸R˙We1$A j7EOR29uON8ʻ wqx/d QfFct$u6A[] yT:01U?Wl}2}]Ly=_ endstream endobj 683 0 obj << /Length 1363 /Filter /FlateDecode >> stream xXmo6_!dh1CR^Æ`mӶIt%ERl:utǻ?^.FA%( i]<1Kb|]OS]z1b#G4=iV2-*fNMwЮH}:.yDg^ à1<$B1%Z7gC(UF\7) 2UP{!(a1k W5/׼j)&)ټ}'r^8)?,N? UV`7iH =6*P`39O'fpJ3>y%v;;ps! [L(yifV#Nd>N 1mD_uܧ(ž cyr)J^D1l<ܤ"3^ML+Us^`p8m 8%7~.rU[l\!Ƈ!L[84:fRh4qX)F-EzV PJKHRuZYWmPO1L-YOsS-ݫ.L*/&q $KVpnYX(O"D`4´1Q4p?ިm~\#-cZ0-lV.zZm*T"0k_HCGchkjHxl ,KYUC7 T"FﻡAE!|Q$zon7Ip I Uly쀛L!W}Pd;a'/)J6Gg;ӭ kG縦/ qOfvYq_b(&OF.7a{kiJyFJUQN}HoQ{yGiTݎz6D_ļIcw?3d!ii2,8 ~ӃR0=Q⠔$MorFc%^ݖՎ ,B4fvE)UiPY0+SLBsm:x>f'Gv4@!VL" `j^F(1BrU;>x\,% {^,vpa$PXj=:߆ GsxnsvsuunenK3ncoz!FbzIEIĢo]wTe9s\ С3Uk[/;º?*x:䓽BQ/׽4s_ endstream endobj 698 0 obj << /Length 2636 /Filter /FlateDecode >> stream xڭZm۸_"f$R)ŶM-H6rWh^"K^3Po+h< Í{ӳ?{&n:p]0σjִweei.kZ=r{> ".2ZgI F]%jf2?a'͵„0ǒ 4#ed~~q #񉞛yh>}Kqۛ^ӏ1j׃lҽGS\춐4PUu 'K ψ}WԊbla7M Bӕj.w_Ž<ʃvsOi6؇K $CU ^`O97>ح1)|r* lJp(+EFfyκ&9]l yIat/+"kh#6V*mgr*`t5y'N1$ԫ}7 NGi3Ƀ"ǝfLcvG:nG}[sT(`yi+fjx9u&DA(;2,y.Y,:|}. 1_tsle4m}Q"Jp%\_k {|(ؼ 22&3<ɚ3WŃi OЏޮyr`gl8[m)@[B@LY[Brqq|l.γ K$O;p>j']I'إFuXw^`*(P%.F'8.;.|@F}a9ƏlM'}%T{aaU.LzTu(b\[e6f}}>4F!Ip><{. i:5[S]iVJqҕ{l86UpVH}_oȭmhgYI` iX6=3F93J棒o#9fmؑ %c9z#we]3ҙQ PImZE$2Ģc}ؗm>Jpn(&u|'] y&W[{bb;,.q@688+B-tJ g7Vھغ\EhT{@49A=}YʤsKkEcGV@e4b qg~^] ㋰c"2Bvݨ&BEI?=\;# Pޤ$4b#keSΌ岲1c>{nY+2tKXS esH߫kEW<ұ Q+7Ay:>)gl]&II.Tͱ%)rz+m>* MWͣp] ΃=>M3Ri<7ZMVPQZhYmsxIy{($Zwl3Tۅ٨Y6M^tiD ?(KteI>hfEvW%s%I3<ĻT[4ȐZ&B/soi~I/[To m݊ΖC[gp8 bt-j%V{+lja8J'wN% "M;QBlD(i[aA*/DbP\`i0sg6+J endstream endobj 603 0 obj << /Type /ObjStm /N 100 /First 874 /Length 1833 /Filter /FlateDecode >> stream xZKoFWloe;1hɡb3r@?PJơrWgΐ.΄`\o0u$'GCd(8"ÒLdAV!1 <`RҫBΐ.`}1;D@7\ń((ɬ3`Xg"n7:G?e$ %{#GM"C'pGVzhGP.m=9 &Jڔ.C y/着$ j&%Bn"u5()UCRP!{edL6{dԗݛA'iz5Y ȃ=Ymox0*H-7x5#,- kmcY]xGX^MԆ;,rFl6YL"]lgc f~`ѭ~X}_z7Om2?jJCpgùG{N3{>tǻcY,s +W5w]EtOHؼbi6e$p"+5kbA _&C~ D[vҙ YZR68"bhOL4oQ'Zn_<<W|)*;?\NrШ'[ endstream endobj 712 0 obj << /Length 1618 /Filter /FlateDecode >> stream xڵXo6BPjV"E=l@5{%ۗh5Jj~G)!I{x=vg4DSɣISVil#J ]Mwk(T.6
%(PHr #,P!!W6Y|N$ #Q|xX@3~cx#s{wuQb[ټr:^ΤI77{P{ezre !y]/~{XLyi`N^GyٴrsI|R',;upN d7 c`hn< %3|6pYQQ[/r;v*7^v$Ne+d,uNLo߽v~doUL⠟DBF9e Y 0d,Y<DyFǀDɴi4k}?-a;hfɋB(l4?Jd%Ryhjա 6U:!!TQ@X 1\^0MS]m!D^ fFX /OhX-^>tQ. MX`ԆR:J,]*ٜEYk㈄|g DXK,mFy%O&Ʃuh{鉑eS-篦W 3&O"cSy9!}vy2l3"sS$4ġF$w=s=s17qOjgl:VrkZ q][NP<8& F! c~lV!_1[ &aGX[fĬ3sJh}fҐ8|=-T$F*嵸#4R!xزe >L# K:fkq!' ߈.fg 3.([^mgZN)l1Bȡq9Q[ ]w;̮Ek8!!(3Wڳ } >{lMsm^暕xqSZ WY^Xfu_pk_O͠? J<V endstream endobj 730 0 obj << /Length 2142 /Filter /FlateDecode >> stream xڭYm۸_a8 ČHz z-pE퇦h^ 7Hn_j""9ÙgfѺǍݟ}$&fq a]y~ IOlͿǦ:_.ga"%\{mw;vKWܞV5¾ϫgU@,MUlwиϴ9 y`_&xj7P!~ݱJz>8׽'!{W[!m CAQ5;-[8d<;G#c{e<%=vѮh5 ,<-U)pf c$UQ:Z*za4h 'CUٸahӪDxch&"T:y +X 2 m7Z7ೂ~o-N $QZLι:@8FUpv;VӄGkըNyO̗ TMA:RY06`<#0b,+BA^AYuv&dL:}ʰ]{CWE X 'p,xzx60s>& X@.LvY蒕YDi_Gf@ EA  R}0}ʻ+" Jd49s3qdN5+k;0"-"CNh0R"p2.\8wVkKmcm1HX=~YpO^=3`1\HEdI΄65!)Nm~"R2@# FE}Ӷ?¡M20Jr w< yk/5^vm D%5)]=\.J~gi?ŔrP譺L39yڒnEݝmxWZٔ~s>秢쫾|XYrOk+}0ŴltwjJ6.xh4jy2!/$藒ׇC$?R7Y1CЎVXkVq`VoQ+QV!NgNdEkd4C`d>h1:!“|TX3 LA72YҬ DJND@ϬL3I*B yIhє٥D)`f<]&]eB~զJ_L&)A#@S,)Rbrj@l J:>ΰ ZU| B{ ONY-S΋f'#&">7M*^2#rۄEu7D-/2V/.t jBѲmpx¤O:IHo:ncɊ7D@)E)WYͳ0N!I(ѳմ4`c.<8/~7YmJ 9 B`\3PiQ&ˁo(-PiUƞ@SpPƳ r9{xEcs=ƚ:NђIl:,L#3bjGq=%S.u=tiv4kkZ ܀/ +{VdM!,tkbשo@^%־ZZ9Lŧu=zչ YPûL@ endstream endobj 748 0 obj << /Length 1032 /Filter /FlateDecode >> stream xڽWn6}W)Q[Ң-E}FPl-і*;IYmE"@Hù9אq!\?ڮkZ t V<eja}]Zh-qNf_9*.ph9\XvНژnOmAhӆ9p_ 7=6ks4]18@V5+ny!Ls~Ci+nql4hX&y٦~| \ ۙ"`T)@ʏ46E)Q~81&;|goʖu 6DE7?{/VsE DZ_Qh(B W)QLkIj.|b=y{ez4MZ{I %eM7 0+kP~a2#UO R^5h +]^/JD0*ͫ,Rz;HS51;ⓘWrAYHxC1XI-;qOoVH[9\<Uo#.5}$}ąP-!/\[Cy"g>amnqvs3n"R-eXmje8OK2*yE򊞔GtM۬J̓"ejkwwCN|%+v> v3ҒQ7Ҷ[f&O<$S]Cwa0ҌY7|hMSoHaVkN4l=ۂy=-ؽaoW Aei!7W2~]~|f endstream endobj 758 0 obj << /Length 1593 /Filter /FlateDecode >> stream xڽXm8_zH^*vV=/8 *Cv/`I.9U|{<~f晙`g`խ;7pWy4pX}|)ly|-|>l 4#j [/|i/]̖`ױխ7Pa_ɸ'bc<<+3Lx͟}M^o: )Ղي'>oUV(t[Xe%b^he&fܴs/5JsY %ZZj.?/*89Ov ן<bȳ"hBo~fA"V Md~zQ{2҄. nob4'ҜXoJ/⦪/k{o@#`GXEZ[!єDԠ;a r∏N^Y}ucF.pg$'v9g(k~H)W=Enڽ+B%*5vi6s 0'tƁ?dBeRd""7qBS=mE87O|[*g3NYEwS![ %eH\\XmЗ₯'?Ŷ 3pwhz]U֌M \W0?- rZWj=E!_3PpW28FJiR_)^"|mFaYEeƹڛǴޘB\$՞nվހ [YVojp.rogψҥz.qg$C^u(! ̹y4^t=1$b@C}D/NF.K6ɭGuaNJ6-,(&eQF[[LAUm2><~ nz.x@vsCA4UAj!;E8pGwh&w7{Qt|<aDʐz. U՝BHbm>έsEc5AD-~r]T`.lCa"`hiBQwŦ+Q ./),ԅP( ]\N_T?d]+ zC_Ip*5m`OpVt`A#N}abh݋F_y 3oE.aT0u`- 6797跋fKFq-ܜmy:ʼN?m9HO|䣀  >IRe^;' ׬GF:ЯH[WWb|JsHYFPB endstream endobj 778 0 obj << /Length 1795 /Filter /FlateDecode >> stream xڽXmo6_at+ 1Q/6`ڢC k/6%OxwQ(NܮC$ywϽtv5g_xFfGHg@?x%SDs,U]?ΰQ'NYt<6qB{x%KHGWP51bEHjopwtHxZ'sAb3vouXt;r=_pMU!QۼQsx2K6CjSԺ;kJP![L O `)*jIյRċL&TX9yZ^gɐV!U<#2xO d:Y5F8b*|8۔uYP!jDbdVPEg +4ݭwÊ93'|@2Hd2 by"!=w{M\HsPh*qbaFjł0vPH3,7(ܹיQfd"P*]ȣ}` ,XDdpU+kڬ0%V^§.XhxP6ʢ;Q YcA6>d(+Y8  Ji_ '1>|F ?0 PF (f蕔B@\KV[RvafQ?G>dz 1#wd(c?.[E`m5ܛ > 2\lp?9jd4fcrƽYY++ =z(vvnʍ4!e_dl=d Jb}Qeזd,c(;$Ti@uM{hI?a8*ui Kc KsJLnÓ&K(CGOr"^~sdE)mh0l;Nȸ nιڵA蛽u!ޔ &Dv ܆c6}yܶFFfr^ :;Fr~2v؅ +r=j͵mNjۡVLgR,|> +Uw{#&1{?%&(cdAvBp׮l'Ǐ?ykI#rffYLich9ycse\&FwB.N93M?]8s]I_P"iDhH x=LI„7:ٺ25LZekJk頚-\n~zԠn1tM@P8 J!+ʁ 6NbZ3R’ԿZCխCzg@}ߖÞV PjݝN0'z\pHسx2w=E^GLKnVO5ϋe]Uۋ&_ۣgdyʫ c}rׁ:nW]{dk +^ "Uؗ „zC`WWKA2&CAɁ{ٿgH endstream endobj 794 0 obj << /Length 1319 /Filter /FlateDecode >> stream xW[o6~.%tXڤOIQp֭߾Ûl9nl<߹;[{ػv=y{A,bz0$ƈw]x7hkז\ο\{ -QyZ7 [ o/B⥠GJwR'li dc}nj`VMW[L0VΪZUI# &{Q.TZi%hF1Hf`p;ZANJRjYYc{v~cҺ~(oZ)e&i>z-ޘh/+eb5 }E/%;;c:V73U{ZT[SF<{l(3JEm/8;hX$=E0U/4QrZzĊ8mB, O }:&P܌/˪Fxa+V,1O .P.V5[~^ _VGs.* ݽ6 (Yjs+J;ɰ޻}eLSTSvģw)3R2GtѯO"hכc{AՎ|V1uaɼJ8JG9]c%,nf"I XX(RAV^CBKjqhpORQKq{a eG[^(K}\" .JZOfrkk1 tڻh.GιuSdn[&xjt_C'֞?]6NP&.:{1qܢ6G0D%N;nI;?epd9 .`(O"zdwcږ S#=3€0zm:)%抩NُR8)pn R0.~'tx 6aMR#0Huc?QHI.ddf-gLUU-s'm[Ӳܚײ:^C*$bQLoG:m]]H^XE5Q oض;}C3E]mIy`PfԨy؂Ld*tGkd&y@W=-[أ!5蘖}enNyr!*8Q_S(0kvFdei[$BI/A}'g{s)T"Jbtdϸv&6v.ĩF[[g#t'`@W?@"irQ$٘L*OvVaҪ6ʣ`s"Q endstream endobj 814 0 obj << /Length 1647 /Filter /FlateDecode >> stream xXm6~bU$K(R($CSvW/}g49#9F4^^m6YVW? O\Y(+3_ }{$I@/ȧ#ܐ (vdp0 Z#Sr.!O<^Bq.|" ^[HP՝&iҴy:%gJJo1LCαu% d+ɧfoʸ챟?%]4FoV7:;Pf!0Jon1(D(dLKNe,aOU$'OBeP .6G)&QOfX P8c`粱L m?09>Lv97b,r̓ӝf8Uq&U7 Ea4D*3mN1a6msAv8^ozYLt[B bڎXr>It >]]1POD(EmK^ $oĒ0BL%=>p1^!hq>60f2QkK{qtN}=1 RքL9nK\~6B{Cqw6]^#b"Z!¡~iA*GDqʀ6HBer[QYⓋ}xBYI?\3}ΕikX_g^K_z| eKS|M)h_נ+y$ZDp( j^R܎WayG^Gp4oPz]&'ElrzvTĖ7l>ؗ+yCY"9Mݱpgf7(+1K;t9gr4Ү0]_Lǧ6.yEMit5z=q k0ڰ;S[.hdGlXCs˪pƨvpPS몍93sN+) PW} 7DXuXUh4Vfzs 5<7$ endstream endobj 707 0 obj << /Type /ObjStm /N 100 /First 878 /Length 2049 /Filter /FlateDecode >> stream xZKo7W^(r2 h"ɡ+Q2$h}Yutr?|W2M5zՠTqPʡTLibeBP~Cc@B`0w;RqE#J"\@^Gƌ6tė#1׆ CF0@VhRFrw JZF+@}15N Y)dԼC}WT0k#Mxp>BA 4H&0:,`4+E#%'1t #@iP CR|U }7ӉD S@`q_&A͇hqY Z.V`Hc6_(AVBIЧAE%C#V1 Zqn .x (R+ t\,a0'ٗqƗޜXzo]ˮBWTZF%S[TX-@[ tPK}meZR΍KY5B:yt2}8[O9On /pėNQqsἀ{1syžn2}xrGaaj:nM?lL_V٪۔o#y"%^mKL?6ߟ-0QD;oo &ӗogL?,'ezGplo(HXJs &L5Gņa<(%" "]*y~C7*|Up*>UF7-m[ӥ F$")zrdq;rqq_\㛏]s\lOf/|% 19jg1^vB\BJLb/Eհϓ5y5N6y7"$$"dRN@g4jGDS9Z-e<l1,5JI؍tD> stream xڵW[o6~ HQIbKL;Be!;r'R4ysx'V? QhD1!%)R9h@WJƨj=8퉄8^q+2!^(43%8^{WN.7U$3Rr=&hl9ދQ] y.'MA b$Fi1qy1r#3 \1MxTʭhtjʲH>m8+1^%cMNRUS08G8il$##0< S:J\yrJymHK7)n$i2w¸+^+nϓ0-JJRkdd,E]v3ygDA-.Wp,q,p D6 xC!Yao2UBn\_vO}"sV(L?.nٵl'OI i!VeHZ&X閺[- xFYS$+9 q!pB"Ql>؝\6ᔍNf[3e!{}8l{(_P>|@ o!ORxyI(./6R#YE-{1B ZNJsƅ.qAG/j7A.-BA&-򒾯_~7xB} Of g@^҄cFٻpa'4⌥ }[?P9h*D vnwq47'XA^h "(3al:wB;l2bafQDg0snób1uUh mXiZ\c ѳb5U E*\p)iV<~2wx.!!]lUY֋ *MT$IW@xî/ N< 0(Tjf(( endstream endobj 847 0 obj << /Length 1460 /Filter /FlateDecode >> stream xڵXn6}W.E}HdE@wCBʒ+I;CRȹ,Z#CG_s_yW|1J@Q"Ds);+E( 1k* Gԩfˌ8i7=ěmC^o H(3Iít겲q$[qk+p8`@E%?a`cmQ2/ٌw ?6"л7K7 |V}qAэc(yqh˨(dM~LcEV.)y))[Nrb$ 57Bn_dNIkBB bR8Ivbq?zkOF?03ަIkv Y<1ÚMQ: LLgL:Z5"_ #4h˝zqN|u)o)UҾ]7c-3.UѶo[m| urߛ!,ݞ\~TVE z:2uZ rr53K\W.RN@'!xwY+z:\XkIߦX.G0N4q&4_ُ)՗L?-n<2=Z{HUxQ$lLZS;u4sX.mѾ~qòBBMvL,={fH6Z.φ"W$ɭ9?< UwXb•KΞr'h#ДJ45=aj+-;R9wW.'YE>JWx TfȽ@?]`vdUarpd{g RLu0aP~:G ;57yL2!=5[,*/4\Lzfy@vuàqCcA,68QpKAqX1;AhYCfKku^ ]܇69!U&2sĶpaڵM,NK!rK ćh48 _7{y@vѯdH۞i ٯD`KN&J8Fx|Uu(G7Q1g Gq"\:f;*h<-TBá1^*O:U?DIl"^4c<'JOB75nt؁[oҼ+]޸5*;=n&E&gtߠA{gWþ4ŕ.{5Q eQ ꊝkwkj٬Ӊ"чñ ջ4I8nChĄIѾ6ON e;2%;ޟ>wQkC: ⫖n'c&>(mPLW~4г_1tF endstream endobj 857 0 obj << /Length 1249 /Filter /FlateDecode >> stream xڽW[s6}`/xfJdڇtg'mI_۔0rO7nƬ>(Hn|G[W߭PjE(b.y>\.>9'n`_cɅ^ 6nϊ[R06/J`SQhҝ0{0έVvO7 `|b8D,NBhD~(wkǥ5vD/+="Ur$"vdybwV@(ڡkms~ &$ &v!(tD]A0,v̩ OI\`V/ͷy]\+uYzeYch_BM0 N=':۸͛,W'!dЄC?y?[Su%(bmk25!D3sD19v5PtU6RYIm|gE#;7哠r^͓Ꙫnv.^+Z-{Hq ]^ (dؚxpeulUбyϷ@yڡxɑYK19uEY1OL J`Gʦ5(aoeIMv<[Γ(7cgQ:dR^@Y-|P?Ļ}%9#GLP7Fh,%.J!):ZL!c΅)s TԜ …VSX.6Z.V[zV q]I+˿hWEP0XkOPQ82~]/>\% 3D,sJ_xЂd4Vg%b5xJr+ 0N% בY=(8ňn:y`č(6m:a|I+tSl E!ˍOظ:i@L=4 endstream endobj 871 0 obj << /Length 1862 /Filter /FlateDecode >> stream xڭXY~_ALVI:IlAcZ1ʯOUW7/QMeuU-|LJtLK<9Shb%]!T_#NFLαwvV2 +q /"7E)۷~Bܘ4D=`7[vEu 2HT,aYy]W-qx?Oa8-X,c"MOesvM}F^\Dē$A'4Mas(;oa m~rGT@9%~^ !fyE|["2LcN#\olMhHұB *w?B +W`GeYF?,kK!]s2|2A_|A=O;;uCĶۖlb={\p[4'Ԉ aQR^:k-tƔ,!r8O,d e =s&\eƹͥ5ᥚVPIoR7\k&c>-347T މtM#r2U׾j+{&%hH`,etWI<_% 3U.͆U4ŮPi?E3bo믛syu(S50^}o\=;g#n ]1%s~cʚ\]58EٲY@e'd\`/,y}XKx_7`/֧}B~AZ$Q`twٱECN(j8}(ݞR}J*S0EӴ0. ZFOkfݑhrXlsQ^z6fG]:}sCe\X4qz$]q= 4HyK[IlqJUGĊg>}`= YmStKe6;Ik?ʠO~4&&Ns5aDQzudr16Bt)\n$f*.t Q4E.ghŮv6M}//?.\',W]uMuȾx12mX1E azr&P&B9 WA`a9k)Ky2M1]x8Y 3EbNz[JZp'g/:OfZi´otx,| ؙu͍Gzs!ܥyCD,$Y^"/x$Q'Xɒ$}XA!B#HHr-*a6RM3(!&h> stream xY[۸~ϯ2:f$R]M,[`;iŀh[]ZrEj'=H4siP 0"%眏p]o~} e f!"1[$,Dm[͵Xv:#p]F2Mo?!Iٴ1JmEkq\aohF! CYz.?zMnCdJ!ㆿ>`"tYWJH)CbHw_, ~e]q)KF#mtSK~'ᆕXbܻI^v_)7QnHPH @A9~84;a&QP2 J=ug);i+JZbBq=g(W^6շںW{ҥPF$Bi Cx1a|XF4_R9'3x$C)VVq36AqSvcgnϰ+Y/});t-:$Qx){jF7H^g.Q67DimD̘1wX(-v°3ڍqzE`E% >s2SB2#4#q@c )isKb,xJjߘ}D1!ΎF-q.HNKӦWf_>\l(%E׆A6Q׉ mD^!Kޣ ޔ|-\*LfA}ʵn0穸=Mafk(ĦC~=ѶMTY]U(fz1n1 3)z{M=5|.tn魮V?_o@QT(DG$/4}I>PCЅ % {b !!*9IRi;9dwTJWҥOeN^߂AxW88F)A7ǫ㷥't> +7WIXKm"NH%Vtːj-{1"NQRֵ;*@ޗ̐DG%>y Q!O/]@Ày/LʦI¼/-.ԍi'c?5RH(}@v_d+XW161 \јAiEfd"rN܈Ū4|m%up$L fn&ǰ IWYFJmʾM<һ.k^S̀4TQWi5wg뚉TF+n=WmD(ݣ&f]U7{/#Vdg_]wt( 栵&W lgIk1nm{ٽ: T kRq`TbG#et5CPz>:Vk.Kn љa÷ss mkxCшUo2;w$V7 endstream endobj 928 0 obj << /Length 1436 /Filter /FlateDecode >> stream xڽW[o6~ϯ2`@:`:`K1 eDd~7ٔ4yE~sHoxN^_|Ɠ1'<"Pe>kz/~&$$D1GT]KZ*N<Ї/> m[K*}V%!غKDh.( gBkP%cBy~q -i>T#iJ=Wl>׮`ͧ41hJDDx\K`Rqj`}=G7}Gw;e&(\lbYWCT/}h^ 3J-;" ȋ^(I8T|t<L2@ž@`~WlZہv#&N&^v,Mmڝ^t7H LmC.$i0^<6Z-'Bm"Qq95mob=?%֔?>莋2&8~Qhk-C yN$ԟ&T}6I$zosN2U|'7EW;UWlc{DˮMHYB%0s|71vWAZ.$QFFbP[P kJg} ;2yљ.rئ}w@㛲Fb=nWn7fG)iIpMG UzزUM[mNen't? > SI3SN[ɍy'1 p8ʦ7@ABũӞ^dHW=&߈FȮh xagy~Ă#zj$b 1wZВ\e7QO<cs1?E)%hIz'\ᝒ؛:VdS"%8 oV`` -3V:}.]:Eࡪ;^Wc3ҍ-0{sʮ7&%,?PIxQwOw'̹dSrjoG˓@J endstream endobj 822 0 obj << /Type /ObjStm /N 100 /First 884 /Length 2063 /Filter /FlateDecode >> stream xڽZ]o}ׯ- P$3@PHZppo<(" IFdٱ"&^px8<3}6ΰ/&&4lXЈ g|{&`B;CO&E6\CrFCg`CxP:Au"o1ؔHLgt1 <{7aJxHړ͢ EGLp _ ?x!1 |pFnya= ɠO}£I.*މCYo6a3b5KhAawp+tIFYQhfm $V+|VK1d똏"~D?@@d 8OA/;"$kc@Nb9ÆhKq8lfQh==uO {@H;pno׳ȅK{< ɥUu=^؃t]3ݞPWNv㓈֫r1$}]n} mϞb˕ i8v> stream xXn6}W)ۘ!)Qh"-]})mѶYrE9,ٌMt"r̙FKƜE>!#Ljθ\?$Og a=Dzب8t)١JxH_og!ø7E+K/|~lN ȝ4=DB?ʅ/uZ*;!(beZX:WVKl. IVjG H3zdskx 9pTS|% ^&yf>46fYBB@2Vd m"( I%cxaL f zHEV,1gyGX4¤R!Z pP , )"Q}q<'<5B:$0` YrJV "R`!zZS/y+1Mt}֎y6*vJvfjq+hgR@$o &<[#qíbn r? ^܈Z#;"6f^=W^y(tauӂX \M4YKPFFq>m]O33!J܎ Q*gS>% 4 =. T˘IdžI7*`U hTk k;o]~*J|> stream xVO0W4 x, 3oH Sh@`1}c9 e6~0BHϘ!H %x;ɿya,TxWBX/i%@#iqHųqT^o`QsXt0͔lx+% 5cqBA::Н4%/8 3 M0DžLŲ0s!aT22'2J-?"JZE!5)~X.>Ե/rw?iqA"|0pP5oL]~-B*yEFF"DV)Xd֡My- +F~RvfO`g?4YZJӦ4~ O2M|Sxl:2 endstream endobj 957 0 obj << /Length 2149 /Filter /FlateDecode >> stream xYێ6}0:D.a f.& YM#[.~}ɒHDY8}bd!běڟA%E }Y|8=޾L,0F)Xi/;U|9^ݻMwЋ, cS 7U#,A(J4y٫֛48,S?|z>8Dj69z@G`/z}NbڛtOA7nȀE I7`j6d,ss-H|?J8 hq e%P,8zjvz_W?G6 v[GǑ^ M iitPYZ= ]2SK6vM=QiJǼޡ@TRŜ\oZ+Jmѵ M'Ƴ tvKaVjUkooGzUVY?~[T.ӈl uQOm ":0$$$5zcB5ym~"2/Wvԗ,M G];׭cT{1*?G&)BsnW>We*!KM>`Hlq:0 Hc^x%540AUI%(N8lzUwH[RhG\"taTrdPuPB;_E$40_/Cڌb0hL=h,Zgʗm@)ҹ1'7_zɵiw0ِWy/1J7KYrSr֗PE99{Ɲ-ˋL"6Z=6|^)/<ܔ%=̐ೀ tvjBV1N:hm!1FS|BB+].rdi&)#뫥 u&߻Jм(—$&ѓRնؑnA2PgXޔvbY+Q ) <0S&1:' JR'iqH 6@a i+6SUAmH!.9" –I*D;Wn!{zx|oztf>sH{_/}V^ C)W.E 3k|4s'1G?pW,7mSH V}>^} _V}6dƾ[quUsRy\9@E5=k$];TXŘsg)1Q඿^8RFz;M}h:XfOC ߵWUۢ{nwߴ$A]ϐ 5<󕹻HݡW7E{׹WGn볹grӅⲨ;IrK%u4IKsgKuA}Bg[}ACE⇷S2Tԇi/4]i$|.%|ސq>|i'}LpTZ8΢Y#T*38J~FڿflJ.DE_90݂_gőt^O/+@ֿv@Tݰ1QI߾, } endstream endobj 965 0 obj << /Length 2143 /Filter /FlateDecode >> stream xY6ȡ Y(E @RpMv/mѣ%Vn>d+ˎ) [l޿/1lA=px -oK_q/%ĜMS:Z~3NX3!HǰV#4&ezPKC7#cGի5R+-?;5JvNROQcā?⎤nd.t|;&"Ê uZ:3%S;x"+Ci3#bM#7F,m7j./B6œvR1AR5[xy^6rS~~ $tҊ>q#5c"aCX_W1Ӗڙ!7@(^֛TufNE+K=DŽE9|~BM;(YwM#!AF?f',˪>J0Hb:'TH"1پQjV4X*{?}cqDlY!@MiԄ0nwf6D:kV5վr~9rNNe;cuA"~UOL"3B}iѥDeoZt!qx g @ʺjFE1EB7vh|y'q8x$=%#nw߇x5i/7OW ,,ϧkER:oPf}Q|p0 gZG Ӝ|A['q kFu ͣڢyѰ{"[<(5 Xx(T'/u'W[oD3 2(K8]+%A  b@FEv]:c s "mDlwS;`qDme`fҥLiqЉ-6\> pBO; p`z+pbV LJI?Q ;I]Ph€_v1:0M1:U3ÿVט NL@,.]\{8"&4&_EqN>X]+( '!شC5^;Z6]v嘪LEwwԓlMľa'xaDbp~OgQ.%Dc0l^0!;7HwJGAq/t v rЅd2jpgW?ʭ Fn>$2:'diT7źr,xW@¿GHC/Pz endstream endobj 976 0 obj << /Length 1276 /Filter /FlateDecode >> stream xڥWmo6_!x(f5CR^m5C @#ɶ;,ɲl{'V~[n8|;ԱK`lX/L<W70Aj.%kW76 z@nZ^1xe5 2 ,lVRi47QD-8AIJOjE׸W5v(l%TIWZԝdPWᰡ .EePuv0u<(:+ĬB-U!2zC1UF74Y{2";ڐwno? 6#kzRb?T]Ŷe&zO+,7!GjԚ)M (IY\|b0@P2)^pxc6Cڬ9()P1ey9-\OXEn=vs]7!_W`SMR i6*͓2?CU,CRH&ԓMNSdcXx(Ҹ,?dvrP'@3V.UᾸֺ=d| 9 泳0K8Aس5`=/"s1G,I,*yރG?EwN9]ڣ(t!VYk{&DߙgyO 0(+.a:U4r,n:R{{؜MOQTqMDa&o3¨^z> stream xnF k< P +!1FTQd]}QL =w1/M)J#e@0F n#h0[a48&9R6*/5;7 D&2k)5Kd1raZRYpY6\,UfM-J:\d.i-ֲꂪSQSQD_$mF|( s\ 'm/j'}Gr[%-ſG)eىx-?ծ|E1iH JIsnS#\^^E2UӴ:=TB}u a! _ /Uޠl%j)i"\O"t޶le3-($֣qG4 vu8d϶JOS[k̋B'|ɉ]"Ҥ_X<WSU(kY4!xܜ6C4cm8Z&7*`c]%ũ l؟XJ mbo?=3㜼|t: :!|NƬeN,ޝK/j 8ވ却o嗭(dhbMe% טh ƢEҴ: miֽCԧՕERCc\Xvs sGN&{Ν**d-L:Y]u\)b r0ϳ_> Ceh?R_S,Cd?.HnO0|?S:Ɣ#TOzes{#ft׫~Z>OLXNIQ/Ve6I@|\[^~T%rhZ]^?5Lyp/_hnhf:dEgGMJ@!(عp46j-5)>e쥽lFw 8+&RmkX6,a*4jBxfbnbżO!9Y[8v2BRN#Ų+>AWT"`6AZl%]XzXvsF*hozrsI*]/2/Z4 O:o_ > stream xڭZێܸ}W /އГMz_[7Y8G"%FJQYXmStmspVUbuGvnc%52^U/3=O n"1&QpX{e:Ryoz lK howċ.BA!fDV}$nƢ o A??T=*V}- GW-iIZVV?6;~2++ %9;($8[`v 9v6]@!f iYMOAGȣ h?|RR]W㉁׆g=>6ʥrUgB'K(stp{V`;mDoaoA:cq[hXݰ:ξp}]KU}5{Pspy?RnmFZ2xܨSM+Ug :I$h +@"sσ{IM}^$6E2Q៭/e>gKgB~)_ڕrR7[hۑ}uH$We"/'bFR;Z}Q9F gv@{%Lo3D Ү߿TDcH " FZ}^_e1[,.9\%DÌe槣"O/be}q$YvYܑI;1`%N =6 czb%^PѲFt1Z -Mᎍ|u ل@ѐ-[S8u^s)06k 9Edl[%.``mND{ϯW(dBXs (_$=tjvor?0IժN:d٭Kƕb0&X$)l HSl!ovx%'6dm"0E&B49N2xqfW7= L02eiC~|hsa8N7x8H"M[8V<5uYG1XcnLOS?͒e-/uh`D`6DI 8E| Rq[vfW{S)]hiT&`l*8+u+ronf#h$7=Dx9AQp6u^Qc*$9f+כ>+]?nCU,1t*L)7vcMu>X|EnFKXs/,5~rIÁ4E6'B,GўWLz2(X:쌤،@Jʴ< BgpPPӑ3qFJ#4sH"N`^{]E}:k W;j3q2J@uw}ߨ.l eJMCp|.&0zҕf~06s͑sH05 Bbf'(UYf=q, ibZ3%61Hev]}d@PJ/c9I}YCw !01'K۔n ĭO`Ah˷~Xeql߾=xKF!#YM0VjEP%U;EgIûߟ8+iR9{!&:+QܖDvҖح6wّ7eu l HpC٦0N1iGU5: YǴ ou9*%6a~c+t-!]~Y{W.NpԈ7rloR! ӂ(bqnN=eՉKpw6&"0J6קQ;r]AcHic _IJ s@jhp:c?Zaoi9^2$aP/ekѶMcmӤebMk'P-6&+g:XaP4a8³ *N?A>h|m%vk sޤΞV[ח l`WMzȚ7Mej!E";ZeR-Ͳp^T,l>h2qi erZIwU|OXM>(*pNCJvJ(dYYvwkSqhR{"qE>c1jmvkGA4Q^-$[5} endstream endobj 1009 0 obj << /Length 1448 /Filter /FlateDecode >> stream xڵWK6W23 C&A4@M/ ZV\Q]ΐCY*!6Ù7OsoqꗫQ,OĻysNpwUz~?_}:Ga3.1дl] PHI%c f >jtR> ipPXxCjrs_?w@iQi ?O QSʿR+V{i?ʺW|G%[&P~IDӫ5 ~רŪ? ;z b9O/ -G+iiUw&y&xk";emSrA5HfTvJsB_2,ŽL+$cd Ĝ`hn .ϝ*+]ֳ~{鮣 fМf )~Mp7Ut,+2b ]ЪdK•Dž$'C>> KeO^Òo+fU-g J<-{i{6 m yԖhC\iTLh@H`LPtݭ^jn )#†I>k3 FG{wcmay F ήc`jS |"J%kmZ \,޹"MdM'rH 唵RD 4/]icPhn47L#zd&$3F@8&q]w]]BIx2 x_4%<@ vy`KWͨROet*k y,fVteׁ)oC/ F~ @C"$"3x!xQ> stream xڵVYoF~ׯ[E.$V"ò}f/\^3|3CI_1U_<exC #TCy+Z#[3=/YVffD?w/n}[Y㌢{h.1KQ-TEe]M:a6>Id(ez/ͭrwCW#u+U.;;w)[ޫ %Mi8vӻ~3lbG2x|=u?H;1 SHά1D9!_a Gmi?pyzJؾ ͑P6}6d`YRLĐA{XkX>U6y: XG /6A1 E,I܉L#IJu=^7uJ~s1 e f˦˹8:&DF׃!;YшFJh&|R@8:kƌ/LY?c))0 ,Ru+ ^+KSjW[?L1Ymܥz蛡0%gmJ7PDoXtJ]a7eoƦ-TNe_U}[fb#:6:5ۚQC0}=d߿/}y0sb[ ?җ}{]#r9JM0kxKcK{QݾUW^[X(̑qpo1/йUY]PB/o\vw-:9/,F=ܞ *k`v>K:+H40z][+$C`])%^o'(Gۮ7Uyb6:'xƭP91]li3@Uͅ; >c =El+RDxHT=v&a> stream xYMo7W\!9$#@>@ NmT{@ -;eFQr7!U8g GC>Y 'owS“ƓMpbB$1>g$Fxɤ}>]LhW(:ِ#D{!r(Q hO2A1BV{;:C!1RXA0$jKU˨ Wl%F) d8xpP($Wz2M=9QIpq!"& ^%6ޫ3< 6 f&N'u|R!4hO1>a=uJ𒤺88} 8ëuEf4Q9Xd=EԽq,Q)#E{X\C0Raڇ%s x( E-ua?NΖퟳt1Fݦ${m=8&vҭc+2mJ 9[-ޛ>!m`hN9츅ޠ[ڬRؑiEī$##XqҶab'Yqed8&qU9Oy;חo/I_Y?xE Rw|oƖP;jb}jЄ np+52.0фLCWy3YG%5Ol}V`TqynRB#nы&08_/^x+1=8+N9)Yc9v̚eu )aCfM]Z>Y@;^A}bI|%es^) gW+7z_}If-m{]q􍚹$ 5ȇ60 yx~[V#WԄz:NO1f; nѮ6ʻk1b4'A vpG6ÁoEXn;2vjG0[Rߣy3j=$jy+l/VƍF%lu\wݷ=Y~:F 1= ߣIЂ, nrF{ endstream endobj 1039 0 obj << /Length 1452 /Filter /FlateDecode >> stream xڽW[o6~ÀR(YaͰK D;Bu+I5qo*nC"ύcizv'A$N#`_7k7=tDe "qr MWZa'ɷ~KR}h^޳p>>h$6m! jV\{8*a}mB kB 1hf[v232D663[sCH2xa-SS aLv!+:-Y fW38\,9h;Tr&`Z`#;GқDۜ^1eD 7Mɟ}gw)L֜dIՎ{<ݪo1on臋V?rcт嫗Շ;C(C)^ݼA E.3u!JUzef@!n&&PG1!( ^MH,/.Xq۪ o _F pt4FY[~Se,"<!!ajDP$צ.+]$ݭUk&j(F1ν4#>^@ |(So:s 9&*pB'WyQLGQE[;tYTqkV+ s7e3pLI;Wm!` FQ8TK P+ gGoA _\hI)1EQY=:&ZGm%;xh˷jɫXUT7:1 V=TĄ[/Ff{*؏:՜ԲQ&Ne1.?Y:'aP}+~Tbscmۀ+oͼad̽!2ţWU/ioXץTuhLj8dfj.CK:=`$]<pζ=(F[ӣqxo{y8k jҢb\ʾZ12C@df3Y~[@#0cf[il'pMKXCYoWmnSBI|VG+Żۯi$WfOvH_q^Xeփ;ؼ`kaW=\Kye3.W@9!f6qO'#k.<.~!Ўd9;{D0t!+kjߙj,1Sddm~.ӇfM qB?Y~܌#3O8Yv_F)U endstream endobj 1050 0 obj << /Length 1381 /Filter /FlateDecode >> stream xڝk7{~Ŋ ).NNm%p$H^r_߱g٤U>x<ÃMW_o'^id,ʂur$ 428բg/@ ВL8Iڱ9ϣ4۶Fc]! -v<ʲo O+ܱ yEFjUq+lH<tUeR4lP:݉Hv/d`d-#DOy0$Zmy, dKȗ35[iѴDɎqMoD4 PE?hDTƯjJ5}Giݸ7ZQYn\"MI\K-Bv^sz[18o{=K)ReO$`MUVCDXNeTߪ,yh|ڴgԬ۾)ٙ *2KŽ>_8  ,KϡUBh |3]Ao`\C5#)U? kڷ3qd-H;H|:\#Os]kY C~5ztĺ6O.bcama΢d}?QcY'|S9?@D,  z9[6[su-= AaY$YȒe'p Y\{GZ |Wp)>H`wA$,E6H| ?la}k;@u4-='?Ο܆Mx:7ZxUx 887w2Zժ8\Ȅټp&4Jͻ# g a~8ܬp<m`eW#.YD,_3g}(\z"$&NC\iV`hyP]tx, ]> stream xڽX{6?CQX")kHѤ!(-$Xh%&VJoYʻIZ5)rH8O"\rx,ffaH8$qHgr.l߿b@SAhaCeєB;l=\+V,AgjG GN l)S{h><:%zNm}I>!S+ $+N@~<%'+Ywa5vљt^q-n۔v |V\}E4VG_!Nշ GI) e>Cr 'u+~+uD fDe|h_n =nk{uc0߷VvG)k(a ~Nqn l6ytڦhvC'݃NE^z3|(HzV2 \]谧&q(FƿⰓj-"I&̠ƸP38*@8nSvt} "UD_7<ݬmW8>~WunEڶ`y/Va0DA$mǃֽo p-N_u*<"izus d}`Y݇pQ$Dx.t$2տ/Z)3F`1ddzݙ-!2Fƿ|8qJRƾD[b IX+joVXt*tY+<el<[ہ"=HЛ(R1k0^ c&*Qd,;Z/>UU:bhJi!E34d0 55ƭ!Xu^tґUk \ =d'x9$v7ʭD(-Fֈ@& A=gO4,q`$0 Tg@( ̘p^FL'bSl$֖hf4U+?G"GOs߸͌s29_4~j{{89 9y(!K@d~ñ=o; U##("u"^qG˵#Rq<8fMt}),S|c2dz|<XˣEUS>\VFSg'?ŽQ"PBfnrV&miϋ鸞>"{56< ξpV4Qe竣/;T~>H׹mEBZLSJԚ<Ð$n@!!L;T*G.1-ܮqk_Den;~vU-&Vneb_`O 弡xPM.JGv C2zPEٯ 3ė̈́+:?T@tܯywmru0?*9d ҟy7Nn7o_=Jz| E! endstream endobj 1076 0 obj << /Length 2058 /Filter /FlateDecode >> stream xڽY[o6~_I_d`̈ˠ] &]`0tX,y%99H8. Dxx.~|p{/"#YvAÐ(^$qHg_Az~= QHI+M&8|-ss^YKr,i<Zoy{==MIqÈ>=3I2rD@z;ec$t HIDD8Lw)nZeXnx $J  +c15ʱAnF5ϵld^֕y1hTMϘKe2q&#\VxktL9{qhbU݁QZKHeZY܃/=ԕ%B|uۢ+ܬ ̸!J@DP`JsbZ˺͌6<?Q矕2k J]+~EP)da,a,eDJ-" dLn;yd"Զ[C@<*7ɍY&Zi yjB /fk(‘aJ,I+K 7cۙ;qЂAE o%slި4f\ۙN\=hY3^tnd;1%TY=z hr:lV#B7=ea*; >y?NiO+"h^ ]ӥ/cOr4̫nWiPu1gv(uViybO[~I6M%mROc ,U;!⩨di@Lyׇ*NDX{zLanଭ<ڛ,p{e4f`?+j& 3^+֥S}WZ(ι1Hq`Y1 \? zX@@$.c&m[TOy!;*,YsLaW8[04q~VKcGbAj|Eɼ8o8#Q&Ҹΐ舽3#$?*@"2>plT^yOUe1ۗMD -^A l FɮGh:N!C˜0f"b{e1ũKA]Ux߫Q?qkR5/oVOkFW=49H\!Qt)1<#6=k\ԧ.q=x#_WgJ"YvBb]'jJы\_M l@f2ޒmxC3E ˶3azʶ~VPb;N`{cݥlx9}A9wr0 ɕ]uVɔP^K50.ocU3+6gc%/KH_yIIE9soY pԤܯ/Co^8?}Za#`:v~ - Ge)~#ftJQ<}5X7 +l097֘u?o0-쌆6qy`_Rh*xGKXFcqsmcص`㿑z曏NbRdtiBecSYNڍPsu)g)P)' KRl@?dũ>Nh$K[.CVnW MNn&@vݾ\ p,Rϼ]SS5 ei<Ԯ9'/o<)"ä$s3kwO@?~8R=7uK'֞'r1t7T{Ã}K9]Wvr5v/zJc endstream endobj 1084 0 obj << /Length 1427 /Filter /FlateDecode >> stream xXYo6~P҇&͈Խhn@fQԋ>AJt,D:x}!J"(z,D49{!i[wm0]'5_Yض3o#!|i]x 3(F,cZ1N9% dzB|IgFGOgm|7۳w*~^oyQEJn/|BW?4V)(:(E@X 1ajrH#gWWkڗڍ3D( 0Ad%eIe!qC%A{ u++&FZhqWoXV2mp 5+ŏN|yTI!aw'Qi,^Zh#V$2!kvc5ZQGC UV&fH\dj]2i%Mkg4-ndyNa^5+r6|IӤB߷{RG< #}3'a ՛(k_|F>(Re}PKJBFޫP|RXf&͐{'̏?O62sFNf1ZVx(8H;6)C-iEUwCHl3W./2]i3Vu 6I endstream endobj 1091 0 obj << /Length 1438 /Filter /FlateDecode >> stream xWmo6_ad( 5CIdu]]  Zln(PRRHeIQ4-0N'R+QJ#]۴!]iLi'b/DrNɹC=FuQb\ QO=N@p Ҥhɮ;r`K!K>5WͳQt'd A0BR5x{a uu퀎 5gH܆q2 ]ӱczpP<%W)ɸ\W+HL*K> IOQ"&ix8͕¨m傐ˬ=Zx*:o}n}67Ъ jZx<]vt^ ٝYwZ{vXYt'ׄڇWwq3ˊqSȮ/r] 3|KUdRYm6GKvz~MٺADO~hţv1Ā!q0A{Y}oЫ':z=Hp?bqe^4<{]y糸j39fe#S#_3%t!k|h pDCQh>ZS>"CqЗN,>}ueW>I<@z{Iҝ W3"P)bug"qrqt=a=kWWMRjLsܝ~-|NYrg=? ~Y?  endstream endobj 1108 0 obj << /Length 1584 /Filter /FlateDecode >> stream xڽX[6~_##e`ҶHɴ/Iy205 nj1;9 v-~, #/AID#o{cJam2Ϣo;) i=RT Ծ,^b SBya 2xރWڻ[^HCus}\7N`ċq LIF(JęXQ.CӳDI(G,BFעIeql)?1}(->3f(ҡR*t6{^D\^®չ*+EҬݪ. -):r4:2"69!tsZ匔uh 3oJgg1POJU\ O'6>7|'fzm4(-\1}.Y%dCyf`jc n<{F. wATms:Oz8|.7K-!=%xb6]5S$֊QɧrķGb PҼKiVN% (``mIܰc֪5% ͗ a>~Κ! HOό(kwˈipML(4 (+J)$c6]ķ*$ |R"q_ Ak֛Q'w~EhηEvZ5i{8#8.Č94By2ABM&6+h07LcnECc%!"$NǼ8g`< EhXܡi Qz}fkz]qI@8ΫǑQB=EPKj]LW $>&j7}.o0q<uWZ-h;Y974p٘n,@=Zk5izڵ{f]{֙#;+ #RA&&ߝ`獃JMrt$Ȼle9> Ơdr2p$؛Oe=E\p.3\*"8iu<|;y]jշeGYl ѼynZO[L)XVf1x5m@}HAu6{I7^V8Rdy[:*ZvIm)2ɺ;N펀GC|81Y ӡ Λw8Ɏfܰ-fEg4*Q! $E:מ` /ҽ xeŴ{ّqSnP1O5a<K*(WJ23t6ՖqPC)OKU]6T/IV]/,iq)֬"40q * @hQm؊=W-N.^g@ ʎҵ -O㐺/T-u ^›_|5 endstream endobj 1118 0 obj << /Length 1855 /Filter /FlateDecode >> stream xr6งJ3BLN;n)x 8H #o ҢR|p.cžp\%NLΡKO~u |ߥ$AF<[1 ܋:s{K;|,ТqieFc8 u[Y&?`)~A82+KEUIi"K+3.yde$sjsnJdw+0,Yv(nRK*Z +JUTwf:TO_܅g,98g͌]z/L?L67C["4d5uGƠ3zve\;1q#F8Hf#qDoGD8X>OZd.G>6${D;*~K󀓰q7R%e"<-{'`9J`${"6~!t)ۣ#xX1|paJ-hFD+JyzM.۠dҼCL"V T2)#~pH͕ 9 D)qs%d mĴgX!1ٶG#~5(I+Y!i&l|^lTԂ`׳z{=0_ttC׋wo.#>jrE3ID)K,AE x` ߮3ȾL9[>W d& g"At!bKB{dD~YV:/4} $oψ\!g 9[+M-K?su#> stream xYn7}W1y&08 hN:*K.M=C]"[R%45`{v,9 Qָ|IYY¯Of/7X9$$P`)Y0Fy;UpVE1ިLAr$XLSHcY$84Gֆ . D$ Or+8d( 0pW`Єl[bހ;dr3 sWN )@Je-kZatʹTIylMFƋ0i)` pX`f'" + BGX Fv`xȉT$SACIN!R)([yL&%22P`H1Gx.TDbe"d%#%>Ivg DHi*E%+!J^"2G@ *ST$&1QzΡE?z&DStF'`0/:/_~Ւ-k1B;4쨅huiۡ]¬-4q[4Fxd8#UU k'e2/$^rwasU{s׉ZNSǦSƴ`2 .ufOzVx R").]/\ac(nBb͠C E;=hMlKMONdߍo=Os:î;jovpZ67 Iq~ٌkѽ ݛ9ə[}:; ځ軛f&acs%jolH3>.\> R>ii4h)Qƣl [`7^@Ѯ\ övN]i_7q?e7ڣnvٕqa`]e)|n[4?ie!Dʪ1U##\ N. *멎qWʏ>L>_Tpԭ'c=]ӌpz~ 'Un"߄Fӣ3vKI$nF/SR\'-O%dBp ZThyQ"M68l/oMO/񵾄 4nL1lY9}GA]wMyb~)4۹kXQ5W cF ۪͢ŋQ_wgt:?<|L X];3Eʮ&hԐ:5d(jX]tj<)I6ȍe&|$nwhkj}ߝk]OonQ!|l/g ڴlFǬeK!%8 i;4b["KO>]cJm_7 endstream endobj 1138 0 obj << /Length 2049 /Filter /FlateDecode >> stream xڽk۸ !P]3$%J⦇"m]Ql,˻ʒOs|3( ~;w>\8@30asv]->R^1i8ǬʅT~).n|}w/=/eH:$)w#€HvW~!&k׭B g0=P3D25Cq.RnQ6lrKPd@ʶJ% %DU|EQtVϊ%x?sPnuA1sTLRy*95&4N|HC8!4BK79&1p-zzHT`&;)=0gkQS/Cp/{|YjDK#JuHʼui.IH|>aFY,mc&4T2A4uq]geZk,Oq}d#2y]fc]\qiORsa8b%r_?; ) 3 އ0 Ecx!I YfF"'d\L2;bs{s%#cSl3,8QA^0@_ds $vӛYW X$!|MQҏt$ٌޮKd陦q0n.zٙsToT"fN]ۖ(^cyeHOG$JK؊xe?Yq*zhbb>W:rAp*md3:O绞MEQ n*bPp׿TN#҉ ,]hHHY84_׳ `1Lb rU@0PӌKa>pgV(=(AB,`46  ƻ&h5CPhOpvblgd 4odRplzDKxZh&vm< xyY^ i;E_S}p&|Ȫ"o%=;ЮxF^qn_= ~ :(Ot7X k0"@$fEZ/0ysIIGbhH͐/,q<ǫy߼=@FywW?_av$Ԟl?sg ٛ[LJ.ʝW8ͫQɡn}Y5-HmOj+v];oh>'" OzU3",lG.*ۙ ^hM+3]y'r Iib`iFlel-vh* wXэjtw{ =ݓڤ)+ٖpym]u{|Pj/sl o8u-:t׻Ph_\V5Ma:^#ֶӡԖ"nORRAe]?9գӁ?S  K6}UB,7&AfÄmϐi~0Մa8]?7G%4>@#ÿ.ZY؇> stream xڭXK6W(@٢i(rh!-ZzC~{g8,je{ Mq3 יΜOϮnzn63qD8rX{^8s)=\8aW9Z_K/3"C[aQZ_-e J?Y\,CǙ3՚[ /bawu])V&a8bi]W],!B4l\ng!k/F_?o,rSCRBd9aa40Doԛ\;MWxۚFH/p.tߊ.g(r(fA-'&3ΚRX~8撴U@%bPG=r@aBGz5prW*iL7ٖvbQ8g>"E?\MT5|O_/;,}ۦv_VǾX"uGB2Ϋ'Ї,Sƾhwiy%[E!9AUT}m+=Z,tl SKI܋n!y HAN@,#>. GyM8# {XoRi_P*Fn]Vp)S愾1Fcf,qaCja햷t"16]gyUF#ɫ.4zT{I)qLEY1 +,}2'M9(1ɡ对lY]te%K~\w2ʑK"xJX4c'(Gj ^܈FZ2e K P2QʴZ#z6:P:vw kJ* hy>3μjLZOY\ˮ,<2Jno@{4OW^DܬVEbA %yJᴡxյT;~BQljMfON | ;e? #[V< w-(P lbd: WMtFs#,pܴRh(iI)wš1+@RLff"P |4<,ts+Fޝ"T?>pB^:Kas~m=fI+5:w:X?O_*VJ:}GKx VoJ jk4+.xyo?#֗mfycOجZ7n/qoݮx  2M&8<IajY/Qф&ʵUKl_oF`UkCadrLT-p׵#W/6s]ecYy/PL endstream endobj 1185 0 obj << /Length 2293 /Filter /FlateDecode >> stream xYmo6_!(`k._D,ppv(6c*K(op3R'A@yyf6Wz!NLHp gZz}Z$|o?h=TYx>Ʈ t$+$nJ ;m˕|_  :'/kn=4bq/ZDjv8M MMv'wW_ 8GRj&cW>h Sy;} XA>^δuPC P(B=#fpdy&^u0REљJjhjj j̺+hsG^q[c2˓3ŲD7vݖ𦡍DΙL5-״fc6TGFƑ0$DdZ|A٘O J7Ez vRrFnA&1Xr dny6X(t~jRL$Xoo'>,NeXWtt[|c,c`˕kh uJ98By]|):y߬ ㄵ愗MXj>I6dKGkލt`"ͼ 蜡ID?vy*}y3$aڢp# e{,7]de(xjҼEFcL,ɣ}xyGVYUbHF pWVi/7bo͓GyHM= t 4Ú8lq.,p 8vk2á-;;;&H6Lq yߏ+e#NBo}w׃JwN&0uJ~&.KE.yrSzm`<+7!7YȔG\uتߖG kg *nVLCvjLHg. )הFUf9T!?mC{$wp8 җu rk YM]wZfa]a F_7qqrw.zWcbSU 1٘*݅`yT\xGG (N`SlpJ"EFT[a&Bs?49ե/Skc[3:R]^{^090qgNE?X)N 5UGF5%Q7ƕOr#bG|"zl{SvivnGVm =}wF9K^~Sz7ǯK󘉱3Ց0kPu`H:2=T[MqNR9d3Ȟxdt9~/1ǀ7n,AbRq/s.+9l ,ؚ!^3%)D墄!(Nq䆬 ~ UܺEG `I>58e[~EN{myᇧ퉚uLFH<R5T&qIJ:sD >S>Wʽ!0K1huV )>0eF e4u?!tBeb?="=u/ yȌs,SF=WGš% :EKD/Bݻ+.s  ;z06mgiC/l✁P֛-6F_Q,m\CƾN8P KGzA_8M<9Dy.,B^6'WLY]:zGRo{GS͛Al:TXm$Jٴ՟  endstream endobj 1231 0 obj << /Length 2331 /Filter /FlateDecode >> stream xZ[~_ᾴ fHeff Lʒ+;1Ëٝ&bW4Ex_w}c eqv+1hJbX/OVSLPQvYL̒;~:L̜}}OҠ:d_ _ x-jp^>$)Sw⏕pRFj 0CT'Qs-RWPN+b&u zA;|=$^N0;XBFXpM&VTN/̋VNV$B5}l8Um~FQ<)qkI^׼܋ϼ(>aĴ}%,i6. JQz M$;VI.'r7K.H)1ѻY`+&uDٍ& S6h_k>ő7BnFBz y ˂ 3'~)dwVͻ77:H Obէ7H x/Ea:jGͳغ1\ 7j7PPL)x[!=^XƲQDMFfݑ. B3ɠcm%C)K@A¼8Pε['QnaY vvrBaRrN(aX- Xt(Ԗ=qtY~9f}ն+뙀g*6"&0חf*g}=6M0zb[#m`p筛\j'=`z7q R>h(Fj\ybvQh\ܚ\( q5_\%r}Zm;wQeމ p8c\Y.)] CWl7i?n*[6NZ6r4sr_V\jߋe&w K4DӠMvr`zS)/6u- 1hc'i*)l^'{C?frlM܉XA/A$Fis!i(Tq(~Bڿ3MiKW;{9zZPe;Xb&v3R1{7rJ*un(T[]1&b˭̽  QȶňݿUi/^Yʏw-r%]=ۥj`Cz5I)Y# vgA@Ho%|ɫ|l2YJڧ)~lД1}9y%ixt*;JRh|^Py-Oirѩj̦',hOp0RZBOyf+w?\Ѯ߸qn]^U~e)/g4myɿT+bXY֕$t{Vi^aFFKLS?af30Q2= *i|fwuu!e]PBe*mzMb[Lgqv.EIvM3* @r}"M r8E!}+J.|;#HǞaaSQ3*䦹aWh{vaQAbJG_x=j)W64N\CBhb9Rnvyg?5gL녭 _~D#E$v4û/wqFu2+O ѻ @si?' )$8 t 9Aܧ2"q(J 汓؟ D_C MoV7 endstream endobj 1129 0 obj << /Type /ObjStm /N 100 /First 984 /Length 2435 /Filter /FlateDecode >> stream xZKϯqBU"i N$ayF %Cl>_QffײrE6'v_ F385m@ڀF*qX:|AF\\ Fեd઩ rtU2BCØXHm&\q-mgYi0jWo ImPh<>$ARm &5412(KI.H%?.542 1Gʎ*S$D48mW2Q9P4T*NHfrP6+͡4bD/AH2^"&gnʾyf7M[ P`r'E,IKG]nnۅy3 $t!'l[std~PK!$h~E=N&Պ@lLpLH|I*⏥ !Ru!A}+uVU |E])ЀI}B!G-z7`3[>LjP{Z$Pz9oջ 眎"Y6c+x\TH~Z1Q9 qI-::-:de-`Ο% Tګ/՗K{"4A笝vιsΝss;99wιsu/ 2~SIpx^6e뽻X]_nwCZPz(Q:FyL#'_QlGs<-1PrHA29yEqee{#&3L l; A39ک4 ~ 3v$/$Bp.# \N: Gͽ7|~m~* O繁 nt+/rAW~9#q &d[DMvogAOvδ;S9]hҰr u;L87gv. ڇk8;n|9FjI{v/B4ARmeF2G/S5/!#!{;<~CK a0\aƯK-Ν<Ա^v11)GlOԳ萈Q+jΗ` d3 endstream endobj 1266 0 obj << /Length 2000 /Filter /FlateDecode >> stream xڭXm6_a>TjԻ6pEt/aAIZWYQT6_ )KZb!zD o~xW_ MƲ؏7@Ib΢ܗ߼__ 23U ^q=W"vA87;?A"4t'-!`D8u*4AQ" YEv(MqPvGܓJW(u_ҭi0TbKf'"+H=)yRW2^{eavGDV- eM:|>˺~h/y4sGX\)ȣ{Y5Ds~m"$^36ǡ7DQ: h :-IϽjeOlfwy;4uҺ>,tW|j7;2N,M}3X^ '8)¦B},Tg]5 hSmpn^0*拭H=3XƦbsɢԜgf55*sX~h SͭLO/*~A8;;\麺oSbҟ/Ǭg&wMIG϶“vLX`t{^ O+3R Iy7Nҟ*=;%6iH%Ky[&0Sn5F +cl% e<(Fk]Pc Vd+*e 3$W{ %hM%%QM?zIxm<S/x} #"G-(|/C˼O6m]u( P0M)l =k?*޲TBuAUMiG6էcmA-UݳCQ>"Yl{k$bj&K}݉,~j.H[jh7F/FLO[zA_{2/?ޢ=%kgVǶh@Bdk خ+" = ÍjOo&z'>)mOtm_n8}$P%Co`'S)xwTK OrwtA|rnw ?*]n3lz`W;紡OŜ[XE0D\1\,]T&4y`U9~˾mW#:x0VWP`W0深[#n0q5i~fR XO}+'{ ! CG?2gw {B@70N«O01ja/iuj2ʁ t<phC37uIaLDS"By?.*f }ߏE?7*d0j inn(:Uʿv X=/#-y~<")#>]G 10[֑YU8^`sTyAgQ:GKC ?gX{G%+N# :Yҩ>*CU*౫b>i #|AI,er8;λE-+H-I"Ƃ Ϫ)+7H;S[EAXX5ƈCDG7BNҢ)ʘP ndUM_T⳶.e?߅hUۏֶխ,]Uq~Vb5iDMB@E# ahFyk2=HL7i7ːl(j{o61@N%@h`4_rr 99Ŏ=LdК4o> stream xڽV[6~_X:KZ蔖-Lm)E+dWs:N6ۧѹHd5~X? 1IPpx$ 1NϠ/Ov1AQlMj!s*w'vibhּ'ֻn3q- TeAM=߀Qe)0#@ fo ς}Ql!>GxdaW583TKhޖFmJf$xRJ- PX~LI,BaA9 ^1FB(:5.kU3t94^Jmujtr;|4#Zt]`h^`Ehq8F{/dfz"5Z'u Qt MɟPtmTp a Nv`i[eFߋ$(տS N~gh 2*XOcD;)NMꝶ~1,fCyȢ 6M|Yxf(X%]X1X&'PӗI&T+HGg>./('@ !~am(Ƿ!,5\C0k~+⽺b, ?I(&+E6]7p#`}!DG{=ء 40IAԩ)l(W^D7 { qmf1448Or&!b߼ endstream endobj 1283 0 obj << /Length 1085 /Filter /FlateDecode >> stream xڽVmo6_h!wJA[ C}؆iK,<7@|IH 'x<>`1@~]21X1)P)g?弥I8D<6~N3=/ni`D # FDI{s=q ÇՃi]GYI$l18MPGGH8B6c*x ޒ}2nTY' f6tBBvP0a6"۲(}~b]t.z!xh܄Q#ܫ^BKU"8 r=/~0d̈́rP=y- ~^&$5`ҪQ_803IK^e6QOS]IidaȔȨXp5dN[KgO&-O&D0Ɋ|fw6ٚt%fzeQ)lcA\lVEMWYj\ eY-U޿3̄9%K(xYV>[f4fA^8${śHg}`K[LZ7XiJmRlB>IQشhG+@QB%a33oٚtNd3[8vplL)Fv6esPS!vnKUT_u 4[VtU}}tN3wj٣JGO`LZ{(fj:_9|f?2 C=]/tΚv&sJ-ىi}Ӑ{ IZ]T@;fMo=dݎ}n߇1}xc f:@uoگ"_;!XFQH *gF!;T΄Y;՘nƘ+o,6X@kX^t,x0zIuu4uYfWFe?*{#V=)Se;@ݪ,jWY(?~ gnG5zQ/LnM SP*7 endstream endobj 1293 0 obj << /Length 1285 /Filter /FlateDecode >> stream xڽW[o6~Pu!{PH0`xCE;tq)zI/%GV+@\|<*%! !#((HgW^E1jaT&:?^ J9ؼ]X?J;itNJ ߞ)3}C5bұ~k 17fA}&f'sWcDuحs`W>M4p\reeK.9/2Nj̪*Yqs_E(J ?!?+/$Dq:yj{;A&($== Ld9o LzIo\5l `Le+KҚϣ,2[.s~n`yhZՉ_x8w2Kۊ#CǛĆQLP[ܗy]R$(8BM [6$pXXܛO8$ s GN$&:%TwzX/Aq@5lL(?lgEW&uW p4beBO>\誑V;) $vjđ>w'?i*D'|8~ JAӖA3ꮹ4ͧ. Lj#54zIM:wh6K0?z%5"$t׻\RuYrYj͟DͶEj$Yi%k"ʬ~0 /V桮Fz1]eCͻ6yyJ qS& Hؠ!\%$kW4%7npk!QgiC@P(\U bynIJ10GD6)|A<D;D190@Ko@E@h`w%$6*f%.a\A=tcYzAg Z03㬝4:UR~ ] 7^&LBv~<  ߀BN@yLE3[&lh]U٦.r^&j0^S( .d`Fr$/-kh E jSj|Xb+ab+M7[('+жT_Ba䊉Jyn nގjΨ.IKSlz;d Q2=`PS)̚#X-nҡ22J{隢^Q,eң9'-Ӭ0o6 endstream endobj 1305 0 obj << /Length 1337 /Filter /FlateDecode >> stream xڽXKo6WbFD=Cn-8dam Õͦ3HPHj8fY8=IGXĺ{k){N07GY+$ڑVKǗҁC͵Kiߣ"bgv<F? g6 ehF%QGMĶU[q¼h1ܾ0{n)q\y B NFU$^iMGSC̾RyM%fKt*"vW;)uw@UjHtU*bV& SdNwEYFlJm\ο\9$͓yea㝝XMeEX™n9$׀g'+jj@BEC,iL%YW*M]SIhp[k)e@e~kD9+G5..&y';E-`.zL=$]!sc 6QTW?f`ۢuN'x n`6ĻRG!;W> R)n k">)߭hg>|Cws=͞r=ճhnaDUm2Lr0U턑[D93ф]<`exvVA$<TNh-LR#e  )v[\m"U'YĶ.Y˜)c НFd{Q:>xkM )#Npq5$ls &ۍqW0HhVYQE1͑'Z,9]iF7lvG&x=\Ven@!jbP0 nJѿ/:`S52^9"YOQ'[x=湼:'|8c endstream endobj 1324 0 obj << /Length 2407 /Filter /FlateDecode >> stream xY~?Ś'A"mn+Z ۴b![,zfgCeEX<3C]OKYxa˄eRxtNiX컜Pҫs m|/lD$p q2&{g#Ϟs)kgB]ŕ.Ϣ_z#,Z ?I9O6ٜKV"czIX :&rI^8;h-6p9 q@~=@k*Ex#;Udmt"b Lڒ濬AJUw*=Ե\ks;.+H;( pjAAFΩkZVь Is!s9_vA `RL4g>EJ[ۣ!Zt/ 6$zZ&k\mlVV)A[7Q啪+,gQ5CwU4N0.%=O=i3) dQ3_gfU _I劬UhzQiVF0XsiW!⇺*4H|/3 'jM#ynE;?R{ ֤[H&pꙴ'Y)ִuk|V$uyjMQ=I8iэBOY Q3b ]8Pe ebOqjv@* c`ܸ2bb)W kNR؞@hc}N0䥣qSY6# ٮi2{SK~4W=LwKx,q`,$Ai|[sVX%sp w| ֖KYÂevY, ˞ ū?ɚhf+Tjů?[ )?Ȉon纯Չ:%0W:C~ p 1q%MQ|0MH4h-É^iI dR:Zk ] hjTQ @&bGu%晐ǁ 6"w/I i,垞 shhx.Zs34ԆXo\?Ç0PkDMw"RH v_0V|B?ą*kcMIJ4RJSU?noOr_sd- lLfʹӧ$x^Biq=>m|>d'aHL^?GubYj>L O3 jUVa&^ i_ۯ9 לP<Br+ $ bdk˰{7~g˕~Ko@J˄ _ į{`DwdTB&ϓu?o:q>*Tb5#ր'(Hla-pR4Ii:С4K*S1LC\JMz|po >A{JZ-gt7j‚CژE: 1@wGߐC;AD]he]"}*NvI%hS."iiJv_vᤚ|y4Z2:Vb,€qv`_$ge5vJ yc=="%8Ng,XM#uz_R*[^ժ-bmL$-Z2͜%f,;KqLZi?\[jk1k~ g?_,hB< ׍/^{<$W3De&XgxY˷(ڧoףnzMٴ~r endstream endobj 1332 0 obj << /Length 1353 /Filter /FlateDecode >> stream xڽXmo6_a(6#z ( K\6dDD%Q%8ޯ%ȶ`M{yd3&~]]-\wr=- L[;e2/^~Zx^g<7tz Kqeg.$wTrB3dz.nRF1dfG})7ET`8ߤJ+=| DWZ JWZ^n|c'ìHL$L Dm)Jƌǐ˙gY(ubWVw.!jf[ B\3B*?pn){*'Jҵ!*cByaɨIʕ{SRb gBIQD~am Oj ؘc@2!v96l!̕!)S1}('3f\I5KY]E0{=!:2H0bgeփ$LK6Z_Q\3_(I,6#",-r . & }*r/Rķ:(.".%5Y9G=N`:Ϧ2JH`77iڡOU*e]u9GpXĩ(Mt bMOXw$5^#x~c5 QJ1›Z!Zb;rN @1^[Ò @4&R|s6 Avaev1!{'aS3)VǶ b j,^bdjć͢u=  kzwżԄRxUq\`\a\\X́y4=GE\VqȱpT X{E> stream xڽXێ6}WQ\RwCɶ I\A̵RÛJI ̜{;{-֋8r'Ao<1 K0[/UWװ10Air$WGJ~^|[bB%qgma[yڋBW޻śAW[N,'%Ra>|'wgĽݛUcɂv-I#-$y1fG`JC^w=o3& "J2s{]2CsJ4S*z25v]Kڛ۽ho}xk_*$^f?t+Hx !+yOpDؿSnaB*tlflo=G2+̀֍icߊ-Zv+zdW A<4-=SoNyM !3%f n΂y͠(y&Q&;˝_K*h3a2>*VX|]`ϝ#6mWL n=:RL8I8P)NLR%=1c(Y R-žU&Nx22 "2\O#|E9a[Ӌ*±#,{$Džӻ s潢 ^GQ(կ`=PʾKG[vw9Y$rw%Ƭ SL bKj Y*%y`YƊ`0!Qlo\M+GFQQǙc֥\䄙lL  ucc,OR[5tNsK:|q̱罅1K-ұ?.$rTp "иo<ʃ܆|Ck6I HSGGOsWV܊D($e%㻲3"$(ݜZU)jWlΪZ(Tٗ')թ|{)_j2 7ȓٕH5-T@j8.l~ԋ`)wX՞Q[ 6\c$5 arںE!yuL~1+v k(0`-xAB&Cșm83ݨ4&:1'cSگi24l".\jƊWiN1oF݊CeҤTZoZ[Ca b4m\ڪda05mMhƐ XQa=5hCAe 7HE.#hץ0 ,WqO#[El'ڝuD'Sybӳlklᐜ'v%Hs¦fM3cg#Ԉ3"ndLP>%yzQ4H짪sUPk?e4N0> stream xڽXKo8W6jӤ*CvEhUc,v9m9'QcSoQoӳɻ02~Mo=F) ĔDMޗABߦ'h3H#’XVתluN~1R!SٗoԛǏ|Rom]K/d$\xgm>-`c!$Edi>R֥rm& ~DZ Y4Ч-\sh~q0N&?܅.qJ|"KJ6ZU/]RL1Kk=]J?C6 {mWVh4շhKOhpq ]J-WJɜW[Coęhxe2 7z nZ7d^jwNc`8Zת,껉Yq@g$ ٰ#Yj.Z|ӝCS쒳jZ+93RZn!MI. ȓ)GQ}lQ;J#*+4){MX@$e2G[-QE:E48,=LF4="m+RC3LMVuu@hᡋ֎]cD1ZZHmwߚe2Rxeٴ68r6)|6hGBakiO12D`?>gq:6g$JWc~~էكP,+â>Pue ^+'v:Uwu8BCSu4jD tQH[g`p/n9Ws OwPʢ3NKC[v[w97s ui=OX/) P3J^پb@QȞF()^q>gU o^Gj%EwY;&pO gYQwwtuWe1 ؽuYr4ꗄ6d!еph_7`g]-Bom[G׽#*7,;]Vl5#y]Nojd5lRB8-=-8 |=A0|{d6`秣6jݏї P8hZH<-o8°#tSd$ďT4ms#b0+/FB|9[#3< ]$ژ.pC`8"௷n$?7û97y[mS@뾘&Co%3-y =z"2n;窮đ( ,(zoaZ$ajOc`ПOG͏3 endstream endobj 1259 0 obj << /Type /ObjStm /N 100 /First 975 /Length 2032 /Filter /FlateDecode >> stream xZ[o7~ׯ /rH^FA@@w>-TCͿwh|[v@"?+G9VFeU֑S6E-M$_"A(`1HP *fH*S4 |Ȗe#6,Feam΂ DpBaF$/B*֛cg!ހV YPbuG*8q'*W \&`\]b H`+52XSq <"BΣQu7ټUVgmyqsUUN?nϯ/~yZ}[Ht$띪G嬋B"iט&_w2[cn׌ +6ǣp!-JypT̓Ӹ~ y zhpz>6jᾯ/QmfRfrjqޔ1;b\;]c[2} kb9K_*}M5lGBq{:i%4eS^:Ϫi=_7ղ/K}z::#IT_??](Gp?}3 endstream endobj 1370 0 obj << /Length 2003 /Filter /FlateDecode >> stream xڥXYo8~ϯ$5KJdiѢ[`7C[LʒW~RWd'mf:yq; IB/tn7AD!%۵Ε~ֹ>~C@bWFL- H갞 |'_@5Lr`*;t >ЅssgoG3J€9 fy2$f&7ݞǩ+lïm[l*N>uVaIBoQr0g0 9Y*G/?V[,YLxSg3Ъ+EXan)i"ӽ e0ecwyQ]Y)R\UB SHZh'7vm jTnԕpm -Xz eBȟ`nf'jdjR9euOHuq+7`%i ;y-T~;4T4i;De2P+2Pc7aDgZȶhz'IvF_Q}fii]HgPzhF<>^N#Õe0?k+6&0]6՞ Ƿ祊/'K{_/֣g2&arUyEq%)SEgxdw&`l?>}R,皑 =ڴl\<Ɲ*A{7) Hka CkuaW*=ǽ8",&gI{w \ʹ0CPkA]mִ)@PP@Ñܧ~"3U@ ƣ q*tZRδ@T.q*R9 ∔lK r1 x n1{͛MPZv:7QeozQ*Q J: WclGjMNS&GJIMN.l9J*ga΃<գ-Ħ xAÓ!Eħ,ri;(v T3V=Y.Yp9bֻىrae4iwj2~ 1hv4 $إٮDzJtV0̐ W`W]pTmL8 0PԿ4>ZǞ <ڲk´d>9E^}} WT0/VRg饲U |wC#/?8jhi}$τyhx% G`omV9LX ,-2*r]5ħ˰R H3+tS|hBi#_Q+[U~m3ƽjLvQ B b"OWN]oqUlZ9bċsO3\bsQ!:]ƪWL GxŒ8۴ZűoS2Zd+Qlky2zePTfU+M,ESާ~x_,G^8tZKTNsdo/dNz endstream endobj 1400 0 obj << /Length 1927 /Filter /FlateDecode >> stream xڽY8_Q K%jtR$N{Nwˇ]9j9 |E@Iϒ(\w^NI2KR|̚*U+q pRfQƳ0E"xh,}ZNgnWzm뵝T-M,eA[w 3lg4-4ȼU_ɺ{ l[ݺh\g'EBm X6xng,dlЙ;ٲP,n0=qwfIR"3>nam6ӅZjQI6^3s7, ۵ qa쉮j1 }dKBK2x@Sgf]4Zβ4vkZRLē[+ f,/?8"-IY ˾.(-]$R[MVK7rO8Ktl?b8ȌׄiJ޶rٗvg_jp(` edlUkJ:6r6JzS +s_ ͳ`'P'䝊Ji$fݎ2Q\zvVD0F$Y,a*EPZ+y.`GF!'S/N2,Iyw* +X;kI@e|~W,yҾOo׵KuYn7φ(rp832 v rEy,frQ^b1K`)_h ᔰQ|΂q!d]P/(TtlMQơ;`d3.F]KӦ18~7X8 z};,V1T5}w j')*+ks]|ꮅ̱Óܨ)TYaRqfΕ [EMs IeMo,]9vțjjGݨZY>F5M+MVIhossC{Vh8D +u )4tehe9I=H A2Pyv u @ad R,3?Ƹ JoyX 56Zu W%>rʓZ`B3F-vێl`/G4lN._r`K]ٲ-*8ɼiB@Po>e>0/m``ݪCcvnv}NMZ֭hY%͵iPpLvz{U)AE5/\;F_*OCJRHP2km6 B,YJZ:4qgXXb9*XCTڭŅ)$ (Z|{#%)  h~HfI.tыh A2QcR"Byjh05"ZtوBڜ4#bPgY8)<}y2\y;Hh/(Ikny.»VKUv9bM 6d}b1^(p+K*.T׮:~sS&u^},ogxWéieϐCץB,,#%/JhöiY4f`F.z#F1Ўg[#<^4tj|G6BA:mc: @cC [xT[Z6Ɛ@ih*pg߄aaa|[R7b]n5 3 M;fwj\6%݈ws-{:ZrYoܡgIP -Fnc!{,Ai]oczLݪ|(gLSٹ :lXCS,vEӣՃ endstream endobj 1424 0 obj << /Length 992 /Filter /FlateDecode >> stream xV[o6~:V -0 CxtqIɎ1I GӤ"P|qhHEA K}\}YӕcJ_z7yWӇ7l'ڵlRڴԛdqu5,VӨzԪGo{Xw9(?gFn>*c<+f{j[r0r$qOI+s>bG˪ ^{є*H̑e#]e݀c (R=DV>==Yyt^,0j,҉|9AƂ=kYwW_\) dP fGV`$Z #WfTvWs؋A¡Őfw~υ늼װ:Ova3͡6ib`A(CȈ4`C"?9'&#~f}5m߼5<,q﫚IAHoFCqނ7֭Dv3gCSC8GwŮ:R2Pj94A"tBhQz\Rf Ik/aUo_OGhP0=|-!3B y-|,Lu2uSKދYOڹ9dk9,#! ^ 6{/q g.sQw@?wzZs/+vEPpQfSn=QYo9rH 1U/MG39WF AK؞cu iIXa!4-CZJ⼍"$y6qOR׉^@6s/21߆QHBC{@L8b젟=KI2MQ΂x/U/jf~<=q0G# endstream endobj 1438 0 obj << /Length 1707 /Filter /FlateDecode >> stream xڽX[o6~ϯ$5ËDI0 ڡE[C[Ld$~y(YR)HE MG.~|H/!һ^{R"@IJBޏח/p0SHF)yUfu'tI=vܜD. _{0޽H o~ӯ@ b̋hD"ݥX-%$Yg[UY,yH<-5G& ) RƄ6V|U9Rh[hRcxx FTBUYL+KVjuU*ww ϡP~g[ڦ N*U^ݫZ=LhkTk#WogH΀Ig\Օa1pR  @x>/ *cC88`9Y,V)D}YZv:MF՞eUqk1!xpJ5i^hbV 'QA#$qm+"oUp6;Y^6^Sw_WƤ|p _ZZҲ3ݯ?~F=ں9$:[F)Y+4U7iO6@€o;-ȮmIk,9 &%aavguG=A' f(-q{XAhD A'm|L;rє v*_e @bţN΀M*l" f{$ǟQ&UڤJC /wuy(u~rOMmx輥9ɬ!!V 4)6lyH Ik'pшl-_k6e,CR8ߕE3fq_حond:hܺ>x`#6/ݏ B:%mFF`-3h=4 ƎD5Aeڙߏ(H8& ~2AE iq-K&Bd&cѐ?{fF%Ρ> stream xڵMO@MLcvzhҚ'*PVFdؙwfV[c !>(bCK$%x?'zʳjW SzR!1PӁkFlZq +pbdH^^ z&Ae*[&Qar\+.|>XX6E "$az7=خ.\lUm=OY Z{D3tQD-`$/J$<:HYL@_?F g*a6T/]mjŠ3[sЦׁgRՊX3b(-)i.Um8\ST0O¨Y,Ubx:_(T-80^4 ' endstream endobj 1551 0 obj << /Length 1556 /Filter /FlateDecode >> stream x[Io6WT$h͈;5̡@3hEn!JV!ѓߗ C1--J<a"}`)`Dj]^߼A4@k   Cܜ>pnϬb׉sDP TE\y/{ExxFsKǪDP5~fQOYG倷DkWG@PpfGo*7A#uVl8#$:s>TWP\uZ<\m7Re1haޞ/= Wy6FZ;zz*<#wKܤ+ eLܽ2Oi&jvi)KZF2YHj9o؄٭܊ w*gdI5d=eR-p"νS ^'Y>tҥQp%ـ-={q։vٷq Dka{m}Uxٿ^2^x19n*fpf槏5BUY{)sj4T)u>_ < '`^ZDhuYrStwNvnPaލ_ߘy1퉁 Uő8˞f%W1=-eټ<-!&a쇧 bmQ&aąLze߮ԛfKp=p 8.2z3!zt`]DE&jwr+fi2^U毕2}E/EbbRԞ@ʟtquǹ.^uoeMb@]J!\D OͭyEH.?+k\eXdGTBUj#>bT-U9)MT5`4NˍTmk ,ĺ#@]1'Aq_ 8Oɳwt=Hc>Jhl|>p]/i2#rZQ*Wfk4vR{A[.,Ǫ[a?+ʛii. 84.2C :{GNH>^Ӱkk L(@/`]*~*6?xۻ#K?0I\x)XMNɯ!31ZZVojnT5!qk {) EGvN"λ=6SoWq  @)@a')Ov{/ endstream endobj 1362 0 obj << /Type /ObjStm /N 100 /First 977 /Length 2518 /Filter /FlateDecode >> stream xZ[s[ ~c"AeL;Mm&R*%ڲǻqxp ( .&ǢkQVWh."GGtDQ\jHNJr3T]Ւ me!7x1vJ1N#6!a o3.s#+Gi{I:JD́%ScRVqDQ᳔jqIz:0%Q-@CI!+_#ׁeqJc.Cˎ bYZ@t6P~c+%rP No$ N2v) !K;a($b+ "r" "CelE04#5E.cuJ5 :_pc6:+A#N#[ySr%9M2PM4J%asRD'"KQbN pFVv1S! MK '+jsx[\U-Gѷp A5Mv.dkSB҉U! C B2pGb(!xq6ѽDl.wg~8sGy w>b}ø%oy(opa6wju/^Kzg/w*"MS?)ZY |zuzuo_ɛٗ; /ggan4 ljY]g]c]̧߯$M/_LN\N7ˏߨFpc?2|bՇB^]}>ͮ7tƬ:%yDl6a;S\C%K%rq,=wʯuD8b1QDrF=Q2\6W5$W\Mr3$7Lr3$7ʨq%5O{%O^gR ǽwv - &C@WN߿_F jKQ R%^|3">êxZ, 莢M#"d/{'!qc|d3662QqM3 ?ՈAkup!31q9SEgF. p2#$||T 'AL%pJ;f: Wv5N`q|∦^Kx:|َK-0yOXLGE!ݠ@ƨOXn3dL AJJ}T|yACYzTovL^%lK~ b{(z̸zps= b6prܐW(HyfNqݵ!^,0~TJHd^Ođ@ŧvFs('`+Ip:>rD c$Xcx f5Qp\: c1?ݻUhOUh@C<[pաroC~w5WpL|Toi@}o曝@nYSՊ F`l18e(7'_!]3 zBR* u7ݑ~͑AvFaHF2 'F5_Hq0P<^tO׫fă( NU;&/AkdM' j'NY+=Vu= 8z5j5$c Śco]Ba&&鱅9f*bfA[་)c9͘U( I4@؛`;Z:D0ܼи\Ԯ h7׿MْAm;[۽) e&8^H.clS>7N a'K JԤꔇN>o3LŰwON%ߑ!zbX'bD6ayX^-̖EflleֲdkYr6$\Lr1$\L>ZlO֧ L>ZlO'[듭dk}>ZlO'[듭fIQB0"AF$#1"QFhI&9hI&9d9jC2d$I&L&L2d]jEv]$dIN&9d6l$If&M2ۼܯ5b-N/?#6U.VT7xHtvrB-ڼ_j#!? ZKj=b8Au܎K@ ?+(9j8lr% l3lR9nP84|YGm؋o8.##(A,1)8Y`8}.4hc endstream endobj 1711 0 obj << /Length 1932 /Filter /FlateDecode >> stream x\[o6~ϯc ,x/ڢ{"Pm:*ˁͿeIPe}Թ`q~z. fA0 FbY/~߼Su|0N?XUPn|]?|XoWVr2JPC@5  zs0YB~/4gax(s/57dk 5r  GoýFF9%7d0ї6#FBTAT?ձvr L1`;d*1QQ^4,Jh @eyPޞA<]>2l#N:| #uTvȻ6JCT dL}^O0 $|Hi2!P7bDJ]yBgN<2>+e7(*bxYT+8&T`sW&N y Ipnbid`{$0V~³  F0a >)+^|Vl|s)gdYB R>Q![(?t hQ]T~\C(JU8Or>brB6{ػ66QB}}Bܠ>sq<0 3롺צ/" E=L8v˵DP|B= W$PuF>e .Mx;u|k؉F:=Ptu6U\{i1[54ʾ3not T+5%Nu*!D\y:G:OSuqbpB+i @:8ȇÔYSnb sF<}YC7inoO\U O,*^\J󖐥ry>~:}fP6ҟbګ/5vCFn_Ѱw(8B,(,MWԴl=ǡe]wG\usJu Z?6Lܴ0n]Tvr?z9C|FwzG&QɃTfCݺhƦ[Gݭ2;o@ Ŷ~4 ō Vk#9H#_f4=hQUj\"EEUy0A)?3QNS"t]:44=:5t)&2Z_ҹ#44K 8qhdxC~g]\7iĖ64%e|ʉipN9˫A>FSwn&ΈXt` 8v#'1cgVQ"s77fݕ?Rv1IZfy.՝ͼݼLx$lx}(ki$&B#.LWP[8ȐCD] |N--A#9Q+-p["njăKj5d?>;oȷލ VE \~SՙNϲfxňacn˲] lvrJ-Mر[11tiٗ+DL 6"w;l4≔ϸW!կt$M gCl(1)[;8{a K#NG a$X¼mtgx\ endstream endobj 1553 0 obj << /Type /ObjStm /N 100 /First 1012 /Length 2825 /Filter /FlateDecode >> stream xڽ[M1XU$ HA!c/#֐V5COv~]W$qI!V@ eSA/5 8W;0=B?4@;[ :3|ThZXu*5l%( U݊@HG QذQΎg !4RZqh1;4#+-05 i(x"hE+QQRyacH61jEU9FZZ1=,Ջ(XI0+ZLZLg pyT>;8Ip9[p#qC<\%+MV<~C[WEp5tP V7> tJxoǘ!E1.C 9.jcaN ȱHsc`ǔ*-ʐp{.QaR5 i"ST.Qa;hN$P`7dJ{+gl(xѐ<r@1ž5wH HPpcu$ۅD[7I("N\1m sZl{YFĚnrhgKȸrqkEBwLhch)Dx)}*l%vEG+u!a.1KLǐX%-R Ɍ1Y͈\&qLXpLBv'-"bZJc6.$PT1mkw̹:elֹ]-N'n!k UsM6k=աqPGUZ?۶JmBc ixmh8Rmm %x,#rgZh#= ]'KF@eӶ6ws˕C_5=-hKq:el5OVpɈ 8s6n~Petэ΃б>kZ(P t  Zh\MVs^KѰ~?˻/^LQ2jO'qv@|y[y$~hοՇ'fd>}1~Vg{yL+y80~9Qy:> stream xڽ[M9ݿ_%l|]߶4HE,BJFsʯjngn)/uۧN*<2 k!lXaEFFF/GhD+CB4 -|# {0R-t5 g ( :6%~Z8zx^YΆ/l"B9oETs^ *9(ZT$(t WFBEraEYODQkiM]EV"oh-*ijZO~V,<_tzܔQyМӦNT\jN\\x-ZǕ t |A,`&[:m>᥋'tٯ(=${OJ9x7Zԅ  6-C[+æ`jp8^Ɛ[K~bFpOHMY:cp-=&stu,mĩhԝ}ы&eٴ~&i2X'4(5 f_bDG61Ec0@r$1ޓ"<# }?*?u~˿?//ǟH㯟|$J`(WGyw@?xo|P?,% `_DN@tp) ȧ K6:0lRuu=/;/@h[M/O1/!U?vܐ@POp[1@<@ FP\|DZ=yjoe[*!vAp 8ǎa s >qj844 @ D;Jؕ]ӹ^ڝaZ7:6 @::Ç=l9f*X*sg<$}}Yef6Ѹ'kdE dLּya [BϻK|Ai)NU|Sn°aS3 ~!~@Wî֧1u`\tR./‚mP6<!+W}ty(sJK>wv;U li[ V2O7"tBHiT-vD`Ng%@5FľS0R'J[;4aA[{l3l3GTSm=P]h!Ij߀aJ 7-;+) &,* ]t3Dg\*ƇN3Bb3@8`W[}_l1w[|X5ubSbSW, RnOB199, yr)CSk7y(0=[[Z@J(@: 0Pwk܃F L<ѪP GzzUa/ Ƥ` ChFǎѴ @ Y)=x~A6N07)㕄 swAQU.&t!T};GCD[JFő#oRXĽW~yuj)P ӑ6X:)nƮF5XoWVg]yM>]GjX^X( aVy (d4-l8zp#slWjCoc=b&]C CF% y [sS99 mN@ly R~Hl񙗰xe^ָ^Q~uh"w.^{#Sye-[=-Uy0,/ݺH|P0 endstream endobj 1846 0 obj << /Length 1955 /Filter /FlateDecode >> stream x\[o6~ϯc ,DqX;l{0VHT׏Xe"s/D]z՛w>^N^ܭMvRew0K)XKc$"ɘmGp4aUڪ7~"]9Şo9uqL LE,3kZg#ĊYд8 A*Eɷ"t{* U49 >G2ޱûhBs)ШfKe 4s+<7MdyOTztҤ;Yt*,jr'*d\#[  ͣI~=Pyq2±:$atʋ:]qvɩ~O,.Ӏ`MpSwHG?U\<*ǟi$՟.,ŒNelS@2 X qY+_lZ?LysCW]T{ԋzu Œգh3 \Y31}y ;S ߝ"Ӵ^lyPN2t7g>~-BK"_fݖc3ՄLf?kS@BB8\<8jB3|vI0ϧn9jUUyj ح> fP,MfO= :#ϯ欴LuSuXxF"0`eC? b8umP=[?9Ԟ_{s,UdsZ@:Oճk(%#,h{pGKBGP;DRUv_^vhlpcPk\\K1uK 8qkk0NOb#Qu8:` f@䱊m"P_2~<6] lv FYZor|M5?mXQro¨ j*Y.)-EߺPjk<f秧} M0@nIZdrWL~}| juKLJ69yaFivM endstream endobj 1714 0 obj << /Type /ObjStm /N 100 /First 1022 /Length 2668 /Filter /FlateDecode >> stream xڽ[ˎWplxYU,l J$`kDBcp>8]}兺ۂ4a>]bPkJj"a_hbâ%拞TF@ 4+qEUpO:Ot԰7M$f%J0qg5q74:+\&ds%FZn$Q JbC\2jD+MSU(je{)*8)uJ 2դUWT]+L澁?)NtRRN'<HVX($(P8 sJE皬aZ=nci;S`# 8 G6`4m1 < 4ҝpvHE:1Aͽ+p)`i40uӫ:Wͽ!%Q}5sr~-1M-Yu:ULY܋/ܧjxL5)~$44.Tѩ M]5|᛻(s`‹=oǻˏq~/| "Dyso_pwZ: y+$Z[p 6={.?__=>?o8(gq<i5Ww",]rAeN$YV .%F :s]C*RJjJbq]Fإ5TQ 9M^p2_&tup.㬴g̨VeLl =0lT+Q̪}) d TN+ OahIh;58&- FJBFf ljsrK}]"yY .yodP9>N@ˡ#}f -Z࠺Pe{ n.͕﷍JD-Z Q @K $|* wٌblnHN" 1{D7D-hUꡪ$ d - Cb IЁK(K HA['%^kSgK,mx%Fz(!0ZzA̺hi[/RإBʆ]DXlbġCĒCw9ńy~.X[JK T8(oxhmǷ=$űU8(:刈* uK+{>P"U@G㋪ E>?X:}~HR[ç_}_<>|/•|ľ|w?ރ||"]^1<^{Yz޿߫OϿ4%C疬Rx?o~| EEuX\bAXH,j,YYYYYYYYYYr 5k @\r d d d d d d d d d d -[ @nr -{ @r={ @@@@@@@@@@@<y#G @<d+% ĢBcbcad d d d d A A A A A A A A A A A A A A P(ēLR _rgjḥ8LP) eLx*F D/t .󙭝W 6${|!L椠fߓJQ7QĂN<'Tl(`Sqm4Gje48@)Oچ*7_+( .p D0 YFV :[/;@XЮ{!Ps~ [ʑ )ܞ2Ί&j-VI$1AA sΊO9\=sڒl beERZb"s[dĒ5vIy,a5`WV3.ڨHlFi=>FT?y#jyIc"Vڵcu1U!tC3$j9i'UT VChoFtD*j4ɶ5t|hT>E$BhfgL[6>c#9,?.~V4bj[g7!!ݧVgj9) r AG zkJ'l3vE,ۜ_{?u${%1k] Ǧ1<֯ z`<|1z`['%XX~v2#Tm4J\E9I;Y9,U.[<+%qg尴<nhyvIԖ+'qg%<$Af endstream endobj 1848 0 obj << /Type /ObjStm /N 100 /First 1014 /Length 2021 /Filter /FlateDecode >> stream xZn$+L6l$0<^8"q122}tWg*-ZEs.՚JV[SzjdbJ HU84KCORbN$2%װǜדDoqWj]pnIՃ#Oj\HRG@s^K:lw0JP7J2i7$.rJÓB#u3#s^K^Ɯדy#aKI$4Y/UȒw,(9RqTxk)\缞"b [Ӧ]$ŠMhs7rT-DJy-s^OzZRhsj.a3E{ `C-^&\JP0b"z3lz?0;R" pnkixaXO#a#Ym%a3̰T)Ψ)=b%DW.iⴄÞyu̫\,t^pj*!X wSr*wu^nVF2TJ:-KyM*o<\ϏOOy~p<~PP//oxN=LҞe&dV=uz&]ާ{J7?ퟏ~? 00 6 fyBA BD] {. Ĉht1Rl!AZ%}AXօq ?@!&Puja Z%:N9ƟG0Z!f}r!.~Z+rS d}^ٷr+%dy0\!G!$hχ R( }Ju3PHZ;R"@ȑ7@bl;@7̶'Ӈ7[p'캮 7$@;W>,UGwdomo[gyV'^Y97rN NuKiw |DLG֓BezC[!"PR3#*KԩQhs ͟+Yo͟uɻ]Qd,Y]RQF' g>0(f8\t|g)RyF/ m '=ظE"5"t*'`+dcVlpɬ<*)Rz;/m|& vO?}M~y.3X F}y 2%.|~#7oߥw~Ns~|{_ f6 endstream endobj 1851 0 obj << /Length 113 /Filter /FlateDecode >> stream x332V0PP06S02U01SH1*24 (Bes< ͸=\ %E\N \. ц \.  33qzrrJi` endstream endobj 1855 0 obj << /Length 122 /Filter /FlateDecode >> stream x-ɱA($ \vTSHB $:@\#Q_TQUE&MG-nu8M [Yð,ΐV]'v=WN;S3uz3x:cE_ endstream endobj 1869 0 obj << /Length1 3085 /Length2 22447 /Length3 0 /Length 23991 /Filter /FlateDecode >> stream xڜT% !}݂l`.-Kp \Cw$Psꃜ2!HƉ ecdkhktz+L3;"9B$W98p/:%@'3 t09Y8 &f(:p̍̀ +H.bk`nj { 22qЃYJ@;3,d*sG'5 Bhn`dkMM9df?Sk/%+/SvBјdL5Ys##lc r(KLKVHl@V MW|N&NV&7#/;_[ & 6 'FjplElA6NLcs#'!+wpc_X`fb`bb0{ nouyps"brbRVux299L\:4wRL lLlI?E<տG +y['pTOMѿOe' t:=GK˄Aƛ_:_ oTC doo-ANb[_ઍFdl$;g_k߽`f&k53gl0Ak|V?no/` s?ڶu3,XwoП*r~n NܯEpck"fokg+'s;~ '`2~ldlm}4#Qfpmd33|v@?Xo?9 Nu*ǿd涿O n;+?eKr9R7/-CSէn9IL_: ?w~2U `3:9MN}8p=^FL? 8a?F鳀w`O 2a>(}6_`slx,j\w9l.%I/]Vs|d0>&M[r7x;]Skc!& SV)ɾ>4q@[\ؾ*BQ Yt!u#K) ;X\Z%9؀ƦL`r HI8AoG_= Hэǣ IBc\j4{^rlA]&H-Ɓtdmrc&"5@YjO}- DIe`(."Oj[t3*$ |jE(~Au% ٴx8Ė2$ fKL?7J2 ()okV/.Z(SyPw(Xը.`UZ:uW>CK \x;F0HNy=qz CM'*sй u$ :t˽PRӱB+AL=z"mFcH u1T ןnIVEt3Fi,HtX8!KȻ6U|F0Qr?6ı9=M3\yR2]"-cudR\ZK4w9my6gݎyȪ )Q wlﰍ9E$.tl>`e,:H+ F8~ +R"R89lҐ$d#Te-pM!-Kgt:khނ|X8G%kI!5npV2(HD\q7V ެLG ,EnjxNꧻw.Px)fsd4X)?&$`rzA pB;Qa`Pjڮq*i\3v=&{X*'EX+8V>l3UE>+W{DiD{z]}|q׮?'/Z }?~[pW- Uͽf.{87Jә"o?[ƒ'gyU3*uv^^QC@yw`: D&%cȲz-6;/ZK c#j5cۈb"uo Ԡ[^p-'tjE#mk;SG{pSB.Yテ(NJ`׼#9Ee ^y{T&/GQM5>*bx-v.8.Vi)yn>`.Z” r}Vל@wԘ3I iEV<rͤ8e^\Gao7% Cvp/cZc(L􄧠mY%2΢(:0~G=Wڵhq \JadSQ/sgtQ v`ȭ$UηrU--m6 42R Ji1)ؚ^Upv  pih^uM`\2C,C6w!^Olğg]GaOQ]gsyJQܧ.'iHBYY]e-Edl]yxo~/;RM{r(طgƣW@t9i:mC)8~/̗;wg6WL]h1{S+BPN< w%fxؼnsx|.jՄtp+QWaՑ+(@0 >U3-G`i2"Ħ]_ٍNuګ2GDcÏH)l4Ci4O.to ~ƕd0F[;ģEלK>)D=X<N)Y;1Ǜl8{ʩHթVyn Ic-ԨӕhUekH[:gu!N QUˮFz:,Pr﷥r-`"|\f^|oaGej[=9K{{`'MjԞ}Mܗߐ@ae\ۃM ҷŻe}ǼfXtɕMZ9Arq)_1o5enK ?Y7S(ĿLb KXI3?;cΡþ*bUDZeuwp>bEfE@.?FhN<1r}jP"*-pq R(N剂:TcX`UgԧcrnmMZ:"6gE9b'-öNb)ޕ_ Zr7} g`("F\?׺)`.b jဩ5;;]MV\?-om {6 ׻;:{>!3ȼ3mh(sRj%5Byf:=҉IO*:Ye=̗4а|{ڵ{T P( ݻ3Fi]d-MPA ܈B ֎2e/ObUMw=Ve=9e{+vCء+Uz:vFa1^ItR~2b9٭$@Ru`lp BD ! ,k3hb)Zi/(ۯ߆] t~m+cHrbݥm$f--l,7E$8ҘCn*{qSٍVy ᎓V;2&9?Ol{'Z<RW@g~[iD'a-}`ˇ-j54#. `Gծ*XP[/Xg b]ss,i̭[/ȝ~3+)5Z`g%&7OC$;?Iplornr&PW֛s19=q2KMht!u8lM|s88.H>-K1o}eliه)OQb}6;2Lknc&F>Jq3[)c1:}.Srn˽!܊֭&(d9଩?OlQ(xIU+s_S7BL[p.xָEja Yeiy# h!&yC~+R݃w*ACN)iwU)}PҐWY&U7Շ/m,?4G fB)4ܯ'Z8 )5uX r" ,"̯D/G4inC~48"U֝Tb'ЛmqvW#nL$bA fPf4S?2. tΟW~Zy+<u1ަuYàj}0HP腂&JV0OoV!!5"Ğݝtzpb C.d71ԶB1Q[xNI>k%273Ο$ (7('=F7dFC[ӵ8،:l X8dY"@s&9#4v&u\/xuDhOoW(v8!Q846aWɯ=WKWԍ>!8(͆'nXU=,%4V%rvonx#,P\cn hޮ\5 zS'5e6Dr+H@YV'#( -y= 1ҟZCeŶܐssSKYd>{Т~hB-H.xğZ0Me,7iJNX}!Ӽ }ocMF2iEGaK;C(-tj:'ャ˓UBc`HI+K>A^7J Edކa˼@j$bOU : w<[zd/ WKq{wo\ T02J8MCU/lF~Ps< n+/)Z WG? r%$6@ڊ3uH+x92֍x#!ƽ ƛج&sЏnzu۱=DZg"GBkێ^n#-lORC$&U̐ gRv@\|܉_Z=(^a|ID'J#"N4߾$ɸƢ(52`;zȍk(dዏIi[zZj^Lt-_?iK Qu􈨚vKMOKҺY͟u꾖xGLQ<6Œm˫>MTRNk> 3ruZ;k F;sGOAPQvs:q$|+~bN*@sTՑW~yɾ@$hoDBvS/eaȅO+|O A[+olNU /DK79 ["P'WOI{b+.O#Oll˽$@=\C Ud~i]q`cc{Q󣏂͝/"+W sP)fA􅓅/3!O*qgOk9WsK~ Q;kkW\W!tUuBJi}P){ZF05җîe0lY-Lۜ`m75o`@HqtKݛrtWZGҴHW=۫'dU#>z$@(گviѰ/yeodf0 #a 'P}o#:; EbN@#&$?vي먞HIt'ѱ|m (]QKzR&y~4NQLq8<Ќ%~%;x=1#lZiCs,f(lǥ^8[Ҭ##Z[Jūy=QQqOZ:+ 8?Q0*w A`b꬛fH  NNo ~TbJñn4l.Z&::mkuR?jzMDXZ6=١U~fy0>m|QBN51@ B3u("i!k?t؍X0#qnH<?,M ߵE鿱])&II1XX#A+Br{@khڠ£K̫0et#SQsWPs"O{w`I""GEq2CO=S7JF]5 HKj1 a\%y&1dVkJl4Pk'`gI_,Ey[lϔrIa9l?-XsϫCPDe'kl^̙KOyݘ8W  (%/~wBA 0hitꧣ' h£՜{)G9F.P`dO|׭}/h7MgZd˔c̫5z>oS|?PK'Ոr( Kv)c ?ҾDiIF{M1_}h.SK] ܆:*|[wB^B=U<]r(|'$<[bN]FKgIr}:~!ŻR5uz|Xej joפe]|+ඳ74C]Q?]%ڂ)1:5Wa dlS"O/74?(5 !Us搽b`m>-Mesg~@*?} R9Hiƃ?L\nbbVO6Cs}պV6([roT^2 B0 G@R{iMNL&v䦤4Jk50/ؗ(ϚQ<|A|o f0M¾7vTObh؜oбX 3D|OrA MӺi t97Nu[fpmJ hͽer|4}]QD&0{B>Ɨ(w6V. Te ;9'e]M2D aۓ UE[֍#ZEʉj+Yz{Cln{U9Ƿhwk #?{ ,0 $ًۃBZ=6Qu'~exXQ?[.8v,w#ADA!׉acb.;P1p5ۇ1(~? X%euzvJe? q]`Ivk|xpl&.J[f`eNh܈7`X G媙tmx/9ȸƭBO<˅~icdO#HՀnłk̶] j֓~d 6eb2" G"te;QE Cл|n06ooosq6'`KNQDK e7PeȰXYBGMFw,ƨ0+D϶jcƉ~} 9PCVR:ap=ê1PF1YbH;Vk4"Eed/x}&,5ث2O# pæW\16QkXp'̝󪣂LI1@5558-u]234E/TSVcnw;5o5_hÜ@u-H0jJ~3ăZ0+oD(d +q`kG{:Vw l~\jL6"XHg՟@s?Й(ykQh޼La_TI3s #aICZU}? ۼFV/tFnWܡu/R]߮T_uAo݁q}EyYa|k#m5A MgMᛅ1P2  ]0da+xۙdPAahh;4oS[9q45nv&,/uF5ydrxe-<j}fm_ P)BKu2ՏIލ6lq:\YP1~r sJ|ߓ} +ںAhRkyQ%>aը2B mB% U$Mξ!r'^ iR|leEPL`-{'vvu-9e|B(r'9R#lOc9ORTc-=H˲}ٗG;T } _ نy롉)L:n$"/fiB"|qyv ZHOEIዡ|5!1cA\|a%|Fk=&| :BЃk7Upx ѤBa> {~l2IS0߿ej5Z~[ Ը*QH]wWwA*u:dLXѕP㣬D1m yJd ^s-ԈbI:43<ϥ9n1q j,?/pz^+,ƒި8%ZAWF/<)w)|L$ݒ8fԀ̙ƨ;=bFX[;ͼ/P!-ZIsOl53.)}ϵm?o\in *a_ F[N#Jsd5 !k瞽%\b3?K[T|l- _ PDc/`U v(0/ōmIqZ9nؼANa)w|2ˏ*hhyCˎöK*# w } 4dR( ɇhchXzr'0}{YgLf.Al(Qtm+)OL!cnΑWܵO3)!*łC2;(/fM`•GC*R"rZ\13]4a9TrW v,8q)@AL̊8ٗ7FԂɱ |oY#4u;@T+I-OcXFq4}*9%I <.| lL˦Ei8'SXeor)ħ$+8O@gt$YUyD?k \Jx/d; X= @wR,zICf@ @e\S-iψYA<AtgI-0@?z<'۾9QW3ê rfoյ0D!mg%d@ݐAUW-+ȝ.p5lϟ(c( 0}}EUb neN({pf4hFfv#:C fڋ; m_2r<v0$iOIAx.WAK[| @Iî95=nZ?4cF6tՅ2`_5ێ8WӬ ʏ{ِ< xO*jp} Lj~>W{{LU} N5VqR:\J/8kɑ/īCm{3W?2im08X3TmaWD@qAK-߬{!BDB7R,s''gjP ƍavI9".ۨKDڇwocqt_UNH"RӬNs0cyxbe`f/E#$a3ZiVvӲ鹽F@g zNxr &hn>%L&Hu'Y JVL%uK"cfrykqz<)( ,  )WY|("YҠK ZC-(-JKeՃ-?<)^ ?q&֙qLC S"6R-TGgMŽV B2 $ge~m\A跥't35(W:QXIlUe7txŷ2 {}Wˡ[bÙ-WM{giq.2z?qBM״Pڵ.,s2j'rնo!&zVK֫^M2j7c7ttC} `bTU*#t $AU!^ە_p4f1B[3pi8^O˪TX.8`,?TĐ4Z3 kf < z_#$toXpYY'veӘηC|w࿻W1?8 1 &ʄ;Af:8~[Mg(UKTYO";Ot[h7S(wMi5D!I3,'ʌDZD0o2CkV,Fy0Ug-VE4KAO6u9jwT;( 33t& WQ띲u ycm TdBBAv:&O_km}L0>t8y?%~B3y[{4(iҜ X 1˾)) '~׵0Ši*WTv^Nttz8}B*c=MRgXĜxMg5B' j6S{( > zڦ3)'6@x_/UhZ,R(ivsAђmQX5,Q.Ӭ#SD{b~HZ45+w tY Z-mzmZhH'7Wi,PWV)2' E{cƾn j2k|A'6 waѾ't|:o0o0plj^-?,К%J5.VD[}X9qO"fS@7Ңu2V7z%zG4F㺷Ѥ&3[e9}M>jjB;>5ҙF[dUfϽkFz8+o-a9d͛~E8K5g*sL[O;Tus }3B}GEZcshsuX`J!/d?J{; 3RT,^CZ'NE[vJ'iDmq޺ sǪTFSx9~CFĻ jv+dRޕdP{0&F<X/Bn%H߁J`{Q)d!ߨ埯Xs?$#pu&f׼KœLj^/3GUm.dGr,H8ċ06 i xѡC`q|nq;WS4xYqj0K+0zv$~&#~ ͊)`txxUĶӺt]꣔Η1|/-3MI e׍;_, =/ܰ6VuԀsC8Pd$ۈ/aR ļ~b-B 1:A-3,9^RPd8^1hZy,FfFYw$U,PS\{j 1kIQNnX§Ɠa_\<^is,ךZ+KVx\# F월8CJ8;c ?WYaXiM= !E/|-Tgf0pMVg>+hukn@hTtr}Ċ/^RS 8LWT~MݭZ=*쎗5M5. ׽ltH\ScB `Qih/}#uB>}Iq0M7O>&:<#aS)[ǝ%Z)SG}~qG5dz?/ZΥ+ߤU!MNG >u@㷈 -V6:0gJZ2.c@P׃o\%+"e,w)W0蝔;~X* 'YzH`#PaёūQz0@<nb%Qnj k1qkVz:yRq*uh):7O=`B99]5%ڝK0zZ ^0|5GbWz1 '11/zXOfol"iv vުg?xOf{i/"(ť|dU~/jdہ'>ӬOo&%qBݛD5h)!b^CM;q?h[Ѷd$*6S<=c^,4(Z ѷ,=+˶%0bB:fam 5U޶_'kqGU2ۺ:Ct f$c:͟o5+VzԾCd9ch;_+yt:5Y !y I$Uƹ!ThMqE-vfeb%1/qSε˼#b8E|;Hw!eQ޹6_RYw*s=ƧWbw5QY{8?/ pߤM#9) "[&Kg !رBcєA,S6lwumoa 7[$?eīO2Ld^<MoPIb$}îstl ⑓gY!Y5"Ϸí[LO q*ۍXuqY*Dw[F=OT'Z1ړ vm`)8 J ;yh¬jXUBm|g4«%2<([)llȷ| M'2l*ؘgԭ~)ՋAYO^V[L"yC4 כZPU.|1 ?W{6Qj4P+8謪1Pqc|U|M*r!7ږ\tɺr7q Hx";/E3tZ>ǝ?_ ߒX^oSih(p;5)]CYu@)yy)F:lگzt$Z'gK~aGȎa$R[$8bapcX6zJbuˤU]>#鍣#?FDҹ# iX2B’N#vd`R@mj)l2MJkr@_ d2fMؤ\[3H@1|:I튧QOI#:0խ;ÄĥiWg bM~rfQ*t”zJ$;`%+: S_,.'8LG+>Ba-^1R+2(b)#"aRDLӄ%d@EwͥՇ6TBi>Z$~W?*m𶚬:|4$ $~z'租xm<4'{ ! @4!xH"3^iElq*."68Rvm>k@NJ.1>IHsnBg(8# Q lMR:,oG NHR&>/<xY@T^bPs?:?ω,MܐW2磱x\~[`N4h\9g`F5fp%?Dt5d$)+ r[cl[)7j.,q$-JQZ8=sG*פT&Giޣ;&+EaQb4 aW7l>,qFq R1 r^?bu4 AfZ~QNF0 Vt %FĸV=?9E,$++rDb/ ,e0 +дC̎o&ImϨL7"J2A1,h˸fj\&7cQ1]qVԁ;ͥA|2:U`-zi6BKfelD8_Jg<͙ۊlܷV U{N4mZK.h8 T$Y x< :z>竨ZWA:M귓x;%ZM!}̱Gi^ʾ5ebrW%d~Stw0!apHЖѧ63 VF zVާ9\CT\N[#&q6 h>R/Q\"5.b1YRI"0HI1@lCv4/g˟oU%P=Ll^ PmY0jtیn ]ǿE{qϊX;C\ nF4'ȷ.GV D+5HuS'2<4ύVg{+U<nYqB?ytlHuvi; =fC`?+|y#G}[_@ P(H# 4O\rV/g;d}ZSެ;#XPFt4Gl@V7,>yTαǀԄubK# D2U `Nm,T> D7k‹ MSf]$;{3I L*[gI÷Aפ4NI6ˮI Řþ =' xA7eҗCUe13,yO??X4XXr:Ē'p5k).sS\k.WN/߉!eVr䫄q*3s`.Y l!aOE2=w ۊǡZfAQD2wR[+xP7?~x$Vd ti Ua5χV(:5oRژ;N@ڨT*`=TVƇZuc/_Tla#,GZJU9~~47=W$p bkDzn~vc{UH%USՐm ƶE~< CbM^Ĺ{gVNz~됄:xFw6eeE[S%U._MmIH[u~Cv/Wf-i=OSl~9\wF8KE{rJB׵1>|dd(Bt -UV"0g}շiu!1-3a#OǤ#{:0;38l.bw|֠.Ʌv]l*˓YMǣ#Z(\{| yA6(jue&=A6~ Q\%'v"Pz[BtB\z}nY>XFSx L9l<#ʿR9a"#yZ9\B\mַQ>NBJ7H/lͅ*kO}j6V #U`?uer$@UrěNZ3*\;-ܳ-8 vDZr=tԽ q sO{r/BbNHGvI ] Q~y1!׉mvr4rwPK3~mH&56Q$~[y (FH"#Kr\*+Ù]TK]}&I^SߛpSɚ딕OU9;gȂO \({UFQAeD "<2# ϖ?gX%K A~|ɘ$PqC%96: 6.ͤS ?CO;;*<Õ*Nm:$l20 6_ bBԛM&p:;J b7e,4'o7b[YcH[ Ɗv)lz`V6S_CSFbFRsyeĜA seV<Zj#(5AEi$;{Zb6?g%iTөEZwZ7XSyouM#~PZ/_!`BG8%bv$8%~v\F6󸀣>GiOnF:n~ǶhSݸ{ ftesy~SBJY4rU떤273swbVZtF~i"ۆJKkxU,m*R3<yME+]@kxaSq>sPy9At>Oryq/9+jlen56+æ£[jyP.2i_KAal\b޾e*a Xʖ]1p . d(LڍYLuFmjHS7S.i+':/g&d78trC#C0+azCY*sF 1 뾅Bhg%iA?{s\,bT>VSj擜qErW -#:wA .k"(Hl$,7tC2}6X!lCPܒʸ>3¹PxM];4)jp*G rئ0^ug38^H/2:΂+7+%aYDWbN[}ᩁƁ")2kp%F,Kgx}a 9O8{r7zHKyP:]B L2[QˇA fU)Ldm5O|r‰U`7ǚ<)m8VSkf`Z2$aAv=9k"Ɗhغ/pzg*ZRGw@yrHZ{ [ ]mA1ؓ-6zs|[l1$haŤ4iYe, .~_2C/ "[oJ "i*\6ByK- `|9r"u3MOQ\u3Re-Td}QMfVѼc}TG`^kKI%EP-s"|[fEI%cNv $Jk{55Ӕz0m͇>/$\!ywp\%auK!!5njԛ[2/r%~` S?D<\%' %?yiU9[cfAs?:g$[#divBI@t;E%tiiRH?Jw )V=oԊeENʊ*ܨt(ܬ{SANU'~N[P~#€ITV9V2` FI`M>Ď`EXv'1Y݃VЊdrD=,8ҟU̫U: j^0D)L.le0?>F$ Oz)Lӹ1 Cr>#C:OT*c6Z\>DWq Ae}Sk{BV`g|^W<݃l]/>ZN6 Z/zI7 g"{Αnm/mw 4/csW`֥&1)`IlUm'ōN)*Lȫ|1܀d Fa&#&F=.pB0p3 !AgD{ԙ=Dn߄TM7rEe~qF;> y~2dI,Az [L&A`r,ȳb*ގ ؊U*6»^oDs.2z_aH<?C7oRuBY,m) tu1jۚsd Knf  SK3b10})Unbn.]H+vɒE [r̸Ԅg.=+EU~t %k~ɔpa@f+k(c'ŻcDW)m-hei0^(zǑ zZRB+#1U)ڱe< |[ endstream endobj 1871 0 obj << /Length1 1386 /Length2 6039 /Length3 0 /Length 6990 /Filter /FlateDecode >> stream xڍxTSۺ5Ҥ#H7 & wAjHB$t^7J](]zQA^x}kk͹d30T`($F$[ PT!4c$f04 T,PD@4HBRQU C!ahN% III(<0 8ܰ!`0H#K {{{ B('y^7 0a^0(=hB$g8/1 C!H(0Ðul$W?ѿ#p #H/ Fa^`8n PW2 cBh8׌¿`Y UA4ɯT0v}+{GBt6Ex´T`&ۜ`PJ\\ =| ¿ ~;31`pG 0@tsEps#Ik9ƞ`0( 7 iݷ43(@PJ@ 1@?X-# W}e?#^?s顰̅xMtk;_YWwGo?_v#| `UjPs_ՅAn€jPB:a-+Vp /e77 3@0( |XA\w4]0YW AAMDL`|~ ,Da!쌁GɯؙhW98rLV{[0 B2?Ȅ8UbP欁gՈ" zX]tQeg: MqDmLПg'Dl* XG.d44Zxzl.˞#wN+-n"7Z^w D8N$Ytfom%7k2SiCu&'NwiW`O4(4zgGl)ð {x1)QMmX㸅ȣc7RՙݵwۍF=UsRպ\RfAd'dPYcBA{hۊQK,Uw ^4mu gxš? D?|p{jn+Aݥң"ę7Ej:"v"7[Q$[>S 7;<Qdnef&NJ[DVҡ5r=gUw8(BJ3{9Πsuwo!!|_mTEQkWM%i݈{1:O;̴LVAOE;747LE?!һ$}MaR4͕zWd'~ 3C?~ՖSv[&-Nn䃼@jie5{左[F׽Ts UIȧFr):]JZY4%P!M?WșhϏ$ءaSzGQ4cQ˚]WV?X[t8 4"Se =y<#0lZp\7.E{:pU"U^hzzIǶHaITX>oxYPb'yq)F~Oi7&lT?ˮge(l~90qV9]\|>\*Zdxv]W}[?+gM)e Pjo}q}G.Aj`{ƴ5=G3WC*IDzZ3+W- u˳m7fHqw0LgJ+hR7RI[<]6C3WILggdgltyͱJR%5j0[0r'm>8i(s>{meǏlp|in|;ԙvgn]I0S? !0j)n-R}E:/!#G㨛U9:o۴?5f>b?^\sNMܥb=!ڌ8wnc\6΂'2,Uϼr`}Ʀk^%]q[9NJ [x;N&"- 5z.6B<{5B޾K~'\}BЄeG4lz}]g$-!JXo*T2.?`gl`)V !d~oѣnW?wݑH ]@ O7}oz]y)1X R|[727r4UE]zaEi-U'U7yYhc-b0kx'8tx.Dѳkx%{@! f njuɁby蕋Iv|Ho J8 3$%ͽl˾&wIbpa[rfR cG(]S6!bs~P^Ξ}<ѐ&A$㰓[v²s&>'+Su oR!Oωm") gK[A!ţըC~moC| [P輱:Rǯ.n"cd67wK6Ù_'Sp|,F|a.2))9 \++ĺ| ,"bBnUhME3ƢQ/~;XT悔 MqwQ,;[П!%7QM9J0XHtvdK.8JpS\dYiہQļ J)N|[!=͚QbY%F~=Q?cґF՛^gl᦭*Ҫd_-Ei;·'Mc]L]ecgz z 6R kSHXܕj^TQ J̐e4>c V/cbje`rbqؙaΌ O`kn_EkV2BDKW i7Y͎rK%ȑ/ɷkhԵW{|Czn,)v_-vwı{ e yѼ5OR d;, ]kA\8]vn>&אY8Ca"r7q֚啢s;<5 Ll@.Or%Ռǣ==+䂓6sS/n2~ }URڈV0fo0pj22fm˨@.g^pdt,Pb쎆DY0g+*mռ?sngS~)nFXN`fLe鳨N}t2m `^uyu'cS]0 `%O)Ĕ J(RK0)a䫌  "MO-5Y@+횃-aF $O8fh1*N>niȩ.38Ep:Z=g\P_kn+:Xh߄oqʑxXv:#-"]SY 4{r#}1E(BuY0ՊcyOB4/rky8H»rCo 27n'EPf^X|;8Ԃ&Q`YKFY4@F3nfyXܤE)b /c=u1r5|!*x]m:1LJukgsC:!a\ ݅xVfO^z3z:G/NT+t kNQg7ʯ62OWNm7w|PlU((?=$F_d2R^_EU\UE"||wp_*IA؅ӊ)AĨq\ݱD?jTI?"+!r S ;/B،1ПKfv#{POlduk"'r OP5KֺAyY9XbiD*NQz)hrM3Sv{COEW=U#sSc/$.gK!Aj Cb%\cV 1B&m.T 2@"fUR_B>kqQy'E w؋,%t=/齗AA]ޣߑRFɓfab<Șp[Ci$q6qnyQ 7(%CYFXfr9bR3ȓPW@яPHVrJU͋7p,lk_*Oh}'yIk|N-LKR}şua sjR8Ė8w_noUmNf S`{*js,W|ƩI)i"flvX=5S]j}1w,oPN5b* ]*"KzKM%)։u.MCI.LDb#P3pAk˪kSE]u.z_|>M`qX>u"9=zڳaz s}%p^5`,hoN~Jxd~;B jwgTFCVclSd,iRоTsIXa-s*:EG-t>ğJX"[ss=d_SK hǧ'y~{j2K` ÍexlTI&yʞZԁ~᪸ nUmV}BWQ9MD`Ͼqn /ο`i$TעKr3ݬk-=mxA] Hb`#b\ ^y)Dgw06|bNmP`f&2E%{ E{S0d3)Fy!Pש݆mO/O&h@*-.>͍$lmKPYg5PCk-Ǧ *\Z&_&FLX?o-X=8~8 .+"=`Yδߜ7W@Ce+37q㼮Tw;?Fz0| /|;ܘ:o) Ds =K-a鴨\gWE > stream xuSyQa"AXHx\dDg"B+1+|&WY#]AĆ#t rt&TA>Z4s:¢gBvP#X4L,SB ]3i̜!>@͝[q?,fδ6Ptw'alPXp+c62@gH4Lx`Ѹp;џb B;E`B !@5|SGa5 V ku^(o>H0fn_T06x)"o1WB;Blľ  îWALd3Ep?5wO-47˝dq\xӽsiiWsYw! 10uL 2)5,fμ87 `px.1"`P @7C0sN0aB0 Q̯4xf.=eςAp+P/AIg'ϐc0nYXm,Zn+t^fD6r)m`9o9L{c" j湥i0=gCT~Ф5EkcϝWFWO;T&#񺓛Qz|%1͏(u#%[҅S.x^Ѡ[ꨂJvU}E*&6޼d(۴dzt̬]ӣ뫻5S^ّX}Dkm60dx0t~zli^Kɚv󶞆{k'֩#%ILf=?x$6wjVurhu(237k<]iu4Mтָ'" ^&?S^PZo#fn=q-ޞ'IS 6Ɖg'v5+:+E-%F#/7삯O$1w_H\W8PAݓҨ@BT9>2hZJ?U7[qf*L&\꺪#oXl-Aih\Fѹw)}ʭDءx5{b 2+: M%w:~uxe[ؤ=j*/ާ z:V]q[e"Y)sa@&YDtd[~Lwp[:eMY1uX|ƹڪ~9qluL,a$+o[{$mr>[4|x~p7>Qi\XZT< 0\8e@<2}llDUޭ\Q=D-)p#1ve9k|U\3)J)}AؾގWuЉ<گ4kli3[}!FW7=81&A[%E R9etI犓%?Hd)g֍{}:drވ>~s@ҞhReQ? {#nq69WxKKԇn7r겜p=*VmI.xu$ #c|?M>ՙe:Y`{Yt2C eͺiۍ{6i8U捞5 K֭^]%+ ڍ#VE\~E"Pk~%lLs+ęyoj UVHF`iͶ8QO 6kKZ$M sSC] ąhv~B1Ja:`:>LcKRa-4&w([nR(UK}5*a㧬'R4>o R:`4V̷(2語rnxjo \s͓T҅ اPPhy`#qRãvEjA fR[SiNuC%eNy՝թsG9޷h{cdE>!Gm,)hi|-M7Q21dՈDZêhEm 쩒\h endstream endobj 1875 0 obj << /Length1 1626 /Length2 13646 /Length3 0 /Length 14492 /Filter /FlateDecode >> stream xڭteTے-Fwwq6!wwwwwwAsߟ~7ƷJfͪYkPj0[8A`fv6#"΍DC#48>#`GH::yXYtZ:L0OϟLW+Ϗ;_'j5`icH)d2@ffocP1\KG?sG_w\6Ҁ@\L'+3#dnf?vKǿ 98p v5wqTU(`kS_]m"-jo?^ zeXظ:ٛzon6 2`L],쁮``5 oݛ:9{wq-Y94meBbkW@v-ܜt{@t  {OU$2˿O-K{Z^ydLA? @C:{5Z8,l,l0ڸJx-TmKS?ۮۀt{fv6iZۘہ. _ojJw%kz:(9Z/ G73;C(7?J`?F dhhMA6 \\y hh.lh0=T֤Y\P_.yVsi?Ĵa&yٸYa9"#N!a<'9S_D2=0z^kWfp%B1R]n3s7vҡn'tckK)^vko"z"ldZU3o L$ZxA&. br,%LZ鈪4o 0CJ2vVho˅ܷ-KKϿHK?%2+~VRYJ?&C2 &.s_iVqA ͎5yD-N/T~H NbaՌotzQS&tR3' w` Yacx:-iIڄ5z`^igoi~/SyciSyż۵Nrs.e^F ՅPVtR+3>]%):=>b!RyLX ;rj:,0cԂvJ6Dƅ=z1e4Vy/+({W*ߏ'/%4I7,Z{Lߓf6ȪӅ?űI12e}%*<9O@X,=w*(oDUn0@2P[<~U|xV.4쎍R`B8d]E9+9.K2~5)A}pdS=溌I,MdH_abliu)cI| ,ۑXf ¡Nv^-”u%XTv<(Fo$QWb=tufDa#硊OрH "H8 ߙq*X@CW= IKnz3`l\+l爃/މ~+D 4}")J)t7&P= xJ+ïӎ`ͳۖZ鴗+gp"\ gC,j2n,a%Y n%3b J)nF{DOG+0X#/\,ȗ>&z_[p0)b4ֱ4,md/})\-W{jbLT8yU}D$~չ23Lm1rLCh y!8~ʎ#?NGI3_ױO*5q_ٳ0!H;yHuWHZi uPx1D粱e0ԒmL׺ ,v\e9:"O>͸C{חxv]hZnS^P|3 al{;s0U3g>E g<%݂]J5g7C5b7T@M4*1A.)4d}STm}/7Ѹi t@ 8?nvƺ!Rbe:wPP>_'20Ԑ:(GrSZw1o<)U)0m"HH҆9j}=C@z-a9IW]3ǔF觙1[L<ax C9%BKm == = kɟn#b~V(T0KٲLcXx62{:kf/?fv0f%nƜj%:)rWwxyZI$ `YRImb/t.VQ PoBTȖIvrYkRwea&EFM@?yS±3ims]瑛" d >*gTGޱ"EG_vxnXv0G_3TZh:Uu}hpuqX-"~5τkuMh g{KɸSfET {o`{MMʶ[sѺqtos'B2͛U(~Tl߯P?%_zϏ4 gSO Nk%+觀 JrhFc6W9+,> \i4Ɇ>>'[7QyR7Lt\fV;I`8{[͍~]|FYm/$ʧϲ+m潨B[bυDk&иލf$#yA7zUz é !:`i%Ù>QPD["K.H渷\u'>2\xʄ14'^F1JH]F6ː !mp4-U7U( !ع"Tt1u{!*OH2KlH Pfa%wiM>+~:dF"‚ŴN;|\v<#2PfVO+lR}$şz3DqIJVM!?3;zuz0: ;?dJaΉgW2Il~ ;6`yk Lp՜T ИhIP.Y02Y=X w;-x:2L9PA (^kfSCd56+N<;_l%̲dL#`GGNm ( a-5*E`ySyw.rȬ@:#HIHE&?y)ڴ;g[^Jx:Ic|ӟ)N_Q'kY~Q^kO׾kCuY.*Ǫ<@?LE:? s2g-p'l~<UƯx.ĐGϑxzRMG 8ך0&nRe WbQEMv^I6?AW%ޘat6.ա!# \oIr9p$ZU'zc  ~gu5f=k?nMP蕧'${Ć[8fNH6\:we&]neA4J־,:3D6{Dj2ȖS(#3Lk+n }%q⒊" ]SMoijh5+ܯU⽇^{Ae X7VNu(t:1wz/ذwW -ዦHyIh(ids9=Br}X,0/p󰿠%Bcr+ "H>P62Sթ5OvQpoDg7'pm2a$\X3.K^w/Ru{<@JJ|'}tNYa…57so>$j0tzɩ{)TU_8!"bq\b/0'=WAM$JFl_ qz vV+ۣ!)aEXWU+sPsÿ&|l8F7WXU \C w08va.e:lWT.p.kNnL~.)YRsMސH)_zGߤݸy KS!V>;࢕jяb.C U7r>:hkUvb"REL(ٓiMR[?&Cl7m-[";?0z_f%R u#RFEbKlOMR<#}baUa]od#@Rï=76-cHlS39a#0YOiDqCZej E&}ʨ`mnٸ%$?6wۚ[ ROuҾdxUtX՛Äˏ>fTP5T}Ed0Ԓ@Wi1*y4/Q- _,?|Z''$язc-ǻpX)k2ʵTJ^2!3]*>t7l+QD̈́TNxD:C2WӣK5?(ke6w|U X*M$h%pB@uԬ!BA3&^X-2fbr SkQ{ׄEB!]"7@smI)Z{6I75^ .k^3llkL{i]?q\#*s'jud%:pIDcEEjf\pX1}H>47=IP$St%GD b'~ mOXz>>~桖YFjD,\]3!pO }L8/ ,Դ939,CCyE 44QOc T(6Rm"Mѳ@5ES!M .owPmhڐ$W>e$fT OPomzRw˘E`H[sO,h~R %,ߨ=vr#h+ _T ?N϶of&u*ˣ)0 P2@!,> NeSŸrw6;S*4|㶯xR?( ܩvMI6E U!pBs6u3 7; ) ;0Yo̫3+F9GvEnnʃGE48J(3jMϷĺ* !y3J|k@:VJb9F WF;0E-/8 Yed0nJxI.E&sF|U=sdE92yqN$}VYv+S7&c;4U\4n mR#J\ g }my:ka'!!tEK`gN5/շ?Q!3S6_뤔ЙC'o!I> os xp=my`p2‡uR|PB4̅a4qwJ$Ĕ׃hLO8=wC[ 6 U6oזa;9qJW=<붎]jF]VeM`y;s95 !en`T'MI!4*:MboٝjDx-%`aw}>z=7-qi=A \-Ňo\ЙEGM贐4 23n Fm`(j=jjEOX|^eТf|U,X=]U+ ^GN`%387b. c/:.'|yjlme޷U(C;d}{L;^TXsuV;JP@)7E/6(/ XazN/Z<&A2TVm˅V=k)d3񂨞XBe"R,u oDᒯGpѦ%"9&.Gz#ub?+ "hglzMOL(4~壎d?c䗗':UoUnp@;LG6l*S~a@ՎpȚg[M5aNҲg"@+7ڬQ0ٓ:{1ªIC?N* @$8d|;rCم89;c' K[3Zomkm*"f :JDnSY>0 7YV1Bɩ*Yq5ߵH㐦rAGpa,s T O6H?`7&ip f z~Ekq[6#ɚ9{:M4ߴuD&~n2:枢 }#5frdѓVž/&.ߎe4;IJ\F/ZF~TFx75p/@Ი\O0hCS^* l#ia"u4X w;=ˍ#bR`1FFۼ9HM{0/~׀3R~g2XwpƘ/'gv06^%2|~+TtŹZ9.> uau+0HPTtqa c2E'm?I&`B$fwv=LSpá&r5bKgŏMTӊ/v'iϬ5S<z"X e,} Ev*u Ĭ WHnHl#Ƚt͕v)X="5&:|aZ3EVXUd ,$Npd@dϗ\}j~NG/_,Ib1m 3~N'6ŔH ~y{IdVnԳ`JlH. :C#"Nd7zeyt9CzO'qjș-$jZ@c0΄s(#i.>[F^\8$+!ߚO5h Ow;ۈ?~'bc P )#:20nF '& 6b\*hOD&Qg3ūFM X%yLR5R.Mbe)ɟ,s#|ĖP|SY+Ohy-) }a|=d}O1q ^Vk# nZyg6i'[Eu<*iwLQiڶGԺRkGm1x8wvp׭^x_>PSƪljDZWatO ~&T=D֌x+kd,)@%yC6lL'n=S|fAR+ɽtf_BI$|Tc^7r;%կğاb8JTy":h'*b(<#`iwհRҠ'W:"^dBgzSXHی$|Hb."^L4U*߱ѬJkp}Eު/ET"ﷂX3Hà|S9aUxA;٣`h~l-y'O;@s %/=(|AD!P ĕn(kC{&Ҟ?*,u\ZMw\L0?Th*Mb:B쮜/6l_|aL5 $d$kle(3M!}𯈤m+fYވCcϽg襇,i4v i OfBI7$f [K]q5}BXZŸc<$n;Jd:RcBHR;<R2]%*=oeRdWǴv mi\'VşYaA%#iU|{)gv.+K֝iǓ,d{R9O~:M٧#X߹cqC-4}Brxaטt$7`7/k^ds g˕Pe; 9b11ೌ},x=,vҘd]Z.1,_ iB+ulY0F Zث)ޱ~^w+]0RnKM44E =9O  ' w̠#±*h OFwi"aWWֶ+oe5';Y#__n*l4~C~F~AfHRZW kB,Y$ϣڐUtSip'Gy&,/? v b#\Cs!#YNAG5U'4dհ7GEV2z0^LF`nx8.U8X&ͣ=[3+{Hgߢ֓r#exE%B2GBǗ H-hw8B0;a`hϒ$XRLXR)XVzeF MtVo縡`Ϡ̑;JSa",1z\Gej #Tv9!W%2$J^g+]H&ZL.{zܸK GM~[>L~Vcz\BiY-M;Adf ӕi30E'> |Vhtڋvۥe e7䃬 (' N>` `m"5͎6s_x^[݃f^~&R43{[JQtuڗ'N> {G j^&iڞ endstream endobj 1877 0 obj << /Length1 1630 /Length2 17882 /Length3 0 /Length 18717 /Filter /FlateDecode >> stream xڬctem&۶mۨvvl۶mbFNW|Ϗ5ƺ'ksȉUML۹330[:+s+Ð:\,.:2w >pKM K='Nq4tzh2st^iӨENVs}_q%${ʜyJ`- |$b/~R$@G8!:*н"uƱq(ag {"AZ¾N%$9h"ą?ր#H..-T03eO4VŗF5WC"%(},{ԃ#UEr%[>Hv3iꅑupH:wr7pdmfB|{T> q EAٓf=(Ϧ-+ h8?TO%f:SU9"i}Q)DO0) &<Qǁ ͙lΤk5xt_ mAe? EAARܷ :)}ܲ kIz?C\[S1Ku2f r6,Jnjk5QUzZq+=%h~S |rRO򚡮-gJaQc-zsnwfEg:m£YfgeI[p4s>#+;˭PP c*l` `@8Ii*=2eϽ_zl=ek?7EREyTQ#|XgܒAB 9*u/ ȍJR4TrUEǎ|DQrJI|'2D3Q;#3Nw@b j JǎzkmI/#@mG)ߕq^d]p-1>~G 尖1^x Rl36"D%X!=wy^ӃD 77rI ӛI Gjl<Ϩ\m6eZ,, ҊMSvi_L^ f ҏ$b.T4Z Ï\\3Xח/yF̘ /Iw/Kܻ`6H- _uyg#odg/'{x2_^`olF4sLTeP:Vd 47~ s5^QV5m(EǷ6b\s{ xRDF*ښG җ׹a:66%^Daӑ"@30Po*= *v0D ?X\r?QwO)#HJ^S]+eu`muhUFz )Y{U*3HsWն8tXed[e+ڢ/=V bP74fk][]0{F\k}2~qRA-H @S&Ϡ9`[O]qYA F. V-Y:e`ςLUM u[<"sfL&B@a* V 9iX 5}dU3onꀙ9@cщ(Й‘I!*i95-)qn l>$*ɒ"NY.P(ΚgߒG>`&"I$ #Mr-`2B8IFtʽpzPz!3~Ԡ k*;DX롪p3S֍^zT<MDP{z13~ d\s ػ#uU]ѻd\M[^ELb§T e0]֝[kgUw)a핏a2܋[p%/FmA*>H(ޡ&j̄Md\N5uྎ(XS]mʊl5h5MKO (SJ*:rH 똋²/Q;O!b4qp+_fYj3?w^cI&X4@q@4z\^Y,p "Ks\X<}fSqZ@1Lk"}w5Ȝl}#3> x$LF h>Fk<0lM뷹s3cY` Վl.َ&eE*şkV~`8wAgެj2-UYQ,s *Y@*xzPy:kScI%8x_ng+4i`{xruS|7,gBf(6jp86@0c|m'%0]'헠 _TK D 4IrKP"* EzE7vWR Z 6`%[L kH"o*ۣϹ?~#87X}c ZWީ y)t#A/W{<.{F)ʰ]m=2l=y$Q ra؟Ձ UYW`z皘k{fSԴuR{3&V!<^ 1yQbOc+~~,ba+x0nxPz c դۇO6Eztbe}IX{mI udsC֞܅?Tx/x|IoQ6T)\Y{噞F|cF$a`ZNʈ4DƉ?S+57HhI`kD{xzfҔ#SJ5k@ .HŮ}-;_ L:8oB=uҟJҷ@S_bpNM^y(xRt4,tF6{(=}!Ou@ᭆ94L1iv,!rAF[хѾ71~y65rp/sBm /wBz hY\F,.g |䴞RXQW ? Tܲ48^<m=n2ۏAh5č* .h|[izg7Zbl\Tn2iQQOлګm F2CelE!> yP<`sܵǗﭐ#6,biPCB7 \3D Q(n签Mձ>վmzPef1!"sPpdn36 ws(8ƭ- 7۵p n ;k[^IL`BRy0s[`ăiHzރR֫-m{HWEV9Q /31i0 !͍>952`XyyCWɅ8|ZUYQ;/)_FgrBL0zP&+|}"z0Gbun@?_5B5@zNv;IrbHUShWnkh[ChTN!m{?dmVf a4]0J8 c<ۀAAJMkYse)Xƪ{j,?U2YCG*ҝ8[L)v,!!iWz7Ŵ˴$T=Zc-V%hn-/- gU I]Q3Mݥ4W+.+CIɏ4z|X aMP.S̠̊~gs8ITY:{'L*{4 |:rgv45`MĬTD>I r 3?_rC` ?5yJ3k@|z6|)dN?b$yǦdiRgjXLUc=ױ._e;dOa@NzRo'x^-D=mozF  mF#1t*#.=U"݊)gKujpo'aiXsǨژGE,Tg5cEoi CR#U׷IӿnY"$T4; *O`fsX*0o!,'f4Lt`8$Z={s)궑2tyj7#F }cmKXY*GBv G8hư|ٷ -tt/F9PjtPfaen2Nyq~>`W60Ka.>iGX% ~͸)^z-w*IM9 8i$,rDpx NVq~kFbb#1][5'־j5, ,}WgCP!>Xk | piYH3-7:'˩:ݻ 8 $%1}ˬ) Pq@Z_ n$|1TucUz|Y߉(#~E+3_vx[llxA#8% ɢ`Sghֆ9CM]15Ώ5G9m@ ;2 'hv̑|MoU mx yBѦ iX5SjTH'M\HP ; >|ݎb" ]Xc8yS,X@*ک|iZ='E#F("ol@y %s7IAdLVW\^srqgIJlѩ872K>)m.0.l3-#Jyf4j@idI^f4z#8[?dSg9oˆxa ~ੲ[ l03匆i !(7z ŵy H]sJfK!: Q*1Wsx/2 ]|afÝ#g1lLHUPrr'c[ۺ52ypZtߒvՅ D- [,(]]-j 2*ژjVۊ`j |[UQ$ qqd^V5lM\t5Rڵ2_ @Q#%PIԋ]K4g!۳'DY5Jbۚ HF޾gE" r.cʏY؟]&yňqg#)z(CٮD֡Ѳgt⏇ Ù-JՋ%6 (KWcG(y_tvܟ_7 ak5bh&WfUև[{\,)xѿ^=Hī4FD%5"c ?"eroV4b+$.#F%nЫ^[]><ذʎgMмᚱr]K<6=mNhMTXX\xt7P[o{wsEćݾ7Q$5v@EF٢ED -ΥpKpAh87:ɥBD[*yb/Ͻ$L]pGA p`~^g>Xa|rЯGI2Sw9er<`TP4mǂ 0u;!@+LČDy9bGdؕ9:q]7s#!ĸGp8jOv3m<)&PIdxInƋ+ BUO34zO|Ӏ{Mb*mo2p=P`b]/8QI~#kgB:#.r:6J'5 c[qRfZH iVTd^ru}S9[l/t=Q)H'oVx,oG9<΅OC@ȒwRPl'9؎ 7 2O;H&f],.KHfX6!PJ&GQAOJVҾAwu35a{u-u ~s(y^C`̳6\">Y wn=lŒ'Ol6)#ᬅJLMCGZ!buBZS^&\9*D'I}Vn vՖ2nN͍P:JjHl b:ٳ{fJq6Ac,m5@e1K[4GA_kOdzgk΋d=rs_6uQJpZ{^+Nbi gBHvu XI\}.Uib!uR_Wezp0_Lă)K9G vQELamS։L@Nr4&9rP@O#(qDvR Y>տn붱t;@.7MNu?eP+&{5F,Z;J3cﵰ7)(q>7# YO 9wy!BϴNB)VycX*jqnAFrpt,)J˞9Ү͝2Ͽ2S^WQ=`G:DQ{%V ΖwdBʇ CPAwC%+S$*|H"o֬e3>'Bu5L.ʘFۖ!Ŷצl9mڲ؆ܔRr䫷U !7ˏ*1j% p&靍K=Ue٬\ʹ H?wk'GZ=:BK<i '|$+?dNږDV}7r%jc1] % %Kk}ɑH}F\2~&+LIBfX@J93!NL tPU{.}/kJ=erUl8uWytחabma.ŧ6erV{;`.dpېdd3 pd0-UPa=cT*͟7015CV:N{pЎ3t~2t., Ƿ2bniin_PÀq)dL!iLPz\NkRcOh^Ԯf,^?M eLO#S#C*m[v` Oq2{)ؘH(1SLDQw"L q0SMr^޽tDwɖ ?Sg~k6ǫ]-37eNaOSJ_ %tS/8Ixc0YAZ7uG[cd__yo^ : ̌OHV>(7dhxH-` ;TQC&ɍׁ±m+bpVI{OD!`\>ݘ^4:RO&180- H֡|Ә{@CMSgsO=RF7l?eהug)BZ(gب5tu*yf4Wy@c%sn&c?&OLR!t1Y<2z!|I戽un1&e Kbpf%p. 㾔Aź?RP.lWl4ز,Lv"Xf>/ucuhfH9COOκv+͖W[Qp (\q]3t'"<0zΑE]| CNuS[~^XO Rmi9e12C-gtRU<ՃQZ~l6U8J.T5\lH1knX蟫2k0Ob >:#%ӈ.oE&ENHƶeFF}Ks93 xI8Ir%ځ J|g|1wljȼ}9s[;`"FQvqnAW֟B C 'J tTo%Mh]>MobM)9q%E0?#*JC:apbiتW(S7-Pc"ϱ;~ml` d:.`70]vP`/XI;etROeJ##7P3jKsOuH,4rP1 3`YpU_վ- tT4+WҦAd)췔#ׯZTRo(t Q A)֟nn?~#|MdIE1ZAFjKpña~Q:H4Σ .uԀii 5CHRo8(rg˚HHPHl-DBXQow,Rzyss! Zb[TF*\cnԧl r~ :c BsэcYb \ͮlp@,8u2ipt}gSFԠeesm\xx%Wj Ytr5 }Dr\9O XG=+u6=剽E;=e4Q0]tO ̏a 4=5@s~s7.Ǡ39 ӼȢC! u`e'C]+tUp IF9OCq<&*\œHR *Д~h't7xJ@977lBkֲL@YX-|@N(oC]P'QܩDW"|@A kgd2.-??ЗnE? 4ON wZa(9YHS޹W:00 {RblS)8_.KD]\aҜ93|21}uYBi|rfM ֌IɉhZD jAN4L$<; @Fh؀ssF0׉ #DZVu}k3N)e5?;ݝk?n\Yl G g d惥|gjr1}|p3t eVT&L{IlvVIk#ܒ5w6S{7pE%J3y_kfjvqgu6;T#Ǘ F;W~-%Qd ne2"@L&j62 B< ˒ u% w Z"!eџa}k-Hw?iu}ij /'ߜTQHkXx9vw#ޣqm3y lECcoiٶ\A)BL1<'riװFCWlݩpD!JkjauPdo>Y2X빋*]N/ P (@3U{)yNuJJD kxq"ReCT *NԍY(:1u8"7Ίy}-ccGt<mအ;ɛ0ɷ7z,egdbHHA 枘ư&IAb*XXh[@0LzE=ЗgM,k(hLgȤD?kn?Y8U3/ڨrk-G\R{D.b]\)v^jjU%J>4at.yҰmH 1L[e|D#UI DR׾c[Lnmߦ@Ez) ܍mJe؎1%|Qt~\!9 D_3mZ XX;F&~!EGqAa .P VT4ɣQ w4Tֺ~{fC_s9‡Λ)aQ "I֛=*dJUADmyAzdeW4a8lweP{RwK5nI,ofR*39(!8܋.؆)J ?vcd7{pm;q :D--(Es7Vɦsqb[IƬat~.] x7vZ^@Aav,˦'s,' JaxiQ;GK F\[8sNpw]*79K0UW5Nua]sD-0;)V01XX9FbZcu:SsVۆ㦾툣/aJW2iH$[k*蠐e@LPхdpi,ꢌH *D*H XFgyo.SIU Vpb6&{h/@z"3J,ߊzb*VWyڑSG2іx GsQP ۞JTvaX2^*ϋAS(j-M E 3>}`h-~(Mn 0gGUAfhCB%.|a*tey6EGW&^HLZX|qe߿~ɒ c%W^ cqWB[ >?)тBr¾u[Pa'gOP]VjgN6ti H.)uy!0S|_Yn?rg+<(μ$TDx\N0B.2 jj0@T''Wk*b= MIl`}Bk[:Ё'@P?  "$$)"DJU(fU4=Οcf墮^QhdOIEs[3{i$Ɂ]@\{WnH1:+[rӴd.X~|cã-qR-ȩ]1TLükΚw&6mU= s4dWqpf Iߓ6!jQ%V8lQ=$D9SlFpDZJ] N؄zv}cc!6Z#BŸs:[ω00Wbtws3k0s%@5o\<;o^XPeJY·NLU$;cpC74Z#bٶhͭW%&/\z=T7A6Q?08W"NTO^JNKĢǢI/cuw ɚ倎xCs< Kr)=)l`Z-gnCSc?'Œ3lȕ1: uI ܴ_#^lĆ bG-*ϳħ2yюBy"s/;@+!" l #rʆ(Qt\* CVm m¸nr-HV^'\`b_脕'L3%2Gip&zV+AZD=XW`up;JUm|,j_%hM7ƸF&VRvߜv(IՍ]7 ep6_HPZq9߇E&%m- @I|&9:q)Z?kߘ!'ˀk{h-3YJȵ eу Bcph>؜USIo'/Aj _8Gco|*\jsw%s,+oxT 'iƭwr7Lft&?Ԯ\E'аAcqm|ʹ-nЊCq>],$iLĸ)Cu*qGQ 8m #}i CNTuG)%k/|+jyiYBzT-Z*ީtlIU-D9dԈk3ΖMC b];~Is@Ez$L޶x;;^judWrm5Ϲ܊N hwޒA٨?R6Nu24Bx8NHZuBEtk d:NapWPdy#阬\QXlrA紂usx"ӈY)3:ȃ鱈cHK!qqIt]s [\יU nal)c}b*eZSFr/4H [7"t^J@ i]JkJD ȷn6T蹌mRy@8m0=2-s=AdH)8Wg4Uo_x-% 5Dp[KlXK|1U^b2% R5dKo=I weATYtL4pgX],nJBOi*e^e* 9"|`,TQB| 8{@Vp?ؑ~Jsn Kn1b"&9nJ~o%H%`1)W#5ҡٯ VLl`h[ 8-ܷ#BȯjZD(WAY]Mv3ߌ9ȅ*aP Yۀa E^nm w\:~+eJ9#t4~#ӭ]_iqXP/ԯ AFkVU=M*xLx/BBsl樫~.uS ӑs9 p%EФ@D8`_d}_#AR)Gn'\ }P^B+-Nx1w`QxY.:G;\Co?r`>x0sӦMb ?^)5ۤ{XG珵HJMDDSnxH/A*PI^_.-.& K/(R}C~}JBe̲[Ei eM>kl{[h#Ίqj)>VdMfQ-ҜeNJKVVS4.?ػ-99 >CQ[딊Sfm endstream endobj 1879 0 obj << /Length1 1644 /Length2 11126 /Length3 0 /Length 11978 /Filter /FlateDecode >> stream xڭvUXݶ%ww7qw'8! .ANpmw>U)cʘkVQ3@RLl̬% .Bw%L]Am 2^`K+W6I̼CPl@ώ  ۂ*JZi%M4lj Pq3 {`[ ; 2(o!?rrU2{15u|L^7+ {[D-ߩace"UV S#״mw~i-tV`sDpK{S4dm߇͘_.*cxڊ<s0qyo{j|~Y gefee{d?7w$uWS{{q@ s_ ӳ2\%A:6j8GlU<׆27M{9kKӓ :+GA[FðbT!X;|NaFUkwk\Uͨx.½0>?bdfH<ݙb )wx"3bTH, .f9r0E@%fL:ƀXnC Ip8]cW -ſ'!~~Nm :S6E݂[\ a~@nMB+;h 1+ kaGK# 07pࡻr+M}ް_u e넡I?ӅI|p}1d^r G|k$91;[V=v=fBkzwhR>shػSDv1fVESZ72[@CLi5m K\ L}2y?P{'it![wkE.{#7Np``}h(%Eľ?< 8ˋY9y#jm/hH %reki/.vJT+m(Tź[KvɡE_ ]c|T$YRWa9jʌ8No66(%bH7"fQp̰U0E1 6YO3;9ߡi\&ڎYȎ>JeM!}Xi;~6aшcC@+Ԡaq^q5WJOE\eqo!zv1i;$Xo2xx@nSr N.3Q¬ֵ.Ma]HWH()Օ[h?e>s2Eap LD^+}p )p#YRTZ.~E9m؆Y*ll=8LN}wD?慩x-Õ؂yU/M /۶em 函8yWDmT艆&bN_ۅ&-6+irΩ-Y v8I/J{ӡ}\S3V`kP⟄roH*,X'Pp$)[i! 6q\%nYOʩ}KG([("ẛN{AǤ0ziFMF?ihp| 4~V7F-S+ .(ǻ㌛^9:a@m5:Psy% g1wpj^:/l d,'v'sY}qbIiaz2IswOrJ|)eg1:m)|-SoB4 T;ބXd2.Ź`92 @l!@EtUǹe4u@ɉ%# gfz|s[=#8ǺnLE!dIZ -Κ6=4` Xѣ.0<:Fe2sBA Nmi@b|/.&Iu%cc]jJ 4AEpÎTDxyTʓ[*W%čx*kCm $ʤov)׻F|+jblmyíX]`h0Usz|&$'R6o78YLt-OկeX_e1r?d$_jlԻEӢ,uC0%xҸHflz >Pe[AZTɖ%R7H7j%bdCAKKN"gdEzrҌ[Fʦee7(3 3E$%ŸCL2y"M*a}= i! n1ͿѪ W Ӵ&&x"VkP^>@1n ^.2s5(âXD0-~'ow"⦒ѣ|^watVqo*":ߚRm=hԺ¢GRK-w9~)Ch} 9_@Y4UCNItOkɴ6Bp1㓔. ٤4e/D|A$`:ҁwxRYnth>LF'-XvM6Rz^(i4g-FNdgq d$OB&qY()Z #r]:Ci_҆s9WYP: #@i k@̀1.{~v-| ~/x0 $WNxIFD,IʫLeX_a ~0ꍰz_B}5D~iЯQ Yt~O? ٱ C'Uhm~ .'rnjbn#5E@w%{E͖.sFBʁ\:3b/ +'.Q 7T QA[e1tUm*"BbŎ竟S[y">*4wVԊ@%fm>em^g)jθ!:4q5}FUW``5"2d:2?+|E ]"I |F#iNbqK8K+nt_G.z'ׅ:^ Cy*/r-7)d&qSUjEKP6#A>2ʿOd KP/ڐ K8aDǏgֺg>1m}jLNs5rBz{G8iKGJI2E92#&4OqU ш!VJuދbNTlIQYS]m}b"F~H"S؟\(/pNnf8Cj<+F9<ԁ??h;T2(pZe/"b~,6$:'H ´ԖX3۞i|iD2:J)CK& TrRƢBˏ ENҠjkHPV#%ݩ>d-ԮNcm~;GRl6+ p B`쐹dOJБHniue7hxF!6Y s٘]`jEr &D.zBDh`Ї&|}_uTfN+XO|koṄw[\Q[DЬbE~JCM$f3\DOXKXv!8`lsˈ" Y1|QUjXT' (F/p7MfG (F_ ZYLaZ6653b^3E@v%.lN7 ӒAx$y#|BWɄ,~ `TQI,F[U~ͭJH9&Zuo`D0|i; ܑ,Zz(\ 3ֹAMtnzm|~ (0osdtfaȬK]w5M(F,ߩ5BU:^u -g"A?43P- H@(tnW0TxLނ?Y$!ni\_c#~mUlkk!XBf+gd8lv>*g Zcg5 >km=c2V7'4R<[\ُAQAvS)f\kD]RcZ'P6\ )?r.i,u$q3%H|+,7ihzfxaֺh̲~[,MEsN%y&Zؤ H۳:+DDD=eui,C|B9k_:XQ\7?pm.Mj}- 4Kgm2sxgŎ"#?Ȼf5dO/[uկb)jUHsꅵce{-Zq1 5X筟Z ?j;bPqqXGpTLڳƢ|GBK&9] T1k %gx0'@= 84,H"P*znRp-x09%hpAAiXFIuvh&o~ЃOΘrNZiXėeV "qkd*y/:EJ,t&{)qv#>Ҧq&×Ϲ(0VoQ~J-l4,DWFіH.sd~6X[F0!d/JB͟ uy~}?#g7"_mP(:}ȤHjkvLCDTp}DT=z?U=֤`?æ(1gRb-}@Ex3KZG֓z؝>4Mm"#'ۄ"cDߥ12JCk$RҔSQǢU 80_S}ϑ#ضȧ5g4l /d*DU49|ֻtxӝA|xL(Ñk0 ạK/t7/2<Ъ\Y1 Ό65XZ EˏC!&jljXn"E+J9Zσ_Dzx3.+9yhP!%#v한;dtkt yC&]\tW/ %ƍztWtYŊ#ܟ EIeG@O}TRG ޸4V":KzR!6I0dݎDΔT;4>)!%5Eg>.-P:.es QJ[aT}3VUlxf3vף,迯">Iw:2:6wO QVDBH*٦2EWHY &c=R<~7K`SV_.9_16 qˮ~ޞ[XqԈ?v.g, " D<,h>.*IzѬ2Ws6Òb2focU XH0_R_yY+@>yG[jncLC?}&/]N'C{7pMC  )mtѻ~xn_m֗0j`(z }ܢ6V5&~ Xb I֚v0haE6U q`yO:T.*4-[Ҫ>ceC+V9E&y_C,e7Ho)Xg_$vIBNK bvT5rMน#zn G|r|Cr*@ ii?Vܦ&",tK]ʌF%`/c__4vm\2Se>U؞qC(R)ޱ]~<h~{PrC7JO[##Ux?~YxnE p ]saK.(ƥqQ uW~-J29BdAR-ƓF4V.to%ohQ,KeTQǍdEҜY]Yyw\N`p!G"/"6f"GEaY[s1IAxjҢKYŐ2h}'59YS ?jk~m|<2~fi=. sV<no$~)ʡ7͚ Ϥ>뷘~+K+ol\Kn+&NgU-iÝLsnwf<*vQWPk41^;eʲM|NȀR|aS Cd yX;;[͗@mx9s/PrjɑF!zvG=^\g12.~mgkawG-$$vD[3gz$w6p(- J#XQ\chƅvūS)[3C5R䬽6;0paԍ!ʊH=JaROoy5~6vcHMpwx.` er=Hueݏ*r!)ſGE^W0t8gس =2^ڻ=A;}']܃TnEmAƶ-BeYܹScp=lz:lx>}\ #JOEq"\ { P ugV}pExj\j 飭+d{6GnwI:/pQ3ۗ<֥4<*|=~a., ܓ>K_'E3;ת/@s m)c^RD1 melP= M;Md`[蟐٫bV|m(2\מ8!*Z/M/>Fz `Z}hiµ4V@qyT͐hvs'E1oۛ(p$H.ialʬVoEOF7qnģty 3jot+Eou8p<kNgoq5,ZFCRn2F2;<^K tXӺ !vD󓆓LKM1mle8ա0[N c/ͅt'M*>#[`|M\7O0B7( *{Z\4tsb4`HcOнiZ&:y'!<, ׊:<")TGmjcOL|p3- u<>払?]@9nKAj2_U9M''h6lG;i+5PS>~߾ npEwӦ҇a_z.#| XNf3"r sSXo!mB}{ׅV׉X)[2ZaL 7YDZsQZƽKtb{F LiT"IF3'^'vI.bm擕4ʝQnkqTbzMK/~@xuy bm^21&s e ?*ht*q`hsn,e0읁~5&>* 12; zJ#8;=mvr%DOKX2e/P >'lGVEg,n5-jLO@Ow}QUˤIWsNejZ׭!ٔճl-ʻmnWa@to.Ylh~X(Ctρ endstream endobj 1881 0 obj << /Length1 1647 /Length2 15015 /Length3 0 /Length 15875 /Filter /FlateDecode >> stream xڭsx_&cضmضݱ͎:m6;ꘓΙ3gWUZMFB'dbgucg[8)q)\ _,#V Dff,@@AECC<t0=m_׎*@ 0D%j -E#k c1 H0sX{054'/.!'!hlt7쁎6NN_ '8,l]LInjv|a_dvNNƎ΀N_0k?% B -l@wb&Nֆ_-f@ p:Xh9Uohoo/o_9X8;Mab;6e_lML tQ3T_IZ{L v_!w* -_5.}.6_ 94Y3l   ;_5Rf 'q w9eW5:Z[D~912L5 mMk _uYuI%ôfůpVg$ 9;JXEc`_ב'쿈s-ghagdd|}?WzF>Rq65je6vqtR_X@c%;c`˴tܡ QoC!% w ުC?Z=Gz0)Sx>$T(4 ?J5dw'AO8B_=R>#bt 7'<>P  v@b&%9{8@r!xىz=3S Nu_5o`3#ᅼepxא^##1%oHetjׯ x3Pd\Hij7Q*g©<(5}GO^vʟ&/?yM,t;,VfA&C}9%~pVR1e?]/`,.\|3RFBG1 U=|TU jXd,g}&21Z5Y\#&xy%V3s';Lߨ ~z9hN5Z7 \*NyHd%_#?0u1Xa&cDa pk}σy5t6@wET_}v2(qH!΄e.~3rDx"yKZsfXt~^C ;o)iF7-͟ d^쩩"2eΟҾn( im*jD 1ˑw,pAG=vsq1TXXuj]2.ղt`>ki?۲օNNAEma Lr=>Tpüj/81.y(wU:#Ȱ)`A,MNؒ*ra w^7@,f_ R-a w&6f .H!aOjY% Ԝ1f]]{O؂'acߎDU!6I RիJ2eIuo EEbЏBzj<غe`kd<JYuޚTS ?-N|?|:T9Y2Ho,t6z}w%ӝzIm@Z!3s(gaIWA):L;\=‹(>n\|UD8$=E"Cڌ+ĕx+Z[qiPԡދ#s z?mYk{(0f+p:L:v & vy!-e.L ;1 ]gE/ɶT|?{{Vr>w7ov1y|vo=!s74IME$֌~cum}SW65tʀqM?w?,~N]wZμs -KİEQcw`h波r҇Yyp0 P~S(j܀\Vµ)rK HVKU1 ~hLk+$=>X'pЗ9 >1`6 Ւm5(Ni F2gLε]qNVe'Xtmܷ*Eve8 ;n؂h3hyܧrHUc67qV~ChIz>MG!o:@U-jnecSWP\uֻmgX)_2=V8WM3φe,d6sJ{FiW8_O-s*Ƶ{k:U(DcÍ#i8)_|O^e(lۉk#⏍+<ʴ{|8o;qwCżMb̈A=`NGᨀt{,GVV1ND Vޟ 䥖,A%רb[y-`O'10%rU(!Oܖ&(*5]v m5;Fyk6caꉀ;8eX=^.WNhg]M݁3B] f7ԗ+ :͓g!x.!dFfC Ȋy[1C;}n¬"X6|]1>n7XSB1? nhH~V_{xsNttlYDcWK&W vsM+{"\0w %~dų3 \%8*81h8eƪ+5PC8yi}1%ٝեMoG, ۱ ^QmcڅCig/cm!:[jbsʔ+rV>dPQ iQ ̋Ru5CkQh?EՐ "m~0xeOߜI6_cMǴK{R jo +QB{l5QID!Γ\ION?-9h+ሦ~*/wӢGƣD؂O%tE=PIg bWNeK8h:0R3R@?9L鄝߯`yʔ!ʭ5DGMx:nC<. 9F !+;C`jkrM_GUĪ𐂾_ԋ~amO債%E!d[Qׯ",ù ^.3OsW=1(h13ʳi~,PO}@1Im(CׅbK bSkBvu V{3J>$UN \!)ЉI!ՏN>]/ g-\MV3ԘD"%@KZN>U1c0$+nNp{R1l%3?17DlW2KdƜM!r6r3ߢ AxnRkYum@͛4uoKNPÓb'9~Mɬ93 MWf?\Oc %PLpJ\ehI(%55;jrG0*t6)#! UXjP#~ EWuUJj)[w9B J`f|go=S%OB$rt/ h9)R]%Bk9WP8ߒk-E>fe΀\,g]4nkrSnNhw4{[[n_6 ȝOLi-JN+¹G @#Mv,ZW4#ͧ1杚Ge$27? nTSBMHlE/~:W\ULD=$:nN cG-Rҵ)8pxsOr< 9>e*;H,~Vc\J/ kW8+{tM՝9%A]FMGg9.MrԭG}A,Z(LF6>1k^t;61]w?.UOy!I6EJchAPfo ߼V9^< r%R<}M9,U)+(jr1B!*2jǯp xlP)D7&T~Do$DXi3luU (7;L#Pq)*T/ #ar)UhȔ''7cWu5%8FnfJh6Ż!$8Dբx ?ydT`ȴ_0ݱtx7c=S%zg"<DLn-F:gXhEF)XoM"i{gBt ӓ7$@3 K㟠Y_D0-&YL ' Sw:b{[neXvS*r`H4bSr͋*<*m4wƨLr<Ɓhag=d:CAIƻ,o(wp/y'υ%E@u{]w ^YġO:A s]5iFR'r yBUMn'v~=go @uQt joe`i摢ARLYNIAw(I\Qn(t}@L6 .9LPz%j_w[˒Ap񺙿k&ȩ cCȂ4,d?wgAIS멓N}DED\ۨ5x_6+zð/, APKP6}J;߁f7hƦ77*1ZI+(p4AS[o?m$I+!;^绣[ba|syc1ȡ;N\9Q ;zԯʨ%s)PElKvw׀P*į޽c]v$?)RE._pBϷ魧 eYPf,5=ׇzA ӭn/eFͽȤI(P!J`"b\֑lsWy[D:dQG0]ܖZɞ*̒M+5d48fYOy}3A!m"`aU̼* L+F`iЎ4z0Rzõ kztxA_o_mvY.Qؓ, &ja/6a~@{ DkB01K߫vi3ڬ%>onǕ9WR7YG%Y2#fHв\ySbAD-s1n| gp[LPmG %-x(w$Hf-X|FōI'5Rfzv춎m6is/͸Q$uOfDQ'eݍ 5炨hv-eLۨErgdapHў%WTaXW*Sln` b#ݞPFښ"xn vӕo NNVux]nH:vyKZMvL*񻚒%LL@ن =jgZ^Xlu Qi6D!m>ca!8BGK0 K{Lb#h8M|2<њvL4XSv^ivNP+4KtuU7U e3?278\(2E"Iv<[0 }8tmrs7No#o_Hp5D*"=fb9e,$1&{/t1}Bż{"tkge+Mɇ2bd%ȋ :6>sI2q8vY[x382z:oh}moeZǠ] 6sY*=ܥN;D}]Ѕza=6r7Cp?/hGw yAH l6({@{?^0eLJ"]a ?tbT_fҋ惥,9⿱HS t_eCꨦÀ ` _KJ[3N$!&uii?OJ?pAkDn[d2S"Xkg7qm9o>YzRM܄|-+f>UBovp*,bA+ڜxt2ztṯ rn^P aw 0p _鉸֍,zW-3M f|Ѷ%j?rGSFlOHuY7qdLsz ΓP IB+ARLNmʎYmX:BX|CHO}ng=O\st@*8Y2koDmK=%q eqaŤW%@8cp#f^ΑAp#HE_Lw0 `c72>O85q/a#Wx(̆@V7DA\&!#ҎCV L7TxviYtQ b~,<- fOW[wv7z fX+_z/u^p mT湃}E\1DP1`X6FQxP`!~'{[/ n+ZedtAd|\I,k2*d\1Vц8MATak q#ѕ+b:[%'+G Qo0rW\ KA\sm܉H(Ar,mi&Tk'F%˞3.psa0^gHw7ktnH:eAi @)q~,T]\!3ׇ$i~jxɗX8֊^&Gp|O sA+g [Fk>X6gNYYZ}R51/~P2Mզu L5: 8=;Rv_wJɯy>9g<灯p??{Ɂ A)1 xQN"|Rm+8 Hrjj)ca ]Q:۠We\fsg,J4C <ځ[|waVmFBMpM`)tMȅi03oz~oW9oޢ4s^U PD@L\=DvoCL;5QHa2&cG9)w_̰\ScuW+d>MhwCeG3uO5-eLxXڻϦd)7Zb情N_c5!Gj`πb%6i1QM,ԎH v_vby ۇͯ{C sێX{ q vB4Mv6s Ty16Y2U;&eUeeޣGJ^8+$)>NO{5ۛ}YycטQ¬҅f?lŪu)rE M^MQJ:PTEmRcP[/m1# gRѳ$pk ԧKt/*Mcxꯧh!7W,R7I iOOMR7.H2X;j%`G4gI~9g2hfԗ'imCs Yf2Fnzi % .nm>g J>ZAftxB>cV8[PmuEv&lZLK##"{<6f֓c5fjAA`co 4dO`-P?L+-y&J0247@J-}sѱ:i<{fQX'.,@aB^ZWu\@:4 T-E+~z9ܻ";aAKta`[ˣ7H|5@1X݂z69$W :UX~I7nTh~7ZlCKSތuF Uw("9.^K& Y!e[nUunmŀ8N-dFjR,#ojh48t3©"떞ldaW8+O jwClZ"LA`=͈پ`ؑ0\սNB7}CɼuBAe̷mቌK)+>cIA^hA:鉃pgPC wHʉ?Cjts.K%x 5М+}+pX\j+(E?ӭژoW?+Rl~gcYUqޣFn'=8^[f$!oԃ6)3y:qgx: BM&^1=`/6ۻT8rOk~9f5/RmTb瀙3 _P!{:TnqSu`K^*l` ¶7h?;nQtS:Cշ':L|}iu bn 0 i0 :<uȈ)^N# BHR蚺  FBZSy ^lشRTnDXչJmi,Ke1y39;}??6] t>vef50>!K{% TXϒ/J㐛xWYG.}xj{8ܟp-rޢAО3r~.EH-T#bmʣdŀ|_c7$5|Q YQ`y{zٴ7ńz&ݔY{.I2^b QɀHy/"r@ 7Qs*]dpIA*Ҵ4_-vm9o"Fu@ɟ^RӺz WN9_[VB0oc.Mdy/sXu1j%3Jm}"vf !E*3~c)? lJ2LD| 8%q{d)]Б- A3)pWA$jԫ'Qq$KfwyѴ5}f/eC2B EPfiP 9_ 5/}Eccaz6v őu nvBC JrV%N@pyoWMټ?CkʚQޅ.OY#t ;y54nӀ]`Ӭr$+ ! <k"9YxKyRX6)̡R_zƠ{K{9J@̴-EXSZ OS [wLaΈC3dOS?YYp'Rϛʂ> dpc͹ m6W5v%* !&[= U6$ !bfpynTc~9qAڍVJRv#TX̸#;̤J.5Ef IgۍݣZ Iq*[:u$Zd<,=Ud&Gfd)Z:4]P,ؖrXcrO9 > Yt\]}ܬC PU( ݩM);:z¹IKgkbE^`M ;Mw Π[vFl͢O$ KuDYv6,{*-TR.a-~_:!A|]|[,!ɽa ےيHzy_P!nOme&% Rt$\Z-̳Yհ:7qY(k!;# U_@\ZEw Hq] W2[={j맥Fho˩w*2*ӤY) 7t:64U4By=tm^vjPa [оfu g*kVtԻVYIyllb-S,Eƕ(7K45.GfljۿynƐ{Ctv=8 OI]N]yol2Ȉ(LPq'E#9);7C,T ^%%͊W}_dkZ>Zf6Yz*sjj\AJQHTȈbjn"X^Lr,L֕r9If7Q¤ZC*S٠yV1@3*rIj]3Tk&Dۊ-&Sd>^0{mbWE^մgǒ" ~8A5'j]L4g[le ybtee}eR/4߭7 &ߵߑzbÐuFy i5Š@~h㤩ņ^8uQ]'shE[zd {k&u`hW01Wu&ҟ_ꓫ3%*xDA%١8[T$u~ ZxpC qʟJI, RŢpbL >dHe}տg͟sesz avIR,XVpyVʓ,oXz$ɄYa^6fgd;E0kwkw0@=WqЕccQBo7<ےP\g؀<㺴]zbC0J,#-> G'nvp!1O$$1C!(FOBCY5620R+*^?ro4պE7vr>$9do8قF*J#(1x$QMozs3!rIxMڃ 9 :dHk'^wXY3uAbyתS .z_?m|gGyd!I$1|,}ܼl63"8/UU7?XX' =%:i񰏵eXm+s$ml8l0ݻX?-]+,ޡP@|kM2t䙬5lLCGb/^s(qGpo).1Jo|9! a[ pQ &E֩ykT"' iw7(v]%?0a=*7!_q hyX6 97뉮!8l$r-Vq84ʆha]'FI )l;(?יP<@:|Tn5ox4yٸT+x }P 7Ӯ:sf9챒#c8an hk|rv;>آ00]V8hϮA!N}m_&ȱdݘ =ݥˠ(xx8t`8&9(Β)h)vaE'l0FK4w.>_a;e<.ᢡb(K=^ucP yNkN{SBO#9&)Yt@1w9*}(ϙU@Ӵ;8=E)6eNPձ:RphiC݁Y ^_rvg>R֚tvi( ZZ בlJw=rqTh Oa0ϱa%_i]!ю*Ea#*iG9@mYCwX@( \C6W j,j-l!h-hA,r"cs6(bّ$&N!G7\CF_1 `% ) zd9|6.Pk:;t֧"hڿ8Reu`-6W7vļ)حwrDX&i݉ݲ@ya@ endstream endobj 1849 0 obj << /Type /ObjStm /N 100 /First 884 /Length 2248 /Filter /FlateDecode >> stream xYmo8_(wXisIMvvJK7C i.pE33 IJT S&|3 ԗK˜/%j uj?'ez gyIRNXDXFRl*/,5^XjQ3w;%eBxdBz`B{ Zb3$a0s$\i& 3G0'svIœ$aNe0'f 1; d S(G.qL!^X ӈ֢55X4B51`AoiGQA^h:Qq"@m,) ߭;,Hf6Q?E8.8s0:ħ.)8*tC| rBPHԈR1z&R&$y$ABBx#-y)8Q#)&q@X$DYG2m` Loo3CZ iV#U8 [D#e,[IN&ZTRGPR ׇhČ-4C7XK;5s $aYb3+c]$ggE]ʦ4O8e}H&\RY3kggse@c jZ={r>>k_1M|FŲ_\eK\]pEŤeml<f="Lfe j S\|bóxY\\|F>8 g6)A}]&n,Md粶2N ep +|ov>r3)BiKo'GZ׸p}fkSz{^S) IN;!p7Clt%W& ٦ُs@mXHtn>I6m^AW_hIӮOm<><8G߅e/o,vFzGǶQ 7k[Aa;]^w%yce8)RQZI (JA:vDI7Bo^gpNF;m@D1ȳMov]b{L{6=vO"Ba%^YEM%S[ѕzK5;dwî^ཇ% e:B-IK[41ɟ^IGwѪ,.]H26$]Mc]o+2oH4扲|}Y_'=wϴm۶xL c< !tWi,o$y|2bQϗA9=:=f~Vˬξrx_ʌ&ed|4IؐLġ &ϫq>YglWCk:+eSΧkKeja^K؅=apG   )w2Ȧ|Ye dո(r6('96|YTγ%/q^uZ םr>4 L%zr!^e%e6 (>p5e ,%yU.0[Mѐrs` Vo5Zr.݋媂k^U] ռ'eӨ S |Z4T'T* jysXj6A(2O|/O  'o8:yϓ^~ Sdž.ϱꛈbFL[qC!wriC.dqMkӬZ7vx::=yULWI6;Io!7 S59I8sJD&46w;S.$U7ToX%bVϧpxO-iˆA1Lع$6,q[ k|{E%@dkLلVNrgW[&6?6Bw;GsԆФ2 kQ|s>3(ʷ/ez[We/>M%.[Zc݃Ó_z2<)YoC\{h2BrJV^{'׆HtMȶm.UMi2F\TCvrQjonGTJQ*׿o,[~gN%K>J׋+0>紆4 7ལ˼aƧM!CPx0mnHf]\x\Vq'԰&xk Eaٽ"$Dw1޶{谑6W_حYnȜgp҉^C_ endstream endobj 1884 0 obj << /Length 900 /Filter /FlateDecode >> stream xmUMo:W5?$R. d9M eCkmCp;;w~>|3E_?O]5߶w]Occ]=~?}Oyh9%?۹׬B|Ɯ>);vw%g43>\ 6 EJ78 1{~`W(-;]%=xe_,b+-O;q\L}UI--=BKE1p[! Mߊyu>.N5K)Wb٬8i[_uʕMzQ)V(Txޢjy!Z2P="Zd0\ÃGR\).2*Шa!U,H`+j.5Nα@VK-x%3%AYӀzΚ>kP#5m0Woþj.ZT$X/)n)#Wo(oRZ $Kp4Z-b\1ܰJ P"GXQi/8k^Zq:Zs9dB )sL-7xJ`aɽ)f$1 dъcCZC<73JgznHȰYɚTa,_-O87}KԴܗLloK+gJ.GZyVc48Wt]:P~`rZq.n1] S/Pu7Ue:?&?!d&1yHn5)yғBx#1ޞ]Go׏M?X endstream endobj 1885 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTMk0WhFG*! miʲVZCcYy#9햅ļ{3񸟤e&Oo]&C]]Mq>zwt߉Ǯ)n.pCx?nڽVgx=itO"i [\l\WM}'ԭ̚t4pXeȉeU oq yM\-CnCW_Ey}wP dZz891euB)] W-\v\]~[S!8&+Zce"'2Ɍ5I@|"B2AQhSlLء28a}ɑFq5ҍnnbfǮCG= Wܢe$g;A,:sx l=NOTƘ$0_س/vЧQ%~Zx pX2]$^qnaK??q FqMyc0=) &l(mi,3|d &\c ]͹&ӈ9w{d-tx\ \cΜekqLJs?<@>qhx .׷8wl~1V<*m"mmDa endstream endobj 1886 0 obj << /Length 664 /Filter /FlateDecode >> stream xmTMo0WxvB+8l[jWHL7RI;onDo3ތ?n~<&Y$ŝK_IsE77E[^N\5sߖ;7|[lzmS_*7F?h3΃;mc-bB`ew\_7oK׽;(2Z.ETz}ܟ~o9V^MVK7-\f\S}[S!pcSs|TXo1/ȡ aeuC> stream xmTMo0WxvB+8l[+ML7RI;onDo3ތ?n~<&YվI|/ŋ;t硋nn\3<:Wj\=?-wn6pGۦ|Tnʽgxté7~qzxKlqrnX7UޞMjuSAxHiQ,'wͱ 1}hW7q{UEݥ-rG*F>NNL7u]tNhWS;wE )b,#TTHy=)9>*QKr7P:MȡQ^s$LD6aȑ*s.$S56`>ƄmÁ#TL 5kd}WXssc*zRh/#? bE$L|ږ8^y>eSQc̯bV̯cNa'_OAJ195kd3EH@8ܰ%~As*=F 0`{RLPh33Y$LƹǬ oqMsȼ tx\ \cΜ-eksL ?"@>qhx ׷=l~1֍>*]!MBa endstream endobj 1888 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTn0C6U@"mTt@;olvR3ތm~<&YվI|+œ;t羋<]3;Wj|{}[ mmᆂMv{Kt=c_~B?zxoBS6wBJ)X7UaMuSxHiQV,4$O;nC-bD/OCnC_n^ѻs׽9X2Z.ET~{~ʶrn_~߼h!R,6ew*ؔb%k e+Kӄ$a"1x*s.$S56P>Ƅm„A Fs 5577vرϾ+uaя6R:!,əCxg+ѧy*JcL|*m:fvuiWUꧏɩ\g%<Ϛ"sÖ0_:3x0kjhyIYx0aCnOg3$cx0<<v5O#ܵu7A 6*sZ ZcΜ-ܠeYksL ?"@>qh|tngk;dGGM@c endstream endobj 1889 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTn0C6U@"mTt@;olvR3ތm~<&YվI|+œ;t羋<]3;Wj|{}[ mmᆂMv{Kt=cߚ~B?zxoBS6wBJ)X7UaMuSxHiQV,4$O;nC-bD/OCnC_n^ѻs׽9X2Z.ET~{~ʶrn_~߼h!R,6ew*ؔb%k e+Kӄ$a"1x*s.$S56P>Ƅm„A Fs 5577vرϾ+uaя6R:!,əCxg+ѧy*JcL|*m:fvuiWUꧏɩ\g%<Ϛ"sÖ0_:3x0kjhyIYx0aCnOg3$cx0<<v5O#ܵu7A 6*sZ ZcΜ-ܠeYksL ?"@>qh|tngk;dGGMc endstream endobj 1890 0 obj << /Length 799 /Filter /FlateDecode >> stream xuUn@+HɁkc{!5&Q^ үル!zya/W7/~jyld}{9}N=C'u\W;oέO*k`~?''3Ɖt3\;WS]Q?SVk ]{9FSѤoG^ 32j$WC0h޼O~wC4Sy<&>U]Rn·ÛB~,{_=ڰfhm_}4zu|sH]Wb MLD!E!B FAC\dQQ(%T<#h^QqKƊL0cF F͌a._Q mPG9'+X38)+ι7\'~5:r%%Β뤧$1$܋a %aN*Atg&W̡`92/X[B|fAlI)dKdgI$[d$[H$[hv-|9~ddK%[w-t--d ~)BO)Rd dK|ɖNK)K)++Ζ]Rd]Oz͜|x8?<ᤥNO]?p@}_:h? endstream endobj 1891 0 obj << /Length 550 /Filter /FlateDecode >> stream xmSˎ0+$1$#8h@Bܰp`~ +8*=SJ]sCM&ESݮ`w z\ħmbo'ޚr028~}uHXz_z.XA_`1o"xR:bct\$7҈٘TmH@ ]W0ywznͩV+1r]oś}X 6g1ͭnm{!^ ' bނP48YhC`୤\UY=0ZĎiơ 7([4r;"A"e"qDgs"2dK$#В%#KDDNs5&]J[/G endstream endobj 1883 0 obj << /Type /ObjStm /N 100 /First 897 /Length 2873 /Filter /FlateDecode >> stream xY]s7}_G**11,lBmz3ޙ^I=f4]su:jC iQaZ7ʆqpKiIùmi,goJm8L (؉4BCԼB@V֊(Il"ت, V#l(a H(L!BD d`fKlA%RRʠT0@JM #KH:(-ɵlH r!SiaЀ TFP&TF <<,Dh`% + ),0P< @EL4 <@  HO`L`d J A} @yRN`I\XMZ )t2E2@U@hځ@Nbcv@* %)T(46gPg8 QUTDQhJ 0n4ujhlAH*`kНZR[g DAqDaæTf-(Ԡ:(%ƵTcA SAOPhRӸ-5V (Tor{5'Oi<UnOYBu*NnN>L@~\.ɫ~b'Ю;4'>9?zwJ&nn}o $9zxYۿF ,]÷@;1:{}f]=87%jba 7%qMʻ峟/ϗnڗ(])c*IJ|euw<ʇVT) R֟GYߝϯ^@thgچa6w٫g/~j.ȓ*yFSۻ'Oy^ڡo/Y*m=ڜgU[- o8]Sdt[ON-7E g+~ &g~W] _BS=ަ^}4.,є'dv,w3.g[2PqN|?6=^^>x5aᯐί;5$1i];ơ&~c!'gv28GNĎӗGx͛#D{ZGB.9VN%c,fκ՟<:?BvXSa9Trwrǎ_`.K [ܺZXMBU Uۡ)烸nkFQ?}}hik+Y" ˠ  eH0\g:Uh~wBׇ0c c[9u8X4f}3+5wK).E7|:B&VE(5b #Ȼ0̰d% ܲ*)q7x) !S,%, x1w(e;֗[%Qn,UݝTm97kҺ#e7Ǧ):ZYbhaG4y lx\*pAL/xkZI: _!;}Nc3wԺ)^b.A.:kib#v]@qmiP:fH>ݬt34> stream xڕ\M#9nF7Mvl7;|={PGvT3dA*S2lCR2x(Cc?:<;DD ٳv'z CF\ 4_7WK,Q8QyV#^6ɄmܵɭmOEĦQQ5IDpbguw|)ݬ[^ফݓ .z6gJyE)?8FӼY[N .EaEm#$=^qElEv{ 5Raiܩdԕ ~۱VlCF,t~Xnߜ|yo $;В8lֶ~ h:6:H}sXݯ' {+hg%b|z63b`n/3 / 4wل$ZƆ'ҏȡ%srú+ʴqǛȳ{Va@)ҿ|\fh)UZ޶)L_3U cG>ƴ1ZŭOX't^M+7FL#Lu[߶ܾ|ɨÛ\-c 3bU/'b?GYeP"Q\9r.Y?";7X,?nby`ڭz's^i2 ˝k "b:"ׄ鬼-@:+o \SҥHpůit,׮#*C(5(]Q 󨞯%5긵(hUgEAFt<{`GQΎV aY6*9/BuF͖znBԉ+.c c25Vsgg:t00^a̽v1="|q?JPXZ4hRk"NQ\ťg\'z0cgUe=cjDW [af}0D`Uc= *c30l+^eQ۹c=72O(zAGWeVk篯u~yLljb@ljGRu~sHceEeC1/=~1lY=FOT䢖Zz+%ZQ~t2@]_vG'i:!Vq$doNn3xe*5ɌJ1 ̘2f\¢`E y3!Ww763zۡN 5Caēˆ[qu{cQёm,[U [Vy:mVb 0ެ\*zur㵦GKG=^mk7ο)J/_Rj[;}^~ko ^m`Wq/ȯWW Gq^^vYf T٫ҫ#|'ҝ>>W%TxˮJRc݀Jzuq՛Z=zz:C BGA7cߩ* :EZt_^u' #`>. XlP6AڀBACp nN@yJ1`A%֠1>PgHk'T2#xnP9XnP (aPT5A :cU^-fxmP#Aqy|ފeLCZߗR BT6 hPoFN\44UW\AU$ A݁C3U43TܫXW.LU&猪fUY{UHjTM1`tQݮiF<3 U*;AL"zQ0h;V*WF^Ȗ,ݖz| >zìĔ[ЃYT'~s2}EM^!,qyHC5_iZwۆ)8enQl_vb_kE?go7_`gX,] v.gDGw͋v->~XܟO4mn-^+ܟR $ endstream endobj 2035 0 obj << /Producer (pdfTeX-1.40.23) /Author(\376\377\000H\000a\000d\000l\000e\000y\000\040\000W\000i\000c\000k\000h\000a\000m\000;\000\040\000R\000o\000m\000a\000i\000n\000\040\000F\000r\000a\000n\000\347\000o\000i\000s\000;\000\040\000L\000i\000o\000n\000e\000l\000\040\000H\000e\000n\000r\000y\000;\000\040\000K\000i\000r\000i\000l\000l\000\040\000M\000\374\000l\000l\000e\000r)/Title(\376\377\000d\000p\000l\000y\000r\000:\000\040\000A\000\040\000G\000r\000a\000m\000m\000a\000r\000\040\000o\000f\000\040\000D\000a\000t\000a\000\040\000M\000a\000n\000i\000p\000u\000l\000a\000t\000i\000o\000n)/Subject()/Creator(LaTeX with hyperref)/Keywords() /CreationDate (D:20220207092843+01'00') /ModDate (D:20220207092843+01'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.23 (TeX Live 2022/dev) kpathsea version 6.3.4/dev) >> endobj 2016 0 obj << /Type /ObjStm /N 19 /First 178 /Length 590 /Filter /FlateDecode >> stream xڥUn0+xL 9Cͭ+`j J=KJ~Km^' $ P@?R@2ΖƓ@FFLey~,&Th.ä\t3b7dyYCާȐH!ʵIqP KuuUMgQܚ (Fo}yݪo7߭}]YT2 #zI5%0;gHHH 3C<~jEVfh [ [![B4'u9€€€"FM\.ML\ph \NVႋ.Ri+ W."R2u_."\$HH5AOZ~v%H0`"!9iyk8kO,Av0;C&8ä܇8&*UYn})7\l39 2 @sXzE9q݌22Xp'zqq)٪7M'Y^ѻCc~?V|owxӯ~!>n| ~_C{30 ] /Length 4727 /Filter /FlateDecode >> stream x%Y$Y~33?ϾzfFED#C\ŋh \(H ^ԅA\ (Hi#؟Ǜz~ӽ^~^oO}ML?PK=q}c0^Y0[]0G-Scs.S;v$|v[@mZͅhTNݰPns),SۡvrX6vJXvAհFeka]ן]6Uhs#lR[vfآYͭM9a6w®QP;cs7QDͽOmڧ6jlCjwN< G%jGqRۦvf S;8vR->:r1qg—8sjԎ، vA-|Í}I!jq(q6Cj 2\֪bQ;8 !v| OVEOZa.9j6Z\P U |bL `_W=OlR3۾Z`oNmin QΦގ28>8j 0```YzS.W2.I 0U`k >( -9N&5h]Uk]unJY+Vȭ[Z|i2^ɖ-[zAk|So8mp?{.x2O3gb'`'O8 \KpC+w|&܂;p"r>G О+xz7{6^r(.2DSxq(\J Aȗ߈͘1} 4s ì{.^ǜfȒjHI%7pq%^>C e2_ Qː/C Y2<Rǿ;pm@!-<`ȃ&ˁȆN>: DJ17d}2 0$Ð Kb(؁hJJzx?~pC6 1?%D{(ǞGF|K1$؟`lku>j]c,50?~Z<RX+`%vuΣwNXz_je#Dmhf{p 8]WbW[}Z_#}=f_ו}}l_3By\p4xe| pnw/IdNç5mpg3 4esz_V|=<'4Wڇ^7>[xe/ϥw?Mz "5Dj\`SÜF_<06Ti,4|i!C8yy5Y j8l#0~ h\q;rǜf3a])5'̰QC&hѸ -4#.7D!nsd&b3aXða 4_ 5kl/!a 5 knF4WS?PscIÒ% / _ A4]HQןg|PA>8Jidzs`.̃}>H)[m),V*X kaM64) `/>{S=ޱ!8 q_{ 9n7RZ4r ܪN7Sgep9uu7DR߅{pNuJ0Sx5Oi0w{x)r8 v 6:%15lS:OwpMXɍrڛ60jT)RR:o9Rgc*%UJT)]2ZŰRRJIr Jijw/%UJT)צt,J6{&Mv}-^ TdSɦr/*Srܓ6*9Tʺ2z2YJw$^)cU4e[Ь Ҕ)ISFy%{!RIH%J"A+ҫсrPɡC%iʸIzqqޘ._b/?xѤ>, ӟ+=Ȥ۪y)oEm>,EAع~KUրɭƆ`7쁣#Z`M!8 G|F ~;I8i gSp B2r(/_\Xj  q!b>Cf4sN>k7wSݣi4H5n}iCJ_Ӹh.| 4h>gv^b9."):vvq;?%#fkdb&:[?úW?X{Q:"Y{Ѭ?Xu幰 XUn>,JXa u6Fa lmvNa}p_ )"Q g=3*{p{Y8S8{s3Iĥ\+KztM܅3xǿo{OLKx!7"X{`ŵ 00 f́dO`>,X `9 VX `=l 6 `;쀝 v `?p8 8L `'| sp.E \X޾7&܂p=!<"'b;%;x|%J+W_ |%J+W_ |%J+W_ |%J+W_ |%J+W_ |%J+W_ |%J+W_ |%J+W_ |%J+W_ |%J+W_ |%J+WO)<@J+W2d^ɼy%J g2e^˼y-Zk2e^˼y-Zk2e^˼y-Zk2e^˼y-Zk2e^˼y-Zk2e^˼r-:Fh|\[/})PS@M5j )PS@M5j )PS@M5j )PS@M5j )PS@M5j )PS@M5j )PGmN0fL3c2cfLߙ@3h+N5ӳfIF4<ӨgZLi3 }w-5B ~#߯l+D>%+15l}`90#cf3,:`*Y,)g3̙xf:K9e~1. \kg7jw4 x) } g <- function(...) { x2 <- 2 f(x > x2, ...) } res <- g() expect_equal(res$x, 3L) df <- group_by(df, x) res <- g() expect_equal(res$x, 3L) }) test_that("filter handles simple symbols", { df <- data.frame(x = 1:4, test = rep(c(T, F), each = 2)) res <- filter(df, test) gdf <- group_by(df, x) res <- filter(gdf, test) h <- function(data) { test2 <- c(T, T, F, F) filter(data, test2) } expect_equal(h(df), df[1:2, ]) f <- function(data, ...) { one <- 1 filter(data, test, x > one, ...) } g <- function(data, ...) { four <- 4 f(data, x < four, ...) } res <- g(df) expect_equal(res$x, 2L) expect_equal(res$test, TRUE) res <- g(gdf) expect_equal(res$x, 2L) expect_equal(res$test, TRUE) }) test_that("filter handlers scalar results", { expect_equal(filter(mtcars, min(mpg) > 0), mtcars, ignore_attr = TRUE) expect_equal(filter(group_by(mtcars, cyl), min(mpg) > 0), group_by(mtcars, cyl)) }) test_that("filter propagates attributes", { date.start <- ISOdate(2010, 01, 01, 0) test <- data.frame(Date = ISOdate(2010, 01, 01, 1:10)) test2 <- test %>% filter(Date < ISOdate(2010, 01, 01, 5)) expect_equal(test$Date[1:4], test2$Date) }) test_that("filter discards NA", { temp <- data.frame( i = 1:5, x = c(NA, 1L, 1L, 0L, 0L) ) res <- filter(temp, x == 1) expect_equal(nrow(res), 2L) }) test_that("date class remains on filter (#273)", { x1 <- x2 <- data.frame( date = seq.Date(as.Date("2013-01-01"), by = "1 days", length.out = 2), var = c(5, 8) ) x1.filter <- x1 %>% filter(as.Date(date) > as.Date("2013-01-01")) x2$date <- x2$date + 1 x2.filter <- x2 %>% filter(as.Date(date) > as.Date("2013-01-01")) expect_equal(class(x1.filter$date), "Date") expect_equal(class(x2.filter$date), "Date") }) test_that("filter handles $ correctly (#278)", { d1 <- tibble( num1 = as.character(sample(1:10, 1000, T)), var1 = runif(1000), ) d2 <- data.frame(num1 = as.character(1:3), stringsAsFactors = FALSE) res1 <- d1 %>% filter(num1 %in% c("1", "2", "3")) res2 <- d1 %>% filter(num1 %in% d2$num1) expect_equal(res1, res2) }) test_that("filter() returns the input data if no parameters are given", { expect_identical(filter(mtcars), mtcars) expect_identical(filter(mtcars, !!!list()), mtcars) }) test_that("$ does not end call traversing. #502", { # Suppose some analysis options are set much earlier in the script analysis_opts <- list(min_outcome = 0.25) # Generate some dummy data d <- expand.grid(Subject = 1:3, TrialNo = 1:2, Time = 1:3) %>% as_tibble() %>% arrange(Subject, TrialNo, Time) %>% mutate(Outcome = (1:18 %% c(5, 7, 11)) / 10) # Do some aggregation trial_outcomes <- d %>% group_by(Subject, TrialNo) %>% summarise(MeanOutcome = mean(Outcome)) left <- filter(trial_outcomes, MeanOutcome < analysis_opts$min_outcome) right <- filter(trial_outcomes, analysis_opts$min_outcome > MeanOutcome) expect_equal(left, right) }) test_that("filter handles POSIXlt", { datesDF <- read.csv(stringsAsFactors = FALSE, text = " X 2014-03-13 16:08:19 2014-03-13 16:16:23 2014-03-13 16:28:28 2014-03-13 16:28:54 ") datesDF$X <- as.POSIXlt(datesDF$X) expect_equal( nrow(filter(datesDF, X > as.POSIXlt("2014-03-13"))), 4 ) }) test_that("filter handles complex vectors (#436)", { d <- data.frame(x = 1:10, y = 1:10 + 2i) expect_equal(filter(d, x < 4)$y, 1:3 + 2i) expect_equal(filter(d, Re(y) < 4)$y, 1:3 + 2i) }) test_that("%in% works as expected (#126)", { df <- tibble(a = c("a", "b", "ab"), g = c(1, 1, 2)) res <- df %>% filter(a %in% letters) expect_equal(nrow(res), 2L) res <- df %>% group_by(g) %>% filter(a %in% letters) expect_equal(nrow(res), 2L) }) test_that("row_number does not segfault with example from #781", { z <- data.frame(a = c(1, 2, 3)) b <- "a" res <- z %>% filter(row_number(b) == 2) expect_equal(nrow(res), 0L) }) test_that("row_number works on 0 length columns (#3454)", { expect_identical( mutate(tibble(), a = row_number()), tibble(a = integer()) ) }) test_that("filter does not alter expression (#971)", { my_filter <- ~ am == 1 expect_equal(my_filter[[2]][[2]], as.name("am")) }) test_that("hybrid evaluation handles $ correctly (#1134)", { df <- tibble(x = 1:10, g = rep(1:5, 2)) res <- df %>% group_by(g) %>% filter(x > min(df$x)) expect_equal(nrow(res), 9L) }) test_that("filter correctly handles empty data frames (#782)", { res <- tibble() %>% filter(F) expect_equal(nrow(res), 0L) expect_equal(length(names(res)), 0L) }) test_that("filter(.,TRUE,TRUE) works (#1210)", { df <- data.frame(x = 1:5) res <- filter(df, TRUE, TRUE) expect_equal(res, df) }) test_that("filter, slice and arrange preserves attributes (#1064)", { df <- structure( data.frame(x = 1:10, g1 = rep(1:2, each = 5), g2 = rep(1:5, 2)), meta = "this is important" ) res <- filter(df, x < 5) %>% attr("meta") expect_equal(res, "this is important") res <- filter(df, x < 5, x > 4) %>% attr("meta") expect_equal(res, "this is important") res <- df %>% slice(1:50) %>% attr("meta") expect_equal(res, "this is important") res <- df %>% arrange(x) %>% attr("meta") expect_equal(res, "this is important") }) test_that("filter works with rowwise data (#1099)", { df <- tibble(First = c("string1", "string2"), Second = c("Sentence with string1", "something")) res <- df %>% rowwise() %>% filter(grepl(First, Second, fixed = TRUE)) expect_equal(nrow(res), 1L) expect_equal(df[1, ], ungroup(res)) }) test_that("grouped filter handles indices (#880)", { res <- iris %>% group_by(Species) %>% filter(Sepal.Length > 5) res2 <- mutate(res, Petal = Petal.Width * Petal.Length) expect_equal(nrow(res), nrow(res2)) expect_equal(group_rows(res), group_rows(res2)) expect_equal(group_keys(res), group_keys(res2)) }) test_that("filter(FALSE) handles indices", { out <- mtcars %>% group_by(cyl) %>% filter(FALSE, .preserve = TRUE) %>% group_rows() expect_identical(out, list_of(integer(), integer(), integer(), .ptype = integer())) out <- mtcars %>% group_by(cyl) %>% filter(FALSE, .preserve = FALSE) %>% group_rows() expect_identical(out, list_of(.ptype = integer())) }) test_that("filter handles S4 objects (#1366)", { env <- environment() Numbers <- suppressWarnings(setClass( "Numbers", slots = c(foo = "numeric"), contains = "integer", where = env )) setMethod("[", "Numbers", function(x, i, ...){ Numbers(unclass(x)[i, ...], foo = x@foo) }) on.exit(removeClass("Numbers", where = env)) df <- data.frame(x = Numbers(1:10, foo = 10)) res <- filter(df, x > 3) expect_s4_class(res$x, "Numbers") expect_equal(res$x@foo, 10) }) test_that("hybrid lag and default value for string columns work (#1403)", { res <- mtcars %>% mutate(xx = LETTERS[gear]) %>% filter(xx == lag(xx, default = "foo")) xx <- LETTERS[mtcars$gear] ok <- xx == lag(xx, default = "foo") expect_equal(xx[ok], res$xx) res <- mtcars %>% mutate(xx = LETTERS[gear]) %>% filter(xx == lead(xx, default = "foo")) xx <- LETTERS[mtcars$gear] ok <- xx == lead(xx, default = "foo") expect_equal(xx[ok], res$xx) }) # .data and .env tests now in test-hybrid-traverse.R test_that("filter handles raw vectors (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(filter(df, a == 1), tibble(a = 1L, b = as.raw(1))) expect_identical(filter(df, b == 1), tibble(a = 1L, b = as.raw(1))) }) test_that("`vars` attribute is not added if empty (#2772)", { expect_identical(tibble(x = 1:2) %>% filter(x == 1), tibble(x = 1L)) }) test_that("filter handles list columns", { res <- tibble(a=1:2, x = list(1:10, 1:5)) %>% filter(a == 1) %>% pull(x) expect_equal(res, list(1:10)) res <- tibble(a=1:2, x = list(1:10, 1:5)) %>% group_by(a) %>% filter(a == 1) %>% pull(x) expect_equal(res, list(1:10)) }) test_that("hybrid function row_number does not trigger warning in filter (#3750)", { out <- tryCatch({ mtcars %>% filter(row_number() > 1, row_number() < 5); TRUE }, warning = function(w) FALSE ) expect_true(out) }) test_that("filter() preserve order across groups (#3989)", { df <- tibble(g = c(1, 2, 1, 2, 1), time = 5:1, x = 5:1) res1 <- df %>% group_by(g) %>% filter(x <= 4) %>% arrange(time) res2 <- df %>% group_by(g) %>% arrange(time) %>% filter(x <= 4) res3 <- df %>% filter(x <= 4) %>% arrange(time) %>% group_by(g) expect_equal(res1, res2) expect_equal(res1, res3) expect_false(is.unsorted(res1$time)) expect_false(is.unsorted(res2$time)) expect_false(is.unsorted(res3$time)) }) test_that("filter() with two conditions does not freeze (#4049)", { expect_identical( iris %>% filter(Sepal.Length > 7, Petal.Length < 6), iris %>% filter(Sepal.Length > 7 & Petal.Length < 6) ) }) test_that("filter() handles matrix and data frame columns (#3630)", { df <- tibble( x = 1:2, y = matrix(1:4, ncol = 2), z = data.frame(A = 1:2, B = 3:4) ) expect_equal(filter(df, x == 1), df[1, ]) expect_equal(filter(df, y[,1] == 1), df[1, ]) expect_equal(filter(df, z$A == 1), df[1, ]) gdf <- group_by(df, x) expect_equal(filter(gdf, x == 1), gdf[1, ]) expect_equal(filter(gdf, y[,1] == 1), gdf[1, ]) expect_equal(filter(gdf, z$A == 1), gdf[1, ]) gdf <- group_by(df, y) expect_equal(filter(gdf, x == 1), gdf[1, ]) expect_equal(filter(gdf, y[,1] == 1), gdf[1, ]) expect_equal(filter(gdf, z$A == 1), gdf[1, ]) gdf <- group_by(df, z) expect_equal(filter(gdf, x == 1), gdf[1, ]) expect_equal(filter(gdf, y[,1] == 1), gdf[1, ]) expect_equal(filter(gdf, z$A == 1), gdf[1, ]) }) test_that("filter() handles named logical (#4638)", { tbl <- tibble(a = c(a = TRUE)) expect_equal(filter(tbl, a), tbl) }) test_that("filter() allows named constants that resolve to logical vectors (#4612)", { filters <- mtcars %>% transmute( cyl %in% 6:8, hp / drat > 50 ) expect_identical( mtcars %>% filter(!!!filters), mtcars %>% filter(!!!unname(filters)) ) }) test_that("filter() allowing matrices with 1 column", { out <- expect_warning( filter(data.frame(x = 1:2), matrix(c(TRUE, FALSE), nrow = 2)), NA ) expect_identical(out, data.frame(x = 1L)) }) test_that("filter() gives useful error messages", { expect_snapshot({ # wrong type (expect_error( iris %>% group_by(Species) %>% filter(1:n()) )) (expect_error( iris %>% filter(1:n()) )) # matrix with > 1 columns (expect_error( filter(data.frame(x = 1:2), matrix(c(TRUE, FALSE, TRUE, FALSE), nrow = 2)) )) # wrong size (expect_error( iris %>% group_by(Species) %>% filter(c(TRUE, FALSE)) )) (expect_error( iris %>% rowwise(Species) %>% filter(c(TRUE, FALSE)) )) (expect_error( iris %>% filter(c(TRUE, FALSE)) )) # wrong size in column (expect_error( iris %>% group_by(Species) %>% filter(data.frame(c(TRUE, FALSE))) )) (expect_error( iris %>% rowwise() %>% filter(data.frame(c(TRUE, FALSE))) )) (expect_error( iris %>% filter(data.frame(c(TRUE, FALSE))) )) (expect_error( tibble(x = 1) %>% filter(c(TRUE, TRUE)) )) # wrong type in column (expect_error( iris %>% group_by(Species) %>% filter(data.frame(Sepal.Length > 3, 1:n())) )) (expect_error( iris %>% filter(data.frame(Sepal.Length > 3, 1:n())) )) # evaluation error (expect_error( mtcars %>% filter(`_x`) )) (expect_error( mtcars %>% group_by(cyl) %>% filter(`_x`) )) # named inputs (expect_error( filter(mtcars, x = 1) )) (expect_error( filter(mtcars, y > 2, z = 3) )) (expect_error( filter(mtcars, TRUE, x = 1) )) # ts (expect_error( filter(ts(1:10)) )) # Error that contains { (expect_error( tibble() %>% filter(stop("{")) )) # across() in filter() does not warn yet data.frame(x = 1, y = 1) %>% filter(across(everything(), ~ .x > 0)) data.frame(x = 1, y = 1) %>% filter(data.frame(x > 0, y > 0)) }) }) test_that("filter preserves grouping", { gf <- group_by(tibble(g = c(1, 1, 1, 2, 2), x = 1:5), g) i <- count_regroups(out <- filter(gf, x %in% c(3,4))) expect_equal(i, 0L) expect_equal(group_vars(gf), "g") expect_equal(group_rows(out), list_of(1L, 2L)) i <- count_regroups(out <- filter(gf, x < 3)) expect_equal(i, 0L) expect_equal(group_vars(gf), "g") expect_equal(group_rows(out), list_of(c(1L, 2L))) }) test_that("filter() with empty dots still calls dplyr_row_slice()", { tbl <- new_tibble(list(x = 1), nrow = 1L) foo <- structure(tbl, class = c("foo_df", class(tbl))) local_methods( # `foo_df` always loses class when row slicing dplyr_row_slice.foo_df = function(data, i, ...) { out <- NextMethod() new_tibble(out, nrow = nrow(out)) } ) expect_s3_class(filter(foo), class(tbl), exact = TRUE) expect_s3_class(filter(foo, x == 1), class(tbl), exact = TRUE) }) test_that("can filter() with unruly class", { local_methods( `[.dplyr_foobar` = function(x, i, ...) new_dispatched_quux(vec_slice(x, i)), dplyr_row_slice.dplyr_foobar = function(x, i, ...) x[i, ] ) df <- foobar(data.frame(x = 1:3)) expect_identical( filter(df, x <= 2), quux(data.frame(x = 1:2, dispatched = TRUE)) ) }) test_that("filter() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), filter(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("if_any() and if_all() work", { df <- tibble(x1 = 1:10, x2 = c(1:5, 10:6)) expect_equal( filter(df, if_all(starts_with("x"), ~ . > 6)), filter(df, x1 > 6 & x2 > 6) ) expect_equal( filter(df, if_any(starts_with("x"), ~ . > 6)), filter(df, x1 > 6 | x2 > 6) ) }) dplyr/tests/testthat/test-deprec-funs.R0000644000176200001440000000654614151641776017750 0ustar liggesuserstest_that("fun_list is merged with new args", { withr::local_options(lifecycle_verbosity = "quiet") funs <- funs(fn = bar) funs <- as_fun_list(funs, env(), baz = "baz") expect_identical(funs$fn, quo(bar(., baz = "baz"))) }) test_that("funs() works with namespaced calls", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(summarise_all(mtcars, funs(base::mean(.))), summarise_all(mtcars, funs(mean(.)))) expect_identical(summarise_all(mtcars, funs(base::mean)), summarise_all(mtcars, funs(mean(.)))) }) test_that("funs() found in local environment", { withr::local_options(lifecycle_verbosity = "quiet") f <- function(x) 1 df <- data.frame(x = c(2:10, 1000)) out <- summarise_all(df, funs(f = f, mean = mean, median = median)) expect_equal(out, data.frame(f = 1, mean = 105.4, median = 6.5)) }) test_that("funs() accepts quoted functions", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(funs(mean), funs("mean")) }) test_that("funs() accepts unquoted functions", { withr::local_options(lifecycle_verbosity = "quiet") funs <- funs(fn = !!mean) expect_identical(funs$fn, new_quosure(call2(base::mean, quote(.)))) }) test_that("funs() accepts quoted calls", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(funs(mean), funs(mean(.))) }) test_that("funs() can be merged with new arguments", { withr::local_options(lifecycle_verbosity = "quiet") fns <- funs(foo(.)) expect_identical(as_fun_list(fns, current_env(), foo = 1L), funs(foo(., foo = 1L))) }) enfun <- function(.funs, ...) { as_fun_list(.funs, caller_env(), ...) } test_that("can enfun() literal functions", { res <- enfun(identity(mean)) expect_equal(length(res), 1L) expect_identical(res[[1L]], mean) }) test_that("can enfun() named functions by expression", { res <- enfun(mean) expect_equal(length(res), 1L) expect_identical(res[[1L]], mean) }) test_that("local objects are not treated as symbols", { withr::local_options(lifecycle_verbosity = "quiet") mean <- funs(my_mean(.)) expect_identical(enfun(mean), mean) }) test_that("can enfun() character vectors", { res <- enfun(c("min", "max")) expect_equal(length(res), 2L) expect_equal(res[[1]], min) expect_equal(res[[2]], max) }) test_that("can enfun() purrr-style lambdas", { my_mean <- as_function(~ mean(.x)) res <- enfun(~ mean(.x)) expect_equal(length(res), 1L) expect_true(typeof(res[[1]]) == "closure") }) test_that("funs_ works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( funs(mean), funs_(list(~ mean)) ) expect_equal( funs_(list("mean")), funs_(list(`environment<-`(~ mean, baseenv()))), ignore_formula_env = TRUE ) expect_equal( funs(mean(.)), funs_(list(~ mean(.))) ) }) test_that("as_fun_list() auto names chr vectors (4307)", { expect_identical( data.frame(x = 1:10) %>% summarise_at("x", c("mean", "sum")), data.frame(x = 1:10) %>% summarise(mean = mean(x), sum = sum(x)) ) }) test_that("funs() is deprecated", { expect_snapshot(funs(fn = bar)) }) # Errors ------------------------------------------------------------------ test_that("funs() give meaningful error messages", { withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(funs(function(si) { mp[si] }))) (expect_error(funs(~ mp[.])) ) }) }) dplyr/tests/testthat/test-deprec-tibble.R0000644000176200001440000000030314121112104020166 0ustar liggesuserstest_that("add_rownames is deprecated", { expect_warning( res <- mtcars %>% add_rownames("Make&Model"), "deprecated" ) expect_equal(class(res), c("tbl_df", "tbl", "data.frame")) }) dplyr/tests/testthat/test-window.R0000644000176200001440000000522314151641776017033 0ustar liggesuserstest_that("If n = 0, lead and lag return x", { expect_equal(lead(1:2, 0), 1:2) expect_equal(lag(1:2, 0), 1:2) }) test_that("If n = length(x), returns all missing", { miss <- rep(NA_integer_, 2) expect_equal(lead(1:2, 2), miss) expect_equal(lag(1:2, 2), miss) }) test_that("cumany and cumall handle NAs consistently (#408, #3749, #4132)", { batman <- c(NA, NA, NA, NA, NA) expect_true(all(is.na(cumany(batman)))) expect_true(all(is.na(cumall(batman)))) # normal usecases expect_identical( cumall(c(TRUE, NA, FALSE, NA)), c(TRUE, NA, FALSE, FALSE) ) expect_identical( cumall(c(FALSE, NA, TRUE)), c(FALSE, FALSE, FALSE) ) expect_identical( cumall(c(NA, TRUE)), c(NA, NA) ) expect_identical( cumall(c(NA, FALSE)), c(NA, FALSE) ) expect_identical( cumany(c(TRUE, NA, FALSE)), c(TRUE, TRUE, TRUE) ) expect_identical( cumany(c(FALSE, NA, TRUE)), c(FALSE, NA, TRUE) ) # scalars expect_true(is.na(cumall(NA))) expect_true(is.na(cumany(NA))) expect_true(cumall(TRUE)) expect_false(cumall(FALSE)) expect_true(cumany(TRUE)) expect_false(cumany(FALSE)) # degenerate cases expect_identical( cumall(logical()), logical() ) expect_identical( cumany(logical()), logical() ) # behaviour of degenerate logical vectors mimics that of base R functions x <- as.raw(c(2L, 9L, 0L)) class(x) <- "logical" expect_identical(cumall(x), x == TRUE) expect_identical(cumany(x), c(TRUE, TRUE, TRUE)) }) test_that("percent_rank ignores NAs (#1132)", { expect_equal(percent_rank(c(1:3, NA)), c(0, 0.5, 1, NA)) }) test_that("cume_dist ignores NAs (#1132)", { expect_equal(cume_dist(c(1:3, NA)), c(1 / 3, 2 / 3, 1, NA)) }) test_that("cummean is not confused by FP error (#1387)", { a <- rep(99, 9) expect_true(all(cummean(a) == a)) }) test_that("cummean is consistent with cumsum() and seq_along() (#5287)", { x <- 1:5 expect_equal(cummean(x), c(1, 1.5, 2, 2.5, 3)) expect_equal(cummean(x), cumsum(x) / seq_along(x)) expect_equal(cummean(numeric()), numeric()) }) test_that("order_by() returns correct value", { expected <- int(15, 14, 12, 9, 5) expect_identical(order_by(5:1, cumsum(1:5)), expected) x <- 5:1 y <- 1:5 expect_identical(order_by(x, cumsum(y)), expected) }) test_that("order_by() works in arbitrary envs (#2297)", { env <- child_env("base") expect_equal( with_env(env, dplyr::order_by(5:1, cumsum(1:5))), rev(cumsum(rev(1:5))) ) expect_equal( order_by(5:1, cumsum(1:5)), rev(cumsum(rev(1:5))) ) }) test_that("order_by() give meaningful errors", { expect_snapshot({ (expect_error(order_by(NULL, 1L))) }) }) dplyr/tests/testthat/test-colwise-distinct.R0000644000176200001440000000165214121112104020761 0ustar liggesuserstest_that("scoped distinct is identical to manual distinct", { df <- tibble( x = rep(2:5, each=2), y = rep(2:3, each = 4), z = "a" ) expect_identical(distinct_all(df), distinct(df, x, y, z)) expect_identical(distinct_at(df, vars(x)), distinct(df, x)) expect_identical(distinct_if(df, is.integer), distinct(df, x, y)) }) test_that(".funs is applied to variables before getting distinct rows", { df <- tibble( x = rep(2:5, each=2), y = rep(2:3, each = 4) ) expect_identical(distinct_all(df, `-`), distinct(mutate_all(df,`-`), x, y)) }) test_that("scoped distinct applies to grouping variables (#3480)", { df <- tibble( g = rep(1:2, each = 4), x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2 ) out <- df[c(1, 3, 5, 8), ] expect_identical(distinct_all(df), out) expect_identical(distinct_at(df, vars(g, x, y)), out) expect_identical(distinct_if(df, is.numeric), out) }) dplyr/tests/testthat/test-n_distinct.R0000644000176200001440000000516514121112104017636 0ustar liggesuserstest_that("n_distinct gives the correct results on iris", { expect_equal( sapply(iris, n_distinct), sapply(iris, function(.) length(unique(.))) ) }) df_var <- data.frame( l = c(T, F, F), i = c(1, 1, 2), d = Sys.Date() + c(1, 1, 2), f = factor(letters[c(1, 1, 2)]), n = c(1, 1, 2) + 0.5, t = Sys.time() + c(1, 1, 2), c = letters[c(1, 1, 2)], stringsAsFactors = FALSE ) test_that("n_distinct gives correct results for key types", { expect_equal( sapply(df_var, n_distinct), sapply(df_var, function(.) length(unique(.))) ) }) test_that("n_distinct treats NA correctly in the REALSXP case (#384)", { expect_equal(n_distinct(c(1.0, NA, NA)), 2) }) test_that("n_distinct recycles length 1 vectors (#3685)", { expect_equal(n_distinct(1, 1:4), 4) expect_equal(n_distinct(1:4, 1), 4) d <- tibble(x = 1:4) res <- d %>% summarise(y = sum(x), n1 = n_distinct(y, x), n2 = n_distinct(x, y), n3 = n_distinct(y), n4 = n_distinct(identity(y)), n5 = n_distinct(x)) expect_equal(res$n1, 4) expect_equal(res$n2, 4) expect_equal(res$n3, 1) expect_equal(res$n4, 1) expect_equal(res$n5, 4) res <- tibble(g = c(1,1,1,1,2,2), x = c(1,2,3,1,1,2)) %>% group_by(g) %>% summarise(y = sum(x), n1 = n_distinct(y, x), n2 = n_distinct(x, y), n3 = n_distinct(y), n4 = n_distinct(identity(y)), n5 = n_distinct(x)) expect_equal(res$n1, c(3,2)) expect_equal(res$n2, c(3,2)) expect_equal(res$n3, c(1,1)) expect_equal(res$n4, c(1,1)) expect_equal(res$n5, c(3,2)) }) test_that("n_distinct() handles unnamed (#5069)", { x <- iris$Sepal.Length y <- iris$Sepal.Width expect_equal( n_distinct(iris$Sepal.Length, iris$Sepal.Width), n_distinct(x, y) ) }) test_that("n_distinct handles expressions in na.rm (#3686)", { d <- tibble(x = c(1:4,NA)) yes <- TRUE no <- FALSE expect_equal(d %>% summarise(n = n_distinct(x, na.rm = T)) %>% pull(), 4) expect_equal(d %>% summarise(n = n_distinct(x, na.rm = F)) %>% pull(), 5) expect_equal(d %>% summarise(n = n_distinct(x, na.rm = yes)) %>% pull(), 4) expect_equal(d %>% summarise(n = n_distinct(x, na.rm = no)) %>% pull(), 5) expect_equal(d %>% summarise(n = n_distinct(x, na.rm = TRUE || TRUE)) %>% pull(), 4) }) test_that("n_distinct() respects .data (#5008)", { expect_identical( data.frame(x = 42) %>% summarise(n = n_distinct(.data$x)), data.frame(n = 1L) ) }) test_that("n_distinct() works with `{{ }}` (#5461)", { wrapper <- function(data, col) { summarise(data, result = n_distinct({{ col }}, na.rm = TRUE)) } df <- data.frame(x = c(1, 1, 3, NA)) expect_identical( wrapper(df, x), data.frame(result = 2L) ) }) dplyr/tests/testthat/test-join-rows.R0000644000176200001440000000215414151641776017453 0ustar liggesuserstest_that("repeated keys generate Cartesian product", { out <- join_rows(c(1, 1), c(1, 1)) expect_equal(out$x, c(1L, 1L, 2L, 2L)) expect_equal(out$y, c(1L, 2L, 1L, 2L)) }) test_that("inner join only outputs matching keys", { out <- join_rows(c(2, 1), c(3, 4, 1), type = "inner") expect_equal(out$x, 2L) expect_equal(out$y, 3L) }) test_that("left join contains all keys from x", { out <- join_rows(c(2, 1), c(3, 4, 1), type = "left") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(NA, 3L)) }) test_that("right join contains all keys from y", { out <- join_rows(c(2, 1), c(3, 4, 1), type = "right") expect_equal(out$x, c(2L)) expect_equal(out$y, c(3L)) expect_equal(out$y_extra, c(1L, 2L)) }) test_that("full join contains all keys from both", { out <- join_rows(c(2, 1), c(3, 1), type = "full") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(NA, 2L)) expect_equal(out$y_extra, 1L) }) test_that("join_rows() gives meaningful error message on incompatible types", { expect_snapshot({ (expect_error( join_rows(data.frame(x = 1), data.frame(x = factor("a"))) )) }) }) dplyr/tests/testthat/utf-8.txt0000644000176200001440000000157214121112104016102 0ustar liggesusers# UTF-8 tests that can't be run on Windows CRAN # R CMD check will try to parse the file anyway, # we use a different file extension to avoid this. df <- data.frame(中文1 = 1:10, 中文2 = 1:10, eng = 1:10) df2 <- df %>% mutate(中文1 = 中文1 + 1) gdf2 <- df %>% group_by(eng) %>% mutate(中文1 = 中文1 + 1) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(df2))) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(gdf2))) df3 <- filter(df2, eng > 5) gdf3 <- filter(gdf2, eng > 5) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(df3))) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(gdf3))) df4 <- filter(df2, 中文1 > 5) gdf4 <- filter(gdf2, 中文1 > 5) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(df4))) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(gdf4))) dplyr/tests/testthat/test-near.R0000644000176200001440000000012214121112104016411 0ustar liggesuserstest_that("near accepts nearby fp values", { expect_true(near(sqrt(2)^2, 2)) }) dplyr/tests/testthat/test-nest_by.R0000644000176200001440000000165614121112104017144 0ustar liggesuserstest_that("returns expected type/data", { df <- data.frame(g = 1:2, x = 1:2, y = 1:2) out <- nest_by(df, g) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "g") expect_named(out, c("g", "data")) }) test_that("can control key col", { df <- data.frame(g = 1:2, x = 1:2, y = 1:2) out <- nest_by(df, g, .key = "key") expect_named(out, c("g", "key")) }) test_that("nest_by() inherits grouping", { df <- data.frame(g1 = 1:2, g2 = 1:2, x = 1:2, y = 1:2) expect_equal( df %>% group_by(g1) %>% nest_by() %>% group_vars(), "g1" ) # And you can't have it both ways expect_error(df %>% group_by(g1) %>% nest_by("g2"), "re-group") }) test_that("can control whether grouping data in list-col", { df <- data.frame(g = 1:2, x = 1:2, y = 1:2) out <- nest_by(df, g) expect_named(out$data[[1]], c("x", "y")) out <- nest_by(df, g, .keep = TRUE) expect_named(out$data[[1]], c("g", "x", "y")) }) dplyr/tests/testthat/test-coalesce.R0000644000176200001440000000275314151641776017307 0ustar liggesuserstest_that("non-missing scalar replaces all missing values", { x <- c(NA, 1) expect_equal(coalesce(x, 1), c(1, 1)) }) test_that("coerces to common type", { expect_identical(coalesce(NA, 1), 1) f <- factor("x", levels = c("x", "y")) expect_identical(coalesce(NA, f), f) }) test_that("finds non-missing values in multiple positions", { x1 <- c(1L, NA, NA) x2 <- c(NA, 2L, NA) x3 <- c(NA, NA, 3L) expect_equal(coalesce(x1, x2, x3), 1:3) }) test_that("coalesce() gives meaningful error messages", { expect_snapshot({ (expect_error(coalesce(1:2, 1:3))) (expect_error(coalesce(1:2, letters[1:2]))) }) }) test_that("coalesce() supports data frames (#5326)", { out <- coalesce( data.frame(x = c(NA, 1)), data.frame(x = 1:2) ) expect_identical(out, data.frame(x = c(1, 1))) df1 <- data.frame(x = c(NA, 1, NA), y = c(2, NA, NA), z = c(1:2, NA)) df2 <- tibble::tibble(x = 1:3, y = c(3, 4, NA), z = c(NA, NA, NA)) df3 <- data.frame(x = NA, y = c(30, 40, 50), z = 101:103) out <- coalesce(df1, df2, df3) exp <- tibble(x = c(1, 1, 3), y = c(2, 4, 50), z = c(1L, 2L, 103L)) expect_identical(out, exp) expect_error( coalesce( data.frame(x = c(NA, 1)), data.frame(x = c("a", "b")) ), class = "vctrs_error_incompatible_type" ) expect_error(coalesce(as.matrix(mtcars), as.matrix(mtcars)), "matrices") }) test_that("coalesce() supports one-dimensional arrays (#5557)", { x <- array(1:10) out <- coalesce(x, 0) expect_equal(out, x) }) dplyr/tests/testthat/test-nth-value.R0000644000176200001440000000214214151641776017424 0ustar liggesuserstest_that("nth works with lists", { x <- list(1, 2, 3) expect_equal(nth(x, 1), 1) expect_equal(nth(x, 4), NULL) expect_equal(nth(x, 4, default = 1), 1) }) test_that("negative values index from end", { x <- 1:5 expect_equal(nth(x, -1), 5) expect_equal(nth(x, -3), 3) }) test_that("indexing past ends returns default value", { expect_equal(nth(1:4, 5), NA_integer_) expect_equal(nth(1:4, -5), NA_integer_) expect_equal(nth(1:4, -10), NA_integer_) }) test_that("first uses default value for 0 length vectors", { expect_equal(first(logical()), NA) expect_equal(first(integer()), NA_integer_) expect_equal(first(numeric()), NA_real_) expect_equal(first(character()), NA_character_) expect_equal(first(list()), NULL) }) test_that("firsts uses default value for 0 length augmented vectors", { fc <- factor("a")[0] dt <- Sys.Date() tm <- Sys.time() expect_equal(first(fc[0]), fc[NA]) expect_equal(first(dt[0]), dt[NA]) expect_equal(first(tm[0]), tm[NA]) }) test_that("nth() gives meaningful error message (#5466)", { expect_snapshot({ (expect_error(nth(1:10, "x"))) }) }) dplyr/tests/testthat/test-deprec-lazyeval.R0000644000176200001440000002236114121112104020564 0ustar liggesuserstest_that("can select negatively (#2519)", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(select_(mtcars, ~ -cyl), mtcars[-2]) }) test_that("select yields proper names", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(names(select_(mtcars, ~ cyl:hp)), c("cyl", "disp", "hp")) }) test_that("lazydots are named and arrange() doesn't fail (it assumes empty names)", { withr::local_options(lifecycle_verbosity = "quiet") dots <- compat_lazy_dots(list(), env(), "cyl") expect_identical(names(dots), "") expect_identical(arrange_(mtcars, "cyl"), arrange(mtcars, cyl)) }) test_that("mutate_each_() and summarise_each_() handle lazydots", { withr::local_options(lifecycle_verbosity = "quiet") cyl_chr <- mutate_each_(mtcars, list(as.character), "cyl")$cyl expect_identical(cyl_chr, as.character(mtcars$cyl)) cyl_mean <- summarise_each_(mtcars, list(mean), "cyl")$cyl expect_equal(cyl_mean, mean(mtcars$cyl)) }) test_that("select_vars_() handles lazydots", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(select_vars_(letters, c("a", "b")), set_names(c("a", "b"))) expect_identical( select_vars_(letters, c("a", "b"), include = "c"), set_names(c("c", "a", "b")) ) expect_identical( select_vars_(letters, c("a", "b"), exclude = "b"), set_names(c("a")) ) }) df <- tibble( a = c(1:3, 2:3), b = letters[c(1:4, 4L)] ) test_that("arrange_ works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( arrange_(df, ~ -a), arrange(df, -a) ) expect_equal( arrange_(df, .dots = list(quote(-a))), arrange(df, -a) ) expect_equal( arrange_(df, .dots = list(~ -a)), arrange(df, -a) ) }) test_that("count_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( count_(df, ~ b), count(df, b) ) expect_equal( count_(df, ~ b, wt = quote(a)), count(df, b, wt = a) ) wt <- 1:4 expect_identical( count_(df, "b", "wt"), count(df, b, wt = wt) ) expect_identical( add_count(df, b), add_count_(df, ~ b) ) }) test_that("distinct_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( distinct_(df, ~ a), distinct(df, a) ) expect_equal( distinct_(df, .dots = list(quote(a))), distinct(df, a) ) expect_equal( distinct_(df, .dots = list(~ a)), distinct(df, a) ) expect_equal( distinct_(df %>% group_by(b), ~ a, .dots = NULL), distinct(df %>% group_by(b), a) ) expect_equal( distinct_(df %>% group_by(b), .dots = list(quote(a))), distinct(df %>% group_by(b), a) ) expect_equal( distinct_(df %>% group_by(b), .dots = list(~ a)), distinct(df %>% group_by(b), a) ) }) test_that("do_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( do_(df, ~ tibble(-.$a)), do(df, tibble(-.$a)) ) expect_equal( do_(df, .dots = list(quote(dplyr::tibble(-.$a)))), do(df, tibble(-.$a)) ) expect_equal( do_(df, .dots = list(~ dplyr::tibble(-.$a))), do(df, tibble(-.$a)) ) foo <- "foobar" expect_identical( do_(df, .dots = "tibble(foo)"), do(df, tibble(foo)) ) expect_equal( do_(df %>% group_by(b), ~ tibble(-.$a)), do(df %>% group_by(b), tibble(-.$a)) ) expect_equal( do_(df %>% group_by(b), .dots = list(quote(dplyr::tibble(-.$a)))), do(df %>% group_by(b), tibble(-.$a)) ) expect_equal( do_(df %>% group_by(b), .dots = list(~ dplyr::tibble(-.$a))), do(df %>% group_by(b), tibble(-.$a)) ) }) test_that("filter_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( filter_(df, ~ a > 1), filter(df, a > 1) ) expect_equal( filter_(df, .dots = list(quote(a > 1))), filter(df, a > 1) ) cnd <- rep(TRUE, 5) expect_identical( filter_(df, .dots = "cnd"), filter(df, cnd) ) }) test_that("group_by_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( group_by_(df, ~ a), group_by(df, a) ) expect_equal( group_by_(df, ~ -a), group_by(df, -a) ) expect_equal( group_by_(df, .dots = "a"), group_by(df, a) ) expect_equal( group_by_(df, .dots = list(quote(-a))), group_by(df, -a) ) expect_equal( group_by_(df, .dots = list(~ -a)), group_by(df, -a) ) }) test_that("mutate_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( mutate_(df, c = ~ -a), mutate(df, c = -a) ) expect_equal( mutate_(df, .dots = list(c = quote(-a))), mutate(df, c = -a) ) expect_equal( mutate_(df, .dots = list(c = ~ -a)), mutate(df, c = -a) ) expect_identical( mutate_(df, ~ -a), mutate(df, -a) ) foo <- "foobar" expect_identical( mutate_(df, .dots = "foo"), mutate(df, foo) ) }) test_that("rename_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( rename_(df, c = ~ a), rename(df, c = a) ) expect_equal( rename_(df, .dots = list(c = quote(a))), rename(df, c = a) ) expect_equal( rename_(df, .dots = list(c = ~ a)), rename(df, c = a) ) }) test_that("select_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( select_(df, ~ a), select(df, a) ) expect_equal( select_(df, ~ -a), select(df, -a) ) expect_equal( select_(df, .dots = "a"), select(df, a) ) expect_equal( select_(df, .dots = list(quote(-a))), select(df, -a) ) expect_equal( select_(df, .dots = list(~ -a)), select(df, -a) ) }) test_that("slice_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( slice_(df, ~ 2:n()), slice(df, 2:n()) ) expect_equal( slice_(df, .dots = list(quote(2:n()))), slice(df, 2:n()) ) expect_equal( slice_(df, .dots = list(~ 2:n())), slice(df, 2:n()) ) pos <- 3 expect_identical( slice_(df, .dots = "pos:n()"), slice(df, pos:n()) ) }) test_that("summarise_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( summarise_(df, a = ~ mean(a)), summarise(df, a = mean(a)) ) expect_equal( summarise_(df, .dots = list(a = quote(mean(a)))), summarise(df, a = mean(a)) ) expect_equal( summarise_(df, .dots = list(a = ~ mean(a))), summarise(df, a = mean(a)) ) my_mean <- mean expect_identical( summarise_(df, .dots = c(a = "my_mean(a)")), summarise(df, a = my_mean(a)) ) expect_equal( summarise_(df %>% group_by(b), a = ~ mean(a)), summarise(df %>% group_by(b), a = mean(a)) ) expect_equal( summarise_(df %>% group_by(b), .dots = list(a = quote(mean(a)))), summarise(df %>% group_by(b), a = mean(a)) ) expect_equal( summarise_(df %>% group_by(b), .dots = list(a = ~ mean(a))), summarise(df %>% group_by(b), a = mean(a)) ) }) test_that("summarize_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( summarize_(df, a = ~ mean(a)), summarize(df, a = mean(a)) ) expect_equal( summarize_(df, .dots = list(a = quote(mean(a)))), summarize(df, a = mean(a)) ) expect_equal( summarize_(df, .dots = list(a = ~ mean(a))), summarize(df, a = mean(a)) ) expect_equal( summarize_(df %>% group_by(b), a = ~ mean(a)), summarize(df %>% group_by(b), a = mean(a)) ) expect_equal( summarize_(df %>% group_by(b), .dots = list(a = quote(mean(a)))), summarize(df %>% group_by(b), a = mean(a)) ) expect_equal( summarize_(df %>% group_by(b), .dots = list(a = ~ mean(a))), summarize(df %>% group_by(b), a = mean(a)) ) }) test_that("transmute_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( transmute_(df, c = ~ -a), transmute(df, c = -a) ) expect_equal( transmute_(df, .dots = list(c = quote(-a))), transmute(df, c = -a) ) expect_equal( transmute_(df, .dots = list(c = ~ -a)), transmute(df, c = -a) ) foo <- "foobar" expect_identical( transmute_(df, .dots = "foo"), transmute(df, foo) ) }) test_that("_each() and _all() families agree", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = 1:3, y = 1:3) expect_equal(summarise_each(df, list(mean)), summarise_all(df, mean)) expect_equal(summarise_each(df, list(mean), x), summarise_at(df, vars(x), mean)) expect_equal(summarise_each(df, list(mean = mean), x), summarise_at(df, vars(x), list(mean = mean))) expect_equal(summarise_each(df, list(mean = mean), x:y), summarise_at(df, vars(x:y), list(mean = mean))) expect_equal(summarise_each(df, list(mean), x:y), summarise_at(df, vars(x:y), mean)) expect_equal(summarise_each(df, list(mean), z = y), summarise_at(df, vars(z = y), mean)) expect_equal(mutate_each(df, list(mean)), mutate_all(df, mean)) expect_equal(mutate_each(df, list(mean), x), mutate_at(df, vars(x), mean)) expect_equal(mutate_each(df, list(mean = mean), x), mutate_at(df, vars(x), list(mean = mean))) expect_equal(mutate_each(df, list(mean = mean), x:y), mutate_at(df, vars(x:y), list(mean = mean))) expect_equal(mutate_each(df, list(mean), x:y), mutate_at(df, vars(x:y), mean)) expect_equal(mutate_each(df, list(mean), z = y), mutate_at(df, vars(z = y), mean)) }) dplyr/tests/testthat/test-generics.R0000644000176200001440000000726014121112104017275 0ustar liggesuserstest_that("row_slice recomputes groups", { gf <- group_by(data.frame(g = c(1, 1, 2, 2, 3, 3)), g) out <- dplyr_row_slice(gf, c(1L, 3L, 5L)) expect_equal(group_data(out)$.rows, list_of(1L, 2L, 3L)) out <- dplyr_row_slice(gf, c(4L, 3L)) expect_equal(group_data(out)$.rows, list_of(c(1L, 2L))) }) test_that("row_slice preserves empty groups if requested", { gf <- group_by(data.frame(g = c(1, 1, 2, 2, 3, 3)), g, .drop = FALSE) out <- dplyr_row_slice(gf, c(3L, 4L)) expect_equal(group_data(out)$.rows, list_of(integer(), c(1L, 2L), integer())) }) # dplyr_col_modify -------------------------------------------------------- test_that("empty cols returns input", { df <- data.frame(x = 1) expect_equal(dplyr_col_modify(df, list()), df) }) test_that("applies tidyverse recycling rules", { expect_equal( dplyr_col_modify(data.frame(x = 1:2), list(y = 1)), data.frame(x = 1:2, y = c(1, 1)) ) expect_equal( dplyr_col_modify(data.frame(x = integer()), list(y = 1)), data.frame(x = integer(), y = integer()) ) expect_error( dplyr_col_modify(data.frame(x = 1:4), list(y = 1:2)), class = "vctrs_error_recycle_incompatible_size" ) }) test_that("can add, remove, and replace columns", { df <- data.frame(x = 1, y = 2) expect_equal(dplyr_col_modify(df, list(y = NULL)), data.frame(x = 1)) expect_equal(dplyr_col_modify(df, list(y = 3)), data.frame(x = 1, y = 3)) expect_equal(dplyr_col_modify(df, list(z = 3)), data.frame(x = 1, y = 2, z = 3)) }) test_that("doesn't expand row names", { df <- data.frame(x = 1:10) out <- dplyr_col_modify(df, list(y = 1)) expect_equal(.row_names_info(out, 1), -10) }) test_that("preserves existing row names", { df <- data.frame(x = c(1, 2), row.names = c("a", "b")) out <- dplyr_col_modify(df, list(y = 1)) expect_equal(row.names(df), c("a", "b")) }) test_that("reconstruct method gets a data frame", { seen_df <- NULL local_methods( dplyr_reconstruct.dplyr_foobar = function(data, template) { if (is.data.frame(data)) { seen_df <<- TRUE } NextMethod() } ) df <- foobar(data.frame(x = 1)) seen_df <- FALSE dplyr_col_modify(df, list(y = 2)) expect_true(seen_df) seen_df <- FALSE dplyr_row_slice(df, 1) expect_true(seen_df) }) # dplyr_reconstruct ------------------------------------------------------- test_that("classes are restored", { expect_identical( dplyr_reconstruct(tibble(), data.frame()), data.frame() ) expect_identical( dplyr_reconstruct(tibble(), tibble()), tibble() ) expect_identical( dplyr_reconstruct(tibble(), new_data_frame(class = "foo")), new_data_frame(class = "foo") ) }) test_that("attributes of `template` are kept", { expect_identical( dplyr_reconstruct(new_tibble(list(), nrow = 1), new_data_frame(foo = 1)), new_data_frame(n = 1L, foo = 1) ) }) test_that("compact row names are retained", { data <- vec_rbind(tibble(a = 1), tibble(a = 2)) template <- tibble() x <- dplyr_reconstruct(data, template) expect <- tibble(a = c(1, 2)) expect_identical(x, expect) # Explicitly ensure internal row name structure is identical expect_identical( .row_names_info(x, type = 0L), .row_names_info(expect, type = 0L) ) }) test_that("dplyr_reconstruct() strips attributes before dispatch", { local_methods( dplyr_reconstruct.dplyr_foobar = function(data, template) { out <<- data } ) df <- foobar(data.frame(x = 1), foo = "bar") out <- NULL dplyr_reconstruct(df, df) expect_identical(out, data.frame(x = 1)) df <- foobar(data.frame(x = 1, row.names = "a"), foo = "bar") out <- NULL dplyr_reconstruct(df, df) expect_identical(out, data.frame(x = 1, row.names = "a")) }) dplyr/tests/testthat/test-pull.R0000644000176200001440000000124414121112104016446 0ustar liggesuserstest_that("default extracts last var from data frame", { df <- tibble(x = 1:10, y = 1:10) expect_equal(pull(df), 1:10) }) test_that("can extract by name, or positive/negative position", { x <- 1:10 df <- tibble(x = x, y = runif(10)) expect_equal(pull(df, x), x) expect_equal(pull(df, 1L), x) expect_equal(pull(df, 1), x) expect_equal(pull(df, -2), x) expect_equal(pull(df, -2L), x) }) test_that("can extract named vectors", { x <- 1:10 y <- letters[x] df <- tibble(x = x, y = y) xn <- set_names(x, y) expect_equal(pull(df, x), x) expect_equal(pull(df, x, y), xn) expect_equal(pull(df, 1, 2), xn) expect_equal(names(pull(df, x, y)), y) }) dplyr/tests/testthat/test-sets.R0000644000176200001440000001073714151641776016510 0ustar liggesuserstest_that("set operations use coercion rules (#799)", { df1 <- tibble(x = 1:2, y = c(1, 1)) df2 <- tibble(x = 1:2, y = 1:2) expect_equal(nrow(union(df1, df2)), 3L) expect_equal(nrow(intersect(df1, df2)), 1L) expect_equal(nrow(setdiff(df1, df2)), 1L) df1 <- tibble(x = factor(letters[1:10])) df2 <- tibble(x = letters[6:15]) res <- intersect(df1, df2) expect_equal(res, tibble(x = letters[6:10]), ignore_attr = TRUE) res <- intersect(df2, df1) expect_equal(res, tibble(x = letters[6:10]), ignore_attr = TRUE) res <- union(df1, df2) expect_equal(res, tibble(x = letters[1:15]), ignore_attr = TRUE) res <- union(df2, df1) expect_equal(res, tibble(x = letters[c(6:15, 1:5)]), ignore_attr = TRUE) res <- setdiff(df1, df2) expect_equal(res, tibble(x = letters[1:5]), ignore_attr = TRUE) res <- setdiff(df2, df1) expect_equal(res, tibble(x = letters[11:15]), ignore_attr = TRUE) }) test_that("setdiff handles factors with NA (#1526)", { df1 <- tibble(x = factor(c(NA, "a"))) df2 <- tibble(x = factor("a")) res <- setdiff(df1, df2) expect_s3_class(res$x, "factor") expect_equal(levels(res$x), "a") expect_true(is.na(res$x[1])) }) test_that("intersect does not unnecessarily coerce (#1722)", { df <- tibble(a = 1L) res <- intersect(df, df) expect_type(res$a, "integer") }) test_that("set operations reconstruct grouping metadata (#3587)", { df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% group_by(g) df2 <- tibble(x = 3:6, g = rep(2:3, each = 2)) expect_equal(setdiff(df1, df2), filter(df1, x < 3)) expect_equal(intersect(df1, df2), filter(df1, x >= 3)) expect_equal(union(df1, df2), tibble(x = 1:6, g = rep(1:3, each = 2)) %>% group_by(g)) expect_equal(setdiff(df1, df2) %>% group_rows(), list_of(1:2)) expect_equal(intersect(df1, df2) %>% group_rows(), list_of(1:2)) expect_equal(union(df1, df2) %>% group_rows(), list_of(1:2, 3:4, 5:6)) }) test_that("set operations keep the ordering of the data (#3839)", { rev_df <- function(df) { df[rev(seq_len(nrow(df))), ] } df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) df2 <- tibble(x = 3:6, g = rep(2:3, each = 2)) expect_equal(setdiff(df1, df2), filter(df1, x < 3), ignore_attr = TRUE) expect_equal(setdiff(rev_df(df1), df2), filter(rev_df(df1), x < 3), ignore_attr = TRUE) expect_equal(intersect(df1, df2), filter(df1, x >= 3), ignore_attr = TRUE) expect_equal(intersect(rev_df(df1), df2), filter(rev_df(df1), x >= 3), ignore_attr = TRUE) expect_equal(union(df1, df2), tibble(x = 1:6, g = rep(1:3, each = 2)), ignore_attr = TRUE) expect_equal(union(rev_df(df1), df2), tibble(x = c(4:1, 5:6), g = rep(c(2:1, 3L), each = 2)), ignore_attr = TRUE) expect_equal(union(df1, rev_df(df2)), tibble(x = c(1:4, 6:5), g = rep(1:3, each = 2)), ignore_attr = TRUE) }) test_that("set operations remove duplicates", { df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% bind_rows(., .) df2 <- tibble(x = 3:6, g = rep(2:3, each = 2)) expect_equal(setdiff(df1, df2), filter(df1, x < 3) %>% distinct(), ignore_attr = TRUE) expect_equal(intersect(df1, df2), filter(df1, x >= 3) %>% distinct(), ignore_attr = TRUE) expect_equal(union(df1, df2), tibble(x = 1:6, g = rep(1:3, each = 2)), ignore_attr = TRUE) }) test_that("set equality", { df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% group_by(g) df2 <- tibble(x = 3:6, g = rep(2:3, each = 2)) expect_true(setequal(df1, df1)) expect_true(setequal(df2, df2)) expect_false(setequal(df1, df2)) expect_false(setequal(df2, df1)) }) test_that("set operations enforce empty ... (#5891)", { a <- tibble(var = 1:3) b <- tibble(var = 2:4) c <- tibble(var = c(1, 3, 4, 5)) expect_error(intersect(a, b, c)) expect_error(setdiff(a, b, c)) expect_error(setequal(a, b, c)) expect_error(union(a, b, c)) expect_error(union_all(a, b, c)) }) # Errors ------------------------------------------------------------------ test_that("set operation give useful error message. #903", { expect_snapshot({ alfa <- tibble( land = c("Sverige", "Norway", "Danmark", "Island", "GB"), data = rnorm(length(land)) ) beta <- tibble( land = c("Norge", "Danmark", "Island", "Storbritannien"), data2 = rnorm(length(land)) ) gamma <- tibble(land = 1:2, data = 1:2) (expect_error( intersect(alfa, beta) )) (expect_error( intersect(alfa, 1) )) (expect_error( intersect(alfa, gamma) )) (expect_error( union(alfa, beta) )) (expect_error( setdiff(alfa, beta) )) }) }) dplyr/tests/testthat/test-count-tally.r0000644000176200001440000001060214151641776020034 0ustar liggesusers# count ------------------------------------------------------------------- test_that("informs if n column already present, unless overridden", { df1 <- tibble(n = c(1, 1, 2, 2, 2)) expect_message(out <- count(df1, n), "already present") expect_named(out, c("n", "nn")) # not a good idea, but supported expect_message(out <- count(df1, n, name = "n"), NA) expect_named(out, "n") expect_message(out <- count(df1, n, name = "nn"), NA) expect_named(out, c("n", "nn")) df2 <- tibble(n = c(1, 1, 2, 2, 2), nn = 1:5) expect_message(out <- count(df2, n), "already present") expect_named(out, c("n", "nn")) expect_message(out <- count(df2, n, nn), "already present") expect_named(out, c("n", "nn", "nnn")) }) test_that("name must be string", { df <- tibble(x = c(1, 2)) expect_error(count(df, x, name = 1), "string") expect_error(count(df, x, name = letters), "string") }) test_that("output includes empty levels with .drop = FALSE", { df <- tibble(f = factor("b", levels = c("a", "b", "c"))) out <- count(df, f, .drop = FALSE) expect_equal(out$n, c(0, 1, 0)) out <- count(group_by(df, f, .drop = FALSE)) expect_equal(out$n, c(0, 1, 0)) }) test_that("ouput preserves grouping", { df <- tibble(g = c(1, 2, 2, 2)) exp <- tibble(g = c(1, 2), n = c(1, 3)) expect_equal(df %>% count(g), exp) expect_equal(df %>% group_by(g) %>% count(), exp %>% group_by(g)) }) test_that("output preserves class & attributes where possible", { df <- data.frame(g = c(1, 2, 2, 2)) attr(df, "my_attr") <- 1 out <- df %>% count(g) expect_s3_class(out, "data.frame", exact = TRUE) expect_equal(attr(out, "my_attr"), 1) out <- df %>% group_by(g) %>% count() expect_s3_class(out, "grouped_df") expect_equal(group_vars(out), "g") # summarise() currently drops attributes expect_equal(attr(out, "my_attr"), NULL) }) test_that("works with dbplyr", { skip_if_not_installed("dbplyr") skip_if_not_installed("RSQLite") db <- dbplyr::memdb_frame(x = c(1, 1, 1, 2, 2)) df1 <- db %>% count(x) %>% as_tibble() expect_equal(df1, tibble(x = c(1, 2), n = c(3, 2))) df2 <- db %>% add_count(x) %>% as_tibble() expect_equal(df2, tibble(x = c(1, 1, 1, 2, 2), n = c(3, 3, 3, 2, 2))) }) test_that("can only explicitly chain together multiple tallies", { expect_snapshot({ df <- data.frame(g = c(1, 1, 2, 2), n = 1:4) df %>% count(g, wt = n) df %>% count(g, wt = n) %>% count(wt = n) df %>% count(n) }) }) test_that("wt = n() is deprecated", { df <- data.frame(x = 1:3) expect_warning(count(df, wt = n()), "`wt = n()`", fixed = TRUE) }) # tally ------------------------------------------------------------------- test_that("tally can sort output", { gf <- group_by(tibble(x = c(1, 1, 2, 2, 2)), x) out <- tally(gf, sort = TRUE) expect_equal(out, tibble(x = c(2, 1), n = c(3, 2))) }) test_that("weighted tally drops NAs (#1145)", { df <- tibble(x = c(1, 1, NA)) expect_equal(tally(df, x)$n, 2) }) test_that("tally() drops last group (#5199) ", { df <- data.frame(x = 1, y = 2, z = 3) res <- expect_message(df %>% group_by(x, y) %>% tally(wt = z), NA) expect_equal(group_vars(res), "x") }) # add_count --------------------------------------------------------------- test_that("ouput preserves grouping", { df <- tibble(g = c(1, 2, 2, 2)) exp <- tibble(g = c(1, 2, 2, 2), n = c(1, 3, 3, 3)) expect_equal(df %>% add_count(g), exp) expect_equal(df %>% group_by(g) %>% add_count(), exp %>% group_by(g)) }) test_that(".drop is deprecated", { local_options(lifecycle_verbosity = "warning") df <- tibble(f = factor("b", levels = c("a", "b", "c"))) expect_warning(out <- add_count(df, f, .drop = FALSE), "deprecated") }) # add_tally --------------------------------------------------------------- test_that("can add tallies of a variable", { df <- tibble(a = c(2, 1, 1)) expect_equal( df %>% group_by(a) %>% add_tally(), group_by(tibble(a = c(2, 1, 1), n = c(1, 2, 2)), a) ) }) test_that("add_tally can be given a weighting variable", { df <- data.frame(a = c(1, 1, 2, 2, 2), w = c(1, 1, 2, 3, 4)) out <- df %>% group_by(a) %>% add_tally(wt = w) expect_equal(out$n, c(2, 2, 9, 9, 9)) out <- df %>% group_by(a) %>% add_tally(wt = w + 1) expect_equal(out$n, c(4, 4, 12, 12, 12)) }) test_that("can override output column", { df <- data.frame(g = c(1, 1, 2, 2, 2), x = c(3, 2, 5, 5, 5)) expect_named(add_tally(df, name = "xxx"), c("g", "x", "xxx")) }) dplyr/tests/testthat/test-mutate.r0000644000176200001440000004347214151641776017073 0ustar liggesuserstest_that("empty mutate returns input", { df <- tibble(x = 1) gf <- group_by(df, x) expect_equal(mutate(df), df) expect_equal(mutate(gf), gf) expect_equal(mutate(df, !!!list()), df) expect_equal(mutate(gf, !!!list()), gf) }) test_that("rownames preserved", { df <- data.frame(x = c(1, 2), row.names = c("a", "b")) df <- mutate(df, y = 2) expect_equal(row.names(df), c("a", "b")) }) test_that("mutations applied progressively", { df <- tibble(x = 1) expect_equal(df %>% mutate(y = x + 1, z = y + 1), tibble(x = 1, y = 2, z = 3)) expect_equal(df %>% mutate(x = x + 1, x = x + 1), tibble(x = 3)) expect_equal(df %>% mutate(x = 2, y = x), tibble(x = 2, y = 2)) df <- data.frame(x = 1, y = 2) expect_equal( df %>% mutate(x2 = x, x3 = x2 + 1), df %>% mutate(x2 = x + 0, x3 = x2 + 1) ) }) test_that("length-1 vectors are recycled (#152)", { df <- tibble(x = 1:4) expect_equal(mutate(df, y = 1)$y, rep(1, 4)) expect_error(mutate(df, y = 1:2)) }) test_that("can remove variables with NULL (#462)", { df <- tibble(x = 1:3, y = 1:3) gf <- group_by(df, x) expect_equal(df %>% mutate(y = NULL), df[1]) expect_equal(gf %>% mutate(y = NULL), gf[1]) # even if it doesn't exist expect_equal(df %>% mutate(z = NULL), df) # or was just created expect_equal(df %>% mutate(z = 1, z = NULL), df) # regression test for https://github.com/tidyverse/dplyr/issues/4974 expect_equal( mutate(data.frame(x = 1, y = 1), z = 1, x = NULL, y = NULL), data.frame(z = 1) ) }) test_that("mutate() names pronouns correctly (#2686)", { expect_named(mutate(tibble(x = 1), .data$x), "x") expect_named(mutate(tibble(x = 1), .data[["x"]]), "x") }) test_that("mutate() supports unquoted values", { df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) expect_identical(mutate(df, out = !!1), mutate(df, out = 1)) expect_identical(mutate(df, out = !!(1:5)), mutate(df, out = 1:5)) expect_identical(mutate(df, out = !!quote(1:5)), mutate(df, out = 1:5)) gdf <- group_by(df, g) expect_identical(mutate(gdf, out = !!1), mutate(gdf, out = 1)) }) test_that("assignments don't overwrite variables (#315)", { df <- tibble(x = 1, y = 2) out <- df %>% mutate(z = {x <- 10; x}) expect_equal(out, tibble(x = 1, y = 2, z = 10)) }) test_that("can mutate a data frame with zero columns and `NULL` column names", { df <- new_data_frame(n = 2L) colnames(df) <- NULL expect_equal(mutate(df, x = 1), data.frame(x = c(1, 1))) }) test_that("mutate() handles symbol expressions", { df <- tibble(x = structure(1, class = "alien")) res <- mutate(df, y = x) expect_identical(df$x, res$y) gf <- group_by(df, x) res <- mutate(df, y = x) expect_identical(df$x, res$y) }) test_that("mutate() supports constants (#6056)", { df <- data.frame(x = 1:10, g = rep(1:2, each = 5)) y <- 1:10 z <- 1:2 expect_error(df %>% mutate(y = !!y), NA) expect_error(df %>% group_by(g) %>% mutate(y = !!y), NA) expect_error(df %>% rowwise() %>% mutate(y = !!y), NA) expect_error(df %>% mutate(z = !!z)) expect_error(df %>% group_by(g) %>% mutate(z = !!z)) expect_error(df %>% rowwise() %>% mutate(z = !!z)) }) # column types ------------------------------------------------------------ test_that("glue() is supported", { expect_equal( tibble(x = 1) %>% mutate(y = glue("")), tibble(x = 1, y = glue("")) ) }) test_that("mutate disambiguates NA and NaN (#1448)", { df <- tibble(x = c(1, NA, NaN)) out <- mutate(df, y = x * 1) expect_equal(out$y, df$x) }) test_that("mutate preserves names (#1689, #2675)", { df <- tibble(a = 1:3) out1 <- df %>% mutate(b = setNames(1:3, letters[1:3])) out2 <- df %>% mutate(b = setNames(as.list(1:3), letters[1:3])) expect_named(out1$b, letters[1:3]) expect_named(out2$b, letters[1:3]) }) test_that("mutate handles data frame columns", { df <- data.frame("a" = c(1, 2, 3), "b" = c(2, 3, 4), "base_col" = c(3, 4, 5)) res <- mutate(df, new_col = data.frame(x = 1:3)) expect_equal(res$new_col, data.frame(x = 1:3)) res <- mutate(group_by(df, a), new_col = data.frame(x = a)) expect_equal(res$new_col, data.frame(x = 1:3)) res <- mutate(rowwise(df), new_col = data.frame(x = a)) expect_equal(res$new_col, data.frame(x = 1:3)) }) test_that("unnamed data frames are automatically unspliced (#2326, #3630)", { expect_identical( tibble(a = 1) %>% mutate(tibble(b = 2)), tibble(a = 1, b = 2) ) expect_identical( tibble(a = 1) %>% mutate(tibble(b = 2), tibble(b = 3)), tibble(a = 1, b = 3) ) expect_identical( tibble(a = 1) %>% mutate(tibble(b = 2), c = b), tibble(a = 1, b = 2, c = 2) ) }) test_that("named data frames are packed (#2326, #3630)", { df <- tibble(x = 1) out <- df %>% mutate(y = tibble(a = x)) expect_equal(out, tibble(x = 1, y = tibble(a = 1))) }) test_that("unchop only called for when multiple groups", { df <- data.frame(g = 1, x = 1:5) out <- mutate(df, x = ts(x, start = c(1971, 1), frequency = 52)) expect_s3_class(out$x, "ts") gdf <- group_by(df, g) out <- mutate(gdf, x = ts(x, start = c(1971, 1), frequency = 52)) expect_s3_class(out$x, "ts") }) # output types ------------------------------------------------------------ test_that("mutate preserves grouping", { gf <- group_by(tibble(x = 1:2, y = 2), x) i <- count_regroups(out <- mutate(gf, x = 1)) expect_equal(i, 1L) expect_equal(group_vars(out), "x") expect_equal(nrow(group_data(out)), 1) i <- count_regroups(out <- mutate(gf, z = 1)) expect_equal(i, 0) expect_equal(group_data(out), group_data(gf)) }) test_that("mutate works on zero-row grouped data frame (#596)", { dat <- data.frame(a = numeric(0), b = character(0), stringsAsFactors = TRUE) res <- dat %>% group_by(b, .drop = FALSE) %>% mutate(a2 = a * 2) expect_type(res$a2, "double") expect_s3_class(res, "grouped_df") expect_equal(res$a2, numeric(0)) expect_type(group_rows(res), "list") expect_equal(attr(group_rows(res), "ptype"), integer()) expect_equal(group_data(res)$b, factor(character(0))) }) test_that("mutate works on zero-row rowwise data frame (#4224)", { dat <- data.frame(a = numeric(0)) res <- dat %>% rowwise() %>% mutate(a2 = a * 2) expect_type(res$a2, "double") expect_s3_class(res, "rowwise_df") expect_equal(res$a2, numeric(0)) }) test_that("mutate works on empty data frames (#1142)", { df <- data.frame() res <- df %>% mutate() expect_equal(nrow(res), 0L) expect_equal(length(res), 0L) res <- df %>% mutate(x = numeric()) expect_equal(names(res), "x") expect_equal(nrow(res), 0L) expect_equal(length(res), 1L) }) test_that("mutate handles 0 rows rowwise (#1300)", { res <- tibble(y = character()) %>% rowwise() %>% mutate(z = 1) expect_equal(nrow(res), 0L) }) test_that("rowwise mutate gives expected results (#1381)", { f <- function(x) ifelse(x < 2, NA_real_, x) res <- tibble(x = 1:3) %>% rowwise() %>% mutate(y = f(x)) expect_equal(res$y, c(NA, 2, 3)) }) test_that("grouped mutate does not drop grouping attributes (#1020)", { d <- data.frame(subject = c("Jack", "Jill"), id = c(2, 1)) %>% group_by(subject) a1 <- names(attributes(d)) a2 <- names(attributes(d %>% mutate(foo = 1))) expect_equal(setdiff(a1, a2), character(0)) }) test_that("mutate() hands list columns with rowwise magic to follow up expressions (#4845)", { test <- rowwise(tibble(x = 1:2)) expect_identical( test %>% mutate(a = list(1)) %>% mutate(b = list(a + 1)), test %>% mutate(a = list(1), b = list(a + 1)) ) }) # other ------------------------------------------------------------------- test_that("no utf8 invasion (#722)", { skip_if_not(l10n_info()$"UTF-8") source("utf-8.txt", local = TRUE, encoding = "UTF-8") }) test_that("mutate() to UTF-8 column names", { df <- tibble(a = 1) %>% mutate("\u5e78" := a) expect_equal(colnames(df), c("a", "\u5e78")) }) test_that("Non-ascii column names in version 0.3 are not duplicated (#636)", { local_non_utf8_encoding() df <- tibble(a = "1", b = "2") names(df) <- c("a", enc2native("\u4e2d")) res <- df %>% mutate_all(as.numeric) expect_equal(names(res), as_utf8_character(names(df))) }) test_that("mutate coerces results from one group with all NA values (#1463) ", { df <- tibble(x = c(1, 2), y = c(1, NA)) res <- df %>% group_by(x) %>% mutate(z = ifelse(y > 1, 1, 2)) expect_true(is.na(res$z[2])) expect_type(res$z, "double") }) test_that("grouped subsets are not lazy (#3360)", { make_call <- function(x) { quo(!!x) } res <- tibble(name = 1:2, value = letters[1:2]) %>% rowwise() %>% mutate(call = list(make_call(value))) %>% pull() expect_identical(res, list(make_call("a"), make_call("b"))) res <- tibble(name = 1:2, value = letters[1:2]) %>% group_by(name) %>% mutate(call = list(make_call(value))) %>% pull() expect_identical(res, list(make_call("a"), make_call("b"))) }) test_that("mutate() evaluates expression for empty groups", { df <- tibble(f = factor(c("a", "b"), levels = c("a", "b", "c"))) gf <- group_by(df, f, .drop = FALSE) count <- 0 mutate(gf, x = {count <<- count + 1}) expect_equal(count, 3L) }) test_that("DataMask$add() forces chunks (#4677)", { df <- tibble(bf10 = 0.244) %>% mutate( bf01 = 1 / bf10, log_e_bf10 = log(bf10), log_e_bf01 = log(bf01) ) expect_equal(df$log_e_bf01, log(1 / 0.244)) }) # .before, .after, .keep ------------------------------------------------------ test_that(".keep = 'unused' keeps variables explicitly mentioned", { df <- tibble(x = 1, y = 2) out <- mutate(df, x1 = x + 1, y = y, .keep = "unused") expect_named(out, c("y", "x1")) }) test_that(".keep = 'used' not affected by across()", { df <- tibble(x = 1, y = 2, z = 3, a = "a", b = "b", c = "c") # This must evaluate every column in order to figure out if should # be included in the set or not, but that shouldn't be counted for # the purposes of "used" variables out <- mutate(df, across(where(is.numeric), identity), .keep = "unused") expect_named(out, names(df)) }) test_that(".keep = 'used' keeps variables used in expressions", { df <- tibble(a = 1, b = 2, c = 3, x = 1, y = 2) out <- mutate(df, xy = x + y, .keep = "used") expect_named(out, c("x", "y", "xy")) }) test_that(".keep = 'none' only keeps grouping variables", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) expect_named(mutate(df, z = 1, .keep = "none"), "z") expect_named(mutate(gf, z = 1, .keep = "none"), c("x", "z")) }) test_that(".keep = 'none' retains original ordering (#5967)", { df <- tibble(x = 1, y = 2) expect_named(df %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y")) # even when grouped gf <- group_by(df, x) expect_named(gf %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y")) }) test_that("can use .before and .after to control column position", { df <- tibble(x = 1, y = 2) expect_named(mutate(df, z = 1), c("x", "y", "z")) expect_named(mutate(df, z = 1, .before = 1), c("z", "x", "y")) expect_named(mutate(df, z = 1, .after = 1), c("x", "z", "y")) # but doesn't affect order of existing columns df <- tibble(x = 1, y = 2) expect_named(mutate(df, x = 1, .after = y), c("x", "y")) }) test_that(".keep and .before/.after interact correctly", { df <- tibble(x = 1, y = 1, z = 1, a = 1, b = 2, c = 3) %>% group_by(a, b) expect_named(mutate(df, d = 1, x = 2, .keep = "none"), c("x", "a", "b", "d")) expect_named(mutate(df, d = 1, x = 2, .keep = "none", .before = "a"), c("x", "d", "a", "b")) expect_named(mutate(df, d = 1, x = 2, .keep = "none", .after = "a"), c("x", "a", "d", "b")) }) test_that("dropping column with `NULL` then readding it retains original location", { df <- tibble(x = 1, y = 2, z = 3, a = 4) df <- group_by(df, z) expect_named(mutate(df, y = NULL, y = 3, .keep = "all"), c("x", "y", "z", "a")) expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "used"), c("y", "z", "a", "b")) expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "unused"), c("x", "y", "z", "b")) # It isn't treated as a "new" column expect_named(mutate(df, y = NULL, y = 3, .keep = "all", .before = x), c("x", "y", "z", "a")) }) test_that(".keep= always retains grouping variables (#5582)", { df <- tibble(x = 1, y = 2, z = 3) %>% group_by(z) expect_equal( df %>% mutate(a = x + 1, .keep = "none"), tibble(z = 3, a = 2) %>% group_by(z) ) expect_equal( df %>% mutate(a = x + 1, .keep = "all"), tibble(x = 1, y = 2, z = 3, a = 2) %>% group_by(z) ) expect_equal( df %>% mutate(a = x + 1, .keep = "used"), tibble(x = 1, z = 3, a = 2) %>% group_by(z) ) expect_equal( df %>% mutate(a = x + 1, .keep = "unused"), tibble(y = 2, z = 3, a = 2) %>% group_by(z) ) }) test_that("mutate() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), mutate(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("dplyr data mask can become obsolete", { lazy <- function(x) { list(enquo(x)) } df <- tibble( x = 1:2 ) res <- df %>% rowwise() %>% mutate(y = lazy(x), .keep = "unused") expect_equal(names(res), c("x", "y")) expect_error(eval_tidy(res$y[[1]])) }) test_that("mutate() deals with 0 groups (#5534)", { df <- data.frame(x = numeric()) %>% group_by(x) expect_equal( mutate(df, y = x + 1), data.frame(x = numeric(), y = numeric()) %>% group_by(x) ) expect_warning( mutate(df, y = max(x)), "Inf" ) }) test_that("mutate(=NULL) preserves correct all_vars", { df <- data.frame(x = 1, y = 2) %>% mutate(x = NULL, vars = cur_data_all()) %>% pull() expect_equal(df, tibble(y = 2)) }) test_that("functions are not skipped in data pronoun (#5608)", { f <- function(i) i + 1 df <- tibble(a = list(f), b = 1) two <- df %>% rowwise() %>% mutate(res = .data$a(.data$b)) %>% pull(res) expect_equal(two, 2) }) test_that("mutate() casts data frame results to common type (#5646)", { df <- data.frame(x = 1:2, g = 1:2) %>% group_by(g) res <- df %>% mutate(if (g == 1) data.frame(y = 1) else data.frame(y = 1, z = 2)) expect_equal(res$z, c(NA, 2)) }) test_that("can suppress or catch warnings from the outside (#5675)", { # Check that basic warning handling still works expect_no_warning( suppressWarnings(mutate(tibble(), warning("foo"))) ) f <- function() warn("foo", "dplyr:::foo") x <- tryCatch(warning = identity, mutate(mtcars, f())) msg <- conditionMessage(x) expect_match(msg, "foo") # Check that caught warnings are instrumented. Requires # . if (can_return_from_exit) { expect_match(msg, "Problem while") } }) test_that("mutate() supports empty list columns in rowwise data frames (#5804", { res <- tibble(a = list()) %>% rowwise() %>% mutate(n = lengths(a)) expect_equal(res$n, integer()) }) test_that("mutate() propagates caller env", { expect_caller_env(mutate(mtcars, sig_caller_env())) }) test_that("rowwise() + mutate(across()) correctly handles list columns (#5951)", { tib <- tibble(a=list(1:2,3:4),c=list(NULL,NULL)) %>% rowwise() expect_identical( mutate(tib, sum = across(everything(),sum)), mutate(tib, sum = across(where(is.list),sum)) ) }) test_that("mutate() fails on named empty arguments (#5925)", { expect_error( mutate(tibble(), bogus = ) ) }) # Error messages ---------------------------------------------------------- test_that("mutate() give meaningful errors", { expect_snapshot({ tbl <- tibble(x = 1:2, y = 1:2) # setting column to NULL makes it unavailable (expect_error(tbl %>% mutate(y = NULL, a = sum(y)))) (expect_error(tbl %>% group_by(x) %>% mutate(y = NULL, a = sum(y)) )) # incompatible column type (expect_error(tibble(x = 1) %>% mutate(y = mean))) # Unsupported type" df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) (expect_error(df %>% mutate(out = env(a = 1)))) (expect_error(df %>% group_by(g) %>% mutate(out = env(a = 1)) )) (expect_error(df %>% rowwise() %>% mutate(out = rnorm) )) # incompatible types across groups (expect_error( data.frame(x = rep(1:5, each = 3)) %>% group_by(x) %>% mutate(val = ifelse(x < 3, "foo", 2)) )) (expect_error( tibble(a = 1:3, b=4:6) %>% group_by(a) %>% mutate(if(a==1) NULL else "foo") )) # incompatible size (expect_error( data.frame(x = c(2, 2, 3, 3)) %>% mutate(int = 1:5) )) (expect_error( data.frame(x = c(2, 2, 3, 3)) %>% group_by(x) %>% mutate(int = 1:5) )) (expect_error( data.frame(x = c(2, 3, 3)) %>% group_by(x) %>% mutate(int = 1:5) )) (expect_error( data.frame(x = c(2, 2, 3, 3)) %>% rowwise() %>% mutate(int = 1:5) )) (expect_error( tibble(y = list(1:3, "a")) %>% rowwise() %>% mutate(y2 = y) )) (expect_error( data.frame(x = 1:10) %>% mutate(y = 11:20, y = 1:2) )) # .data pronoun (expect_error( tibble(a = 1) %>% mutate(c = .data$b) )) (expect_error( tibble(a = 1:3) %>% group_by(a) %>% mutate(c = .data$b) )) # obsolete data mask lazy <- function(x) list(enquo(x)) res <- tbl %>% rowwise() %>% mutate(z = lazy(x), .keep = "unused") (expect_error( eval_tidy(res$z[[1]]) )) # Error that contains { (expect_error( tibble() %>% mutate(stop("{")) )) }) }) dplyr/tests/testthat/test-join.r0000644000176200001440000001677014121112104016503 0ustar liggesusers# Basic properties -------------------------------------------------------- test_that("mutating joins preserve row and column order of x", { df1 <- data.frame(a = 1:3) df2 <- data.frame(b = 1, c = 2, a = 4:1) out <- inner_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:3) out <- left_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:3) out <- right_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:4) out <- full_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:4) }) test_that("even when column names change", { df1 <- data.frame(x = c(1, 1, 2, 3), z = 1:4, a = 1) df2 <- data.frame(z = 1:4, b = 1, x = c(1, 2, 2, 4)) out <- inner_join(df1, df2, by = "x") expect_named(out, c("x", "z.x", "a", "z.y", "b")) }) test_that("by = character() generates cross (#4206)", { df1 <- tibble(x = 1:2) df2 <- tibble(y = 1:2) out <- left_join(df1, df2, by = character()) expect_equal(out$x, rep(1:2, each = 2)) expect_equal(out$y, rep(1:2, 2)) }) test_that("filtering joins preserve row and column order of x (#2964)", { df1 <- data.frame(a = 4:1, b = 1) df2 <- data.frame(b = 1, c = 2, a = 2:3) out <- semi_join(df1, df2, by = "a") expect_named(out, c("a", "b")) expect_equal(out$a, 3:2) out <- anti_join(df1, df2, by = "a") expect_named(out, c("a", "b")) expect_equal(out$a, c(4L, 1L)) }) test_that("keys are coerced to symmetric type", { foo <- tibble(id = factor(c("a", "b")), var1 = "foo") bar <- tibble(id = c("a", "b"), var2 = "bar") expect_type(inner_join(foo, bar, by = "id")$id, "character") expect_type(inner_join(bar, foo, by = "id")$id, "character") df1 <- tibble(x = 1, y = factor("a")) df2 <- tibble(x = 2, y = factor("b")) out <- full_join(df1, df2, by = c("x", "y")) expect_equal(out$y, factor(c("a", "b"))) }) test_that("when keep = TRUE, left_join() preserves both sets of keys", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- left_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(2, 3)) expect_equal(out$x, c(NA, 3)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- left_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(2, 3)) expect_equal(out$a.y, c(NA, 3)) }) test_that("when keep = TRUE, right_join() preserves both sets of keys", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- right_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(3, NA)) expect_equal(out$x, c(3, 4)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- right_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(3, NA)) expect_equal(out$a.y, c(3, 4)) }) test_that("when keep = TRUE, full_join() preserves both sets of keys", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- full_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(2, 3, NA)) expect_equal(out$x, c(NA, 3, 4)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- full_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(2, 3, NA)) expect_equal(out$a.y, c(NA, 3, 4)) }) test_that("when keep = TRUE, inner_join() preserves both sets of keys (#5581)", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- inner_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(3)) expect_equal(out$x, c(3)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- inner_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(3)) expect_equal(out$a.y, c(3)) }) test_that("joins matches NAs by default (#892, #2033)", { df1 <- tibble(x = c(NA_character_, 1)) df2 <- tibble(x = c(NA_character_, 2)) expect_equal(nrow(inner_join(df1, df2, by = "x")), 1) expect_equal(nrow(semi_join(df1, df2, by = "x")), 1) }) test_that("joins don't match NA when na_matches = 'never' (#2033)", { df1 <- tibble(a = c(1, NA)) df2 <- tibble(a = c(1, NA), b = 1:2) out <- left_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = c(1, NA), b = c(1, NA))) out <- inner_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = 1, b = 1)) out <- semi_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = 1)) out <- anti_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = NA_integer_)) dat1 <- tibble( name = c("a", "c"), var1 = c(1, 2) ) dat3 <- tibble( name = c("a", NA_character_), var3 = c(5, 6) ) expect_equal( full_join(dat1, dat3, by = "name", na_matches = "never"), tibble(name = c("a", "c", NA), var1 = c(1, 2, NA), var3 = c(5, NA, 6)) ) }) # nest_join --------------------------------------------------------------- test_that("nest_join returns list of tibbles (#3570)",{ df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 1), z = c(2, 3)) out <- nest_join(df1, df2, by = "x") expect_named(out, c("x", "y", "df2")) expect_type(out$df2, "list") expect_s3_class(out$df2[[1]], "tbl_df") }) test_that("nest_join handles multiple matches in x (#3642)", { df1 <- tibble(x = c(1, 1)) df2 <- tibble(x = 1, y = 1:2) out <- nest_join(df1, df2, by = "x") expect_equal(out$df2[[1]], out$df2[[2]]) }) test_that("y keys dropped by default", { df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 1), z = c(2, 3)) out <- nest_join(df1, df2, by = "x") expect_named(out, c("x", "y", "df2")) expect_named(out$df2[[1]], "z") out <- nest_join(df1, df2, by = "x", keep = TRUE) expect_named(out$df2[[1]], c("x", "z")) }) # output type --------------------------------------------------------------- test_that("joins x preserve type of x", { df1 <- data.frame(x = 1) df2 <- tibble(x = 2) expect_s3_class(inner_join(df1, df2, by = "x"), "data.frame", exact = TRUE) expect_s3_class(inner_join(df2, df1, by = "x"), "tbl_df") }) test_that("joins preserve groups", { gf1 <- tibble(a = 1:3) %>% group_by(a) gf2 <- tibble(a = rep(1:4, 2), b = 1) %>% group_by(b) i <- count_regroups(out <- inner_join(gf1, gf2, by = "a")) expect_equal(i, 1L) expect_equal(group_vars(out), "a") i <- count_regroups(out <- semi_join(gf1, gf2, by = "a")) expect_equal(i, 0L) expect_equal(group_vars(out), "a") # See comment in nest_join i <- count_regroups(out <- nest_join(gf1, gf2, by = "a")) expect_equal(i, 1L) expect_equal(group_vars(out), "a") }) test_that("group column names reflect renamed duplicate columns (#2330)", { df1 <- tibble(x = 1:5, y = 1:5) %>% group_by(x, y) df2 <- tibble(x = 1:5, y = 1:5) out <- inner_join(df1, df2, by = "x") expect_equal(group_vars(out), "x") # TODO: fix this issue: https://github.com/tidyverse/dplyr/issues/4917 # expect_equal(group_vars(out), c("x", "y.x")) }) test_that("rowwise group structure is updated after a join (#5227)", { df1 <- rowwise(tibble(x = 1:2)) df2 <- tibble(x = c(1:2, 2L)) x <- left_join(df1, df2, by = "x") expect_identical(group_rows(x), list_of(1L, 2L, 3L)) }) dplyr/tests/testthat/test-rank.R0000644000176200001440000000543314121112104016431 0ustar liggesusersntile_h <- function(x, n) { tibble(x = x) %>% mutate(y = ntile(x, n)) %>% pull(y) } ntile_h_dplyr <- function(x, n) { tibble(x = x) %>% mutate(y = dplyr::ntile(x, n)) %>% pull(y) } test_that("ntile ignores number of NAs", { x <- c(1:3, NA, NA, NA) expect_equal(ntile(x, 3), x) expect_equal(ntile_h(x, 3), x) x1 <- c(1L, 1L, 1L, NA, NA, NA) expect_equal(ntile(x, 1), x1) expect_equal(ntile_h(x, 1), x1) }) test_that("ntile always returns an integer", { expect_equal(ntile(numeric(), 3), integer()) expect_equal(ntile_h(numeric(), 3), integer()) expect_equal(ntile(NA, 3), NA_integer_) expect_equal(ntile_h(NA, 3), NA_integer_) }) test_that("ntile handles character vectors consistently", { charvec_sort_test <- function() { x1 <- c("[", "]", NA, "B", "y", "a", "Z") x2 <- c("a", "b", "C") expect_equal(ntile_h(x1, 3), ntile_h_dplyr(x1, 3)) expect_equal(ntile_h(x2, 2), ntile_h_dplyr(x2, 2)) } # Test against both the local, and the C locale for collation charvec_sort_test() withr::with_collate("C", charvec_sort_test()) }) test_that("ntile() does not overflow (#4186)", { res <- tibble(a = 1:1e5) %>% mutate(b = ntile(n = 1e5)) %>% count(b) %>% pull() expect_true(all(res == 1L)) }) test_that("row_number handles empty data frames (#762)", { df <- data.frame(a = numeric(0)) res <- df %>% mutate( row_number_0 = row_number(), row_number_a = row_number(a), ntile = ntile(a, 2), min_rank = min_rank(a), percent_rank = percent_rank(a), dense_rank = dense_rank(a), cume_dist = cume_dist(a) ) expect_equal( names(res), c("a", "row_number_0", "row_number_a", "ntile", "min_rank", "percent_rank", "dense_rank", "cume_dist") ) expect_equal(nrow(res), 0L) }) test_that("lead/lag inside mutate handles expressions as value for default (#1411) ", { df <- tibble(x = 1:3) res <- mutate(df, leadn = lead(x, default = x[1]), lagn = lag(x, default = x[1])) expect_equal(res$leadn, lead(df$x, default = df$x[1])) expect_equal(res$lagn, lag(df$x, default = df$x[1])) res <- mutate(df, leadn = lead(x, default = c(1)), lagn = lag(x, default = c(1))) expect_equal(res$leadn, lead(df$x, default = 1)) expect_equal(res$lagn, lag(df$x, default = 1)) }) test_that("ntile puts large groups first (#4995) ", { expect_equal(ntile(1, 5), 1) expect_equal(ntile(1:2, 5), 1:2) expect_equal(ntile(1:3, 5), 1:3) expect_equal(ntile(1:4, 5), 1:4) expect_equal(ntile(1:5, 5), 1:5) expect_equal(ntile(1:6, 5), c(1, 1:5)) expect_equal(ntile(1, 7), 1) expect_equal(ntile(1:2, 7), 1:2) expect_equal(ntile(1:3, 7), 1:3) expect_equal(ntile(1:4, 7), 1:4) expect_equal(ntile(1:5, 7), 1:5) expect_equal(ntile(1:6, 7), 1:6) expect_equal(ntile(1:7, 7), 1:7) expect_equal(ntile(1:8, 7), c(1, 1:7)) }) dplyr/tests/testthat/test-colwise-group-by.R0000644000176200001440000000203414121112104020677 0ustar liggesuserstest_that("group_by_ verbs take scoped inputs", { expect_identical(group_vars(group_by_all(mtcars)), names(mtcars)) expect_identical(group_vars(group_by_at(mtcars, vars(starts_with("d")))), c("disp", "drat")) expect_identical(group_vars(group_by_if(iris, is.factor)), "Species") }) test_that("group_by_ verbs accept optional operations", { df <- tibble(x = 1:2, y = 2:3) gdf <- group_by(mutate_all(df, as.factor), x, y) expect_identical(group_by_all(df, as.factor), gdf) expect_identical(group_by_if(df, is_integer, as.factor), gdf) expect_identical(group_by_at(df, vars(x:y), as.factor), gdf) }) test_that("group_by variants can group by an already grouped by data (#3351)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(c(1, 2), each = 4), x = 1:8) %>% group_by(gr1) expect_identical( group_by_at(tbl, vars(gr1, gr2)), group_by(tbl, gr1, gr2) ) expect_identical( group_by_all(tbl), group_by(tbl, gr1, gr2, x) ) expect_identical( group_by_if(tbl, is.integer), group_by(tbl, gr1, x) ) }) dplyr/tests/testthat/test-rows.R0000644000176200001440000000616114151641776016520 0ustar liggesuserstest_that("rows_insert()", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_insert(data, tibble(a = 4L, b = "z"), by = "a"), tibble(a = 1:4, b = c("a", "b", NA, "z"), c = c(0.5, 1.5, 2.5, NA)) ) expect_error( rows_insert(data, tibble(a = 3, b = "z"), by = "a"), "insert duplicate" ) }) test_that("rows_update()", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_update(data, tibble(a = 2:3, b = "z"), by = "a"), tibble(a = 1:3, b = c("a", "z", "z"), c = data$c) ) expect_error( rows_update(data, tibble(a = 2:3, b = "z"), by = c("a", "b")), "update missing" ) expect_silent( expect_identical( rows_update(data, tibble(b = "z", a = 2:3), by = "a"), tibble(a = 1:3, b = c("a", "z", "z"), c = data$c) ) ) }) test_that("rows_patch()", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_patch(data, tibble(a = 2:3, b = "z"), by = "a"), tibble(a = 1:3, b = c("a", "b", "z"), c = data$c) ) expect_error( rows_patch(data, tibble(a = 2:3, b = "z"), by = c("a", "b")), "patch missing" ) expect_silent( expect_identical( rows_patch(data, tibble(b = "z", a = 2:3), by = "a"), tibble(a = 1:3, b = c("a", "b", "z"), c = data$c) ) ) }) test_that("rows_upsert()", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_upsert(data, tibble(a = 2:4, b = "z"), by = "a"), tibble(a = 1:4, b = c("a", "z", "z", "z"), c = c(data$c, NA)) ) }) test_that("rows_delete()", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_delete(data, tibble(a = 2:3), by = "a"), data[1, ] ) expect_error( rows_delete(data, tibble(a = 2:4), by = "a"), "delete missing" ) expect_snapshot(res <- rows_delete(data, tibble(a = 2:3, b = "b"), by = "a")) expect_identical(res, data[1, ]) expect_error( rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b")), "delete missing" ) }) test_that("rows_*() errors", { expect_snapshot({ data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) # Insert (expect_error( rows_insert(data, tibble(a = 3, b = "z")) )) (expect_error( rows_insert(data[c(1, 1), ], tibble(a = 3)) )) (expect_error( rows_insert(data, tibble(a = 4, b = "z"), by = "e") )) (expect_error( rows_insert(data, tibble(d = 4)) )) # Update (expect_error( rows_update(data, tibble(a = 2:3, b = "z"), by = c("a", "b")) )) # Variants: patch (expect_error( rows_patch(data, tibble(a = 2:3, b = "z"), by = c("a", "b")) )) # Delete and truncate (expect_error( rows_delete(data, tibble(a = 2:4)) )) (expect_error( rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b")) )) rows_delete(data, tibble(a = 2:3)) rows_delete(data, tibble(a = 2:3, b = "b")) }) }) dplyr/tests/testthat/test-DBI.R0000644000176200001440000000064114121112104016070 0ustar liggesuserstest_that("can work directly with DBI connection", { skip_if_not_installed("RSQLite") skip_if_not_installed("dbplyr") con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") on.exit(DBI::dbDisconnect(con)) df <- tibble(x = 1:10, y = letters[1:10]) df1 <- copy_to(con, df) df2 <- tbl(con, "df") expect_equal(collect(df1), df, ignore_attr = TRUE) expect_equal(collect(df2), df, ignore_attr = TRUE) }) dplyr/tests/testthat/test-colwise-select.R0000644000176200001440000001325614151641776020453 0ustar liggesusersdf <- tibble(x = 0L, y = 0.5, z = 1) test_that("can select/rename all variables", { expect_identical(select_all(df), df) expect_identical(select_all(df, toupper), set_names(df, c("X", "Y", "Z"))) expect_identical(select_all(df, toupper), rename_all(df, toupper)) }) test_that("can select/rename with predicate", { expect_identical(select_if(df, is_integerish), select(df, x, z)) expect_identical(select_if(df, is_integerish, toupper), set_names(df[c("x", "z")], c("X", "Z"))) expect_identical(rename_if(df, is_integerish, toupper), set_names(df, c("X", "y", "Z"))) }) test_that("can take list, but only containing single function", { expect_identical( select_if(df, list(~ is_integerish(.)), list(~ toupper(.))), set_names(df[c("x", "z")], c("X", "Z")) ) expect_identical( rename_if(df, list(~ is_integerish(.)), list(~ toupper(.))), set_names(df, c("X", "y", "Z")) ) }) test_that("can select/rename with vars()", { expect_identical(select_at(df, vars(x:y)), df[-3]) expect_identical(select_at(df, vars(x:y), toupper), set_names(df[-3], c("X", "Y"))) expect_identical(rename_at(df, vars(x:y), toupper), set_names(df, c("X", "Y", "z"))) }) test_that("select variants can use grouping variables (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( select(tbl, gr1), select_at(tbl, vars(gr1)) ) expect_identical( select_all(tbl), tbl ) expect_identical( select_if(tbl, is.integer), tbl ) }) test_that("select_if keeps grouping cols", { by_species <- iris %>% group_by(Species) expect_silent(df <- by_species %>% select_if(is.numeric)) expect_equal(df, by_species[c(5, 1:4)]) }) test_that("select_if() handles non-syntactic colnames", { df <- tibble(`x 1` = 1:3) expect_identical(select_if(df, is_integer)[[1]], 1:3) }) test_that("select_if() handles quoted predicates", { expected <- select_if(mtcars, is_integerish) expect_identical(select_if(mtcars, "is_integerish"), expected) expect_identical(select_if(mtcars, ~ is_integerish(.x)), expected) }) test_that("rename_all() works with grouped data (#3363)", { df <- data.frame(a = 1, b = 2) out <- df %>% group_by(a) %>% rename_all(toupper) expect_identical(out, group_by(data.frame(A = 1, B = 2), A)) }) test_that("scoping (#3426)", { interface <- function(.tbl, .funs = list()) { impl(.tbl, .funs = .funs) } impl <- function(.tbl, ...) { select_all(.tbl, ...) } expect_identical( interface(mtcars, .funs = toupper), select_all(mtcars, .funs = list(toupper)) ) }) test_that("rename variants can rename a grouping variable (#3351)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- rename(tbl, GR1 = gr1, GR2 = gr2, X = x) expect_identical( rename_at(tbl, vars(everything()), toupper), res ) expect_identical( rename_all(tbl, toupper), res ) expect_identical( rename_if(tbl, is.integer, toupper), res ) }) test_that("select_all does not change the order of columns (#3351)", { tbl <- group_by(tibble(x = 1:4, y = 4:1), y) expect_identical(select_all(tbl), tbl) tbl <- group_by(tibble(x = 1:4, y = 4:1), x) expect_identical(select_all(tbl), tbl) tbl <- group_by(tibble(x = 1:4, y = 4:1, z = 1:4), y) expect_identical(select_all(tbl), tbl) }) test_that("mutate_all does not change the order of columns (#3351)", { tbl <- group_by(tibble(x = 1:4, y = 1:4), y) expect_message(expect_identical(names(mutate_all(tbl, identity)), names(tbl)), "ignored") tbl <- group_by(tibble(x = 1:4, y = 1:4), x) expect_message(expect_identical(names(mutate_all(tbl, identity)), names(tbl)), "ignored") tbl <- group_by(tibble(x = 1:4, y = 1:4, z = 1:4), y) expect_message(expect_identical(names(mutate_all(tbl, identity)), names(tbl)), "ignored") }) test_that("select_if() and rename_if() handles logical (#4213)", { ids <- "Sepal.Length" expect_identical( iris %>% select_if(!names(.) %in% ids), iris %>% select(-Sepal.Length) ) expect_identical( iris %>% rename_if(!names(.) %in% ids, toupper), iris %>% rename_at(setdiff(names(.), "Sepal.Length"), toupper) ) }) test_that("rename_at() handles empty selection (#4324)", { expect_identical( mtcars %>% rename_at(vars(contains("fake_col")),~paste0("NewCol.",.)), mtcars ) }) test_that("rename_all/at() call the function with simple character vector (#4459)", { fun <- function(x) case_when(x == 'mpg' ~ 'fuel_efficiency', TRUE ~ x) out <- rename_all(mtcars,fun) expect_equal(names(out)[1L], 'fuel_efficiency') out <- rename_at(mtcars, vars(everything()), fun) expect_equal(names(out)[1L], 'fuel_efficiency') }) test_that("select_if() discards the column when predicate gives NA (#4486)", { out <- tibble(mycol=c("","",NA)) %>% select_if(~!all(.=="")) expect_identical( out, tibble::new_tibble(list(), nrow = 3L) ) }) # Errors ------------------------------------------------------------------ test_that("colwise select() / rename() give meaningful errors", { expect_snapshot({ df <- tibble(x = 0L, y = 0.5, z = 1) # colwise rename() (expect_error( df %>% rename_all() )) (expect_error( df %>% rename_if(is_integerish) )) (expect_error( df %>% rename_at(vars(x:y)) )) (expect_error( df %>% rename_all(list(tolower, toupper)) )) # colwise select() (expect_error( df %>% select_all(list(tolower, toupper)) )) (expect_error( df %>% select_if(function(.x) 1) )) (expect_error( df %>% select_if(function(.x) c(TRUE, TRUE)) )) (expect_error( data.frame() %>% select_all(.funs = 42) )) }) }) dplyr/tests/testthat/test-group_trim.R0000644000176200001440000000117014121112104017657 0ustar liggesuserstest_that("group_trim() is identity on non grouped data", { expect_identical(group_trim(iris), iris) }) test_that("group_trim() always regroups even if no factors", { res <- mtcars %>% group_by(cyl) %>% filter(cyl == 6, .preserve = TRUE) %>% group_trim() expect_equal(n_groups(res), 1L) }) test_that("group_trim() drops factor levels in data and grouping structure", { res <- iris %>% group_by(Species) %>% filter(Species == "setosa") %>% group_trim() expect_equal(n_groups(res), 1L) expect_equal(levels(res$Species), "setosa") expect_equal(levels(attr(res, "groups")$Species), "setosa") }) dplyr/tests/testthat/test-bind.R0000644000176200001440000004435714166004113016433 0ustar liggesusers# columns ----------------------------------------------------------------- test_that("bind_cols() uses shallow copies", { df1 <- data.frame( int = 1:10, num = rnorm(10), cha = letters[1:10], stringsAsFactors = FALSE ) df2 <- data.frame( log = sample(c(T, F), 10, replace = TRUE), dat = seq.Date(Sys.Date(), length.out = 10, by = "day"), tim = seq(Sys.time(), length.out = 10, by = "1 hour") ) df <- bind_cols(df1, df2) expect_equal(lobstr::obj_addrs(df1), lobstr::obj_addrs(df[names(df1)])) expect_equal(lobstr::obj_addrs(df2), lobstr::obj_addrs(df[names(df2)])) }) test_that("bind_cols() handles lists (#1104)", { exp <- tibble(x = 1, y = "a", z = 2) l1 <- list(x = 1, y = "a") l2 <- list(z = 2) expect_identical(bind_cols(l1, l2), exp) expect_identical(bind_cols(list(l1, l2)), exp) }) test_that("bind_cols() handles empty argument list (#1963)", { expect_equal(bind_cols(), tibble()) }) test_that("bind_cols() handles all-NULL values (#2303)", { expect_identical(bind_cols(list(a = NULL, b = NULL)), tibble()) expect_identical(bind_cols(NULL), tibble()) }) test_that("bind_cols() repairs names", { df <- tibble(a = 1, b = 2) expect_snapshot(bound <- bind_cols(df, df)) expect_message( repaired <- as_tibble( data.frame(a = 1, b = 2, a = 1, b = 2, check.names = FALSE), .name_repair = "unique" ), "New names" ) expect_identical(bound, repaired) }) test_that("bind_cols() unpacks tibbles", { expect_equal( bind_cols(list(y = tibble(x = 1:2))), tibble(x = 1:2) ) expect_equal( bind_cols(list(y = tibble(x = 1:2), z = tibble(y = 1:2))), tibble(x = 1:2, y = 1:2) ) }) test_that("bind_cols() honours .name_repair=", { expect_message(res <- bind_cols( data.frame(a = 1), data.frame(a = 2) )) expect_equal(res, data.frame(a...1 = 1, a...2 = 2)) expect_error(bind_cols(.name_repair = "check_unique", data.frame(a = 1), data.frame(a = 2) )) }) # rows -------------------------------------------------------------------- df_var <- tibble( l = c(T, F, F), i = c(1, 1, 2), d = Sys.Date() + c(1, 1, 2), f = factor(letters[c(1, 1, 2)]), n = c(1, 1, 2) + 0.5, t = Sys.time() + c(1, 1, 2), c = letters[c(1, 1, 2)] ) test_that("bind_rows() equivalent to rbind()", { exp <- as_tibble(rbind(df_var, df_var, df_var)) attr(exp$t, "tzone") <- "" res <- bind_rows(df_var, df_var, df_var) expect_equal(res, exp) }) test_that("bind_rows() reorders columns", { df_var_scramble <- df_var[sample(ncol(df_var))] expect_equal( names(bind_rows(df_var, df_var_scramble)), names(df_var) ) }) test_that("bind_rows() ignores NULL", { df <- tibble(a = 1) expect_equal(bind_rows(df, NULL), df) expect_equal(bind_rows(list(df, NULL)), df) }) test_that("bind_rows() handles list columns (#463)", { dfl <- tibble(x = list(1:2, 1:3, 1:4)) res <- bind_rows(list(dfl, dfl)) expect_equal(rep(dfl$x, 2L), res$x) }) test_that("bind_rows() handles lists of data frames #1389", { df <- tibble(x = 1) res <- bind_rows(list(df, df), list(df, df)) expect_equal(nrow(res), 4) }) test_that("bind_rows() handles data frames with no rows (#597)", { df1 <- tibble(x = 1, y = factor("a")) df0 <- df1[0, ] expect_identical(bind_rows(df0), df0) expect_identical(bind_rows(df0, df0), df0) expect_identical(bind_rows(df0, df1), df1) }) test_that("bind_rows() handles data frames with no columns (#1346)", { df1 <- tibble(x = 1, y = factor("a")) df0 <- df1[, 0] expect_equal(bind_rows(df0), df0) expect_equal(dim(bind_rows(df0, df0)), c(2, 0)) res <- bind_rows(df0, df1) expect_equal(res$x, c(NA, 1)) }) test_that("bind_rows() handles lists with NULL values (#2056)", { df1 <- tibble(x = 1, y = 1) df2 <- tibble(x = 2, y = 2) lst1 <- list(a = df1, NULL, b = df2) df3 <- tibble( names = c("a", "b"), x = c(1, 2), y = c(1, 2) ) expect_identical(bind_rows(lst1, .id = "names"), df3) }) test_that("bind_rows() handles lists with list() values (#2826)", { expect_equal(bind_rows(list(iris, list())), iris) }) test_that("bind_rows() puts data frames in order received even if no columns (#2175)", { df2 <- tibble(x = 2, y = "b") df1 <- df2[, 0] res <- bind_rows(df1, df2) expect_equal(res$x, c(NA, 2)) expect_equal(res$y, c(NA, "b")) }) test_that("bind_rows(.id= NULL) does not set names (#5089)", { expect_equal( attr(bind_rows(list(a = tibble(x = 1:2))), "row.names"), 1:2 ) }) # Column coercion -------------------------------------------------------------- test_that("bind_rows() promotes integer to numeric", { df1 <- tibble(a = 1L, b = 1L) df2 <- tibble(a = 1, b = 1L) res <- bind_rows(df1, df2) expect_equal(typeof(res$a), "double") expect_equal(typeof(res$b), "integer") }) test_that("bind_rows() promotes factor to character with warning", { df1 <- tibble(a = factor("a")) df2 <- tibble(a = "b") res <- bind_rows(df1, df2) expect_equal(typeof(res$a), "character") }) test_that("bind_rows() coerces factor when levels don't match", { df1 <- data.frame(a = factor("a")) df2 <- data.frame(a = factor("b")) res <- bind_rows(df1, df2) expect_equal(res$a, factor(c("a", "b"))) }) test_that("bind_rows() handles NA in factors #279", { df1 <- tibble(a = factor("a")) df2 <- tibble(a = factor(NA)) res <- bind_rows(df1, df2) expect_equal(res$a, factor(c("a", NA))) }) test_that("bind_rows() preserves timezones #298", { dates1 <- data.frame( ID = c("a", "b", "c"), dates = structure(c(-247320000, -246196800, -245073600), tzone = "GMT", class = c("POSIXct", "POSIXt") ), stringsAsFactors = FALSE ) dates2 <- data.frame( ID = c("d", "e", "f"), dates = structure(c(-243864000, -242654400, -241444800), tzone = "GMT", class = c("POSIXct", "POSIXt") ), stringsAsFactors = FALSE ) alldates <- bind_rows(dates1, dates2) expect_equal(attr(alldates$dates, "tzone"), "GMT") }) test_that("bind_rows() handles all NA columns (#493)", { mydata <- list( data.frame(x = c("foo", "bar"), stringsAsFactors = TRUE), data.frame(x = NA) ) res <- bind_rows(mydata) expect_true(is.na(res$x[3])) expect_s3_class(res$x, "factor") mydata <- list( data.frame(x = NA), data.frame(x = c("foo", "bar"), stringsAsFactors = TRUE) ) res <- bind_rows(mydata) expect_true(is.na(res$x[1])) expect_s3_class(res$x, "factor") }) test_that("bind_rows() handles complex. #933", { df1 <- data.frame(r = c(1 + 1i, 2 - 1i)) df2 <- data.frame(r = c(1 - 1i, 2 + 1i)) df3 <- bind_rows(df1, df2) expect_equal(nrow(df3), 4L) expect_equal(df3$r, c(df1$r, df2$r)) }) test_that("bind_rows() is careful about column names encoding #1265", { one <- data.frame(foo = 1:3, bar = 1:3) names(one) <- c("f\u00fc", "bar") two <- data.frame(foo = 1:3, bar = 1:3) names(two) <- c("f\u00fc", "bar") Encoding(names(one)[1]) <- "UTF-8" expect_equal(names(one), names(two)) res <- bind_rows(one, two) expect_equal(ncol(res), 2L) }) test_that("bind_rows() handles POSIXct (#1125)", { df1 <- data.frame(date = as.POSIXct(NA)) df2 <- data.frame(date = as.POSIXct("2015-05-05")) res <- bind_rows(df1, df2) expect_equal(nrow(res), 2L) expect_true(is.na(res$date[1])) }) test_that("bind_rows() respects ordered factors (#1112)", { l <- c("a", "b", "c", "d") id <- factor(c("a", "c", "d"), levels = l, ordered = TRUE) df <- data.frame(id = rep(id, 2), val = rnorm(6)) res <- bind_rows(df, df) expect_s3_class(res$id, "ordered") expect_equal(levels(df$id), levels(res$id)) res <- group_by(df, id) %>% filter(complete.cases(across())) expect_s3_class(res$id, "ordered") expect_equal(levels(df$id), levels(res$id)) }) test_that("bind_rows() keeps ordered factors (#948)", { y <- bind_rows( data.frame(x = factor(c(1, 2, 3), ordered = TRUE)), data.frame(x = factor(c(1, 2, 3), ordered = TRUE)) ) expect_s3_class(y$x, "ordered") expect_equal(levels(y$x), as.character(1:3)) }) test_that("bind_rows() handles POSIXct of different tz ", { date1 <- structure(-1735660800, tzone = "America/Chicago", class = c("POSIXct", "POSIXt")) date2 <- structure(-1735660800, tzone = "UTC", class = c("POSIXct", "POSIXt")) date3 <- structure(-1735660800, class = c("POSIXct", "POSIXt")) df1 <- data.frame(date = date1) df2 <- data.frame(date = date2) df3 <- data.frame(date = date3) res <- bind_rows(df1, df2) expect_equal(attr(res$date, "tzone"), "America/Chicago") res <- bind_rows(df1, df3) expect_equal(attr(res$date, "tzone"), "America/Chicago") res <- bind_rows(df2, df3) expect_equal(attr(res$date, "tzone"), "UTC") res <- bind_rows(df3, df3) expect_equal(attr(res$date, "tzone"), "") res <- bind_rows(df1, df2, df3) expect_equal(attr(res$date, "tzone"), "America/Chicago") }) test_that("bind_rows() creates a column of identifiers (#1337)", { data1 <- mtcars[c(2, 3), ] data2 <- mtcars[1, ] out <- bind_rows(data1, data2, .id = "col") out_list <- bind_rows(list(data1, data2), .id = "col") expect_equal(names(out)[1], "col") expect_equal(out$col, c("1", "1", "2")) expect_equal(out_list$col, c("1", "1", "2")) out_labelled <- bind_rows(one = data1, two = data2, .id = "col") out_list_labelled <- bind_rows(list(one = data1, two = data2), .id = "col") expect_equal(out_labelled$col, c("one", "one", "two")) expect_equal(out_list_labelled$col, c("one", "one", "two")) }) test_that("string vectors are filled with NA not blanks before collection (#595)", { one <- mtcars[1:10, -10] two <- mtcars[11:32, ] two$char_col <- letters[1:22] res <- bind_rows(one, two) expect_true(all(is.na(res$char_col[1:10]))) }) test_that("bind_rows() handles POSIXct stored as integer (#1402)", { now <- Sys.time() df1 <- data.frame(time = now) expect_equal(class(bind_rows(df1)$time), c("POSIXct", "POSIXt")) df2 <- data.frame(time = seq(now, length.out = 1, by = 1)) expect_equal(class(bind_rows(df2)$time), c("POSIXct", "POSIXt")) res <- bind_rows(df1, df2) expect_equal(class(res$time), c("POSIXct", "POSIXt")) expect_true(all(res$time == c(df1$time, df2$time))) }) test_that("bind_cols() accepts NULL (#1148)", { df1 <- tibble(a = 1:10, b = 1:10) df2 <- tibble(c = 1:10, d = 1:10) res1 <- bind_cols(df1, df2) res2 <- bind_cols(NULL, df1, df2) res3 <- bind_cols(df1, NULL, df2) res4 <- bind_cols(df1, df2, NULL) expect_identical(res1, res2) expect_identical(res1, res3) expect_identical(res1, res4) }) test_that("bind_rows() handles 0-length named list (#1515)", { res <- bind_rows(list(a = 1)[-1]) expect_equal(nrow(res), 0L) expect_s3_class(res, "data.frame") expect_equal(ncol(res), 0L) }) test_that("bind_rows() infers classes from first result (#1692)", { d1 <- data.frame(a = 1:10, b = rep(1:2, each = 5)) d2 <- as_tibble(d1) d3 <- group_by(d1, b) d4 <- rowwise(d1) d5 <- list(a = 1:10, b = rep(1:2, each = 5)) expect_equal(class(bind_rows(d1, d1)), "data.frame") expect_equal(class(bind_rows(d2, d1)), c("tbl_df", "tbl", "data.frame")) res3 <- bind_rows(d3, d1) expect_equal(class(res3), c("grouped_df", "tbl_df", "tbl", "data.frame")) expect_equal(map_int(group_rows(res3), length), c(10, 10)) expect_equal(class(bind_rows(d4, d1)), c("rowwise_df", "tbl_df", "tbl", "data.frame")) }) test_that("bind_cols() infers classes from first result (#1692)", { d1 <- data.frame(a = 1:10, b = rep(1:2, each = 5)) d2 <- tibble(c = 1:10, d = rep(1:2, each = 5)) d3 <- group_by(d2, d) d4 <- rowwise(d2) d5 <- list(c = 1:10, d = rep(1:2, each = 5)) suppressMessages({ expect_equal(class(bind_cols(d1, d1)), "data.frame") expect_equal(class(bind_cols(d2, d1)), c("tbl_df", "tbl", "data.frame")) }) res3 <- bind_cols(d3, d1) expect_equal(class(res3), c("grouped_df", "tbl_df", "tbl", "data.frame")) expect_equal(map_int(group_rows(res3), length), c(5, 5)) expect_equal(class(bind_cols(d4, d1)), c("rowwise_df", "tbl_df", "tbl", "data.frame")) expect_equal(class(bind_cols(d5, d1)), "data.frame") }) test_that("bind_rows() accepts data frame columns (#2015)", { df <- list( x = 1:10, y = data.frame(a = 1:10, y = 1:10) ) class(df) <- "data.frame" attr(df, "row.names") <- .set_row_names(10) res <- dplyr::bind_rows(df, df) expect_s3_class(df$y, "data.frame") expect_equal(names(df$y), c("a", "y")) }) test_that("bind_rows() accepts difftime objects", { df1 <- data.frame(x = as.difftime(1, units = "hours")) df2 <- data.frame(x = as.difftime(1, units = "mins")) res <- bind_rows(df1, df2) expect_equal(res$x, as.difftime(c(3600, 60), units = "secs")) }) test_that("bind_rows() handles rowwise vectors", { tbl <- bind_rows( tibble(a = "foo", b = "bar"), c(a = "A", b = "B") ) expect_identical(tbl, tibble(a = c("foo", "A"), b = c("bar", "B"))) id_tbl <- bind_rows(a = c(a = 1, b = 2), b = c(a = 3, b = 4), .id = "id") expect_equal( id_tbl, tibble(id = c("a", "b"), a = c(1, 3), b = c(2, 4)), ignore_attr = TRUE ) }) test_that("bind_rows() accepts lists of dataframe-like lists as first argument", { ll <- list(a = 1, b = 2) expect_equal(bind_rows(list(ll)), tibble(a = 1, b = 2)) expect_equal(bind_rows(list(ll, ll)), tibble(a = c(1, 1), b = c(2, 2))) }) test_that("bind_rows() can handle lists (#1104)", { my_list <- list(list(x = 1, y = "a"), list(x = 2, y = "b")) res <- bind_rows(my_list) expect_equal(nrow(res), 2L) expect_type(res$x, "double") expect_type(res$y, "character") res <- bind_rows(list(x = 1, y = "a"), list(x = 2, y = "b")) expect_equal(nrow(res), 2L) expect_type(res$x, "double") expect_type(res$y, "character") }) test_that("columns that are OBJECT but have NULL class are handled gracefully (#3349)", { mod <- lm(y ~ ., data = freeny) data <- model.frame(mod) data_list <- list(data, data) res <- bind_rows(data_list) expect_equal(names(res), names(data)) }) # Vectors ------------------------------------------------------------ test_that("accepts named columns", { expect_identical(bind_cols(a = 1:2, b = 3:4), tibble(a = 1:2, b = 3:4)) }) test_that("ignores NULL values", { expect_identical(bind_cols(a = 1, NULL, b = 2, NULL), tibble(a = 1, b = 2)) }) test_that("bind_cols() handles unnamed list with name repair (#3402)", { expect_snapshot(df <- bind_cols(list(1, 2))) expect_identical(df, bind_cols(list(...1 = 1, ...2 = 2))) }) test_that("bind_cols() doesn't squash record types", { df <- data.frame(x = 1) posixlt <- as.POSIXlt(as.Date("1970-01-01")) expect_identical( bind_cols(df, y = posixlt), new_data_frame(list(x = 1, y = posixlt)) ) }) test_that("bind_rows() only flattens list subclasses with explicit inheritance (#3924)", { df <- data.frame(x = 1, y = 2) lst1 <- structure(list(df, df, df), class = "special_lst") expect_error(bind_rows(lst1), "must be a data frame or a named atomic vector") lst2 <- structure(list(df, df, df), class = c("special_lst", "list")) expect_equal(bind_rows(lst2), bind_rows(df,df,df)) }) test_that("bind_rows() handles named list", { expect_equal(bind_rows(map(mtcars, mean)), summarise_all(mtcars, mean), ignore_attr = TRUE) expect_equal(bind_rows(!!!map(mtcars, mean)), summarise_all(mtcars, mean), ignore_attr = TRUE) }) test_that("bind_rows() handles named S3 objects (#4931)", { df <- tibble(x = "foo", y = "bar") fct <- set_names(factor(c("a", "b")), c("x", "y")) expect_identical( bind_rows(df, fct), tibble(x = c("foo", "a"), y = c("bar", "b")) ) expect_identical( bind_rows(fct, fct), tibble( x = factor(c("a", "a"), levels = c("a", "b")), y = factor(c("b", "b"), levels = c("a", "b")) ) ) }) test_that("bind_rows() correctly restores (#2457)", { df <- bind_rows( tibble(x = vctrs::list_of(1)) ) expect_s3_class(df$x, "vctrs_list_of") }) test_that("bind_rows() validates lists (#5417)", { out <- bind_rows(list(x = 1), list(x = 1, y = 1:2)) expect_identical(out, tibble(x = c(1, 1, 1), y = c(NA, 1:2))) x <- vctrs::list_of(a = data.frame(x = 1), b = data.frame(y = 2:3)) out <- bind_rows(x) exp <- tibble( a = vctrs::data_frame(x = c(1, 1), y = int(NA, NA)), b = vctrs::data_frame(x = dbl(NA, NA), y = 2:3) ) expect_identical(out, exp) }) test_that("bind_rows() handles missing, null, and empty elements (#5429)", { x <- list(a = "A", b = 1) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(a = c("A", "B"), b = c(1, 2)) ) x <- list(a = NA, b = NA) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(a = c(NA, "B"), b = c(NA, 2)) ) x <- list(a = NULL, b = NULL) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(a = "B", b = 2) ) x <- list(a = NULL, b = 1) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(b = c(1, 2), a = c(NA, "B")) ) x <- list(a = character(0), b = 1) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(a = "B", b = 2) ) x <- list(a = character(0), b = 1:2) y <- list(a = "B", b = 2) l <- list(x, y) expect_error( bind_rows(l), class = "vctrs_error_incompatible_size" ) x <- list(a = letters[1:3], b = 1:2) y <- list(a = "B", b = 2) l <- list(x, y) expect_error( bind_rows(l), class = "vctrs_error_incompatible_size" ) }) # Errors ------------------------------------------------------------------ test_that("*_bind() give meaningful errors", { expect_snapshot({ # invalid .id df1 <- tibble(x = 1:3) df2 <- tibble(x = 4:6) (expect_error(bind_rows(df1, df2, .id = 5))) # invalid type" ll <- list(1:5, env(a = 1)) (expect_error(bind_rows(ll))) ll <- list(tibble(a = 1:5), env(a = 1)) (expect_error(bind_rows(ll))) df1 <- tibble(a = factor("a")) df2 <- tibble(a = 1L) df3 <- tibble(a = 1) (expect_error(bind_rows(df1, df2))) (expect_error(bind_rows(df1, df3))) df1 <- tibble(b = c(1, 2)) df2 <- tibble(b = c(1L, 2L)) df3 <- tibble(b = factor(c("A", "B"))) df4 <- tibble(b = c("C", "D")) (expect_error(bind_rows(df1, df3))) (expect_error(bind_rows(df1, df4))) (expect_error(bind_rows(df2, df3))) (expect_error(bind_rows(df2, df4))) "# unnamed vectors" (expect_error(bind_rows(1:2))) "# incompatible size" (expect_error(bind_cols(a = 1:2, mtcars))) (expect_error(bind_cols(mtcars, a = 1:3))) }) }) dplyr/tests/testthat/test-colwise.R0000644000176200001440000000204314151641776017166 0ustar liggesuserstest_that("tbl_at_vars() treats `NULL` as empty inputs", { expect_identical(tbl_at_vars(mtcars, vars(NULL)), tbl_at_vars(mtcars, vars())) expect_identical( tibble::remove_rownames(mutate_at(mtcars, vars(NULL), `*`, 100)), tibble::remove_rownames(mtcars) ) }) test_that("lists of formulas are auto-named", { df <- tibble(x = 1:3, y = 4:6) out <- df %>% summarise_all(list(~ mean(.), ~sd(.x, na.rm = TRUE))) expect_named(out, c("x_mean", "y_mean", "x_sd", "y_sd")) out <- df %>% summarise_all(list(foobar = ~ mean(.), ~sd(.x, na.rm = TRUE))) expect_named(out, c("x_foobar", "y_foobar", "x_sd", "y_sd")) }) # Errors -------------------------------------------- test_that("colwise utils gives meaningful error messages", { expect_snapshot({ (expect_error( tbl_at_vars(iris, raw(3)) )) (expect_error( tbl_if_vars(iris, list(identity, force), environment()) )) .funs <- as_fun_list(list(identity, force), caller_env()) (expect_error( tbl_if_vars(iris, .funs, environment()) )) }) }) dplyr/tests/testthat/test-tbl.R0000644000176200001440000000062214121112104016252 0ustar liggesuserstest_that("tbl_nongroup_vars() excludes group variables", { gdf <- group_by(mtcars, cyl) expect_identical(tbl_nongroup_vars(gdf), setdiff(tbl_vars(gdf), "cyl")) }) test_that("tbl_vars() records groups", { gdf <- group_by(mtcars, cyl, am) expect_s3_class(tbl_vars(gdf), "dplyr_sel_vars") expect_true(is_sel_vars(tbl_vars(gdf))) expect_identical(tbl_vars(gdf) %@% groups, c("cyl", "am")) }) dplyr/tests/testthat/test-funs-predicates.R0000644000176200001440000000071014151641776020614 0ustar liggesuserstest_that("all_exprs() creates intersection", { expect_identical(all_exprs(am == 1), quo(am == 1)) quo <- set_env(quo((!!quo(cyl == 2)) & (!!quo(am == 1))), base_env()) expect_identical(all_exprs(cyl == 2, am == 1), quo) }) test_that("any_exprs() creates union", { expect_identical(any_exprs(am == 1), quo(am == 1)) quo <- set_env(quo((!!quo(cyl == 2)) | (!!quo(am == 1))), base_env()) expect_identical(any_exprs(cyl == 2, am == 1), quo) }) dplyr/tests/testthat/test-group_split.R0000644000176200001440000001005014121112104020034 0ustar liggesuserstest_that("group_split() keeps the grouping variables by default", { tbl <- tibble(x = 1:4, g = factor(rep(c("a", "b"), each = 2))) res <- group_split(tbl, g) expect_equal(res, list_of(tbl[1:2,], tbl[3:4,])) expect_identical(res, list_of(tbl[1:2,], tbl[3:4,])) expect_s3_class(res, "vctrs_list_of") expect_identical(attr(res, "ptype"), tibble(x = integer(), g = factor(levels = c("a", "b")))) }) test_that("group_split() can discard the grouping variables with .keep = FALSE", { tbl <- tibble(x = 1:4, g = factor(rep(c("a", "b"), each = 2))) res <- group_split(tbl, g, .keep = FALSE) expect_identical(res, list_of(tbl[1:2, 1, drop = FALSE], tbl[3:4,1, drop = FALSE])) expect_s3_class(res, "vctrs_list_of") expect_identical(attr(res, "ptype"), tibble(x = integer())) }) test_that("group_split() respects empty groups", { tbl <- tibble(x = 1:4, g = factor(rep(c("a", "b"), each = 2), levels = c("a", "b", "c"))) res <- group_split(tbl, g) expect_identical(res, list_of(tbl[1:2,], tbl[3:4,])) expect_s3_class(res, "vctrs_list_of") expect_identical(attr(res, "ptype"), tibble(x = integer(), g = factor(levels = c("a", "b", "c")))) res <- group_split(tbl, g, .drop = FALSE) expect_identical(res, list_of(tbl[1:2,], tbl[3:4,], tbl[integer(), ])) }) test_that("group_split.grouped_df() warns about ...", { expect_warning(group_split(group_by(mtcars, cyl), cyl)) }) test_that("group_split.rowwise_df() warns about ...", { expect_warning(group_split(rowwise(mtcars), cyl)) }) test_that("group_split.grouped_df() works", { iris <- as_tibble(iris) expect_identical( iris %>% group_by(Species) %>% group_split(), iris %>% group_split(Species) ) }) test_that("group_split / bind_rows round trip", { setosa <- iris %>% filter(Species == "setosa") %>% as_tibble() chunks <- setosa %>% group_split(Species) expect_identical(length(chunks), 1L) expect_identical(bind_rows(chunks), setosa) chunks <- setosa %>% group_split(Species, .drop = FALSE) expect_identical(length(chunks), 3L) expect_identical(bind_rows(chunks), setosa) }) test_that("group_split() works if no grouping column", { expect_identical(group_split(iris), list_of(as_tibble(iris))) }) test_that("group_split(keep=FALSE) does not try to remove virtual grouping columns (#4045)", { iris3 <- as_tibble(iris[1:3,]) rows <- list(c(1L, 3L, 2L), c(3L, 2L, 3L)) df <- new_grouped_df( iris3, groups = tibble(.bootstrap = 1:2, .rows := rows) ) res <- group_split(df, .keep = FALSE) expect_identical( res, list_of(iris3[rows[[1L]],], iris3[rows[[2L]],]) ) }) test_that("group_split() respects .drop", { chunks <- tibble(f = factor("b", levels = c("a", "b", "c"))) %>% group_split(f, .drop = TRUE) expect_identical(length(chunks), 1L) }) test_that("group_split() on a bare data frame returns bare tibbles", { df <- data.frame(x = 1:2) tib <- as_tibble(df) expect <- list_of(vec_slice(tib, 1), vec_slice(tib, 2)) expect_identical(group_split(df, x), expect) }) test_that("group_split() on a grouped df returns a list of tibbles", { df <- tibble(x = 1:2) gdf <- group_by(df, x) expect <- list_of(vec_slice(df, 1), vec_slice(df, 2)) expect_identical(group_split(gdf), expect) }) test_that("group_split() on a rowwise df returns a list of tibbles", { df <- tibble(x = 1:2) rdf <- rowwise(df) expect <- list_of(vec_slice(df, 1), vec_slice(df, 2)) expect_identical(group_split(rdf), expect) }) test_that("group_split() works with subclasses implementing group_by() / ungroup()", { local_foo_df() df <- list(x = c(1, 2, 2)) df <- new_tibble(df, nrow = 3L, class = "foo_df") expect <- list_of(vec_slice(df, 1), vec_slice(df, 2:3)) expect_identical(group_split(df, x), expect) }) test_that("group_split() internally uses dplyr_row_slice()", { local_foo_df() df <- list(x = c(1, 2, 2)) df <- new_tibble(df, nrow = 3L, class = "foo_df") local_methods( dplyr_row_slice.foo_df = function(x, i, ...) { abort(class = "dplyr_row_slice_called") } ) expect_error(group_split(df, x), class = "dplyr_row_slice_called") }) dplyr/tests/testthat/test-deprec-combine.R0000644000176200001440000001634714151641776020411 0ustar liggesuserstest_that("combine() is deprecated", { expect_snapshot(combine()) }) test_that("combine handles NULL (#1596, #3365)", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal(combine(list(NULL, 1, 2)), c(1, 2)) expect_equal(combine(list(1, NULL, 2)), c(1, 2)) expect_equal(combine(list(1, 2, NULL)), c(1, 2)) expect_equal(combine(), logical()) expect_equal(combine(list(NULL)), logical()) expect_equal(combine(list(NULL, NULL), list(NULL)), list(NULL, NULL, NULL)) expect_equal(combine(NULL, list(NULL, NULL)), list(NULL, NULL)) }) test_that("combine works with input that used to fail (#1780)", { withr::local_options(lifecycle_verbosity = "quiet") no <- list(alpha = letters[1:3], omega = letters[24:26]) expect_equal(combine(no), unlist(no, use.names = FALSE)) }) test_that("combine works with NA and logical (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first expected_result <- c(NA, TRUE, FALSE, NA, TRUE) works1 <- combine(list(NA, TRUE, FALSE, NA, TRUE)) expect_equal(works1, expected_result) # NA length == 1 expected_result <- c(TRUE, FALSE, NA, TRUE) works1 <- combine(list(TRUE, FALSE, NA, TRUE)) expect_equal(works1, expected_result) # NA length > 1 expected_result <- c(TRUE, FALSE, NA, NA, TRUE) works3 <- combine(list(TRUE, FALSE, c(NA, NA), TRUE)) expect_equal(works3, expected_result) }) test_that("combine works with NA and integers (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") works <- combine(list(1L, 2L, NA, 4L)) expect_equal(works, c(1L, 2L, NA, 4L)) works <- combine(list(1L, 2L, c(NA, NA), 4L)) expect_equal(works, c(1L, 2L, NA, NA, 4L)) }) test_that("combine works with NA and factors (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first fac <- factor(c("a", "c", NA, "b"), levels = letters[1:3]) expected_result <- fac[c(3, 1, 3, 2)] works1 <- combine(list(NA, fac[1], NA, fac[2])) expect_equal(works1, expected_result) # NA length == 1 expected_result <- fac works1 <- combine(list(fac[1], fac[2], fac[3], fac[4])) expect_equal(works1, expected_result) works2 <- combine(list(fac[1], fac[2], NA, fac[4])) expect_equal(works2, expected_result) # NA length > 1 expected_result <- fac[c(1, 2, 3, 3, 4)] works3 <- combine(list(fac[1], fac[2], fac[c(3, 3)], fac[4])) expect_equal(works3, expected_result) works4 <- combine(list(fac[1], fac[2], c(NA, NA), fac[4])) expect_equal(works4, expected_result) }) test_that("combine works with NA and double (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first works <- combine(list(NA, 1.5, 2.5, NA, 4.5)) expect_equal(works, c(NA, 1.5, 2.5, NA, 4.5)) # NA length 1 works <- combine(list(1.5, 2.5, NA, 4.5)) expect_equal(works, c(1.5, 2.5, NA, 4.5)) # NA length > 1 works <- combine(list(1.5, 2.5, c(NA, NA), 4.5)) expect_equal(works, c(1.5, 2.5, NA, NA, 4.5)) }) test_that("combine works with NA and characters (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first works <- combine(list(NA, "a", "b", "c", NA, "e")) expect_equal(works, c(NA, "a", "b", "c", NA, "e")) # NA length 1 works <- combine(list("a", "b", "c", NA, "e")) expect_equal(works, c("a", "b", "c", NA, "e")) # NA length > 1 works <- combine(list("a", "b", "c", c(NA, NA), "e")) expect_equal(works, c("a", "b", "c", NA, NA, "e")) }) test_that("combine works with NA and POSIXct (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first works <- combine(list( NA, as.POSIXct("2010-01-01"), as.POSIXct("2010-01-02"), NA, as.POSIXct("2010-01-04") )) expect_equal( works, c(as.POSIXct(c(NA, "2010-01-01", "2010-01-02", NA, "2010-01-04"))), ignore_attr = TRUE ) # NA length 1 works <- combine(list( as.POSIXct("2010-01-01"), as.POSIXct("2010-01-02"), NA, as.POSIXct("2010-01-04") )) expect_equal( works, c(as.POSIXct(c("2010-01-01", "2010-01-02", NA, "2010-01-04"))), ignore_attr = TRUE ) # NA length > 1 works <- combine(list( as.POSIXct("2010-01-01"), as.POSIXct("2010-01-02"), c(NA, NA), as.POSIXct("2010-01-04") )) expect_equal( works, c(as.POSIXct(c("2010-01-01", "2010-01-02", NA, NA, "2010-01-04"))), ignore_attr = TRUE ) }) test_that("combine works with NA and Date (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first expected_result <- as.Date("2010-01-01") + c(NA, 1, 2, NA, 4) expect_equal(combine(as.list(expected_result)), expected_result) # NA length == 1 expected_result <- c(as.Date(c("2010-01-01", "2010-01-02", NA, "2010-01-04"))) works1 <- combine(list( as.Date("2010-01-01"), as.Date("2010-01-02"), as.Date(NA), as.Date("2010-01-04") )) expect_equal(works1, expected_result) works2 <- combine(list( as.Date("2010-01-01"), as.Date("2010-01-02"), NA, as.Date("2010-01-04") )) expect_equal(works2, expected_result) # NA length > 1 expected_result <- as.Date("2010-01-01") + c(0, 1, NA, NA, 3) works1 <- combine(split(expected_result, c(1, 2, 3, 3, 4))) expect_equal(works1, expected_result) works2 <- combine(list( as.Date("2010-01-01"), as.Date("2010-01-02"), c(NA, NA), as.Date("2010-01-04") )) expect_equal(works2, expected_result) }) test_that("combine works with NA and complex (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first expected_result <- c(NA, 1 + 2i) works1 <- combine(list(NA, 1 + 2i)) expect_equal(works1, expected_result) # NA length == 1 expected_result <- c(1, 2, NA, 4) + 1i expect_equal(combine(as.list(expected_result)), expected_result) works2 <- combine(list(1 + 1i, 2 + 1i, NA, 4 + 1i)) expect_equal(works2, expected_result) # NA length > 1 expected_result <- c(1, 2, NA, NA, 4) + 1i expect_equal( combine(split(expected_result, c(1, 2, 3, 3, 4))), expected_result ) works3 <- combine(list(1 + 1i, 2 + 1i, c(NA, NA), 4 + 1i)) expect_equal(works3, expected_result) }) test_that("combine works with difftime", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( combine(as.difftime(1, units = "mins"), as.difftime(1, units = "hours")), as.difftime(c(60, 3600), units = "secs") ) expect_equal( combine(as.difftime(1, units = "secs"), as.difftime(1, units = "secs")), as.difftime(c(1, 1), units = "secs") ) expect_equal( combine(as.difftime(1, units = "days"), as.difftime(1, units = "secs")), as.difftime(c(24 * 60 * 60, 1), units = "secs") ) expect_equal( combine(as.difftime(2, units = "weeks"), as.difftime(1, units = "secs")), as.difftime(c(2 * 7 * 24 * 60 * 60, 1), units = "secs") ) expect_equal( combine(as.difftime(2, units = "weeks"), as.difftime(3, units = "weeks")), as.difftime(c(2, 3), units = "weeks") ) }) test_that("combine uses tidy dots (#3407)", { withr::local_options(lifecycle_verbosity = "quiet") chunks <- list(1,2,3) expect_equal(combine(!!!chunks), c(1,2,3)) }) # Errors ------------------------------------------------------------------ test_that("combine() gives meaningful error messages", { withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(combine("a", 1))) (expect_error(combine(factor("a"), 1L))) }) }) dplyr/tests/testthat/test-rbind.R0000644000176200001440000000723114121112104016572 0ustar liggesusersdf_var <- data.frame( l = c(T, F, F), i = c(1, 1, 2), d = Sys.Date() + c(1, 1, 2), f = factor(letters[c(1, 1, 2)]), n = c(1, 1, 2) + 0.5, t = Sys.time() + c(1, 1, 2), c = letters[c(1, 1, 2)], stringsAsFactors = FALSE ) test_that("bind_rows handles complex. #933", { df1 <- data.frame(r = c(1 + 1i, 2 - 1i)) df2 <- data.frame(r = c(1 - 1i, 2 + 1i)) df3 <- bind_rows(df1, df2) expect_equal(nrow(df3), 4L) expect_equal(df3$r, c(df1$r, df2$r)) }) test_that("bind_rows is careful about column names encoding #1265", { one <- data.frame(foo = 1:3, bar = 1:3) names(one) <- c("f\u00fc", "bar") two <- data.frame(foo = 1:3, bar = 1:3) names(two) <- c("f\u00fc", "bar") Encoding(names(one)[1]) <- "UTF-8" expect_equal(names(one), names(two)) res <- bind_rows(one, two) expect_equal(ncol(res), 2L) }) test_that("bind_rows handles POSIXct (#1125)", { df1 <- data.frame(date = as.POSIXct(NA)) df2 <- data.frame(date = as.POSIXct("2015-05-05")) res <- bind_rows(df1, df2) expect_equal(nrow(res), 2L) expect_true(is.na(res$date[1])) }) test_that("bind_rows respects ordered factors (#1112)", { l <- c("a", "b", "c", "d") id <- factor(c("a", "c", "d"), levels = l, ordered = TRUE) df <- data.frame(id = rep(id, 2), val = rnorm(6)) res <- bind_rows(df, df) expect_s3_class(res$id, "ordered") expect_equal(levels(df$id), levels(res$id)) }) test_that("bind handles POSIXct of different tz ", { date1 <- structure(-1735660800, tzone = "America/Chicago", class = c("POSIXct", "POSIXt")) date2 <- structure(-1735660800, tzone = "UTC", class = c("POSIXct", "POSIXt")) date3 <- structure(-1735660800, class = c("POSIXct", "POSIXt")) df1 <- data.frame(date = date1) df2 <- data.frame(date = date2) df3 <- data.frame(date = date3) res <- bind_rows(df1, df2) expect_equal(attr(res$date, "tzone"), "America/Chicago") res <- bind_rows(df1, df3) expect_equal(attr(res$date, "tzone"), "America/Chicago") res <- bind_rows(df2, df3) expect_equal(attr(res$date, "tzone"), "UTC") res <- bind_rows(df3, df3) expect_equal(attr(res$date, "tzone"), "") res <- bind_rows(df1, df2, df3) expect_equal(attr(res$date, "tzone"), "America/Chicago") }) test_that("bind_rows() creates a column of identifiers (#1337)", { data1 <- mtcars[c(2, 3), ] data2 <- mtcars[1, ] out <- bind_rows(data1, data2, .id = "col") out_list <- bind_rows(list(data1, data2), .id = "col") expect_equal(names(out)[1], "col") expect_equal(out$col, c("1", "1", "2")) expect_equal(out_list$col, c("1", "1", "2")) out_labelled <- bind_rows(one = data1, two = data2, .id = "col") out_list_labelled <- bind_rows(list(one = data1, two = data2), .id = "col") expect_equal(out_labelled$col, c("one", "one", "two")) expect_equal(out_list_labelled$col, c("one", "one", "two")) }) test_that("empty data frame are handled (#1346)", { res <- tibble() %>% bind_rows(tibble(x = "a")) expect_equal(nrow(res), 1L) }) test_that("bind_rows handles POSIXct stored as integer (#1402)", { now <- Sys.time() df1 <- data.frame(time = now) expect_equal(class(bind_rows(df1)$time), c("POSIXct", "POSIXt")) df2 <- data.frame(time = seq(now, length.out = 1, by = 1)) expect_equal(class(bind_rows(df2)$time), c("POSIXct", "POSIXt")) res <- bind_rows(df1, df2) expect_equal(class(res$time), c("POSIXct", "POSIXt")) expect_true(all(res$time == c(df1$time, df2$time))) }) test_that("bind_rows() correctly handles consecutive NULLs (#4296)", { res <- list( a = tibble(expected_id = "a"), b = NULL, c = NULL, d = tibble(expected_id = "d"), c = NULL, e = tibble(expected_id = "e") ) %>% bind_rows(.id = "id") expect_equal(res$id, res$expected_id) }) dplyr/tests/testthat/test-arrange.r0000644000176200001440000001225214151646326017177 0ustar liggesusers# To turn on warnings from tibble::`names<-()` local_options(lifecycle_verbosity = "warning") test_that("empty arrange() returns input", { df <- tibble(x = 1:10, y = 1:10) gf <- group_by(df, x) expect_identical(arrange(df), df) expect_identical(arrange(gf), gf) expect_identical(arrange(df, !!!list()), df) expect_identical(arrange(gf, !!!list()), gf) }) test_that("can sort empty data frame", { df <- tibble(a = numeric(0)) expect_equal(arrange(df, a), df) }) test_that("local arrange sorts missing values to end", { df <- data.frame(x = c(2, 1, NA)) expect_equal(df %>% arrange(x) %>% pull(), c(1, 2, NA)) expect_equal(df %>% arrange(desc(x)) %>% pull(), c(2, 1, NA)) }) test_that("arrange() gives meaningful errors", { expect_snapshot({ # duplicated column name (expect_error( tibble(x = 1, x = 1, .name_repair = "minimal") %>% arrange(x) )) # error in mutate() step (expect_error( tibble(x = 1) %>% arrange(y) )) (expect_error( tibble(x = 1) %>% arrange(rep(x, 2)) )) }) }) # column types ---------------------------------------------------------- test_that("arrange handles list columns (#282)", { # no intrinsic ordering df <- tibble(x = 1:3, y = list(3, 2, 1)) expect_equal(arrange(df, y), df) df <- tibble(x = 1:3, y = list(sum, mean, sd)) expect_equal(arrange(df, y), df) }) test_that("arrange handles raw columns (#1803)", { df <- tibble(x = 1:3, y = as.raw(3:1)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange handles matrix columns", { df <- tibble(x = 1:3, y = matrix(6:1, ncol = 2)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange handles data.frame columns (#3153)", { df <- tibble(x = 1:3, y = data.frame(z = 3:1)) expect_equal(arrange(df, y), tibble(x = 3:1, y = data.frame(z = 1:3))) }) test_that("arrange handles complex columns", { df <- tibble(x = 1:3, y = 3:1 + 2i) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange handles S4 classes (#1105)", { TestS4 <- suppressWarnings(setClass("TestS4", contains = "integer")) setMethod('[', 'TestS4', function(x, i, ...){ TestS4(unclass(x)[i, ...]) }) on.exit(removeClass("TestS4")) df <- tibble(x = 1:3, y = TestS4(3:1)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange respects locale (#1280)", { df2 <- tibble(words = c("casa", "\u00e1rbol", "zona", "\u00f3rgano")) res <- df2 %>% arrange(words) expect_equal(res$words, sort(df2$words)) res <- df2 %>% arrange(desc(words)) expect_equal(res$words, sort(df2$words, decreasing = TRUE)) }) # data ---------------------------------------------------------------- test_that("arrange preserves input class", { df1 <- data.frame(x = 1:3, y = 3:1) df2 <- tibble(x = 1:3, y = 3:1) df3 <- df1 %>% group_by(x) expect_s3_class(arrange(df1, x), "data.frame", exact = TRUE) expect_s3_class(arrange(df2, x), "tbl_df") expect_s3_class(arrange(df3, x), "grouped_df") }) test_that("grouped arrange ignores group, unless requested with .by_group", { df <- data.frame(g = c(2, 1, 2, 1), x = 4:1) gf <- group_by(df, g) expect_equal(arrange(gf, x), gf[4:1, ,]) expect_equal(arrange(gf, x, .by_group = TRUE), gf[c(4, 2, 3, 1), ,]) }) test_that("arrange updates the grouping structure (#605)", { df <- tibble(g = c(2, 2, 1, 1), x = c(1, 3, 2, 4)) res <- df %>% group_by(g) %>% arrange(x) expect_s3_class(res, "grouped_df") expect_equal(group_rows(res), list_of(c(2L, 4L), c(1L, 3L))) }) test_that("arrange() supports across() (#4679)", { df <- tibble(x = c(1, 3, 2, 1), y = c(4, 3, 2, 1)) expect_identical( df %>% arrange(across()), df %>% arrange(x, y) ) expect_identical( df %>% arrange(across(.fns = desc)), df %>% arrange(desc(x), desc(y)) ) expect_identical( df %>% arrange(across(x)), df %>% arrange(x) ) expect_identical( df %>% arrange(across(y)), df %>% arrange(y) ) }) test_that("arrange() with empty dots still calls dplyr_row_slice()", { tbl <- new_tibble(list(x = 1), nrow = 1L) foo <- structure(tbl, class = c("foo_df", class(tbl))) local_methods( # `foo_df` always loses class when row slicing dplyr_row_slice.foo_df = function(data, i, ...) { out <- NextMethod() new_tibble(out, nrow = nrow(out)) } ) expect_s3_class(arrange(foo), class(tbl), exact = TRUE) expect_s3_class(arrange(foo, x), class(tbl), exact = TRUE) }) test_that("can arrange() with unruly class", { local_methods( `[.dplyr_foobar` = function(x, i, ...) new_dispatched_quux(vec_slice(x, i)), dplyr_row_slice.dplyr_foobar = function(x, i, ...) x[i, ] ) df <- foobar(data.frame(x = 1:3)) expect_identical( arrange(df, desc(x)), quux(data.frame(x = 3:1, dispatched = TRUE)) ) }) test_that("arrange() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), arrange(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("desc() inside arrange() checks the number of arguments (#5921)", { expect_snapshot({ df <- data.frame(x = 1, y = 2) (expect_error(arrange(df, desc(x, y)))) }) }) dplyr/tests/testthat/test-empty-groups.R0000644000176200001440000000536714121112104020157 0ustar liggesusersdf <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) %>% group_by(e, f, g, .drop = FALSE) test_that("filter and slice keep zero length groups", { expect_equal(group_size(filter(df, f == 1)), c(2, 0, 0) ) expect_equal(group_size(slice(df, 1)), c(1, 1, 0) ) }) test_that("filtering and slicing retains labels for zero length groups", { expect_equal( ungroup(count(filter(df, f == 1))), tibble( e = 1, f = factor(1:3), g = c(1, 2, NA), n = c(2L, 0L, 0L) ) ) expect_equal( ungroup(count(slice(df, 1))), tibble( e = 1, f = factor(1:3), g = c(1, 2, NA), n = c(1L, 1L, 0L) ) ) }) test_that("mutate keeps zero length groups", { expect_equal( group_size(mutate(df, z = 2)), c(2, 2, 0) ) }) test_that("summarise returns a row for zero length groups", { expect_equal( nrow(summarise(df, z = n())), 3L) }) test_that("arrange keeps zero length groups",{ expect_equal( group_size(arrange(df)), c(2, 2, 0) ) expect_equal( group_size(arrange(df, x)), c(2, 2, 0) ) }) test_that("bind_rows respect the drop attribute of grouped df",{ gg <- bind_rows(df, df) expect_equal(group_size(gg), c(4L,4L,0L)) }) test_that("joins respect zero length groups", { df1 <- tibble(f = factor( c(1,1,2,2), levels = 1:3), x = c(1,2,1,4)) %>% group_by(f) df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>% group_by(f) expect_equal(group_size(left_join( df1, df2, by = "f")), c(2,4)) expect_equal(group_size(right_join( df1, df2, by = "f")), c(4,2)) expect_equal(group_size(full_join( df1, df2, by = "f")), c(2,4,2)) expect_equal(group_size(anti_join( df1, df2, by = "f")), c(2)) expect_equal(group_size(inner_join( df1, df2, by = "f")), c(4)) df1 <- tibble(f = factor( c(1,1,2,2), levels = 1:3), x = c(1,2,1,4)) %>% group_by(f, .drop = FALSE) df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>% group_by(f, .drop = FALSE) expect_equal(group_size(left_join( df1, df2, by = "f")), c(2,4,0)) expect_equal(group_size(right_join( df1, df2, by = "f")), c(0,4,2)) expect_equal(group_size(full_join( df1, df2, by = "f")), c(2,4,2)) expect_equal(group_size(anti_join( df1, df2, by = "f")), c(2,0,0)) expect_equal(group_size(inner_join( df1, df2, by = "f")), c(0,4,0)) }) test_that("n_groups respects zero-length groups (#341)", { df <- tibble(x = factor(1:3, levels = 1:4)) %>% group_by(x, .drop = FALSE) expect_equal(n_groups(df), 4) }) test_that("summarise respects zero-length groups (#341)", { df <- tibble(x = factor(rep(1:3, each = 10), levels = 1:4)) out <- df %>% group_by(x, .drop = FALSE) %>% summarise(n = n()) expect_equal(out$n, c(10L, 10L, 10L, 0L)) }) dplyr/tests/testthat/test-group_data.R0000644000176200001440000000627514121112104017630 0ustar liggesusers # group_data -------------------------------------------------------------- test_that("group_data() returns a data frame", { df <- data.frame(x = 1:3) gd <- group_data(df) expect_s3_class(gd, "data.frame", exact = TRUE) expect_equal(gd$.rows, list_of(1:3)) }) test_that("group_data() returns a tibble", { df <- tibble(x = 1:3) gd <- group_data(df) expect_s3_class(gd, "tbl_df") expect_equal(gd, tibble(".rows" := list_of(1:3))) }) test_that("group_data() returns a tibble", { df <- tibble(x = c(1, 1, 2)) gf <- group_by(df, x) gd <- group_data(gf) expect_s3_class(gd, "tbl_df") expect_equal( gd, tibble(x = c(1, 2), ".rows" := list_of(1:2, 3L)), ignore_attr = TRUE ) }) test_that("group_data(% group_keys(x), "deprecated") expect_equal(out, tibble(x = 1)) }) # group_indices() --------------------------------------------------------- test_that("no arg group_indices() is deprecated", { df <- tibble(x = 1) expect_warning(out <- summarise(df, id = group_indices()), "deprecated") expect_equal(out, tibble(id = 1)) }) test_that("group_indices(...) is deprecated", { rlang::local_options(lifecycle_verbosity = "error") df <- tibble(x = 1, y = 2) expect_error(df %>% group_indices(x), "deprecated") }) test_that("group_indices(...) still works though", { rlang::local_options(lifecycle_verbosity = "quiet") df <- tibble(x = 1, y = 2) out <- df %>% group_indices(x) expect_equal(out, 1) }) test_that("group_indices() returns expected values", { df <- tibble(x = c("b", "a", "b")) gf <- group_by(df, x) expect_equal(group_indices(df), c(1, 1, 1)) expect_equal(group_indices(gf), c(2, 1, 2)) }) test_that("group_indices() handles 0 rows data frames (#5541)", { df <- new_grouped_df( data.frame(x = integer(), y = integer()), groups = data.frame(x=0, .rows = vctrs::list_of(1:1000)) ) expect_equal(group_indices(df), integer()) }) # group_size -------------------------------------------------------------- test_that("ungrouped data has 1 group, with group size = nrow()", { df <- tibble(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) expect_equal(n_groups(df), 1L) expect_equal(group_size(df), 30) }) test_that("rowwise data has one group for each group", { rw <- rowwise(mtcars) expect_equal(n_groups(rw), 32) expect_equal(group_size(rw), rep(1, 32)) }) test_that("group_size correct for grouped data", { df <- tibble(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) %>% group_by(x) expect_equal(n_groups(df), 3L) expect_equal(group_size(df), rep(10, 3)) }) dplyr/tests/testthat/test-union-all.R0000644000176200001440000000040514121112104017366 0ustar liggesuserstest_that("union all on vectors concatenates", { expect_equal(union_all(1:3, 4:6), 1:6) }) test_that("union all on data frames calls bind rows", { df1 <- tibble(x = 1:2) df2 <- tibble(y = 1:2) expect_equal(union_all(df1, df2), bind_rows(df1, df2)) }) dplyr/tests/testthat/test-relocate.R0000644000176200001440000000425414121112104017274 0ustar liggesuserstest_that(".before and .after relocate individual cols", { df <- tibble(x = 1, y = 2) expect_named(relocate(df, x, .after = y), c("y", "x")) expect_named(relocate(df, y, .before = x), c("y", "x")) }) test_that("can move blocks of variables", { df <- tibble(x = 1, a = "a", y = 2, b = "a") expect_named(relocate(df, where(is.character)), c("a", "b", "x", "y")) expect_named(relocate(df, where(is.character), .after = where(is.numeric)), c("x", "y", "a", "b")) }) test_that("don't lose non-contiguous variables", { df <- tibble(a = 1, b = 1, c = 1, d = 1, e = 1) expect_named(relocate(df, b, .after = c(a, c, e)), c("a", "c", "d", "e", "b")) expect_named(relocate(df, e, .before = c(b, d)), c("a", "e", "b", "c", "d")) }) test_that("no .before/.after moves to front", { df <- tibble(x = 1, y = 2) expect_named(relocate(df, y), c("y", "x")) }) test_that("can only supply one of .before and .after", { df <- tibble(x = 1) expect_error(relocate(df, .before = 1, .after = 1), "only one") }) test_that("before and after are defused with context", { local_fn <- identity expect_identical( names(relocate(mtcars, 3, .before = local_fn(5))), names(relocate(mtcars, 3, .before = 5)) ) expect_identical( names(relocate(mtcars, 3, .after = local_fn(5))), names(relocate(mtcars, 3, .after = 5)) ) }) test_that("relocate() respects order specified by ... (#5328)", { df <- tibble(a = 1, x = 1, b = 1, z = 1, y = 1) expect_equal( names(relocate(df, x, y, z, .before = x)), c("a", "x", "y", "z", "b") ) expect_equal( names(relocate(df, x, y, z, .after = last_col())), c("a", "b", "x", "y", "z") ) expect_equal( names(relocate(df, x, a, z)), c("x", "a", "z", "b", "y") ) }) test_that("relocate() can rename (#5569)", { df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") expect_equal( relocate(df, ffff = f), tibble(ffff = "a", a = 1, b = 1, c = 1, d = "a", e = "a") ) expect_equal( relocate(df, ffff = f, .before = c), tibble(a = 1, b = 1, ffff = "a", c = 1, d = "a", e = "a") ) expect_equal( relocate(df, ffff = f, .after = c), tibble(a = 1, b = 1, c = 1, ffff = "a", d = "a", e = "a") ) }) dplyr/tests/testthat/test-all-equal.r0000644000176200001440000001017114151641776017437 0ustar liggesusers# A data frame with all major types df_all <- data.frame( a = c(1, 2.5), b = 1:2, c = c(T, F), d = c("a", "b"), e = factor(c("a", "b")), f = Sys.Date() + 1:2, g = Sys.time() + 1:2, stringsAsFactors = FALSE ) test_that("data frames equal to themselves", { expect_true(all_equal(mtcars, mtcars)) expect_true(all_equal(iris, iris)) expect_true(all_equal(df_all, df_all)) }) test_that("data frames not equal if missing row", { expect_snapshot({ all_equal(mtcars, mtcars[-1, ]) all_equal(iris, iris[-1, ]) all_equal(df_all, df_all[-1, ]) }) }) test_that("data frames not equal if missing col", { expect_snapshot({ all_equal(mtcars, mtcars[, -1]) all_equal(iris, iris[, -1]) all_equal(df_all, df_all[, -1]) }) }) test_that("factors equal only if levels equal", { df1 <- tibble(x = factor(c("a", "b"))) df2 <- tibble(x = factor(c("a", "d"))) expect_snapshot({ all_equal(df1, df2) all_equal(df2, df1) }) }) test_that("factor comparison requires strict equality of levels (#2440)", { df1 <- tibble(x = factor("a")) df2 <- tibble(x = factor("a", levels = c("a", "b"))) expect_true(all_equal(df1, df2, convert = TRUE)) expect_true(all_equal(df2, df1, convert = TRUE)) expect_snapshot({ all_equal(df1, df2) all_equal(df2, df1) }) }) test_that("all.equal.data.frame handles data.frames with NULL names", { x <- data.frame(LETTERS[1:3], rnorm(3)) names(x) <- NULL suppressMessages( expect_true(all_equal(x, x)) ) }) test_that("data frame equality test with ignore_row_order=TRUE detects difference in number of rows. #1065", { DF1 <- tibble(a = 1:4, b = letters[1:4]) DF2 <- tibble(a = c(1:4, 4L), b = letters[c(1:4, 4L)]) expect_false(isTRUE(all_equal(DF1, DF2, ignore_row_order = TRUE))) DF1 <- tibble(a = c(1:4, 2L), b = letters[c(1:4, 2L)]) DF2 <- tibble(a = c(1:4, 4L), b = letters[c(1:4, 4L)]) expect_false(isTRUE(all_equal(DF1, DF2, ignore_row_order = TRUE))) }) test_that("all.equal handles NA_character_ correctly. #1095", { d1 <- tibble(x = c(NA_character_)) expect_true(all_equal(d1, d1)) d2 <- tibble(x = c(NA_character_, "foo", "bar")) expect_true(all_equal(d2, d2)) }) test_that("handle Date columns of different types, integer and numeric (#1204)", { a <- data.frame(date = as.Date("2015-06-07")) b <- data.frame(date = structure(as.integer(a$date), class = "Date")) expect_true(all_equal(a, b)) }) test_that("equality test fails when convert is FALSE and types don't match (#1484)", { df1 <- tibble(x = "a") df2 <- tibble(x = factor("a")) expect_true(all_equal(df1, df2, convert = TRUE)) expect_snapshot({ all_equal(df1, df2, convert = FALSE) }) }) test_that("equality handles data frames with 0 rows (#1506)", { df0 <- tibble(x = numeric(0), y = character(0)) expect_equal(df0, df0) }) test_that("equality handles data frames with 0 columns (#1506)", { df0 <- tibble(a = 1:10)[-1] expect_equal(df0, df0) }) test_that("equality handle raw columns", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_true(all_equal(df, df)) }) test_that("equality returns a message for convert = TRUE", { df1 <- tibble(x = 1:3) df2 <- tibble(x = as.character(1:3)) expect_snapshot({ all_equal(df1, df2) all_equal(df1, df2, convert = TRUE) }) }) test_that("numeric and integer can be compared if convert = TRUE", { df1 <- tibble(x = 1:3) df2 <- tibble(x = as.numeric(1:3)) expect_true(all_equal(df1, df2, convert = TRUE)) expect_snapshot({ all_equal(df1, df2) }) }) test_that("returns vector for more than one difference (#1819)", { expect_snapshot({ all_equal(tibble(a = 1, b = 2), tibble(a = 1L, b = 2L)) }) }) test_that("ignore column order", { expect_snapshot({ all_equal(tibble(a = 1, b = 2), tibble(b = 2, a = 1), ignore_col_order = FALSE) all_equal(tibble(a = 1, b = 2), tibble(a = 1), ignore_col_order = FALSE) }) }) # Errors ------------------------------------------------------------------ test_that("count() give meaningful errors", { expect_snapshot({ (expect_error(union(tibble(a = 1), tibble(a = "1")))) (expect_error(union(tibble(a = 1, b = 2), tibble(a = "1", b = "2")))) }) }) dplyr/tests/testthat/test-colwise-funs.R0000644000176200001440000000037214121112104020111 0ustar liggesuserstest_that("as_fun_list() uses rlang auto-naming", { nms <- names(as_fun_list(list(min, max), env())) # Just check they are labellised as literals enclosed in brackets to # insulate from upstream changes expect_true(all(grepl("^<", nms))) }) dplyr/tests/testthat/test-top-n.R0000644000176200001440000000154514121112104016533 0ustar liggesuserstest_that("top_n returns n rows", { test_df <- data.frame(x = 1:10, y = 11:20) top_four <- test_df %>% top_n(4, y) expect_equal(dim(top_four), c(4, 2)) }) test_that("top_n() handles missing `wt`", { df <- data.frame(x = c(10, 4, 1, 6, 3, 1, 1)) expect_message( regexp = "Selecting by x", expect_identical(top_n(df, 2)$x, c(10, 6)) ) }) test_that("top_n() handles calls", { expect_identical(top_n(mtcars, 2, -disp), top_n(mtcars, -2, disp)) }) test_that("top_n() quotes n", { expect_snapshot(res1 <- top_n(mtcars, n() * .5)) expect_snapshot(res2 <- top_n(mtcars, 16)) expect_identical(res1, res2) }) test_that("top_frac() is a shorthand for top_n(n()*)", { expect_identical(top_n(mtcars, n() * .5, disp), top_frac(mtcars, .5, disp)) expect_snapshot(res1 <- top_n(mtcars, n() * .5)) expect_snapshot(res2 <- top_frac(mtcars, .5)) }) dplyr/tests/testthat/test-funs.R0000644000176200001440000000141414121112104016444 0ustar liggesusers test_that("returns NA if any argument is NA", { expect_equal(between(1, 1, NA), NA) expect_equal(between(1, NA, 1), NA) expect_equal(between(NA, 1, 1), NA) }) test_that("clearly errors that not vectorised", { expect_error(between(1, 1, 1:2), "right") expect_error(between(1, 1:2, 1), "left") }) test_that("compatible with base R", { x <- runif(1e3) expect_equal(between(x, 0.25, 0.5), x >= 0.25 & x <= 0.5) }) test_that("warns when called on S3 object", { expect_warning(between(structure(c(1, 5), class = "foo"), 1, 3), "numeric vector with S3 class") expect_warning(between(factor("x"), 1, 2), "S3 class") }) test_that("unless it's a date or date time", { expect_warning(between(Sys.Date(), 1, 3), NA) expect_warning(between(Sys.time(), 1, 3), NA) }) dplyr/tests/testthat/test-recode.R0000644000176200001440000001201214151641776016757 0ustar liggesuserstest_that("positional substitution works", { expect_equal(recode(1:2, "a", "b"), c("a", "b")) }) test_that("names override positions", { expect_equal(recode(1:2, `2` = "b", `1` = "a"), c("a", "b")) }) test_that("named substitution works", { x1 <- letters[1:3] x2 <- factor(x1) expect_equal(recode(x1, a = "apple", .default = NA_character_), c("apple", NA, NA)) expect_equal(recode(x2, a = "apple", .default = NA_character_), factor(c("apple", NA, NA))) }) test_that("missing values replaced by missing argument", { expect_equal(recode(c(1, NA), "a"), c("a", NA)) expect_equal(recode(c(1, NA), "a", .missing = "b"), c("a", "b")) expect_equal(recode(c(letters[1:3], NA), .missing = "A"), c("a", "b", "c", "A")) }) test_that("unmatched value replaced by default argument", { expect_warning(expect_equal(recode(c(1, 2), "a"), c("a", NA))) expect_equal(recode(c(1, 2), "a", .default = "b"), c("a", "b")) expect_equal(recode(letters[1:3], .default = "A"), c("A", "A", "A")) }) test_that("missing and default place nicely together", { expect_equal( recode(c(1, 2, NA), "a", .default = "b", .missing = "c"), c("a", "b", "c") ) }) test_that("can give name x", { expect_equal(recode("x", x = "a"), "a") }) test_that(".default works when not all values are named", { x <- rep(1:3, 3) expect_equal(recode(x, `3` = 10L, .default = x), rep(c(1L, 2L, 10L), 3)) }) test_that(".default is aliased to .x when missing and compatible", { x <- letters[1:3] expect_equal(recode(x, a = "A"), c("A", "b", "c")) n <- 1:3 expect_equal(recode(n, `1` = 10L), c(10L, 2L, 3L)) }) test_that(".default is not aliased to .x when missing and not compatible", { x <- letters[1:3] expect_warning(expect_equal(recode(x, a = 1), c(1L, NA, NA))) n <- 1:3 expect_warning(expect_equal(recode(n, `1` = "a"), c("a", NA, NA))) }) test_that("conversion of unreplaced values to NA gives warning", { expect_warning(recode(1:3, `1` = "a"), "treated as NA") expect_warning(recode_factor(letters[1:3], b = 1, c = 2)) }) test_that(".dot argument works correctly (PR #2110)", { x1 <- letters[1:3] x2 <- 1:3 x3 <- factor(x1) expect_equal( recode(x1, a = "apple", b = "banana", .default = NA_character_), recode(x1, .default = NA_character_, !!!list(a = "apple", b = "banana")) ) expect_equal( recode(x1, a = "apple", b = "banana", .default = NA_character_), recode(x1, a = "apple", .default = NA_character_, !!!list(b = "banana")) ) expect_equal( recode(x2, "1" = 4, "2" = 5, .default = NA_real_), recode(x2, .default = NA_real_, !!!list("1" = 4, "2" = 5)) ) expect_equal( recode(x2, "1" = 4, "2" = 5, .default = NA_real_), recode(x2, "1" = 4, .default = NA_real_, !!!list("2" = 5)) ) expect_equal( recode_factor(x3, a = "apple", b = "banana", .default = NA_character_), recode_factor(x3, .default = NA_character_, !!!list(a = "apple", b = "banana")) ) }) # factor ------------------------------------------------------------------ test_that("default .default works with factors", { expect_equal(recode(factor(letters[1:3]), a = "A"), factor(c("A", "b", "c"))) }) test_that("can recode factor to double", { f <- factor(letters[1:3]) expect_equal(recode(f, a = 1, b = 2, c = 3), c(1, 2, 3)) expect_equal(recode(f, a = 1, b = 2), c(1, 2, NA)) expect_equal(recode(f, a = 1, b = 2, .default = 99), c(1, 2, 99)) }) test_that("recode_factor() handles .missing and .default levels", { x <- c(1:3, NA) expect_warning( expect_equal( recode_factor(x, `1` = "z", `2` = "y"), factor(c("z", "y", NA, NA), levels = c("z", "y")) ) ) expect_equal( recode_factor(x, `1` = "z", `2` = "y", .default = "D"), factor(c("z", "y", "D", NA), levels = c("z", "y", "D")) ) expect_equal( recode_factor(x, `1` = "z", `2` = "y", .default = "D", .missing = "M"), factor(c("z", "y", "D", "M"), c("z", "y", "D", "M")) ) }) test_that("recode_factor() handles vector .default", { expected <- factor(c("a", "z", "y"), levels = c("z", "y", "a")) x1 <- letters[1:3] x2 <- factor(x1) expect_equal(recode_factor(x1, b = "z", c = "y"), expected) expect_equal(recode_factor(x2, b = "z", c = "y"), expected) expect_equal(recode_factor(x1, b = "z", c = "y", .default = x1), expected) expect_equal(recode_factor(x2, b = "z", c = "y", .default = x1), expected) }) test_that("can recode factor with redundant levels", { expect_equal( recode(factor(letters[1:4]), d = "c", b = "a"), factor(c("a", "a", "c", "c"), levels = c("a", "c")) ) expect_equal( recode_factor(letters[1:4], d = "c", b = "a"), factor(c("a", "a", "c", "c"), levels = c("c", "a")) ) }) # Errors -------------------------------------------- test_that("recode() gives meaningful error messages", { expect_snapshot({ (expect_error(recode(factor("a"), a = 5, .missing = 10))) (expect_error(recode("a", b = 5, "c"))) (expect_error(recode(factor("a"), b = 5, "c"))) # no replacement (expect_error(recode(1:5))) (expect_error(recode("a"))) (expect_error(recode(factor("a")))) }) }) dplyr/tests/testthat/test-select-helpers.R0000644000176200001440000000107114121112104020407 0ustar liggesuserstest_that("group_cols() selects grouping variables", { df <- tibble(x = 1:3, y = 1:3) gf <- group_by(df, x) expect_equal(df %>% select(group_cols()), df[integer()]) expect_message( expect_equal(gf %>% select(group_cols()), gf["x"]), NA ) }) test_that("group_cols(vars) is deprecated", { expect_warning(out <- group_cols("a"), "deprecated") expect_equal(out, integer()) }) test_that("group_cols() finds groups in scoped helpers", { gf <- group_by(tibble(x = 1, y = 2), x) out <- select_at(gf, vars(group_cols())) expect_named(out, "x") }) dplyr/tests/testthat/test-colwise-mutate.R0000644000176200001440000002647014151641776020475 0ustar liggesuserstest_that("can use character vectors or bare functions", { df <- data.frame(x = 1:3) expect_equal(summarise_all(df, "mean"), data.frame(x = 2)) expect_equal(summarise_all(df, mean), data.frame(x = 2)) expect_equal(mutate_all(df, list(x = "mean")), data.frame(x = rep(2, 3))) expect_equal(mutate_all(df, list(x = mean)), data.frame(x = rep(2, 3))) }) test_that("default names are smallest unique set", { df <- data.frame(x = 1:3, y = 1:3) expect_named(summarise_at(df, vars(x:y), list(mean)), c("x", "y")) expect_named(summarise_at(df, vars(x), list(mean = mean, sd = sd)), c("mean", "sd")) expect_named(summarise_at(df, vars(x:y), list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) }) test_that("named arguments force complete names", { df <- data.frame(x = 1:3, y = 1:3) expect_named(summarise_at(df, vars(x:y), list(mean = mean)), c("x_mean", "y_mean")) expect_named(summarise_at(df, vars(x = x), list(mean = mean, sd = sd)), c("x_mean", "x_sd")) }) expect_classes <- function(tbl, expected) { classes <- unname(map_chr(tbl, class)) classes <- paste0(substring(classes, 0, 1), collapse = "") expect_equal(classes, expected) } test_that("can select colwise", { columns <- iris %>% mutate_at(NULL, as.character) expect_classes(columns, "nnnnf") columns <- iris %>% mutate_at(vars(starts_with("Petal")), as.character) expect_classes(columns, "nnccf") numeric <- iris %>% mutate_at(c(1, 3), as.character) expect_classes(numeric, "cncnf") character <- iris %>% mutate_at("Species", as.character) expect_classes(character, "nnnnc") }) test_that("can probe colwise", { predicate <- iris %>% mutate_if(is.factor, as.character) expect_classes(predicate, "nnnnc") logical <- iris %>% mutate_if(c(TRUE, FALSE, TRUE, TRUE, FALSE), as.character) expect_classes(logical, "cnccf") }) test_that("non syntactic colnames work", { df <- tibble(`x 1` = 1:3) expect_identical(summarise_at(df, "x 1", sum)[[1]], 6L) expect_identical(summarise_if(df, is.numeric, sum)[[1]], 6L) expect_identical(summarise_all(df, sum)[[1]], 6L) expect_identical(mutate_all(df, `*`, 2)[[1]], (1:3) * 2) }) test_that("empty selection does not select everything (#2009, #1989)", { expect_equal( tibble::remove_rownames(mtcars), tibble::remove_rownames(mutate_if(mtcars, is.factor, as.character)) ) }) test_that("predicate can be quoted", { expected <- mutate_if(mtcars, is_integerish, mean) expect_identical(mutate_if(mtcars, "is_integerish", mean), expected) expect_identical(mutate_if(mtcars, ~ is_integerish(.x), mean), expected) }) test_that("transmute verbs do not retain original variables", { expect_named(transmute_all(tibble(x = 1:3, y = 1:3), list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) expect_named(transmute_if(tibble(x = 1:3, y = 1:3), is_integer, list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) expect_named(transmute_at(tibble(x = 1:3, y = 1:3), vars(x:y), list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) }) test_that("can rename with vars() (#2594)", { expect_identical( mutate_at(tibble(x = 1:3), vars(y = x), mean), tibble(x = 1:3, y = c(2, 2, 2)) ) }) test_that("selection works with grouped data frames (#2624)", { gdf <- group_by(iris, Species) expect_snapshot(out <- mutate_if(gdf, is.factor, as.character)) expect_identical(out, gdf) }) test_that("at selection works even if not all ops are named (#2634)", { df <- tibble(x = 1, y = 2) expect_identical(mutate_at(df, vars(z = x, y), list(~. + 1)), tibble(x = 1, y = 3, z = 2)) }) test_that("can use a purrr-style lambda", { expect_identical(summarise_at(mtcars, vars(1:2), ~ mean(.x)), summarise(mtcars, mpg = mean(mpg), cyl = mean(cyl))) }) test_that("mutate and transmute variants does not mutate grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- mutate(tbl, gr2 = sqrt(gr2), x = sqrt(x)) expect_message(expect_identical(mutate_all(tbl, sqrt), res), "ignored") expect_message(expect_identical(transmute_all(tbl, sqrt), res), "ignored") expect_message(expect_identical(mutate_if(tbl, is.integer, sqrt), res), "ignored") expect_message(expect_identical(transmute_if(tbl, is.integer, sqrt), res), "ignored") expect_identical(transmute_at(tbl, vars(-group_cols()), sqrt), res) expect_identical(mutate_at(tbl, vars(-group_cols()), sqrt), res) }) test_that("summarise variants does not summarise grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- summarise(tbl, gr2 = mean(gr2), x = mean(x)) expect_identical(summarise_all(tbl, mean), res) expect_identical(summarise_if(tbl, is.integer, mean), res) }) test_that("summarise_at removes grouping variables (#3613)", { d <- tibble( x = 1:2, y = 3:4, g = 1:2) %>% group_by(g) res <- d %>% group_by(g) %>% summarise_at(-1, mean) expect_equal(names(res), c("g", "y")) }) test_that("group_by_(at,all) handle utf-8 names (#3829)", { local_non_utf8_encoding() name <- get_native_lang_string() tbl <- tibble(a = 1) %>% setNames(name) res <- group_by_all(tbl) %>% groups() expect_equal(res[[1]], sym(name)) res <- group_by_at(tbl, name) %>% groups() expect_equal(res[[1]], sym(name)) }) test_that("*_(all,at) handle utf-8 names (#2967)", { local_non_utf8_encoding() name <- get_native_lang_string() tbl <- tibble(a = 1) %>% setNames(name) res <- tbl %>% mutate_all(list(as.character)) %>% names() expect_equal(res, name) res <- tbl %>% mutate_at(name, list(as.character)) %>% names() expect_equal(res, name) res <- tbl %>% summarise_all(list(as.character)) %>% names() expect_equal(res, name) res <- tbl %>% summarise_at(name, list(as.character)) %>% names() expect_equal(res, name) res <- select_at(tbl, name) %>% names() expect_equal(res, name) }) test_that("summarise_at with multiple columns AND unnamed functions works (#4119)", { res <- storms %>% summarise_at(vars(wind, pressure), list(mean, median)) expect_equal(ncol(res), 4L) expect_equal(names(res), c("wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2")) res <- storms %>% summarise_at(vars(wind, pressure), list(n = length, mean, median)) expect_equal(ncol(res), 6L) expect_equal(names(res), c("wind_n", "pressure_n", "wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2")) }) test_that("mutate_at with multiple columns AND unnamed functions works (#4119)", { res <- storms %>% mutate_at(vars(wind, pressure), list(mean, median)) expect_equal(ncol(res), ncol(storms) + 4L) expect_equal( names(res), c(names(storms), c("wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2")) ) }) test_that("colwise mutate have .data in scope of rlang lambdas (#4183)", { results <- list( iris %>% mutate_if(is.numeric, ~ . / iris$Petal.Width), iris %>% mutate_if(is.numeric, ~ . / Petal.Width), iris %>% mutate_if(is.numeric, ~ . / .data$Petal.Width), iris %>% mutate_if(is.numeric, list(~ . / iris$Petal.Width )), iris %>% mutate_if(is.numeric, list(~ . / Petal.Width )), iris %>% mutate_if(is.numeric, list(~ . / .data$Petal.Width)), iris %>% mutate_if(is.numeric, ~ .x / iris$Petal.Width), iris %>% mutate_if(is.numeric, ~ .x / Petal.Width), iris %>% mutate_if(is.numeric, ~ .x / .data$Petal.Width), iris %>% mutate_if(is.numeric, list(~ .x / iris$Petal.Width )), iris %>% mutate_if(is.numeric, list(~ .x / Petal.Width )), iris %>% mutate_if(is.numeric, list(~ .x / .data$Petal.Width)) ) for(i in 2:12) { expect_equal(results[[1]], results[[i]]) } }) test_that("can choose the name of vars with multiple funs (#4180)", { expect_identical( mtcars %>% group_by(cyl) %>% summarise_at(vars(DISP = disp), list(mean = mean, median = median)), mtcars %>% group_by(cyl) %>% summarise(DISP_mean = mean(disp), DISP_median = median(disp)) ) }) test_that("summarise_at() unquotes in lambda (#4287)", { df <- tibble::tibble(year = seq(2015, 2050, 5), P = 5.0 + 2.5 * year) year <- 2037 expect_equal( summarise_at(df, vars(-year), ~approx(x = year, y = ., xout = !!year)$y), summarise(df, P = approx(x = year, y = P, xout = !!year)$y) ) }) test_that("mutate_at() unquotes in lambdas (#4199)", { df <- tibble(a = 1:10, b = runif(1:10), c = letters[1:10]) varname <- "a" symname <- rlang::sym(varname) quoname <- enquo(symname) expect_identical( df %>% mutate(b = mean(!!quoname)), df %>% mutate_at(vars(matches("b")), list(~mean(!!quoname))) ) }) test_that("summarise_at() can refer to local variables and columns (#4304)", { # using local here in case someone wants to run the content of the test # as opposed to the test_that() call res <- local({ value <- 10 expect_identical( iris %>% summarise_at("Sepal.Length", ~ sum(. / value)), iris %>% summarise(Sepal.Length = sum(Sepal.Length / value)) ) }) }) test_that("colwise mutate handles formulas with constants (#4374)", { expect_identical( tibble(x = 12) %>% mutate_all(~ 42), tibble(x = 42) ) expect_identical( tibble(x = 12) %>% mutate_at("x", ~ 42), tibble(x = 42) ) }) test_that("colwise mutate handle named chr vectors", { res <- tibble(x = 1:10) %>% mutate_at(c(y = "x"), mean) expect_identical(res, tibble(x = 1:10, y = 5.5)) }) test_that("colwise verbs soft deprecate quosures (#4330)", { expect_warning(mutate_at(mtcars, vars(mpg), quo(mean(.))), "quosure") expect_warning(summarise_at(mtcars, vars(mpg), quo(mean(.))), "quosure") }) test_that("rlang lambda inherit from the data mask (#3843)", { res <- iris %>% mutate_at( vars(starts_with("Petal")), ~ ifelse(Species == "setosa" & . < 1.5, NA, .) ) expected <- iris %>% mutate( Petal.Length = ifelse(Species == "setosa" & Petal.Length < 1.5, NA, Petal.Length), Petal.Width = ifelse(Species == "setosa" & Petal.Width < 1.5, NA, Petal.Width) ) expect_identical(res, expected) res <- iris %>% group_by(Species) %>% mutate_at( vars(starts_with("Petal")), ~ ifelse(Species == "setosa" & . < 1.5, NA, .) ) expected <- iris %>% group_by(Species) %>% mutate( Petal.Length = ifelse(Species == "setosa" & Petal.Length < 1.5, NA, Petal.Length), Petal.Width = ifelse(Species == "setosa" & Petal.Width < 1.5, NA, Petal.Width) ) expect_identical(res, expected) }) test_that("_if isn't tripped up by columns named 'i' (#5330)", { test_df <- tibble(i = c("a", "b"), j = c(1, 2)) result_df <- test_df %>% mutate_if(is.character, as.factor) expect_equal(result_df$i, as.factor(test_df$i)) expect_equal(result_df$j, test_df$j) }) # Errors -------------------------------------------- test_that("colwise mutate gives meaningful error messages", { expect_snapshot({ # column not found (expect_error( mutate_at(tibble(), "test", ~ 1) )) # not summarising grouping variables tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) tbl <- group_by(tbl, gr1) (expect_error( summarise_at(tbl, vars(gr1), mean) )) # improper additional arguments (expect_error( mutate_all(mtcars, length, 0, 0) )) (expect_error( mutate_all(mtcars, mean, na.rm = TRUE, na.rm = TRUE) )) }) }) dplyr/tests/testthat/test-join-cols.R0000644000176200001440000000657414151641776017433 0ustar liggesuserstest_that("automatically finds common variables", { expect_message(vars <- join_cols(c("x", "y"), c("x", "z"))) expect_named(vars$x$key, "x") expect_named(vars$y$key, "x") }) test_that("key vars are found", { vars <- join_cols(c("x", "y"), c("x", "z"), by = "x") expect_equal(vars$x$key, c(x = 1L)) expect_equal(vars$y$key, c(x = 1L)) vars <- join_cols(c("a", "x", "b"), c("x", "a"), by = "x") expect_equal(vars$x$key, c(x = 2L)) expect_equal(vars$y$key, c(x = 1L)) vars <- join_cols(c("x", "y"), c("a", "x", "z"), by = c("y" = "z")) expect_equal(vars$x$key, c(y = 2L)) expect_equal(vars$y$key, c(y = 3L)) }) test_that("y key matches order and names of x key", { vars <- join_cols(c("x", "y", "z"), c("c", "b", "a"), by = c("x" = "a", "y" = "b")) expect_equal(vars$x$key, c(x = 1L, y = 2L)) expect_equal(vars$y$key, c(x = 3L, y = 2L)) }) test_that("duplicate column names are given suffixes", { vars <- join_cols(c("x", "y"), c("x", "y"), by = "x") expect_equal(vars$x$out, c("x" = 1, "y.x" = 2)) expect_equal(vars$y$out, c("y.y" = 2)) # including join vars when keep = TRUE vars <- join_cols(c("x", "y"), c("x", "y"), by = "x", keep = TRUE) expect_equal(vars$x$out, c("x.x" = 1, "y.x" = 2)) expect_equal(vars$y$out, c("x.y" = 1, "y.y" = 2)) # suffixes don't create duplicates vars <- join_cols(c("x", "y", "y.x"), c("x", "y"), by = "x") expect_equal(vars$x$out, c("x" = 1, "y.x" = 2, "y.x.x" = 3)) expect_equal(vars$y$out, c("y.y" = 2)) # but not when they're the join vars vars <- join_cols(c("A", "A.x"), c("B", "A.x", "A"), by = "A.x") expect_named(vars$x$out, c("A.x.x", "A.x")) expect_named(vars$y$out, c("B", "A.y")) # or when no suffix is requested vars <- join_cols(c("x", "y"), c("x", "y"), by = "x", suffix = c("", ".y")) expect_equal(vars$x$out, c("x" = 1, "y" = 2)) expect_equal(vars$y$out, c("y.y" = 2)) }) test_that("NA names are preserved", { vars <- join_cols(c("x", NA), c("x", "z"), by = "x") expect_named(vars$x$out, c("x", NA)) vars <- join_cols(c("x", NA), c("x", NA), by = "x") expect_named(vars$x$out, c("x", "NA.x")) expect_named(vars$y$out, "NA.y") }) test_that("by columns omited from y" , { vars <- join_cols(c("x", "y"), c("x", "y"), by = c("x" = "y")) expect_equal(vars$x$out, c("x" = 1, "y" = 2)) expect_equal(vars$y$out, c("x.y" = 1)) # unless specifically requested vars <- join_cols(c("x", "y"), c("x", "y"), by = c("x" = "y"), keep = TRUE) expect_equal(vars$x$out, c("x.x" = 1, "y.x" = 2)) expect_equal(vars$y$out, c("x.y" = 1, "y.y" = 2)) }) test_that("emits useful messages", { expect_snapshot({ # names (expect_error( join_cols(c("x", "y"), c("y", "y")) )) (expect_error( join_cols(c("y", "y"), c("x", "y")) )) # common by xy <- c("x", "y") vars <- join_cols(xy, xy) # by errors (expect_error( join_cols(xy, c("a", "b")) )) (expect_error( join_cols(xy, xy, by = FALSE) )) (expect_error( join_cols(xy, xy, by = list(1, 2)) )) (expect_error( join_cols(xy, xy, by = c("x", "x")) )) (expect_error( join_cols(xy, xy, by = c("x", NA)) )) (expect_error( join_cols(xy, xy, by = c("aaa", "bbb")) )) # suffixes (expect_error( join_cols(xy, xy, by = "x", suffix = "x") )) (expect_error( join_cols(xy, xy, by = "x", suffix = c("", NA)) )) }) }) dplyr/tests/testthat/helper-encoding.R0000644000176200001440000000317714121112104017567 0ustar liggesusersget_lang_strings <- function() { lang_strings <- c( de = "Gl\u00fcck", cn = "\u5e78\u798f", ru = "\u0441\u0447\u0430\u0441\u0442\u044c\u0435", ko = "\ud589\ubcf5" ) native_lang_strings <- enc2native(lang_strings) same <- (lang_strings == native_lang_strings) list( same = lang_strings[same], different = lang_strings[!same] ) } get_native_lang_string <- function() { lang_strings <- get_lang_strings() if (length(lang_strings$same) == 0) testthat::skip("No native language string available") lang_strings$same[[1L]] } get_alien_lang_string <- function() { lang_strings <- get_lang_strings() if (length(lang_strings$different) == 0) testthat::skip("No alien language string available") lang_strings$different[[1L]] } try_encoding <- function(enc) { orig_encoding <- Sys.getlocale("LC_CTYPE") on.exit(Sys.setlocale("LC_CTYPE", orig_encoding), add = TRUE) tryCatch({ Sys.setlocale("LC_CTYPE", enc) TRUE }, warning = function(w) FALSE, error = function(e) FALSE ) } non_utf8_encoding <- function(enc = NULL) { if (!l10n_info()$`UTF-8`) { return(Sys.getlocale("LC_CTYPE")) } enc <- enc %||% c( "en_US.ISO8859-1", "en_US.ISO8859-15", "fr_CH.ISO8859-1", "fr_CH.ISO8859-15" ) available <- vapply(enc, try_encoding, logical(1)) if (any(available)) { enc[available][1] } else { NULL } } local_non_utf8_encoding <- function(enc = NULL, env = parent.frame()) { non_utf8 <- non_utf8_encoding(enc) if (is.null(non_utf8)) { skip("Can't set a non-UTF-8 encoding") } else { withr::local_locale(c(LC_CTYPE = non_utf8), .local_envir = env) } } dplyr/tests/testthat/test-lead-lag.R0000644000176200001440000000530614151641776017174 0ustar liggesuserstest_that("lead and lag preserve factors", { x <- factor(c("a", "b", "c")) expect_equal(levels(lead(x)), c("a", "b", "c")) expect_equal(levels(lag(x)), c("a", "b", "c")) }) test_that("lead and lag preserves dates and times", { x <- as.Date("2013-01-01") + 1:3 y <- as.POSIXct(x) expect_s3_class(lead(x), "Date") expect_s3_class(lag(x), "Date") expect_s3_class(lead(y), "POSIXct") expect_s3_class(lag(y), "POSIXct") }) test_that("#925 is fixed", { data <- tibble( name = c("Rob", "Pete", "Rob", "John", "Rob", "Pete", "John", "Pete", "John", "Pete", "Rob", "Rob"), time = c(3, 2, 5, 3, 2, 3, 2, 4, 1, 1, 4, 1) ) res <- data %>% group_by(name) %>% mutate(lag_time = lag(time)) expect_equal( res$lag_time[res$name == "Rob"], c(NA, head(data$time[data$name == "Rob"], -1)) ) expect_equal( res$lag_time[res$name == "Pete"], c(NA, head(data$time[data$name == "Pete"], -1)) ) expect_equal( res$lag_time[res$name == "John"], c(NA, head(data$time[data$name == "John"], -1)) ) }) test_that("#937 is fixed", { df <- tibble( name = rep(c("Al", "Jen"), 3), score = rep(c(100, 80, 60), 2) ) res <- df %>% group_by(name) %>% mutate(next.score = lead(score)) expect_equal( res$next.score[res$name == "Al"], c(tail(df$score[df$name == "Al"], -1), NA) ) expect_equal( res$next.score[res$name == "Jen"], c(tail(df$score[df$name == "Jen"], -1), NA) ) }) test_that("lead() and lag() work for matrices (#5028)", { m <- matrix(1:6, ncol = 2) expect_equal(lag(m, 1), matrix(c(NA_integer_, 1L, 2L, NA_integer_, 4L, 5L), ncol = 2)) expect_equal(lag(m, 1, default = NA), matrix(c(NA_integer_, 1L, 2L, NA_integer_, 4L, 5L), ncol= 2)) expect_equal(lead(m, 1), matrix(c(2L, 3L, NA_integer_, 5L, 6L, NA_integer_), ncol = 2)) expect_equal(lead(m, 1, default = NA), matrix(c(2L, 3L, NA_integer_, 5L, 6L, NA_integer_), ncol = 2)) }) test_that("lead() and lag() checks size of default (#5641)", { expect_error(lead(1:10, default = integer())) expect_error(lag(1:10, default = integer())) }) # Errors ------------------------------------------------------------------ test_that("lead() / lag() give meaningful errors", { expect_snapshot({ "# complicance of n argument" (expect_error(lead(letters, -1))) (expect_error(lead(letters, "1"))) (expect_error(lag(letters, -1))) (expect_error(lag(letters, "1"))) "# ts" (expect_error(lag(ts(1:10)))) "# incompatible default" (expect_error(lag(c("1", "2", "3"), default = FALSE))) (expect_error(lead(c("1", "2", "3"), default = FALSE))) (expect_error(lag(c("1", "2", "3"), default = character()))) (expect_error(lead(c("1", "2", "3"), default = character()))) }) }) dplyr/tests/testthat/test-order-by.R0000644000176200001440000000024514151641776017246 0ustar liggesuserstest_that("order_by() gives useful error messages", { expect_snapshot({ (expect_error(order_by(mtcars, 10))) (expect_error(order_by(mtcars, cyl))) }) }) dplyr/tests/testthat/test-sample.R0000644000176200001440000000677314151641776017020 0ustar liggesusers# Basic behaviour ------------------------------------------------------------- test_that("sample preserves class", { expect_s3_class(sample_n(mtcars, 1), "data.frame") expect_s3_class(sample_n(as_tibble(mtcars), 1), "tbl_df") expect_s3_class(sample_frac(mtcars, 1), "data.frame") expect_s3_class(sample_frac(as_tibble(mtcars), 1), "tbl_df") }) # Ungrouped -------------------------------------------------------------------- test_that("sample respects weight", { df <- data.frame(x = 1:2, y = c(0, 1)) expect_equal(sample_n(df, 1, weight = y)$x, 2) expect_equal(sample_frac(df, 0.5, weight = y)$x, 2) }) # Grouped ---------------------------------------------------------------------- test_that("sampling grouped tbl samples each group", { sampled <- mtcars %>% group_by(cyl) %>% sample_n(2) expect_s3_class(sampled, "grouped_df") expect_equal(group_vars(sampled), "cyl") expect_equal(nrow(sampled), 6) expect_equal(map_int(group_rows(sampled), length), c(2,2,2)) }) test_that("grouped sample respects weight", { df2 <- tibble( x = rep(1:2, 100), y = rep(c(0, 1), 100), g = rep(1:2, each = 100) ) grp <- df2 %>% group_by(g) expect_equal(sample_n(grp, 1, weight = y)$x, c(2, 2)) expect_equal(sample_frac(grp, 0.5, weight = y)$x, rep(2, nrow(df2) / 2)) }) test_that("grouped sample accepts NULL weight from variable (for saeSim)", { df <- tibble( x = rep(1:2, 10), y = rep(c(0, 1), 10), g = rep(1:2, each = 10) ) weight <- NULL expect_no_error(sample_n(df, nrow(df), weight = weight)) expect_no_error(sample_frac(df, weight = weight)) grp <- df %>% group_by(g) expect_no_error(sample_n(grp, nrow(df) / 2, weight = weight)) expect_no_error(sample_frac(grp, weight = weight)) }) test_that("sample_n and sample_frac can call n() (#3413)", { df <- tibble( x = rep(1:2, 10), y = rep(c(0, 1), 10), g = rep(1:2, each = 10) ) gdf <- group_by(df, g) expect_equal(nrow(sample_n(df, n())), nrow(df)) expect_equal(nrow(sample_n(gdf, n())), nrow(gdf)) expect_equal(nrow(sample_n(df, n() - 2L)), nrow(df) - 2) expect_equal(nrow(sample_n(gdf, n() - 2L)), nrow(df) - 4) }) test_that("sample_n and sample_frac handles lazy grouped data frames (#3380)", { df1 <- data.frame(x = 1:10, y = rep(1:2, each=5)) df2 <- data.frame(x = 6:15, z = 1:10) res <- df1 %>% group_by(y) %>% anti_join(df2, by="x") %>% sample_n(1) expect_equal(nrow(res), 1L) res <- df1 %>% group_by(y) %>% anti_join(df2, by="x") %>% sample_frac(0.2) expect_equal(nrow(res), 1L) }) # Errors -------------------------------------------- test_that("sample_*() gives meaningful error messages", { expect_snapshot({ df2 <- tibble( x = rep(1:2, 100), y = rep(c(0, 1), 100), g = rep(1:2, each = 100) ) grp <- df2 %>% group_by(g) # base R error messages (expect_error( sample_n(grp, nrow(df2) / 2, weight = y) )) (expect_error( sample_frac(grp, 1, weight = y) )) # can't sample more values than obs (without replacement) (expect_error( mtcars %>% group_by(cyl) %>% sample_n(10) )) # unknown type (expect_error( sample_n(list()) )) (expect_error( sample_frac(list()) )) "# respects weight" df <- data.frame(x = 1:2, y = c(0, 1)) (expect_error( sample_n(df, 2, weight = y) )) (expect_error( sample_frac(df, 2) )) (expect_error( sample_frac(df %>% group_by(y), 2) )) (expect_error( sample_frac(df, 1, weight = y) )) }) }) dplyr/tests/testthat/test-deprec-do.R0000644000176200001440000001357714174551541017374 0ustar liggesusers# Grouped data frames ---------------------------------------------------------- df <- data.frame( g = c(1, 2, 2, 3, 3, 3), x = 1:6, y = 6:1 ) %>% group_by(g) test_that("unnamed results bound together by row", { first <- df %>% do(head(., 1)) expect_equal(nrow(first), 3) expect_equal(first$g, 1:3) expect_equal(first$x, c(1, 2, 4)) }) test_that("named argument become list columns", { out <- df %>% do(nrow = nrow(.), ncol = ncol(.)) expect_equal(out$nrow, list(1, 2, 3)) # includes grouping columns expect_equal(out$ncol, list(3, 3, 3)) }) test_that("multiple outputs can access data (#2998)", { out <- do(tibble(a = 1), g = nrow(.), h = nrow(.)) expect_equal(names(out), c("g", "h")) expect_equal(out$g, list(1L)) expect_equal(out$h, list(1L)) }) test_that("colums in output override columns in input", { out <- df %>% do(data.frame(g = 1)) expect_equal(names(out), "g") expect_equal(out$g, c(1, 1, 1)) }) test_that("empty results preserved (#597)", { blankdf <- function(x) data.frame(blank = numeric(0)) dat <- data.frame(a = 1:2, b = factor(1:2)) expect_equal( dat %>% group_by(b, .drop = FALSE) %>% do(blankdf(.)) %>% ungroup(), tibble(b = factor(integer(), levels = 1:2), blank = numeric()) ) }) test_that("empty inputs give empty outputs (#597)", { out <- data.frame(a = numeric(), b = factor()) %>% group_by(b, .drop = FALSE) %>% do(data.frame()) expect_equal(out, data.frame(b = factor()) %>% group_by(b, .drop = FALSE)) out <- data.frame(a = numeric(), b = character()) %>% group_by(b, .drop = FALSE) %>% do(data.frame()) expect_equal(out, data.frame(b = character()) %>% group_by(b, .drop = FALSE)) }) test_that("grouped do evaluates args in correct environment", { a <- 10 f <- function(a) { mtcars %>% group_by(cyl) %>% do(a = a) } expect_equal(f(100)$a, list(100, 100, 100)) }) # Ungrouped data frames -------------------------------------------------------- test_that("ungrouped data frame with unnamed argument returns data frame", { out <- mtcars %>% do(head(.)) expect_s3_class(out, "data.frame") expect_equal(dim(out), c(6, 11)) }) test_that("ungrouped data frame with named argument returns list data frame", { out <- mtcars %>% do(x = 1, y = 2:10) expect_s3_class(out, "tbl_df") expect_equal(out$x, list(1)) expect_equal(out$y, list(2:10)) }) test_that("ungrouped do evaluates args in correct environment", { a <- 10 f <- function(a) { mtcars %>% do(a = a) } expect_equal(f(100)$a, list(100)) }) # Rowwise data frames ---------------------------------------------------------- test_that("can do on rowwise dataframe", { out <- mtcars %>% rowwise() %>% do(x = 1) exp <- tibble(x =rep(list(1), nrow(mtcars))) %>% rowwise() expect_identical(out, exp) }) # Zero row inputs -------------------------------------------------------------- test_that("empty data frames give consistent outputs", { dat <- tibble(x = numeric(0), g = character(0)) grp <- dat %>% group_by(g) emt <- grp %>% filter(FALSE) dat %>% do(data.frame()) %>% vapply(type_sum, character(1)) %>% length() %>% expect_equal(0) dat %>% do(data.frame(y = integer(0))) %>% vapply(type_sum, character(1)) %>% expect_equal(c(y = "int")) dat %>% do(data.frame(.)) %>% vapply(type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr")) dat %>% do(data.frame(., y = integer(0))) %>% vapply(type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr", y = "int")) dat %>% do(y = ncol(.)) %>% vapply(type_sum, character(1)) %>% expect_equal(c(y = "list")) # Grouped data frame should have same col types as ungrouped, with addition # of grouping variable grp %>% do(data.frame()) %>% vapply(type_sum, character(1)) %>% expect_equal(c(g = "chr")) grp %>% do(data.frame(y = integer(0))) %>% vapply(type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "int")) grp %>% do(data.frame(.)) %>% vapply(type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr")) grp %>% do(data.frame(., y = integer(0))) %>% vapply(type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr", y = "int")) grp %>% do(y = ncol(.)) %>% vapply(type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "list")) # A empty grouped dataset should have same types as grp emt %>% do(data.frame()) %>% vapply(type_sum, character(1)) %>% expect_equal(c(g = "chr")) emt %>% do(data.frame(y = integer(0))) %>% vapply(type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "int")) emt %>% do(data.frame(.)) %>% vapply(type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr")) emt %>% do(data.frame(., y = integer(0))) %>% vapply(type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr", y = "int")) emt %>% do(y = ncol(.)) %>% vapply(type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "list")) }) test_that("handling of empty data frames in do", { blankdf <- function(x) data.frame(blank = numeric(0)) dat <- data.frame(a = 1:2, b = factor(1:2)) res <- dat %>% group_by(b, .drop = FALSE) %>% do(blankdf(.)) expect_equal(names(res), c("b", "blank")) }) test_that("do() does not retain .drop attribute (#4176)", { res <- iris %>% group_by(Species) %>% do(data.frame(n=1)) expect_null(attr(res, ".drop", exact = TRUE)) }) # Errors -------------------------------------------- test_that("do() gives meaningful error messages", { df <- data.frame( g = c(1, 2, 2, 3, 3, 3), x = 1:6, y = 6:1 ) %>% group_by(g) expect_snapshot({ (expect_error(df %>% do(head, tail))) # unnamed elements must return data frames (expect_error(df %>% ungroup() %>% do(1))) (expect_error(df %>% do(1))) (expect_error(df %>% do("a"))) # can't use both named and unnamed args (expect_error(df %>% do(x = 1, 2))) }) }) dplyr/tests/testthat/test-colwise-filter.R0000644000176200001440000000432614151641776020457 0ustar liggesuserstest_that("filter_if()", { expect_identical(nrow(filter_if(mtcars, is_integerish, all_vars(. > 1))), 0L) expect_identical(nrow(filter_if(mtcars, is_integerish, all_vars(. > 0))), 7L) }) test_that("filter_at()", { sepal_large <- filter_at(iris, vars(starts_with("Sepal")), all_vars(. > 4)) sepal_large_expected <- filter(iris, Sepal.Length > 4, Sepal.Width > 4) expect_equal(sepal_large, sepal_large_expected) }) test_that("filter_all()", { expect_identical(filter_all(mtcars, any_vars(. > 200))$disp, mtcars$disp[mtcars$disp > 200]) }) test_that("filter_at can filter by grouping variables (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( filter_at(tbl, vars(gr1), all_vars(. > 1)), filter(tbl, gr1 > 1) ) }) test_that("filter_if and filter_all includes grouping variables (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- filter_all(tbl, all_vars(. > 1)) expect_true(all(res$gr1 > 1)) res <- filter_if(tbl, is.integer, all_vars(. > 1)) expect_true(all(res$gr1 > 1)) }) test_that("can supply functions to scoped filters", { exp <- as.list(mtcars[c(8, 9, 21), ]) out <- mtcars %>% filter_at(c("cyl", "am"), ~ .x == 4 | .x == 0) expect_identical(as.list(out), exp) out <- mtcars %>% filter_at(c("cyl", "am"), function(.x) .x == 4 | .x == 0) expect_identical(as.list(out), exp) }) test_that("colwise filter support .data$. in the quosure versions", { expect_identical( filter_if(iris, is.numeric, any_vars(.data$. > 4)), filter_if(iris, is.numeric, any_vars(. > 4)) ) expect_identical( filter_all(select(iris, -Species), any_vars(.data$. > 4)), filter_all(select(iris, -Species), any_vars(. > 4)) ) expect_identical( filter_at(iris, vars(contains(".")), any_vars(.data$. > 4)), filter_at(iris, vars(contains(".")), any_vars(. > 4)) ) }) # Errors ------------------------------------------------------------------ test_that("colwise filter() give meaningful errors", { expect_snapshot({ (expect_error(filter_if(mtcars, is_character, all_vars(. > 0)))) (expect_error(filter_all(mtcars, list(~ . > 0)))) }) }) dplyr/tests/testthat/test-slice.r0000644000176200001440000003717514177154670016676 0ustar liggesuserstest_that("empty slice returns input", { df <- tibble(x = 1:3) expect_equal(slice(df), df) }) test_that("slice handles numeric input (#226)", { g <- mtcars %>% arrange(cyl) %>% group_by(cyl) res <- g %>% slice(1) expect_equal(nrow(res), 3) expect_equal(res, g %>% filter(row_number() == 1L)) expect_equal( mtcars %>% slice(1), mtcars %>% filter(row_number() == 1L) ) }) test_that("slice silently ignores out of range values (#226)", { expect_equal(slice(mtcars, c(2, 100)), slice(mtcars, 2)) g <- group_by(mtcars, cyl) expect_equal(slice(g, c(2, 100)), slice(g, 2)) }) test_that("slice works with negative indices", { res <- slice(mtcars, -(1:2)) exp <- tail(mtcars, -2) expect_equal(res, exp, ignore_attr = TRUE) }) test_that("slice works with grouped data", { g <- mtcars %>% arrange(cyl) %>% group_by(cyl) res <- slice(g, 1:2) exp <- filter(g, row_number() < 3) expect_equal(res, exp) res <- slice(g, -(1:2)) exp <- filter(g, row_number() >= 3) expect_equal(res, exp) g <- group_by(data.frame(x = c(1, 1, 2, 2, 2)), x) expect_equal(group_keys(slice(g, 3, .preserve = TRUE))$x, c(1, 2)) expect_equal(group_keys(slice(g, 3, .preserve = FALSE))$x, 2) }) test_that("slice gives correct rows (#649)", { a <- tibble(value = paste0("row", 1:10)) expect_equal(slice(a, 1:3)$value, paste0("row", 1:3)) expect_equal(slice(a, c(4, 6, 9))$value, paste0("row", c(4, 6, 9))) a <- tibble( value = paste0("row", 1:10), group = rep(1:2, each = 5) ) %>% group_by(group) expect_equal(slice(a, 1:3)$value, paste0("row", c(1:3, 6:8))) expect_equal(slice(a, c(2, 4))$value, paste0("row", c(2, 4, 7, 9))) }) test_that("slice handles NA (#1235)", { df <- tibble(x = 1:3) expect_equal(nrow(slice(df, NA_integer_)), 0L) expect_equal(nrow(slice(df, c(1L, NA_integer_))), 1L) expect_equal(nrow(slice(df, c(-1L, NA_integer_))), 2L) df <- tibble(x = 1:4, g = rep(1:2, 2)) %>% group_by(g) expect_equal(nrow(slice(df, c(1, NA))), 2) expect_equal(nrow(slice(df, c(-1, NA))), 2) }) test_that("slice handles logical NA (#3970)", { df <- tibble(x = 1:3) expect_equal(nrow(slice(df, NA)), 0L) }) test_that("slice handles empty data frames (#1219)", { df <- data.frame(x = numeric()) res <- df %>% slice(1:3) expect_equal(nrow(res), 0L) expect_equal(names(res), "x") }) test_that("slice works fine if n > nrow(df) (#1269)", { by_slice <- mtcars %>% arrange(cyl) %>% group_by(cyl) slice_res <- by_slice %>% slice(8) filter_res <- by_slice %>% group_by(cyl) %>% filter(row_number() == 8) expect_equal(slice_res, filter_res) }) test_that("slice strips grouped indices (#1405)", { res <- mtcars %>% group_by(cyl) %>% slice(1) %>% mutate(mpgplus = mpg + 1) expect_equal(nrow(res), 3L) expect_equal(group_rows(res), list_of(1L, 2L, 3L)) }) test_that("slice works with zero-column data frames (#2490)", { expect_equal( tibble(a = 1:3) %>% select(-a) %>% slice(1) %>% nrow(), 1L ) }) test_that("slice correctly computes positive indices from negative indices (#3073)", { x <- tibble(y = 1:10) expect_identical(slice(x, -10:-30), tibble(y = 1:9)) }) test_that("slice handles raw matrices", { df <- tibble(a = 1:4, b = matrix(as.raw(1:8), ncol = 2)) expect_identical( slice(df, 1:2)$b, matrix(as.raw(c(1, 2, 5, 6)), ncol = 2) ) }) test_that("slice on ungrouped data.frame (not tibble) does not enforce tibble", { expect_equal(class(slice(mtcars, 2)), "data.frame") expect_equal(class(slice(mtcars, -2)), "data.frame") expect_equal(class(slice(mtcars, NA)), "data.frame") }) test_that("slice skips 0 (#3313)", { d <- tibble(x = 1:5, y = LETTERS[1:5], g = 1) expect_identical(slice(d, 0), slice(d, integer(0))) expect_identical(slice(d, c(0, 1)), slice(d, 1)) expect_identical(slice(d, c(0, 1, 2)), slice(d, c(1, 2))) expect_identical(slice(d, c(-1, 0)), slice(d, -1)) expect_identical(slice(d, c(0, -1)), slice(d, -1)) d <- group_by(d, g) expect_identical(slice(d, 0), slice(d, integer(0))) expect_identical(slice(d, c(0, 1)), slice(d, 1)) expect_identical(slice(d, c(0, 1, 2)), slice(d, c(1, 2))) expect_identical(slice(d, c(-1, 0)), slice(d, -1)) expect_identical(slice(d, c(0, -1)), slice(d, -1)) }) test_that("slice accepts ... (#3804)", { expect_equal(slice(mtcars, 1, 2), slice(mtcars, 1:2)) expect_equal(slice(mtcars, 1, n()), slice(mtcars, c(1, nrow(mtcars)))) g <- mtcars %>% group_by(cyl) expect_equal(slice(g, 1, n()), slice(g, c(1, n()))) }) test_that("slice does not evaluate the expression in empty groups (#1438)", { res <- mtcars %>% group_by(cyl) %>% filter(cyl==6) %>% slice(1:2) expect_equal(nrow(res), 2L) expect_error( res <- mtcars %>% group_by(cyl) %>% filter(cyl==6) %>% sample_n(size=3), NA ) expect_equal(nrow(res), 3L) }) test_that("slice() handles matrix and data frame columns (#3630)", { df <- tibble( x = 1:2, y = matrix(1:4, ncol = 2), z = data.frame(A = 1:2, B = 3:4) ) expect_equal(slice(df, 1), df[1, ]) expect_equal(slice(df, 1), df[1, ]) expect_equal(slice(df, 1), df[1, ]) gdf <- group_by(df, x) expect_equal(slice(gdf, 1), gdf) expect_equal(slice(gdf, 1), gdf) expect_equal(slice(gdf, 1), gdf) gdf <- group_by(df, y) expect_equal(slice(gdf, 1), gdf) expect_equal(slice(gdf, 1), gdf) expect_equal(slice(gdf, 1), gdf) gdf <- group_by(df, z) expect_equal(slice(gdf, 1), gdf) expect_equal(slice(gdf, 1), gdf) expect_equal(slice(gdf, 1), gdf) }) # Slice variants ---------------------------------------------------------- test_that("slice_sample() handles n= and prop=", { df <- data.frame(a = 1) expect_equal( df %>% slice_sample(n = 4, replace = TRUE), df %>% slice(rep(1, 4)) ) expect_equal( df %>% slice_sample(prop = 4, replace = TRUE), df %>% slice(rep(1, 4)) ) expect_snapshot({ (expect_error( df %>% slice_sample(n = -1) )) (expect_error( df %>% slice_sample(prop = -1) )) (expect_error( df %>% slice_sample(n = 4, replace = FALSE) )) (expect_error( df %>% slice_sample(prop = 4, replace = FALSE) )) }) }) test_that("functions silently truncate results", { df <- data.frame(x = 1:5) expect_equal(df %>% slice_head(n = 6) %>% nrow(), 5) expect_equal(df %>% slice_tail(n = 6) %>% nrow(), 5) expect_equal(df %>% slice_min(x, n = 6) %>% nrow(), 5) expect_equal(df %>% slice_max(x, n = 6) %>% nrow(), 5) expect_equal(df %>% slice_head(n = -6) %>% nrow(), 0) expect_equal(df %>% slice_tail(n = -6) %>% nrow(), 0) expect_equal(df %>% slice_min(x, n = -6) %>% nrow(), 0) expect_equal(df %>% slice_max(x, n = -6) %>% nrow(), 0) }) test_that("proportion computed correctly", { df <- data.frame(x = 1:10) expect_equal(df %>% slice_head(prop = 0.11) %>% nrow(), 1) expect_equal(df %>% slice_tail(prop = 0.11) %>% nrow(), 1) expect_equal(df %>% slice_sample(prop = 0.11) %>% nrow(), 1) expect_equal(df %>% slice_min(x, prop = 0.11) %>% nrow(), 1) expect_equal(df %>% slice_max(x, prop = 0.11) %>% nrow(), 1) expect_equal(df %>% slice_min(x, prop = 0.11, with_ties = FALSE) %>% nrow(), 1) expect_equal(df %>% slice_max(x, prop = 0.11, with_ties = FALSE) %>% nrow(), 1) }) test_that("min and max return ties by default", { df <- data.frame(x = c(1, 1, 1, 2, 2)) expect_equal(df %>% slice_min(x) %>% nrow(), 3) expect_equal(df %>% slice_max(x) %>% nrow(), 2) expect_equal(df %>% slice_min(x, with_ties = FALSE) %>% nrow(), 1) expect_equal(df %>% slice_max(x, with_ties = FALSE) %>% nrow(), 1) }) test_that("min and max reorder results", { df <- data.frame(id = 1:4, x = c(2, 3, 1, 2)) expect_equal(df %>% slice_min(x, n = 2) %>% pull(id), c(3, 1, 4)) expect_equal(df %>% slice_min(x, n = 2, with_ties = FALSE) %>% pull(id), c(3, 1)) expect_equal(df %>% slice_max(x, n = 2) %>% pull(id), c(2, 1, 4)) expect_equal(df %>% slice_max(x, n = 2, with_ties = FALSE) %>% pull(id), c(2, 1)) }) test_that("min and max ignore NA's (#4826)", { df <- data.frame(id = 1:4, x = c(2, NA, 1, 2), y = c(NA, NA, NA, NA)) expect_equal(df %>% slice_min(x, n = 2) %>% pull(id), c(3, 1, 4)) expect_equal(df %>% slice_min(y, n = 2) %>% nrow(), 0) expect_equal(df %>% slice_max(x, n = 2) %>% pull(id), c(1, 4)) expect_equal(df %>% slice_max(y, n = 2) %>% nrow(), 0) }) test_that("arguments to sample are passed along", { df <- data.frame(x = 1:100, wt = c(1, rep(0, 99))) expect_equal(df %>% slice_sample(n = 1, weight_by = wt) %>% pull(x), 1) expect_equal(df %>% slice_sample(n = 2, weight_by = wt, replace = TRUE) %>% pull(x), c(1, 1)) }) test_that("slice() handles matrices", { df <- data.frame(x = 1) expect_identical( slice(df, 1), slice(df, matrix(1)) ) }) test_that("slice() gives meaningfull errors", { df <- data.frame(x = 1:2) gdf <- group_by(df, x) expect_snapshot({ (expect_error( slice(df, matrix(c(1, 2), ncol = 2)) )) (expect_error( slice(gdf, matrix(c(1, 2), ncol = 2)) )) (expect_error( slice(df, "a") )) (expect_error( slice(gdf, "a") )) (expect_error( slice(df, c(1, -1)) )) (expect_error( slice(gdf, c(1, -1)) )) }) }) test_that("slice_*() checks that `n=` is explicitly named", { df <- data.frame(x = 1:10) expect_snapshot({ (expect_error( slice_head(df, 5) )) (expect_error( slice_tail(df, 5) )) (expect_error( slice_min(df, x, 5) )) (expect_error( slice_max(df, x, 5) )) (expect_error( slice_sample(df, 5) )) }) }) test_that("slice_*() not confusing `n` (#6089)", { df <- data.frame(x = 1:10, n = 10:1, g = rep(1:2, each = 5)) expect_error(slice_max(df, order_by = n), NA) expect_error(slice_min(df, order_by = n), NA) expect_error(slice_sample(df, weight_by = n, n = 1L), NA) df <- group_by(df, g) expect_error(slice_max(df, order_by = n), NA) expect_error(slice_min(df, order_by = n), NA) expect_error(slice_sample(df, weight_by = n, n = 1L), NA) }) test_that("slice_*() checks that for empty `...", { df <- data.frame(x = 1:10) expect_snapshot({ (expect_error( slice_head(df, 5, 2) )) (expect_error( slice_tail(df, 5, 2) )) (expect_error( slice_min(df, x, 5, 2) )) (expect_error( slice_max(df, x, 5, 2) )) (expect_error( slice_sample(df, 5, 2) )) }) expect_snapshot({ (expect_error( slice_head(df, n = 5, 2) )) (expect_error( slice_tail(df, n = 5, 2) )) (expect_error( slice_min(df, x, n = 5, 2) )) (expect_error( slice_max(df, x, n = 5, 2) )) (expect_error( slice_sample(df, n = 5, 2) )) }) expect_snapshot({ (expect_error( slice_head(df, prop = .5, 2) )) (expect_error( slice_tail(df, prop = .5, 2) )) (expect_error( slice_min(df, x, prop = .5, 2) )) (expect_error( slice_max(df, x, prop = .5, 2) )) (expect_error( slice_sample(df, prop = .5, 2) )) }) }) test_that("slice_*() checks for constant n= and prop=", { df <- data.frame(x = 1:10) expect_snapshot({ (expect_error( slice_head(df, n = n()) )) (expect_error( slice_head(df, prop = n()) )) (expect_error( slice_tail(df, n = n()) )) (expect_error( slice_tail(df, prop = n()) )) (expect_error( slice_min(df, x, n = n()) )) (expect_error( slice_min(df, x, prop = n()) )) (expect_error( slice_max(df, x, n = n()) )) (expect_error( slice_max(df, x, prop = n()) )) (expect_error( slice_sample(df, n = n()) )) (expect_error( slice_sample(df, prop = n()) )) }) }) test_that("slice_min/max() check size of `order_by=` (#5922)", { expect_snapshot({ (expect_error( slice_min(data.frame(x = 1:10), 1:6) )) (expect_error( slice_max(data.frame(x = 1:10), 1:6) )) }) }) test_that("slice_sample() check size of `weight_by=` (#5922)", { expect_snapshot({ (expect_error( slice_sample(data.frame(x = 1:10), n = 2, weight_by = 1:6) )) }) }) test_that("slice_sample() does not error on zero rows (#5729)", { df <- tibble(dummy = character(), weight = numeric(0)) res <- expect_error(slice_sample(df, prop=0.5, weight_by = weight), NA) expect_equal(nrow(res), 0L) }) test_that("slice_head/slice_tail correctly slice ungrouped df when n < 0", { df <- data.frame(x = 1:10) expect_equal( slice_head(df, n = -2), slice_head(df, n = nrow(df) - 2) ) expect_equal( slice_tail(df, n = -2), slice_tail(df, n = nrow(df) - 2) ) }) test_that("slice_head,tail() handle n,prop = Inf", { df <- data.frame(x = 1) expect_identical(slice_head(df, n = Inf), df) expect_identical(slice_tail(df, n = Inf), df) expect_identical(slice_head(df, prop = Inf), df) expect_identical(slice_tail(df, prop = Inf), df) expect_identical(slice_head(df, n = -Inf), data.frame(x = numeric())) expect_identical(slice_tail(df, n = -Inf), data.frame(x = numeric())) expect_identical(slice_head(df, prop = -Inf), data.frame(x = numeric())) expect_identical(slice_tail(df, prop = -Inf), data.frame(x = numeric())) }) test_that("slice_head/slice_tail correctly slice grouped df when n < 0", { df <- data.frame(x = 1:10, g = c(rep(1, 8), rep(2, 2))) %>% group_by(g) expect_equal( slice_head(df, n = -3), slice(df, rlang::seq2(1L, n() - 3)) ) expect_equal( n_groups(slice_head(df, n = -3)), 1L ) expect_equal( slice_tail(df, n = -3), slice(df, rlang::seq2(3 + 1, n())) ) expect_equal( n_groups(slice_tail(df, n = -3)), 1L ) }) test_that("Non-integer number of rows computed correctly", { expect_equal(get_slice_size(n = 1.6)(10), 1) expect_equal(get_slice_size(prop = 0.16)(10), 1) expect_equal(get_slice_size(n = -1.6)(10), 9) expect_equal(get_slice_size(prop = -0.16)(10), 9) }) test_that("slice_helpers do call slice() and benefit from dispatch (#6084)", { local_methods( slice.noisy = function(.data, ..., .preserve = FALSE) { warning("noisy") NextMethod() } ) noisy <- function(x) { class(x) <- c("noisy", class(x)) x } df <- tibble(x = 1:10, g = rep(1:2, each = 5)) %>% group_by(g) expect_warning(slice(noisy(df), 1:2), "noisy") expect_warning(slice_sample(noisy(df), n = 2), "noisy") expect_warning(slice_head(noisy(df), n = 2), "noisy") expect_warning(slice_tail(noisy(df), n = 2), "noisy") expect_warning(slice_min(noisy(df), x, n = 2), "noisy") expect_warning(slice_max(noisy(df), x, n = 2), "noisy") expect_warning(sample_n(noisy(df), 2), "noisy") expect_warning(sample_frac(noisy(df), .5), "noisy") }) # Errors ------------------------------------------------------------------ test_that("rename errors with invalid grouped data frame (#640)", { expect_snapshot({ df <- tibble(x = 1:3) # User errors are labelled (expect_error(slice(mtcars, 1, 1 + ""))) (expect_error(group_by(mtcars, cyl) %>% slice(1, 1 + ""))) # Incompatible type (expect_error(slice(df, TRUE))) (expect_error(slice(df, FALSE))) (expect_error(slice(mtcars, 1, 1, ""))) (expect_error(group_by(mtcars, cyl) %>% slice(1, 1, ""))) # Mix of positive and negative integers (expect_error(mtcars %>% slice(c(-1, 2)))) (expect_error(mtcars %>% slice(c(2:3, -1)))) # n and prop are carefully validated (expect_error(slice_head(data.frame(), n = 1, prop = 1))) (expect_error(slice_tail(data.frame(), n = "a"))) (expect_error(slide_head(data.frame(), prop = "a"))) (expect_error(slice_head(data.frame(), n = n()))) (expect_error(slice_head(data.frame(), prop = n()))) (expect_error(slice_head(data.frame(), n = NA))) (expect_error(slice_head(data.frame(), prop = NA))) }) }) dplyr/tests/testthat/test-across.R0000644000176200001440000005606614174551541017024 0ustar liggesusers# across ------------------------------------------------------------------ test_that("across() works on one column data.frame", { df <- data.frame(x = 1) out <- df %>% mutate(across()) expect_equal(out, df) }) test_that("across() does not select grouping variables", { df <- data.frame(g = 1, x = 1) out <- df %>% group_by(g) %>% summarise(x = across(everything())) %>% pull() expect_equal(out, tibble(x = 1)) }) test_that("across() correctly names output columns", { gf <- tibble(x = 1, y = 2, z = 3, s = "") %>% group_by(x) expect_named( summarise(gf, across()), c("x", "y", "z", "s") ) expect_named( summarise(gf, across(.names = "id_{.col}")), c("x", "id_y", "id_z", "id_s") ) expect_named( summarise(gf, across(where(is.numeric), mean)), c("x", "y", "z") ) expect_named( summarise(gf, across(where(is.numeric), mean, .names = "mean_{.col}")), c("x", "mean_y", "mean_z") ) expect_named( summarise(gf, across(where(is.numeric), list(mean = mean, sum = sum))), c("x", "y_mean", "y_sum", "z_mean", "z_sum") ) expect_named( summarise(gf, across(where(is.numeric), list(mean = mean, sum))), c("x", "y_mean", "y_2", "z_mean", "z_2") ) expect_named( summarise(gf, across(where(is.numeric), list(mean, sum = sum))), c("x", "y_1", "y_sum", "z_1", "z_sum") ) expect_named( summarise(gf, across(where(is.numeric), list(mean, sum))), c("x", "y_1", "y_2", "z_1", "z_2") ) expect_named( summarise(gf, across(where(is.numeric), list(mean = mean, sum = sum), .names = "{.fn}_{.col}")), c("x", "mean_y", "sum_y", "mean_z", "sum_z") ) }) test_that("across() result locations are aligned with column names (#4967)", { df <- tibble(x = 1:2, y = c("a", "b")) expect <- tibble(x_cls = "integer", x_type = TRUE, y_cls = "character", y_type = FALSE) x <- summarise(df, across(everything(), list(cls = class, type = is.numeric))) expect_identical(x, expect) }) test_that("across() passes ... to functions", { df <- tibble(x = c(1, NA)) expect_equal( summarise(df, across(everything(), mean, na.rm = TRUE)), tibble(x = 1) ) expect_equal( summarise(df, across(everything(), list(mean = mean, median = median), na.rm = TRUE)), tibble(x_mean = 1, x_median = 1) ) }) test_that("across() passes unnamed arguments following .fns as ... (#4965)", { df <- tibble(x = 1) expect_equal(mutate(df, across(x, `+`, 1)), tibble(x = 2)) }) test_that("across() avoids simple argument name collisions with ... (#4965)", { df <- tibble(x = c(1, 2)) expect_equal(summarize(df, across(x, tail, n = 1)), tibble(x = 2)) }) test_that("across() works sequentially (#4907)", { df <- tibble(a = 1) expect_equal( mutate(df, x = ncol(across(where(is.numeric))), y = ncol(across(where(is.numeric)))), tibble(a = 1, x = 1L, y = 2L) ) expect_equal( mutate(df, a = "x", y = ncol(across(where(is.numeric)))), tibble(a = "x", y = 0L) ) expect_equal( mutate(df, x = 1, y = ncol(across(where(is.numeric)))), tibble(a = 1, x = 1, y = 2L) ) }) test_that("across() retains original ordering", { df <- tibble(a = 1, b = 2) expect_named(mutate(df, a = 2, x = across())$x, c("a", "b")) }) test_that("across() gives meaningful messages", { expect_snapshot({ # expanding (expect_error( tibble(x = 1) %>% summarise(across(where(is.numeric), 42)) )) (expect_error( tibble(x = 1) %>% summarise(across(y, mean)) )) # computing (expect_error( tibble(x = 1) %>% summarise(res = across(where(is.numeric), 42)) )) (expect_error( tibble(x = 1) %>% summarise(z = across(y, mean)) )) (expect_error( tibble(x = 1) %>% summarise(res = sum(if_any(where(is.numeric), 42))) )) (expect_error( tibble(x = 1) %>% summarise(res = sum(if_all(~mean(.x)))) )) (expect_error( tibble(x = 1) %>% summarise(res = sum(if_any(~mean(.x)))) )) (expect_error(across())) (expect_error(c_across())) # problem while computing error_fn <- function(.) { if (all(. > 10)) { rlang::abort("too small", call = call("error_fn")) } else { 42 } } (expect_error( # expanding tibble(x = 1:10, y = 11:20) %>% summarise(across(everything(), error_fn)) )) (expect_error( # expanding tibble(x = 1:10, y = 11:20) %>% mutate(across(everything(), error_fn)) )) (expect_error( # evaluating tibble(x = 1:10, y = 11:20) %>% summarise(force(across(everything(), error_fn))) )) (expect_error( # evaluating tibble(x = 1:10, y = 11:20) %>% mutate(force(across(everything(), error_fn))) )) # name issue (expect_error( tibble(x = 1) %>% summarise(across(everything(), list(f = mean, f = mean))) )) }) }) test_that("monitoring cache - across() can be used twice in the same expression", { df <- tibble(a = 1, b = 2) expect_equal( mutate(df, x = ncol(across(where(is.numeric))) + ncol(across(a))), tibble(a = 1, b = 2, x = 3) ) }) test_that("monitoring cache - across() can be used in separate expressions", { df <- tibble(a = 1, b = 2) expect_equal( mutate(df, x = ncol(across(where(is.numeric))), y = ncol(across(a))), tibble(a = 1, b = 2, x = 2, y = 1) ) }) test_that("monitoring cache - across() usage can depend on the group id", { df <- tibble(g = 1:2, a = 1:2, b = 3:4) df <- group_by(df, g) switcher <- function() { if_else(cur_group_id() == 1L, across(a)$a, across(b)$b) } expect <- df expect$x <- c(1L, 4L) expect_equal( mutate(df, x = switcher()), expect ) }) test_that("monitoring cache - across() internal cache key depends on all inputs", { df <- tibble(g = rep(1:2, each = 2), a = 1:4) df <- group_by(df, g) expect_identical( mutate(df, tibble(x = across(where(is.numeric), mean)$a, y = across(where(is.numeric), max)$a)), mutate(df, x = mean(a), y = max(a)) ) }) test_that("across() rejects non vectors", { expect_error( data.frame(x = 1) %>% summarise(across(everything(), ~sym("foo"))) ) }) test_that("across() uses tidy recycling rules", { expect_equal( data.frame(x = 1, y = 2) %>% summarise(across(everything(), ~rep(42, .))), data.frame(x = rep(42, 2), y = rep(42, 2)) ) expect_error( data.frame(x = 2, y = 3) %>% summarise(across(everything(), ~rep(42, .))) ) }) test_that("across() returns a data frame with 1 row (#5204)", { df <- tibble(x = 1:42) expect_equal( mutate(df, across(c(), as.factor)), df ) expect_equal( mutate(df, y = across(c(), as.factor))$y, tibble::new_tibble(list(), nrow = 42) ) mutate(df, { res <- across(c(), as.factor) expect_equal(nrow(res), 1L) res }) }) test_that("across(.names=) can use local variables in addition to {col} and {fn}", { res <- local({ prefix <- "MEAN" data.frame(x = 42) %>% summarise(across(everything(), mean, .names = "{prefix}_{.col}")) }) expect_identical(res, data.frame(MEAN_x = 42)) }) test_that("across() uses environment from the current quosure (#5460)", { # If the data frame `y` is selected, causes a subscript conversion # error since it is fractional df <- data.frame(x = 1, y = 2.4) y <- "x" expect_equal(df %>% summarise(across(all_of(y), mean)), data.frame(x = 1)) expect_equal(df %>% mutate(across(all_of(y), mean)), df) expect_equal(df %>% filter(if_all(all_of(y), ~ .x < 2)), df) # Inherited case expect_error(df %>% summarise(local(across(all_of(y), mean)))) expect_equal( df %>% summarise(summarise(cur_data(), across(all_of(y), mean))), df %>% summarise(across(all_of(y), mean)) ) }) test_that("across() sees columns in the recursive case (#5498)", { df <- tibble( vars = list("foo"), data = list(data.frame(foo = 1, bar = 2)) ) out <- df %>% mutate(data = purrr::map2(data, vars, ~ { .x %>% mutate(across(all_of(.y), ~ NA)) })) exp <- tibble( vars = list("foo"), data = list(data.frame(foo = NA, bar = 2)) ) expect_identical(out, exp) out <- df %>% mutate(data = purrr::map2(data, vars, ~ { local({ .y <- "bar" .x %>% mutate(across(all_of(.y), ~ NA)) }) })) exp <- tibble( vars = list("foo"), data = list(data.frame(foo = 1, bar = NA)) ) expect_identical(out, exp) }) test_that("across() works with empty data frames (#5523)", { expect_equal( mutate(tibble(), across()), tibble() ) }) test_that("lambdas in mutate() + across() can use columns", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(across(everything(), ~ .x / y)) ) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(+across(everything(), ~ .x / y)) ) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(across(everything(), ~ .x / .data$y)) ) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(+across(everything(), ~ .x / .data$y)) ) }) test_that("lambdas in summarise() + across() can use columns", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(across(everything(), ~ .x / y)) ) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(+across(everything(), ~ .x / y)) ) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(across(everything(), ~ .x / .data$y)) ) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(+across(everything(), ~ .x / .data$y)) ) }) test_that("lambdas in mutate() + across() can use columns in follow up expressions (#5717)", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, +across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, across(c(x, y, z), ~ .x / .data$y)) ) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, +across(c(x, y, z), ~ .x / .data$y)) ) }) test_that("lambdas in summarise() + across() can use columns in follow up expressions (#5717)", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, +across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, across(c(x, y, z), ~ .x / .data$y)) ) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, +across(c(x, y, z), ~ .x / .data$y)) ) }) test_that("functions defined inline can use columns (#5734)", { df <- data.frame(x = 1, y = 2) expect_equal( df %>% mutate(across('x', function(.x) .x / y)) %>% pull(x), 0.5 ) }) test_that("if_any() and if_all() do not enforce logical", { # We used to coerce to logical using vctrs. Now we use base # semantics because we expand `if_all(x:y)` to `x & y`. d <- data.frame(x = 10, y = 10) expect_equal(filter(d, if_all(x:y, identity)), d) expect_equal(filter(d, if_any(x:y, identity)), d) expect_equal( mutate(d, ok = if_any(x:y, identity)), mutate(d, ok = TRUE) ) expect_equal( mutate(d, ok = if_all(x:y, identity)), mutate(d, ok = TRUE) ) }) test_that("if_any() and if_all() can be used in mutate() (#5709)", { d <- data.frame(x = c(1, 5, 10, 10), y = c(0, 0, 0, 10), z = c(10, 5, 1, 10)) res <- d %>% mutate( any = if_any(x:z, ~ . > 8), all = if_all(x:z, ~ . > 8) ) expect_equal(res$any, c(TRUE, FALSE, TRUE, TRUE)) expect_equal(res$all, c(FALSE, FALSE, FALSE, TRUE)) }) test_that("across() caching not confused when used from if_any() and if_all() (#5782)", { res <- data.frame(x = 1:3) %>% mutate( any = if_any(x, ~ . >= 2) + if_any(x, ~ . >= 3), all = if_all(x, ~ . >= 2) + if_all(x, ~ . >= 3) ) expect_equal(res$any, c(0, 1, 2)) expect_equal(res$all, c(0, 1, 2)) }) test_that("if_any() and if_all() respect filter()-like NA handling", { df <- expand.grid( x = c(TRUE, FALSE, NA), y = c(TRUE, FALSE, NA) ) expect_identical( filter(df, x & y), filter(df, if_all(c(x,y), identity)) ) expect_identical( filter(df, x | y), filter(df, if_any(c(x,y), identity)) ) }) test_that("if_any() and if_all() aborts when predicate mistakingly used in .cols= (#5732)", { df <- data.frame(x = 1:10, y = 1:10) expect_snapshot({ # expanded case (expect_error(filter(df, if_any(~ .x > 5)))) (expect_error(filter(df, if_all(~ .x > 5)))) # non expanded case (expect_error(filter(df, !if_any(~ .x > 5)))) (expect_error(filter(df, !if_all(~ .x > 5)))) }) }) test_that("across() correctly reset column", { expect_error(cur_column()) res <- data.frame(x = 1) %>% summarise( a = { expect_error(cur_column()); 2}, across(x, ~{ expect_equal(cur_column(), "x"); 3}, .names = "b"), # top_across() c = { expect_error(cur_column()); 4}, force(across(x, ~{ expect_equal(cur_column(), "x"); 5}, .names = "d")), # across() e = { expect_error(cur_column()); 6} ) expect_equal(res, data.frame(a = 2, b = 3, c = 4, d = 5, e = 6)) expect_error(cur_column()) res <- data.frame(x = 1) %>% mutate( a = { expect_error(cur_column()); 2}, across(x, ~{ expect_equal(cur_column(), "x"); 3}, .names = "b"), # top_across() c = { expect_error(cur_column()); 4}, force(across(x, ~{ expect_equal(cur_column(), "x"); 5}, .names = "d")), # across() e = { expect_error(cur_column()); 6} ) expect_equal(res, data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 6)) expect_error(cur_column()) }) test_that("across() can omit dots", { df <- tibble(x = tibble(foo = 1), y = tibble(foo = 2)) # top res <- mutate(df, across( everything(), list )) expect_equal(res$x[[1]]$foo, 1) expect_equal(res$y[[1]]$foo, 2) # not top res <- mutate(df, force(across( everything(), list ))) expect_equal(res$x[[1]]$foo, 1) expect_equal(res$y[[1]]$foo, 2) }) test_that("across() evaluates ... with promise semantics (#5813)", { df <- tibble(x = tibble(foo = 1), y = tibble(foo = 2)) res <- mutate(df, across( everything(), mutate, foo = foo + 1 )) expect_equal(res$x$foo, 2) expect_equal(res$y$foo, 3) # Dots are evaluated only once new_counter <- function() { n <- 0L function() { n <<- n + 1L n } } counter <- new_counter() list_second <- function(...) { list(..2) } res <- mutate(df, across( everything(), list_second, counter() )) expect_equal(res$x[[1]], 1) expect_equal(res$y[[1]], 1) }) test_that("group variables are in scope (#5832)", { f <- function(x, z) x + z gdf <- data.frame(x = 1:2, y = 3:4, g = 1:2) %>% group_by(g) exp <- gdf %>% summarise(x = f(x, z = y)) expect_equal( gdf %>% summarise(across(x, ~ f(.x, z = y))), exp ) expect_equal( gdf %>% summarise(across(x, f, z = y)), exp ) expect_equal( gdf %>% summarise((across(x, ~ f(.x, z = y)))), exp ) expect_equal( gdf %>% summarise((across(x, f, z = y))), exp ) }) test_that("arguments in dots are evaluated once per group", { set.seed(0) out <- data.frame(g = 1:3, var = NA) %>% group_by(g) %>% mutate(across(var, function(x, y) y, rnorm(1))) %>% pull(var) set.seed(0) expect_equal(out, rnorm(3)) }) test_that("can pass quosure through `across()`", { summarise_mean <- function(data, vars) { data %>% summarise(across({{ vars }}, mean)) } gdf <- data.frame(g = c(1, 1, 2), x = 1:3) %>% group_by(g) expect_equal( gdf %>% summarise_mean(where(is.numeric)), summarise(gdf, x = mean(x)) ) }) test_that("across() inlines formulas", { env <- env() f <- ~ toupper(.x) expect_equal( as_across_fn_call(f, quote(foo), env, env), new_quosure(quote(toupper(foo)), f_env(f)) ) f <- ~ list(.x, ., .x) expect_equal( as_across_fn_call(f, quote(foo), env, env), new_quosure(quote(list(foo, foo, foo)), f_env(f)) ) }) test_that("across() uses local formula environment (#5881)", { f <- local({ prefix <- "foo" ~ paste(prefix, .x) }) df <- tibble(x = "x") expect_equal( mutate(df, across(x, f)), tibble(x = "foo x") ) expect_equal( mutate(df, across(x, list(f = f))), tibble(x = "x", x_f = "foo x") ) local({ # local() here is not necessary, it's just in case the # code is run directly without the test_that() prefix <- "foo" expect_equal( mutate(df, across(x, ~paste(prefix, .x))), tibble(x = "foo x") ) expect_equal( mutate(df, across(x, list(f = ~paste(prefix, .x)))), tibble(x = "x", x_f = "foo x") ) }) expect_equal( data.frame(x = 1) %>% mutate(across(1, list(f = local(~ . + 1)))), data.frame(x = 1, x_f = 2) ) expect_equal( data.frame(x = 1) %>% mutate(across(1, local({ `_local_var` <- 1 ~ . + `_local_var` }))), data.frame(x = 2) ) }) test_that("unevaluated formulas (currently) fail", { df <- tibble(x = "x") expect_error( mutate(df, across(x, quote(~ paste("foo", .x)))) ) }) test_that("across() can access lexical scope (#5862)", { f_across <- function(data, cols, fn) { data %>% summarise( across({{ cols }}, fn) ) } df <- data.frame(x = 1:10, y = 1:10) expect_equal( f_across(df, c(x, y), mean), summarise(df, across(c(x, y), mean)) ) }) test_that("if_any() and if_all() expansions deal with no inputs or single inputs", { d <- data.frame(x = 1) # No inputs expect_equal( filter(d, if_any(starts_with("c"), ~ FALSE)), filter(d) ) expect_equal( filter(d, if_all(starts_with("c"), ~ FALSE)), filter(d) ) # Single inputs expect_equal( filter(d, if_any(x, ~ FALSE)), filter(d, FALSE) ) expect_equal( filter(d, if_all(x, ~ FALSE)), filter(d, FALSE) ) }) test_that("if_any() and if_all() wrapped deal with no inputs or single inputs", { d <- data.frame(x = 1) # No inputs expect_equal( filter(d, (if_any(starts_with("c"), ~ FALSE))), filter(d) ) expect_equal( filter(d, (if_all(starts_with("c"), ~ FALSE))), filter(d) ) # Single inputs expect_equal( filter(d, (if_any(x, ~ FALSE))), filter(d, FALSE) ) expect_equal( filter(d, (if_all(x, ~ FALSE))), filter(d, FALSE) ) }) test_that("expanded if_any() finds local data", { limit <- 7 df <- data.frame(x = 1:10, y = 10:1) expect_identical( filter(df, if_any(everything(), ~ .x > limit)), filter(df, x > limit | y > limit) ) }) test_that("across() can use named selections", { df <- data.frame(x = 1, y = 2) # no fns expect_equal( df %>% summarise(across(c(a = x, b = y))), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise(across(all_of(c(a = "x", b = "y")))), data.frame(a = 1, b = 2) ) # no fns, non expanded expect_equal( df %>% summarise((across(c(a = x, b = y)))), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise((across(all_of(c(a = "x", b = "y"))))), data.frame(a = 1, b = 2) ) # one fn expect_equal( df %>% summarise(across(c(a = x, b = y), mean)), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise(across(all_of(c(a = "x", b = "y")), mean)), data.frame(a = 1, b = 2) ) # one fn - non expanded expect_equal( df %>% summarise((across(c(a = x, b = y), mean))), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise((across(all_of(c(a = "x", b = "y")), mean))), data.frame(a = 1, b = 2) ) # multiple fns expect_equal( df %>% summarise(across(c(a = x, b = y), list(mean = mean, sum = sum))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) expect_equal( df %>% summarise(across(all_of(c(a = "x", b = "y")), list(mean = mean, sum = sum))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) # multiple fns - non expanded expect_equal( df %>% summarise((across(c(a = x, b = y), list(mean = mean, sum = sum)))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) expect_equal( df %>% summarise((across(all_of(c(a = "x", b = "y")), list(mean = mean, sum = sum)))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) }) test_that("expr_subtitute() stops at lambdas (#5896)", { expect_identical( expr_substitute(expr(map(.x, ~mean(.x))), quote(.x), quote(a)), expr(map(a, ~mean(.x))) ) expect_identical( expr_substitute(expr(map(.x, function(.x) mean(.x))), quote(.x), quote(a)), expr(map(a, function(.x) mean(.x))) ) }) test_that("expr_subtitute() keeps at double-sided formula (#5894)", { expect_identical( expr_substitute(expr(case_when(.x < 5 ~ 5, TRUE ~ .x)), quote(.x), quote(a)), expr(case_when(a < 5 ~ 5, TRUE ~ a)) ) expect_identical( expr_substitute(expr(case_when(. < 5 ~ 5, TRUE ~ .)), quote(.), quote(a)), expr(case_when(a < 5 ~ 5, TRUE ~ a)) ) }) test_that("across() predicates operate on whole data", { df <- tibble( x = c(1, 1, 2), g = c(1, 1, 2) ) out <- df %>% mutate(across(where(~ n_distinct(.x) > 1), ~ .x + 10)) exp <- tibble( x = c(11, 11, 12), g = c(11, 11, 12) ) expect_equal(out, exp) out <- df %>% group_by(g) %>% mutate(across(where(~ n_distinct(.x) > 1), ~ .x + 10)) exp <- tibble( x = c(11, 11, 12), g = c(1, 1, 2) ) %>% group_by(g) expect_equal(out, exp) }) test_that("expand_across() expands lambdas", { quo <- quo(across(c(cyl, am), ~ identity(.x))) quo <- new_dplyr_quosure( quo, name_given = "", name_auto = "across()", is_named = FALSE, index = 1 ) DataMask$new(mtcars, current_env(), "mutate", call("caller")) expect_equal( map(expand_across(quo), quo_get_expr), exprs( cyl = identity(cyl), am = identity(am) ) ) }) test_that("expand_if_across() expands lambdas", { quo <- quo(if_any(c(cyl, am), ~ . > 4)) quo <- new_dplyr_quosure( quo, name_given = "", name_auto = "if_any()", is_named = FALSE, index = 1 ) DataMask$new(mtcars, current_env(), "mutate", call("caller")) expect_equal( map(expand_if_across(quo), quo_squash), alist(`|`(cyl > 4, am > 4)) ) }) # c_across ---------------------------------------------------------------- test_that("selects and combines columns", { df <- data.frame(x = 1:2, y = 3:4) out <- df %>% summarise(z = list(c_across(x:y))) expect_equal(out$z, list(1:4)) }) dplyr/tests/testthat/test-na-if.R0000644000176200001440000000057014151641776016516 0ustar liggesuserstest_that("scalar y replaces all matching x", { x <- c(0, 1, 0) expect_equal(na_if(x, 0), c(NA, 1, NA)) expect_equal(na_if(x, 1), c(0, NA, 0)) }) # Errors ------------------------------------------------------------------ test_that("na_if() gives meaningful errors", { expect_snapshot({ (expect_error(na_if(1:3, 1:2))) (expect_error(na_if(1, 1:2))) }) }) dplyr/tests/testthat/test-deprec-dbi.R0000644000176200001440000000033414151641776017520 0ustar liggesuserstest_that("src_sqlite() gives meaningful error messages", { skip_if_not_installed("dbplyr") withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(src_sqlite(":memory:"))) }) }) dplyr/tests/testthat/test-select.r0000644000176200001440000001344414154653101017033 0ustar liggesuserstest_that("select preserves grouping", { gf <- group_by(tibble(g = 1:3, x = 3:1), g) i <- count_regroups(out <- select(gf, h = g)) expect_equal(i, 0) expect_equal(group_vars(out), "h") }) test_that("grouping variables preserved with a message, unless already selected (#1511, #5841)", { df <- tibble(g = 1:3, x = 3:1) %>% group_by(g) expect_snapshot({ res <- select(df, x) }) expect_named(res, c("g", "x")) df <- tibble(a = 1, b = 2, c = 3) %>% group_by(a) expect_equal(df %>% select(a = b), tibble(a = 2)) df <- tibble(a = 1, b = 2, c = 3) %>% group_by(a, b) expect_snapshot({ expect_equal(df %>% select(a = c), tibble(b = 2, a = 3) %>% group_by(b)) expect_equal(df %>% select(b = c), tibble(a = 1, b = 3) %>% group_by(a)) }) }) test_that("non-syntactic grouping variable is preserved (#1138)", { expect_snapshot( df <- tibble(`a b` = 1L) %>% group_by(`a b`) %>% select() ) expect_named(df, "a b") }) test_that("select doesn't fail if some names missing", { df1 <- data.frame(x = 1:10, y = 1:10, z = 1:10) df2 <- setNames(df1, c("x", "y", "")) # df3 <- setNames(df1, c("x", "", "")) expect_equal(select(df1, x), data.frame(x = 1:10)) expect_equal(select(df2, x), data.frame(x = 1:10)) # expect_equal(select(df3, x), data.frame(x = 1:10)) }) # Special cases ------------------------------------------------- test_that("select with no args returns nothing", { empty <- select(mtcars) expect_equal(ncol(empty), 0) expect_equal(nrow(empty), 32) empty <- select(mtcars, !!!list()) expect_equal(ncol(empty), 0) expect_equal(nrow(empty), 32) }) test_that("select excluding all vars returns nothing", { expect_equal(dim(select(mtcars, -(mpg:carb))), c(32, 0)) expect_equal(dim(select(mtcars, starts_with("x"))), c(32, 0)) expect_equal(dim(select(mtcars, -matches("."))), c(32, 0)) }) test_that("negating empty match returns everything", { df <- data.frame(x = 1:3, y = 3:1) expect_equal(select(df, -starts_with("xyz")), df) }) test_that("can select with duplicate columns", { df <- tibble(x = 1, x = 2, y = 1, .name_repair = "minimal") # can extract duplicate cols by position expect_named(df %>% select(1, 3), c("x", "y")) # can select out non-duplicated columns expect_named(df %>% select(y), "y") }) # Select variables ----------------------------------------------- test_that("select can be before group_by (#309)", { df <- data.frame( id = c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5), year = c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013, 2013), var1 = rnorm(10) ) dfagg <- df %>% group_by(id, year) %>% select(id, year, var1) %>% summarise(var1 = mean(var1)) expect_equal(names(dfagg), c("id", "year", "var1")) }) test_that("select succeeds in presence of raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(select(df, a), df["a"]) expect_identical(select(df, b), df["b"]) expect_identical(select(df, -b), df["a"]) }) test_that("arguments to select() don't match vars_select() arguments", { df <- tibble(a = 1) expect_identical(select(df, var = a), tibble(var = 1)) expect_identical(select(group_by(df, a), var = a), group_by(tibble(var = 1), var)) expect_identical(select(df, exclude = a), tibble(exclude = 1)) expect_identical(select(df, include = a), tibble(include = 1)) expect_identical(select(group_by(df, a), exclude = a), group_by(tibble(exclude = 1), exclude)) expect_identical(select(group_by(df, a), include = a), group_by(tibble(include = 1), include)) }) test_that("can select() with .data pronoun (#2715)", { expect_identical(select(mtcars, .data$cyl), select(mtcars, cyl)) }) test_that("can select() with character vectors", { expect_identical(select(mtcars, "cyl", !!"disp", c("cyl", "am", "drat")), mtcars[c("cyl", "disp", "am", "drat")]) }) test_that("select() treats NULL inputs as empty", { expect_identical(select(mtcars, cyl), select(mtcars, NULL, cyl, NULL)) }) test_that("can select() with strings and character vectors", { vars <- c(foo = "cyl", bar = "am") expect_identical(select(mtcars, !!!vars), select(mtcars, foo = cyl, bar = am)) expect_identical(select(mtcars, !!vars), select(mtcars, foo = cyl, bar = am)) }) test_that("select works on empty names (#3601)", { df <- data.frame(x=1, y=2, z=3) colnames(df) <- c("x","y","") expect_identical(select(df, x)$x, 1) colnames(df) <- c("","y","z") expect_identical(select(df, y)$y, 2) }) test_that("select works on NA names (#3601)", { df <- data.frame(x=1, y=2, z=3) colnames(df) <- c("x","y",NA) expect_identical(select(df, x)$x, 1) colnames(df) <- c(NA,"y","z") expect_identical(select(df, y)$y, 2) }) test_that("select() keeps attributes of raw data frames (#5831)", { df <- data.frame(x = 1) attr(df, "a") <- "b" expect_equal(attr(select(df, x), "a"), "b") }) test_that("select() provides informative errors", { expect_snapshot({ (expect_error(select(mtcars, 1 + ""))) }) }) # dplyr_col_select() ------------------------------------------------------ test_that("dplyr_col_select() aborts when `[` implementation is broken", { local_methods( "[.dplyr_test_broken_operator" = function(x, ...) { unclass(x) }, "[.dplyr_test_operator_wrong_size" = function(x, ...) { data.frame() } ) df1 <- new_tibble(list(x = 1), nrow = 1L, class = "dplyr_test_broken_operator") expect_snapshot({ (expect_error( select(df1, 1:2) )) (expect_error( select(df1, 0) )) }) df2 <- new_tibble(list(x = 1), nrow = 1L, class = "dplyr_test_operator_wrong_size") expect_error(select(df2, 1:2)) expect_snapshot({ # from vctrs (expect_error( select(df1, 2) )) # not returning a data frame (expect_error( select(df1, 1) )) # unexpected number of columns (expect_error( select(df2, 1) )) }) }) dplyr/tests/testthat/_snaps/0000755000176200001440000000000014176714175015706 5ustar liggesusersdplyr/tests/testthat/_snaps/summarise.md0000644000176200001440000001372414177154556020246 0ustar liggesusers# summarise() gives meaningful errors Code tibble(x = 1, y = 2) %>% group_by(x, y) %>% summarise() Message `summarise()` has grouped output by 'x'. You can override using the `.groups` argument. Output # A tibble: 1 x 2 # Groups: x [1] x y 1 1 2 Code tibble(x = 1, y = 2) %>% group_by(x, y) %>% summarise(z = c(2, 2)) Message `summarise()` has grouped output by 'x', 'y'. You can override using the `.groups` argument. Output # A tibble: 2 x 3 # Groups: x, y [1] x y z 1 1 2 2 2 1 2 2 Code tibble(x = 1, y = 2) %>% rowwise(x, y) %>% summarise() Message `summarise()` has grouped output by 'x', 'y'. You can override using the `.groups` argument. Output # A tibble: 1 x 2 # Groups: x, y [1] x y 1 1 2 Code tibble(x = 1, y = 2) %>% rowwise() %>% summarise() Output # A tibble: 1 x 0 --- Code (expect_error(tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% summarise(a = rlang::env( a = 1)))) Output Error in `summarise()`: ! Problem while computing `a = rlang::env(a = 1)`. x `a` must be a vector, not an environment. Code (expect_error(tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% group_by(x, y) %>% summarise(a = rlang::env(a = 1)))) Output Error in `summarise()`: ! Problem while computing `a = rlang::env(a = 1)`. x `a` must be a vector, not an environment. i The error occurred in group 1: x = 1, y = 1. Code (expect_error(tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% rowwise() %>% summarise(a = lm(y ~ x)))) Output Error in `summarise()`: ! Problem while computing `a = lm(y ~ x)`. x `a` must be a vector, not a `lm` object. i Did you mean: `a = list(lm(y ~ x))` ? i The error occurred in row 1. Code (expect_error(tibble(id = 1:2, a = list(1, "2")) %>% group_by(id) %>% summarise( a = a[[1]]))) Output Error in `summarise()`: ! Problem while computing `a = a[[1]]`. Caused by error: ! `a` must return compatible vectors across groups. i Result type for group 1 (id = 1): . i Result type for group 2 (id = 2): . Code (expect_error(tibble(id = 1:2, a = list(1, "2")) %>% rowwise() %>% summarise(a = a[[ 1]]))) Output Error in `summarise()`: ! Problem while computing `a = a[[1]]`. Caused by error: ! `a` must return compatible vectors across groups. Code (expect_error(tibble(z = 1) %>% summarise(x = 1:3, y = 1:2))) Output Error in `summarise()`: ! Problem while recycling `y = 1:2`. x `y` must be size 3 or 1, not 2. i An earlier column had size 3. Code (expect_error(tibble(z = 1:2) %>% group_by(z) %>% summarise(x = 1:3, y = 1:2))) Output Error in `summarise()`: ! Problem while recycling `y = 1:2`. x `y` must be size 3 or 1, not 2. i An earlier column had size 3. i The error occurred in group 1: z = 1. Code (expect_error(tibble(z = c(1, 3)) %>% group_by(z) %>% summarise(x = seq_len(z), y = 1:2))) Output Error in `summarise()`: ! Problem while recycling `y = 1:2`. x `y` must be size 3 or 1, not 2. i An earlier column had size 3. i The error occurred in group 2: z = 3. Code (expect_error(data.frame(x = 1:2, g = 1:2) %>% group_by(g) %>% summarise(x = if ( g == 1) 42))) Output Error in `summarise()`: ! Problem while computing `x = if (g == 1) 42`. x `x` must return compatible vectors across groups. x Can't combine NULL and non NULL results. Code (expect_error(summarise(mtcars, a = mean(not_there)))) Output Error in `summarise()`: ! Problem while computing `a = mean(not_there)`. Caused by error in `mean()`: ! object 'not_there' not found Code (expect_error(summarise(group_by(mtcars, cyl), a = mean(not_there)))) Output Error in `summarise()`: ! Problem while computing `a = mean(not_there)`. i The error occurred in group 1: cyl = 4. Caused by error in `mean()`: ! object 'not_there' not found Code (expect_error(summarise(tibble(a = 1), c = .data$b))) Output Error in `summarise()`: ! Problem while computing `c = .data$b`. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code (expect_error(summarise(group_by(tibble(a = 1:3), a), c = .data$b))) Output Error in `summarise()`: ! Problem while computing `c = .data$b`. i The error occurred in group 1: a = 1. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code (expect_error(tibble(x = 1, x = 1, .name_repair = "minimal") %>% summarise(x))) Output Error in `summarise()`: ! Can't transform a data frame with duplicate names. Code (expect_error(tibble() %>% summarise(stop("{")))) Output Error in `summarise()`: ! Problem while computing `..1 = stop("{")`. Caused by error: ! { Code (expect_error(tibble(a = 1, b = "{value:1, unit:a}") %>% group_by(b) %>% summarise(a = stop("!")))) Output Error in `summarise()`: ! Problem while computing `a = stop("!")`. i The error occurred in group 1: b = "{value:1, unit:a}". Caused by error: ! ! dplyr/tests/testthat/_snaps/sample.md0000644000176200001440000000525414177154550017513 0ustar liggesusers# sample_*() gives meaningful error messages Code df2 <- tibble(x = rep(1:2, 100), y = rep(c(0, 1), 100), g = rep(1:2, each = 100)) grp <- df2 %>% group_by(g) (expect_error(sample_n(grp, nrow(df2) / 2, weight = y))) Output Error in `sample_n()`: ! Problem while computing indices. i The error occurred in group 1: g = 1. Caused by error in `sample.int()`: ! too few positive probabilities Code (expect_error(sample_frac(grp, 1, weight = y))) Output Error in `sample_frac()`: ! Problem while computing indices. i The error occurred in group 1: g = 1. Caused by error in `sample.int()`: ! too few positive probabilities Code (expect_error(mtcars %>% group_by(cyl) %>% sample_n(10))) Output Error in `sample_n()`: ! Problem while computing indices. i The error occurred in group 2: cyl = 6. Caused by error: ! `size` must be less than or equal to 7 (size of data). i set `replace = TRUE` to use sampling with replacement. Code (expect_error(sample_n(list()))) Output Error in `sample_n()`: ! `tbl` must be a data frame, not a list. Code (expect_error(sample_frac(list()))) Output Error in `sample_frac()`: ! `tbl` must be a data frame, not a list. Code # # respects weight df <- data.frame(x = 1:2, y = c(0, 1)) (expect_error(sample_n(df, 2, weight = y))) Output Error in `sample_n()`: ! Problem while computing indices. Caused by error in `sample.int()`: ! too few positive probabilities Code (expect_error(sample_frac(df, 2))) Output Error in `sample_frac()`: ! Problem while computing indices. Caused by error: ! `size` of sampled fraction must be less or equal to one. i set `replace = TRUE` to use sampling with replacement. Code (expect_error(sample_frac(df %>% group_by(y), 2))) Output Error in `sample_frac()`: ! Problem while computing indices. i The error occurred in group 1: y = 0. Caused by error: ! `size` of sampled fraction must be less or equal to one. i set `replace = TRUE` to use sampling with replacement. Code (expect_error(sample_frac(df, 1, weight = y))) Output Error in `sample_frac()`: ! Problem while computing indices. Caused by error in `sample.int()`: ! too few positive probabilities dplyr/tests/testthat/_snaps/mutate.md0000644000176200001440000001267414177154544017540 0ustar liggesusers# mutate() give meaningful errors Code tbl <- tibble(x = 1:2, y = 1:2) (expect_error(tbl %>% mutate(y = NULL, a = sum(y)))) Output Error in `mutate()`: ! Problem while computing `a = sum(y)`. Caused by error: ! object 'y' not found Code (expect_error(tbl %>% group_by(x) %>% mutate(y = NULL, a = sum(y)))) Output Error in `mutate()`: ! Problem while computing `a = sum(y)`. i The error occurred in group 1: x = 1. Caused by error: ! object 'y' not found Code (expect_error(tibble(x = 1) %>% mutate(y = mean))) Output Error in `mutate()`: ! Problem while computing `y = mean`. x `y` must be a vector, not a function. Code df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) (expect_error(df %>% mutate(out = env(a = 1)))) Output Error in `mutate()`: ! Problem while computing `out = env(a = 1)`. x `out` must be a vector, not an environment. Code (expect_error(df %>% group_by(g) %>% mutate(out = env(a = 1)))) Output Error in `mutate()`: ! Problem while computing `out = env(a = 1)`. x `out` must be a vector, not an environment. i The error occurred in group 1: g = 1. Code (expect_error(df %>% rowwise() %>% mutate(out = rnorm))) Output Error in `mutate()`: ! Problem while computing `out = rnorm`. x `out` must be a vector, not a function. i Did you mean: `out = list(rnorm)` ? i The error occurred in row 1. Code (expect_error(data.frame(x = rep(1:5, each = 3)) %>% group_by(x) %>% mutate( val = ifelse(x < 3, "foo", 2)))) Output Error in `mutate()`: ! Problem while computing `val = ifelse(x < 3, "foo", 2)`. Caused by error: ! `val` must return compatible vectors across groups. i Result type for group 1 (x = 1): . i Result type for group 3 (x = 3): . Code (expect_error(tibble(a = 1:3, b = 4:6) %>% group_by(a) %>% mutate(if (a == 1) NULL else "foo"))) Output Error in `mutate()`: ! Problem while computing `..1 = if (a == 1) NULL else "foo"`. x `..1` must return compatible vectors across groups. x Can't combine NULL and non NULL results. Code (expect_error(data.frame(x = c(2, 2, 3, 3)) %>% mutate(int = 1:5))) Output Error in `mutate()`: ! Problem while computing `int = 1:5`. x `int` must be size 4 or 1, not 5. Code (expect_error(data.frame(x = c(2, 2, 3, 3)) %>% group_by(x) %>% mutate(int = 1: 5))) Output Error in `mutate()`: ! Problem while computing `int = 1:5`. x `int` must be size 2 or 1, not 5. i The error occurred in group 1: x = 2. Code (expect_error(data.frame(x = c(2, 3, 3)) %>% group_by(x) %>% mutate(int = 1:5))) Output Error in `mutate()`: ! Problem while computing `int = 1:5`. x `int` must be size 1, not 5. i The error occurred in group 1: x = 2. Code (expect_error(data.frame(x = c(2, 2, 3, 3)) %>% rowwise() %>% mutate(int = 1:5)) ) Output Error in `mutate()`: ! Problem while computing `int = 1:5`. x `int` must be size 1, not 5. i Did you mean: `int = list(1:5)` ? i The error occurred in row 1. Code (expect_error(tibble(y = list(1:3, "a")) %>% rowwise() %>% mutate(y2 = y))) Output Error in `mutate()`: ! Problem while computing `y2 = y`. x `y2` must be size 1, not 3. i Did you mean: `y2 = list(y)` ? i The error occurred in row 1. Code (expect_error(data.frame(x = 1:10) %>% mutate(y = 11:20, y = 1:2))) Output Error in `mutate()`: ! Problem while computing `y = 1:2`. x `y` must be size 10 or 1, not 2. Code (expect_error(tibble(a = 1) %>% mutate(c = .data$b))) Output Error in `mutate()`: ! Problem while computing `c = .data$b`. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code (expect_error(tibble(a = 1:3) %>% group_by(a) %>% mutate(c = .data$b))) Output Error in `mutate()`: ! Problem while computing `c = .data$b`. i The error occurred in group 1: a = 1. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code lazy <- (function(x) list(enquo(x))) res <- tbl %>% rowwise() %>% mutate(z = lazy(x), .keep = "unused") (expect_error(eval_tidy(res$z[[1]]))) Output Error: ! Obsolete data mask. x Too late to resolve `x` after the end of `dplyr::mutate()`. i Did you save an object that uses `x` lazily in a column in the `dplyr::mutate()` expression ? Code (expect_error(tibble() %>% mutate(stop("{")))) Output Error in `mutate()`: ! Problem while computing `..1 = stop("{")`. Caused by error: ! { dplyr/tests/testthat/_snaps/filter.md0000644000176200001440000001317214177154535017520 0ustar liggesusers# filter() gives useful error messages Code (expect_error(iris %>% group_by(Species) %>% filter(1:n()))) Output Error in `filter()`: ! Problem while computing `..1 = 1:n()`. x Input `..1` must be a logical vector, not a integer. i The error occurred in group 1: Species = setosa. Code (expect_error(iris %>% filter(1:n()))) Output Error in `filter()`: ! Problem while computing `..1 = 1:n()`. x Input `..1` must be a logical vector, not a integer. Code (expect_error(filter(data.frame(x = 1:2), matrix(c(TRUE, FALSE, TRUE, FALSE), nrow = 2)))) Output Error in `filter()`: ! Problem while computing `..1 = matrix(c(TRUE, FALSE, TRUE, FALSE), nrow = 2)`. x Input `..1` must be a logical vector, not a logical[,2]. Code (expect_error(iris %>% group_by(Species) %>% filter(c(TRUE, FALSE)))) Output Error in `filter()`: ! Problem while computing `..1 = c(TRUE, FALSE)`. x Input `..1` must be of size 50 or 1, not size 2. i The error occurred in group 1: Species = setosa. Code (expect_error(iris %>% rowwise(Species) %>% filter(c(TRUE, FALSE)))) Output Error in `filter()`: ! Problem while computing `..1 = c(TRUE, FALSE)`. x Input `..1` must be of size 1, not size 2. i The error occurred in row 1. Code (expect_error(iris %>% filter(c(TRUE, FALSE)))) Output Error in `filter()`: ! Problem while computing `..1 = c(TRUE, FALSE)`. x Input `..1` must be of size 150 or 1, not size 2. Code (expect_error(iris %>% group_by(Species) %>% filter(data.frame(c(TRUE, FALSE)))) ) Output Error in `filter()`: ! Problem while computing `..1 = data.frame(c(TRUE, FALSE))`. x Input `..1` must be of size 50 or 1, not size 2. i The error occurred in group 1: Species = setosa. Code (expect_error(iris %>% rowwise() %>% filter(data.frame(c(TRUE, FALSE))))) Output Error in `filter()`: ! Problem while computing `..1 = data.frame(c(TRUE, FALSE))`. x Input `..1` must be of size 1, not size 2. i The error occurred in row 1. Code (expect_error(iris %>% filter(data.frame(c(TRUE, FALSE))))) Output Error in `filter()`: ! Problem while computing `..1 = data.frame(c(TRUE, FALSE))`. x Input `..1` must be of size 150 or 1, not size 2. Code (expect_error(tibble(x = 1) %>% filter(c(TRUE, TRUE)))) Output Error in `filter()`: ! Problem while computing `..1 = c(TRUE, TRUE)`. x Input `..1` must be of size 1, not size 2. Code (expect_error(iris %>% group_by(Species) %>% filter(data.frame(Sepal.Length > 3, 1:n())))) Output Error in `filter()`: ! Problem while computing `..1 = data.frame(Sepal.Length > 3, 1:n())`. x Input `..1$X1.n..` must be a logical vector, not a integer. i The error occurred in group 1: Species = setosa. Code (expect_error(iris %>% filter(data.frame(Sepal.Length > 3, 1:n())))) Output Error in `filter()`: ! Problem while computing `..1 = data.frame(Sepal.Length > 3, 1:n())`. x Input `..1$X1.n..` must be a logical vector, not a integer. Code (expect_error(mtcars %>% filter(`_x`))) Output Error in `filter()`: ! Problem while computing `..1 = _x`. Caused by error: ! object '_x' not found Code (expect_error(mtcars %>% group_by(cyl) %>% filter(`_x`))) Output Error in `filter()`: ! Problem while computing `..1 = _x`. i The error occurred in group 1: cyl = 4. Caused by error: ! object '_x' not found Code (expect_error(filter(mtcars, x = 1))) Output Error in `filter()`: ! We detected a named input. i This usually means that you've used `=` instead of `==`. i Did you mean `x == 1`? Code (expect_error(filter(mtcars, y > 2, z = 3))) Output Error in `filter()`: ! We detected a named input. i This usually means that you've used `=` instead of `==`. i Did you mean `z == 3`? Code (expect_error(filter(mtcars, TRUE, x = 1))) Output Error in `filter()`: ! We detected a named input. i This usually means that you've used `=` instead of `==`. i Did you mean `x == 1`? Code (expect_error(filter(ts(1:10)))) Output Error in `filter()`: ! Incompatible data source. x `.data` is a object, not a data source. i Did you want to use `stats::filter()`? Code (expect_error(tibble() %>% filter(stop("{")))) Output Error in `filter()`: ! Problem while computing `..1 = stop("{")`. Caused by error: ! { Code data.frame(x = 1, y = 1) %>% filter(across(everything(), ~ .x > 0)) Condition Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or `if_all()`. Output x y 1 1 1 Code data.frame(x = 1, y = 1) %>% filter(data.frame(x > 0, y > 0)) Condition Warning: data frame results in `filter()` are deprecated, use `if_any()` or `if_all()`. Output x y 1 1 1 dplyr/tests/testthat/_snaps/join-cols.md0000644000176200001440000000403114177154541020117 0ustar liggesusers# emits useful messages Code (expect_error(join_cols(c("x", "y"), c("y", "y")))) Output Error: ! Input columns in `y` must be unique. x Problem with `y`. Code (expect_error(join_cols(c("y", "y"), c("x", "y")))) Output Error: ! Input columns in `x` must be unique. x Problem with `y`. Code xy <- c("x", "y") vars <- join_cols(xy, xy) Message Joining, by = c("x", "y") Code (expect_error(join_cols(xy, c("a", "b")))) Output Error: ! `by` must be supplied when `x` and `y` have no common variables. i use by = character()` to perform a cross-join. Code (expect_error(join_cols(xy, xy, by = FALSE))) Output Error: ! `by` must be a (named) character vector, list, or NULL, not a logical vector. Code (expect_error(join_cols(xy, xy, by = list(1, 2)))) Output Error: ! join columns must be character vectors. Code (expect_error(join_cols(xy, xy, by = c("x", "x")))) Output Error: ! Join columns must be unique. x Problem at position 2. Code (expect_error(join_cols(xy, xy, by = c("x", NA)))) Output Error: ! Join columns must be not NA. x Problem at position 2. Code (expect_error(join_cols(xy, xy, by = c("aaa", "bbb")))) Output Error: ! Join columns must be present in data. x Problem with `aaa` and `bbb`. Code (expect_error(join_cols(xy, xy, by = "x", suffix = "x"))) Output Error: ! `suffix` must be a character vector of length 2. i `suffix` is a character vector of length 1. Code (expect_error(join_cols(xy, xy, by = "x", suffix = c("", NA)))) Output Error: ! `suffix` can't be NA. dplyr/tests/testthat/_snaps/distinct.md0000644000176200001440000000200514177154533020043 0ustar liggesusers# distinct gives a warning when selecting an unknown column (#3140) Code df <- tibble(g = c(1, 2), x = c(1, 2)) (expect_error(df %>% distinct(aa, x))) Output Error in `distinct()`: ! Must use existing variables. x `aa` not found in `.data`. Code (expect_error(df %>% distinct(aa, bb))) Output Error in `distinct()`: ! Must use existing variables. x `aa` not found in `.data`. x `bb` not found in `.data`. Code (expect_error(df %>% distinct(.data$aa))) Output Error in `distinct()`: ! Must use existing variables. x `aa` not found in `.data`. Code (expect_error(df %>% distinct(y = a + 1))) Output Error in `distinct()`: ! Problem adding computed columns. Caused by error in `mutate()`: ! Problem while computing `y = a + 1`. Caused by error: ! object 'a' not found dplyr/tests/testthat/_snaps/all-equal.md0000644000176200001440000000573714177154520020112 0ustar liggesusers# data frames not equal if missing row Code all_equal(mtcars, mtcars[-1, ]) Output [1] "Different number of rows." Code all_equal(iris, iris[-1, ]) Output [1] "Different number of rows." Code all_equal(df_all, df_all[-1, ]) Output [1] "Different number of rows." # data frames not equal if missing col Code all_equal(mtcars, mtcars[, -1]) Output Different number of columns: 11 vs 10. Code all_equal(iris, iris[, -1]) Output Different number of columns: 5 vs 4. Code all_equal(df_all, df_all[, -1]) Output Different number of columns: 7 vs 6. # factors equal only if levels equal Code all_equal(df1, df2) Output Different types for column `x`: factor<38051> vs factor. Code all_equal(df2, df1) Output Different types for column `x`: factor vs factor<38051>. # factor comparison requires strict equality of levels (#2440) Code all_equal(df1, df2) Output Different types for column `x`: factor<4d52a> vs factor<38051>. Code all_equal(df2, df1) Output Different types for column `x`: factor<38051> vs factor<4d52a>. # equality test fails when convert is FALSE and types don't match (#1484) Code all_equal(df1, df2, convert = FALSE) Output Different types for column `x`: character vs factor<4d52a>. # equality returns a message for convert = TRUE Code all_equal(df1, df2) Output Different types for column `x`: integer vs character. Code all_equal(df1, df2, convert = TRUE) Output Incompatible types for column `x`: integer vs character. # numeric and integer can be compared if convert = TRUE Code all_equal(df1, df2) Output Different types for column `x`: integer vs double. # returns vector for more than one difference (#1819) Code all_equal(tibble(a = 1, b = 2), tibble(a = 1L, b = 2L)) Output Different types for column `a`: double vs integer. Different types for column `b`: double vs integer. # ignore column order Code all_equal(tibble(a = 1, b = 2), tibble(b = 2, a = 1), ignore_col_order = FALSE) Output Same column names, but different order. Code all_equal(tibble(a = 1, b = 2), tibble(a = 1), ignore_col_order = FALSE) Output Different number of columns: 2 vs 1. # count() give meaningful errors Code (expect_error(union(tibble(a = 1), tibble(a = "1")))) Output Error in `union()`: ! `x` and `y` are not compatible. x Incompatible types for column `a`: double vs character. Code (expect_error(union(tibble(a = 1, b = 2), tibble(a = "1", b = "2")))) Output Error in `union()`: ! `x` and `y` are not compatible. x Incompatible types for column `a`: double vs character. x Incompatible types for column `b`: double vs character. dplyr/tests/testthat/_snaps/arrange.md0000644000176200001440000000223514177154521017643 0ustar liggesusers# arrange() gives meaningful errors Code (expect_error(tibble(x = 1, x = 1, .name_repair = "minimal") %>% arrange(x))) Output Error in `arrange()`: ! Problem with the implicit `transmute()` step. Caused by error in `transmute()`: ! Can't transform a data frame with duplicate names. Code (expect_error(tibble(x = 1) %>% arrange(y))) Output Error in `arrange()`: ! Problem with the implicit `transmute()` step. x Problem while computing `..1 = y`. Caused by error: ! object 'y' not found Code (expect_error(tibble(x = 1) %>% arrange(rep(x, 2)))) Output Error in `arrange()`: ! Problem with the implicit `transmute()` step. x Problem while computing `..1 = rep(x, 2)`. x `..1` must be size 1, not 2. # desc() inside arrange() checks the number of arguments (#5921) Code df <- data.frame(x = 1, y = 2) (expect_error(arrange(df, desc(x, y)))) Output Error in `arrange()`: ! `desc()` must be called with exactly one argument. dplyr/tests/testthat/_snaps/deprec-funs.md0000644000176200001440000000173314177154532020443 0ustar liggesusers# funs() is deprecated Code funs(fn = bar) Condition Warning: `funs()` was deprecated in dplyr 0.8.0. Please use a list of either functions or lambdas: # Simple named list: list(mean = mean, median = median) # Auto named with `tibble::lst()`: tibble::lst(mean, median) # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE)) Output $ fn: bar(.) # funs() give meaningful error messages Code (expect_error(funs(function(si) { mp[si] }))) Output Error in `funs()`: ! `function(si) { mp[si] }` must be a function name (quoted or unquoted) or an unquoted call, not `function`. Code (expect_error(funs(~ mp[.]))) Output Error in `funs()`: ! `~mp[.]` must be a function name (quoted or unquoted) or an unquoted call, not `~`. dplyr/tests/testthat/_snaps/group-by.md0000644000176200001440000000260014177154540017765 0ustar liggesusers# select(group_by(.)) implicitely adds grouping variables (#170) Code res <- mtcars %>% group_by(vs) %>% select(mpg) Message Adding missing grouping variables: `vs` # group_by works with zero-row data frames (#486) Code x <- select(dfg, a) Message Adding missing grouping variables: `g` # group_by() and ungroup() give meaningful error messages Code df <- tibble(x = 1, y = 2) (expect_error(df %>% group_by(unknown))) Output Error in `group_by()`: ! Must group by variables found in `.data`. x Column `unknown` is not found. Code (expect_error(df %>% ungroup(x))) Output Error in `ungroup()`: ! `...` must be empty. x Problematic argument: * ..1 = x i Did you forget to name an argument? Code (expect_error(df %>% group_by(x, y) %>% ungroup(z))) Output Error in `ungroup()`: ! Can't subset columns that don't exist. x Column `z` doesn't exist. Code (expect_error(df %>% group_by(z = a + 1))) Output Error in `group_by()`: ! Problem adding computed columns. Caused by error in `mutate()`: ! Problem while computing `z = a + 1`. Caused by error: ! object 'a' not found dplyr/tests/testthat/_snaps/sets.md0000644000176200001440000000262214177154551017205 0ustar liggesusers# set operation give useful error message. #903 Code alfa <- tibble(land = c("Sverige", "Norway", "Danmark", "Island", "GB"), data = rnorm( length(land))) beta <- tibble(land = c("Norge", "Danmark", "Island", "Storbritannien"), data2 = rnorm( length(land))) gamma <- tibble(land = 1:2, data = 1:2) (expect_error(intersect(alfa, beta))) Output Error in `intersect()`: ! `x` and `y` are not compatible. x Cols in `y` but not `x`: `data2`. x Cols in `x` but not `y`: `data`. Code (expect_error(intersect(alfa, 1))) Output Error in `intersect()`: ! `y` must be a data frame. Code (expect_error(intersect(alfa, gamma))) Output Error in `intersect()`: ! `x` and `y` are not compatible. x Incompatible types for column `land`: character vs integer. Code (expect_error(union(alfa, beta))) Output Error in `union()`: ! `x` and `y` are not compatible. x Cols in `y` but not `x`: `data2`. x Cols in `x` but not `y`: `data`. Code (expect_error(setdiff(alfa, beta))) Output Error in `setdiff()`: ! `x` and `y` are not compatible. x Cols in `y` but not `x`: `data2`. x Cols in `x` but not `y`: `data`. dplyr/tests/testthat/_snaps/deprec-dbi.md0000644000176200001440000000034614177154531020224 0ustar liggesusers# src_sqlite() gives meaningful error messages Code (expect_error(src_sqlite(":memory:"))) Output Error in `src_sqlite()`: ! `path` must already exist, unless `create` = TRUE. dplyr/tests/testthat/_snaps/bind.md0000644000176200001440000000606014177154522017141 0ustar liggesusers# bind_cols() repairs names Code bound <- bind_cols(df, df) Message New names: * a -> a...1 * b -> b...2 * a -> a...3 * b -> b...4 # bind_cols() handles unnamed list with name repair (#3402) Code df <- bind_cols(list(1, 2)) Message New names: * `` -> ...1 * `` -> ...2 # *_bind() give meaningful errors Code df1 <- tibble(x = 1:3) df2 <- tibble(x = 4:6) (expect_error(bind_rows(df1, df2, .id = 5))) Output Error in `bind_rows()`: ! `.id` must be a scalar string, not a double vector of length 1. Code ll <- list(1:5, env(a = 1)) (expect_error(bind_rows(ll))) Output Error in `bind_rows()`: ! Argument 1 must have names. Code ll <- list(tibble(a = 1:5), env(a = 1)) (expect_error(bind_rows(ll))) Output Error in `bind_rows()`: ! Argument 2 must be a data frame or a named atomic vector. Code df1 <- tibble(a = factor("a")) df2 <- tibble(a = 1L) df3 <- tibble(a = 1) (expect_error(bind_rows(df1, df2))) Output Error in `bind_rows()`: ! Can't combine `..1$a` > and `..2$a` . Code (expect_error(bind_rows(df1, df3))) Output Error in `bind_rows()`: ! Can't combine `..1$a` > and `..2$a` . Code df1 <- tibble(b = c(1, 2)) df2 <- tibble(b = c(1L, 2L)) df3 <- tibble(b = factor(c("A", "B"))) df4 <- tibble(b = c("C", "D")) (expect_error(bind_rows(df1, df3))) Output Error in `bind_rows()`: ! Can't combine `..1$b` and `..2$b` >. Code (expect_error(bind_rows(df1, df4))) Output Error in `bind_rows()`: ! Can't combine `..1$b` and `..2$b` . Code (expect_error(bind_rows(df2, df3))) Output Error in `bind_rows()`: ! Can't combine `..1$b` and `..2$b` >. Code (expect_error(bind_rows(df2, df4))) Output Error in `bind_rows()`: ! Can't combine `..1$b` and `..2$b` . Code # # unnamed vectors (expect_error(bind_rows(1:2))) Output Error in `bind_rows()`: ! Argument 1 must have names. Code # # incompatible size (expect_error(bind_cols(a = 1:2, mtcars))) Output Error in `bind_cols()`: ! Can't recycle `a` (size 2) to match `..2` (size 32). Code (expect_error(bind_cols(mtcars, a = 1:3))) Output Error in `bind_cols()`: ! Can't recycle `..1` (size 32) to match `a` (size 3). dplyr/tests/testthat/_snaps/if-else.md0000644000176200001440000000200714177154541017547 0ustar liggesusers# if_else() give meaningful errors Code (expect_error(if_else(1:10, 1, 2))) Output Error in `if_else()`: ! `condition` must be a logical vector, not an integer vector. Code (expect_error(if_else(1:3 < 2, 1:2, 1:3))) Output Error in `if_else()`: ! `true` must be length 3 (length of `condition`) or one, not 2. Code (expect_error(if_else(1:3 < 2, 1:3, 1:2))) Output Error in `if_else()`: ! `false` must be length 3 (length of `condition`) or one, not 2. Code (expect_error(if_else(1:3 < 2, 1, 1L))) Output Error in `if_else()`: ! `false` must be a double vector, not an integer vector. Code x <- factor("x") y <- ordered("x") (expect_error(if_else(1:3 < 2, x, y))) Output Error in `if_else()`: ! `false` must have class `factor`, not class `ordered/factor`. dplyr/tests/testthat/_snaps/conditions.md0000644000176200001440000001141514177154530020375 0ustar liggesusers# can hide expression in error messages Code mutate(mtcars, invisible(999 + "")) Condition Error in `mutate()`: ! Problem while computing `..1`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code summarise(mtcars, invisible(999 + "")) Condition Error in `summarise()`: ! Problem while computing `..1`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code filter(mtcars, invisible(999 + "")) Condition Error in `filter()`: ! Problem while computing `..1`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code arrange(mtcars, invisible(999 + "")) Condition Error in `arrange()`: ! Problem with the implicit `transmute()` step. x Problem while computing `..1`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code select(mtcars, invisible(999 + "")) Condition Error in `select()`: ! non-numeric argument to binary operator Code slice(mtcars, invisible(999 + "")) Condition Error in `slice()`: ! Problem while evaluating `..1`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code mutate(mtcars, var = invisible(999 + "")) Condition Error in `mutate()`: ! Problem while computing `var`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code summarise(mtcars, var = invisible(999 + "")) Condition Error in `summarise()`: ! Problem while computing `var`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code filter(mtcars, var = invisible(999 + "")) Condition Error in `filter()`: ! We detected a named input. i This usually means that you've used `=` instead of `==`. i Did you mean `var == invisible(999 + "")`? Code arrange(mtcars, var = invisible(999 + "")) Condition Error in `arrange()`: ! Problem with the implicit `transmute()` step. x Problem while computing `..1`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator Code select(mtcars, var = invisible(999 + "")) Condition Error in `select()`: ! non-numeric argument to binary operator Code slice(mtcars, var = invisible(999 + "")) Condition Error in `slice()`: ! Problem while evaluating `var`. Caused by error in `999 + ""`: ! non-numeric argument to binary operator # can pass verb-level error call Code mutate(mtcars, 1 + "") Condition Error in `foo()`: ! Problem while computing `..1 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code transmute(mtcars, 1 + "") Condition Error in `foo()`: ! Problem while computing `..1 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code summarise(mtcars, 1 + "") Condition Error in `foo()`: ! Problem while computing `..1 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code summarise(group_by(mtcars, cyl), 1 + "") Condition Error in `foo()`: ! Problem while computing `..1 = 1 + ""`. i The error occurred in group 1: cyl = 4. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code filter(mtcars, 1 + "") Condition Error in `foo()`: ! Problem while computing `..1 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code arrange(mtcars, 1 + "") Condition Error in `foo()`: ! Problem with the implicit `transmute()` step. x Problem while computing `..1 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code select(mtcars, 1 + "") Condition Error in `foo()`: ! non-numeric argument to binary operator Code slice(mtcars, 1 + "") Condition Error in `foo()`: ! Problem while evaluating `..1 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator # can pass verb-level error call (example case) Code my_verb(mtcars, 1 + "", am) Condition Error in `my_verb()`: ! Problem while computing `.result = (1 + "") * am`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code my_verb(mtcars, cyl, c(am, vs)) Condition Error in `my_verb()`: ! Problem while computing `.result = cyl * c(am, vs)`. x `.result` must be size 32 or 1, not 64. dplyr/tests/testthat/_snaps/colwise-mutate.md0000644000176200001440000000265614177154525021201 0ustar liggesusers# selection works with grouped data frames (#2624) Code out <- mutate_if(gdf, is.factor, as.character) Message `mutate_if()` ignored the following grouping variables: * Column `Species` # colwise mutate gives meaningful error messages Code (expect_error(mutate_at(tibble(), "test", ~1))) Output Error in `mutate_at()`: ! Can't subset columns that don't exist. x Column `test` doesn't exist. Code tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) tbl <- group_by(tbl, gr1) (expect_error(summarise_at(tbl, vars(gr1), mean))) Output Error in `summarise_at()`: ! Can't subset columns that don't exist. x Column `gr1` doesn't exist. Code (expect_error(mutate_all(mtcars, length, 0, 0))) Output Error in `mutate()`: ! Problem while computing `mpg = .Primitive("length")(mpg, 0, 0)`. Caused by error: ! 3 arguments passed to 'length' which requires 1 Code (expect_error(mutate_all(mtcars, mean, na.rm = TRUE, na.rm = TRUE))) Output Error in `mutate()`: ! Problem while computing `mpg = (function (x, ...) ...`. Caused by error in `mean.default()`: ! formal argument "na.rm" matched by multiple actual arguments dplyr/tests/testthat/_snaps/count-tally.md0000644000176200001440000000100414177154531020471 0ustar liggesusers# can only explicitly chain together multiple tallies Code df <- data.frame(g = c(1, 1, 2, 2), n = 1:4) df %>% count(g, wt = n) Output g n 1 1 3 2 2 7 Code df %>% count(g, wt = n) %>% count(wt = n) Output n 1 10 Code df %>% count(n) Message Storing counts in `nn`, as `n` already present in input i Use `name = "new_name"` to pick a new name. Output n nn 1 1 1 2 2 1 3 3 1 4 4 1 dplyr/tests/testthat/_snaps/transmute.md0000644000176200001440000000113514177154556020254 0ustar liggesusers# transmute() error messages Code (expect_error(transmute(mtcars, cyl2 = cyl, .keep = "all"))) Output Error in `transmute()`: ! The `.keep` argument is not supported. Code (expect_error(transmute(mtcars, cyl2 = cyl, .before = disp))) Output Error in `transmute()`: ! The `.before` argument is not supported. Code (expect_error(transmute(mtcars, cyl2 = cyl, .after = disp))) Output Error in `transmute()`: ! The `.after` argument is not supported. dplyr/tests/testthat/_snaps/na-if.md0000644000176200001440000000055314177154544017224 0ustar liggesusers# na_if() gives meaningful errors Code (expect_error(na_if(1:3, 1:2))) Output Error in `na_if()`: ! `y` must be length 3 (same as `x`) or one, not 2. Code (expect_error(na_if(1, 1:2))) Output Error in `na_if()`: ! `y` must be length 1 (same as `x`), not 2. dplyr/tests/testthat/_snaps/rowwise.md0000644000176200001440000000461314177154547017735 0ustar liggesusers# rowwise has decent print method Code rf Output # A tibble: 5 x 1 # Rowwise: x x 1 1 2 2 3 3 4 4 5 5 # validate_rowwise_df() gives useful errors Code (expect_error(validate_rowwise_df(df1))) Output Error in `validate_rowwise_df()`: ! The `.rows` column must be a list of size 1, one-based integer vectors with the right value. Code (expect_error(validate_rowwise_df(df2))) Output Error in `validate_rowwise_df()`: ! The last column of the `groups` attribute must be called `.rows`. Code (expect_error(validate_rowwise_df(df3))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_rowwise_df(df4))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_rowwise_df(df7))) Output Error in `validate_rowwise_df()`: ! The `.rows` column must be a list of size 1, one-based integer vectors with the right value. Code (expect_error(attr(df8, "groups")$.rows <- 1:8)) Output Error: ! Assigned data `1:8` must be compatible with existing data. x Existing data has 10 rows. x Assigned data has 8 rows. i Only vectors of size 1 are recycled. Code (expect_error(validate_rowwise_df(df10))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_rowwise_df(df11))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(new_rowwise_df(tibble(x = 1:10), tibble(".rows" := list(1:5, -1L)))) ) Output Error in `new_rowwise_df()`: ! `group_data` must be a tibble without a `.rows` column. Code (expect_error(new_rowwise_df(tibble(x = 1:10), 1:10))) Output Error in `new_rowwise_df()`: ! `group_data` must be a tibble without a `.rows` column. dplyr/tests/testthat/_snaps/join-rows.md0000644000176200001440000000054214177154541020154 0ustar liggesusers# join_rows() gives meaningful error message on incompatible types Code (expect_error(join_rows(data.frame(x = 1), data.frame(x = factor("a"))))) Output Error: ! Can't join on `x$x` x `y$x` because of incompatible types. i `x$x` is of type >. i `y$x` is of type >>. dplyr/tests/testthat/_snaps/rows.md0000644000176200001440000000442514177154547017231 0ustar liggesusers# rows_delete() Code res <- rows_delete(data, tibble(a = 2:3, b = "b"), by = "a") Message Ignoring extra columns: b # rows_*() errors Code data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) (expect_error(rows_insert(data, tibble(a = 3, b = "z")))) Message Matching, by = "a" Output Error in `rows_insert()`: ! Attempting to insert duplicate rows. Code (expect_error(rows_insert(data[c(1, 1), ], tibble(a = 3)))) Message Matching, by = "a" Output Error in `rows_insert()`: ! `x` key values must be unique. Code (expect_error(rows_insert(data, tibble(a = 4, b = "z"), by = "e"))) Output Error in `rows_insert()`: ! All `by` columns must exist in `x`. Code (expect_error(rows_insert(data, tibble(d = 4)))) Message Matching, by = "d" Output Error in `rows_insert()`: ! All columns in `y` must exist in `x`. Code (expect_error(rows_update(data, tibble(a = 2:3, b = "z"), by = c("a", "b")))) Output Error in `rows_update()`: ! Attempting to update missing rows. Code (expect_error(rows_patch(data, tibble(a = 2:3, b = "z"), by = c("a", "b")))) Output Error in `rows_patch()`: ! Can't patch missing row. Code (expect_error(rows_delete(data, tibble(a = 2:4)))) Message Matching, by = "a" Output Error in `rows_delete()`: ! Can't delete missing row. Code (expect_error(rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b")))) Output Error in `rows_delete()`: ! Can't delete missing row. Code rows_delete(data, tibble(a = 2:3)) Message Matching, by = "a" Output # A tibble: 1 x 3 a b c 1 1 a 0.5 Code rows_delete(data, tibble(a = 2:3, b = "b")) Message Matching, by = "a" Ignoring extra columns: b Output # A tibble: 1 x 3 a b c 1 1 a 0.5 dplyr/tests/testthat/_snaps/colwise-filter.md0000644000176200001440000000073514177154524021162 0ustar liggesusers# colwise filter() give meaningful errors Code (expect_error(filter_if(mtcars, is_character, all_vars(. > 0)))) Output Error in `filter_if()`: ! `.predicate` must match at least one column. Code (expect_error(filter_all(mtcars, list(~ . > 0)))) Output Error in `filter_all()`: ! `.vars_predicate` must be a function or a call to `all_vars()` or `any_vars()`, not a list. dplyr/tests/testthat/_snaps/deprec-src-local.md0000644000176200001440000000131114177154533021340 0ustar liggesusers# src_df() is deprecated / errors Code (expect_error(src_df("base", new.env()))) Output Error in `src_local()`: ! Exactly one of `pkg` and `env` must be non-NULL, not 2. Code (expect_error(src_df())) Output Error in `src_local()`: ! Exactly one of `pkg` and `env` must be non-NULL, not 0. Code env <- new.env(parent = emptyenv()) env$x <- 1 src_env <- src_df(env = env) (expect_error(copy_to(src_env, tibble(x = 1), name = "x"))) Output Error in `copy_to()`: ! Object with `name` = `x` must not already exist, unless `overwrite` = TRUE. dplyr/tests/testthat/_snaps/lead-lag.md0000644000176200001440000000345314177154542017700 0ustar liggesusers# lead() / lag() give meaningful errors Code # # complicance of n argument (expect_error(lead(letters, -1))) Output Error in `lead()`: ! `n` must be a positive integer, not a double vector of length 1. Code (expect_error(lead(letters, "1"))) Output Error in `lead()`: ! `n` must be a positive integer, not a character vector of length 1. Code (expect_error(lag(letters, -1))) Output Error in `lag()`: ! `n` must be a positive integer, not a double vector of length 1. Code (expect_error(lag(letters, "1"))) Output Error in `lag()`: ! `n` must be a positive integer, not a character vector of length 1. Code # # ts (expect_error(lag(ts(1:10)))) Output Error in `lag()`: ! `x` must be a vector, not a ts object, do you want `stats::lag()`? Code # # incompatible default (expect_error(lag(c("1", "2", "3"), default = FALSE))) Output Error in `lag()`: ! Can't combine `default` and `x` . Code (expect_error(lead(c("1", "2", "3"), default = FALSE))) Output Error in `lead()`: ! Can't combine `default` and `x` . Code (expect_error(lag(c("1", "2", "3"), default = character()))) Output Error in `lag()`: ! `default` must be size 1, not size 0 Code (expect_error(lead(c("1", "2", "3"), default = character()))) Output Error in `lead()`: ! `default` must be size 1, not size 0 dplyr/tests/testthat/_snaps/coalesce.md0000644000176200001440000000067714177154523020014 0ustar liggesusers# coalesce() gives meaningful error messages Code (expect_error(coalesce(1:2, 1:3))) Output Error in `coalesce()`: ! Can't recycle `..1` (size 2) to match `..2` (size 3). Code (expect_error(coalesce(1:2, letters[1:2]))) Output Error in `coalesce()`: ! Can't combine `..1` and `..2` . dplyr/tests/testthat/_snaps/slice.md0000644000176200001440000003464514177154554017343 0ustar liggesusers# slice_sample() handles n= and prop= Code (expect_error(df %>% slice_sample(n = -1))) Output Error in `slice_sample()`: ! `n` must be positive. Code (expect_error(df %>% slice_sample(prop = -1))) Output Error in `slice_sample()`: ! `prop` must be positive. Code (expect_error(df %>% slice_sample(n = 4, replace = FALSE))) Output Error in `slice_sample()`: ! Problem while computing indices. Caused by error in `sample.int()`: ! cannot take a sample larger than the population when 'replace = FALSE' Code (expect_error(df %>% slice_sample(prop = 4, replace = FALSE))) Output Error in `slice_sample()`: ! Problem while computing indices. Caused by error in `sample.int()`: ! cannot take a sample larger than the population when 'replace = FALSE' # slice() gives meaningfull errors Code (expect_error(slice(df, matrix(c(1, 2), ncol = 2)))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Can't convert to . Cannot decrease dimensions. Code (expect_error(slice(gdf, matrix(c(1, 2), ncol = 2)))) Output Error in `slice()`: ! Problem while computing indices. i The error occurred in group 1: x = 1. Caused by error: ! Can't convert to . Cannot decrease dimensions. Code (expect_error(slice(df, "a"))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Invalid result of type . i Indices must be positive or negative integers. Code (expect_error(slice(gdf, "a"))) Output Error in `slice()`: ! Problem while computing indices. i The error occurred in group 1: x = 1. Caused by error: ! Invalid result of type . i Indices must be positive or negative integers. Code (expect_error(slice(df, c(1, -1)))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Indices must be all positive or all negative. i Got 1 positives, 1 negatives. Code (expect_error(slice(gdf, c(1, -1)))) Output Error in `slice()`: ! Problem while computing indices. i The error occurred in group 1: x = 1. Caused by error: ! Indices must be all positive or all negative. i Got 1 positives, 1 negatives. # slice_*() checks that `n=` is explicitly named Code (expect_error(slice_head(df, 5))) Output Error in `slice_head()`: ! `n` must be explicitly named. i Did you mean `slice_head(n = 5)`? Code (expect_error(slice_tail(df, 5))) Output Error in `slice_tail()`: ! `n` must be explicitly named. i Did you mean `slice_tail(n = 5)`? Code (expect_error(slice_min(df, x, 5))) Output Error in `slice_min()`: ! `n` must be explicitly named. i Did you mean `slice_min(n = 5)`? Code (expect_error(slice_max(df, x, 5))) Output Error in `slice_max()`: ! `n` must be explicitly named. i Did you mean `slice_max(n = 5)`? Code (expect_error(slice_sample(df, 5))) Output Error in `slice_sample()`: ! `n` must be explicitly named. i Did you mean `slice_sample(n = 5)`? # slice_*() checks that for empty `... Code (expect_error(slice_head(df, 5, 2))) Output Error in `slice_head()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code (expect_error(slice_tail(df, 5, 2))) Output Error in `slice_tail()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code (expect_error(slice_min(df, x, 5, 2))) Output Error in `slice_min()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code (expect_error(slice_max(df, x, 5, 2))) Output Error in `slice_max()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code (expect_error(slice_sample(df, 5, 2))) Output Error in `slice_sample()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? --- Code (expect_error(slice_head(df, n = 5, 2))) Output Error in `slice_head()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_tail(df, n = 5, 2))) Output Error in `slice_tail()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_min(df, x, n = 5, 2))) Output Error in `slice_min()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_max(df, x, n = 5, 2))) Output Error in `slice_max()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_sample(df, n = 5, 2))) Output Error in `slice_sample()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? --- Code (expect_error(slice_head(df, prop = 0.5, 2))) Output Error in `slice_head()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_tail(df, prop = 0.5, 2))) Output Error in `slice_tail()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_min(df, x, prop = 0.5, 2))) Output Error in `slice_min()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_max(df, x, prop = 0.5, 2))) Output Error in `slice_max()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? Code (expect_error(slice_sample(df, prop = 0.5, 2))) Output Error in `slice_sample()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? # slice_*() checks for constant n= and prop= Code (expect_error(slice_head(df, n = n()))) Output Error in `slice_head()`: ! `n` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_head(df, prop = n()))) Output Error in `slice_head()`: ! `prop` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_tail(df, n = n()))) Output Error in `slice_tail()`: ! `n` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_tail(df, prop = n()))) Output Error in `slice_tail()`: ! `prop` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_min(df, x, n = n()))) Output Error in `slice_min()`: ! `n` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_min(df, x, prop = n()))) Output Error in `slice_min()`: ! `prop` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_max(df, x, n = n()))) Output Error in `slice_max()`: ! `n` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_max(df, x, prop = n()))) Output Error in `slice_max()`: ! `prop` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_sample(df, n = n()))) Output Error in `slice_sample()`: ! `n` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_sample(df, prop = n()))) Output Error in `slice_sample()`: ! `prop` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. # slice_min/max() check size of `order_by=` (#5922) Code (expect_error(slice_min(data.frame(x = 1:10), 1:6))) Output Error in `slice_min()`: ! Problem while computing indices. Caused by error: ! `order_by` must have size 10, not size 6. Code (expect_error(slice_max(data.frame(x = 1:10), 1:6))) Output Error in `slice_max()`: ! Problem while computing indices. Caused by error: ! `order_by` must have size 10, not size 6. # slice_sample() check size of `weight_by=` (#5922) Code (expect_error(slice_sample(data.frame(x = 1:10), n = 2, weight_by = 1:6))) Output Error in `slice_sample()`: ! Problem while computing indices. Caused by error: ! `weight_by` must have size 10, not size 6. # rename errors with invalid grouped data frame (#640) Code df <- tibble(x = 1:3) (expect_error(slice(mtcars, 1, 1 + ""))) Output Error in `slice()`: ! Problem while evaluating `..2 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code (expect_error(group_by(mtcars, cyl) %>% slice(1, 1 + ""))) Output Error in `slice()`: ! Problem while evaluating `..2 = 1 + ""`. i The error occurred in group 1: cyl = 4. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code (expect_error(slice(df, TRUE))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Invalid result of type . i Indices must be positive or negative integers. Code (expect_error(slice(df, FALSE))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Invalid result of type . i Indices must be positive or negative integers. Code (expect_error(slice(mtcars, 1, 1, ""))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Can't combine `..1` and `..3` . Code (expect_error(group_by(mtcars, cyl) %>% slice(1, 1, ""))) Output Error in `slice()`: ! Problem while computing indices. i The error occurred in group 1: cyl = 4. Caused by error: ! Can't combine `..1` and `..3` . Code (expect_error(mtcars %>% slice(c(-1, 2)))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Indices must be all positive or all negative. i Got 1 positives, 1 negatives. Code (expect_error(mtcars %>% slice(c(2:3, -1)))) Output Error in `slice()`: ! Problem while computing indices. Caused by error: ! Indices must be all positive or all negative. i Got 2 positives, 1 negatives. Code (expect_error(slice_head(data.frame(), n = 1, prop = 1))) Output Error in `slice_head()`: ! Must supply `n` or `prop`, but not both. Code (expect_error(slice_tail(data.frame(), n = "a"))) Output Error in `slice_tail()`: ! `n` must be a single number. Code (expect_error(slide_head(data.frame(), prop = "a"))) Output Code (expect_error(slice_head(data.frame(), n = n()))) Output Error in `slice_head()`: ! `n` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_head(data.frame(), prop = n()))) Output Error in `slice_head()`: ! `prop` must be a constant. Caused by error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(slice_head(data.frame(), n = NA))) Output Error in `slice_head()`: ! `n` must be a single number. Code (expect_error(slice_head(data.frame(), prop = NA))) Output Error in `slice_head()`: ! `prop` must be a single number. dplyr/tests/testthat/_snaps/order-by.md0000644000176200001440000000067014177154544017755 0ustar liggesusers# order_by() gives useful error messages Code (expect_error(order_by(mtcars, 10))) Output Error in `order_by()`: ! `call` must be a function call, not a double vector. Code (expect_error(order_by(mtcars, cyl))) Output Error in `order_by()`: ! `call` must be a function call, not a symbol. i Did you mean `arrange(mtcars, cyl)`? dplyr/tests/testthat/_snaps/top-n.md0000644000176200001440000000062514177154556017272 0ustar liggesusers# top_n() quotes n Code res1 <- top_n(mtcars, n() * 0.5) Message Selecting by carb --- Code res2 <- top_n(mtcars, 16) Message Selecting by carb # top_frac() is a shorthand for top_n(n()*) Code res1 <- top_n(mtcars, n() * 0.5) Message Selecting by carb --- Code res2 <- top_frac(mtcars, 0.5) Message Selecting by carb dplyr/tests/testthat/_snaps/recode.md0000644000176200001440000000172014177154546017472 0ustar liggesusers# recode() gives meaningful error messages Code (expect_error(recode(factor("a"), a = 5, .missing = 10))) Output Error in `recode()`: ! `.missing` is not supported for factors. Code (expect_error(recode("a", b = 5, "c"))) Output Error in `recode()`: ! Argument 3 must be named. Code (expect_error(recode(factor("a"), b = 5, "c"))) Output Error in `recode()`: ! Argument 3 must be named. Code (expect_error(recode(1:5))) Output Error in `recode()`: ! No replacements provided. Code (expect_error(recode("a"))) Output Error in `recode()`: ! No replacements provided. Code (expect_error(recode(factor("a")))) Output Error in `recode()`: ! No replacements provided. dplyr/tests/testthat/_snaps/deprec-do.md0000644000176200001440000000155114177154532020070 0ustar liggesusers# do() gives meaningful error messages Code (expect_error(df %>% do(head, tail))) Output Error in `do()`: ! Can only supply one unnamed argument, not 2. Code (expect_error(df %>% ungroup() %>% do(1))) Output Error in `do()`: ! Result must be a data frame, not numeric. Code (expect_error(df %>% do(1))) Output Error in `do()`: ! Results 1, 2, 3 must be data frames, not numeric. Code (expect_error(df %>% do("a"))) Output Error in `do()`: ! Results 1, 2, 3 must be data frames, not character. Code (expect_error(df %>% do(x = 1, 2))) Output Error in `do()`: ! Arguments must either be all named or all unnamed. dplyr/tests/testthat/_snaps/nth-value.md0000644000176200001440000000030214177154544020125 0ustar liggesusers# nth() gives meaningful error message (#5466) Code (expect_error(nth(1:10, "x"))) Output Error in `nth()`: ! `n` must be a single integer. dplyr/tests/testthat/_snaps/select.md0000644000176200001440000000442214177154551017506 0ustar liggesusers# grouping variables preserved with a message, unless already selected (#1511, #5841) Code res <- select(df, x) Message Adding missing grouping variables: `g` --- Code expect_equal(df %>% select(a = c), tibble(b = 2, a = 3) %>% group_by(b)) Message Adding missing grouping variables: `b` Code expect_equal(df %>% select(b = c), tibble(a = 1, b = 3) %>% group_by(a)) Message Adding missing grouping variables: `a` # non-syntactic grouping variable is preserved (#1138) Code df <- tibble(`a b` = 1L) %>% group_by(`a b`) %>% select() Message Adding missing grouping variables: `a b` # select() provides informative errors Code (expect_error(select(mtcars, 1 + ""))) Output Error in `select()`: ! non-numeric argument to binary operator # dplyr_col_select() aborts when `[` implementation is broken Code (expect_error(select(df1, 1:2))) Output Error in `select()`: ! Can't subset columns that don't exist. x Location 2 doesn't exist. i There are only 1 column. Code (expect_error(select(df1, 0))) Output Error in `select()`: ! Can't reconstruct data frame. x The `[` method for class must return a data frame. i It returned a . --- Code (expect_error(select(df1, 2))) Output Error in `select()`: ! Can't subset columns that don't exist. x Location 2 doesn't exist. i There are only 1 column. Code (expect_error(select(df1, 1))) Output Error in `select()`: ! Can't reconstruct data frame. x The `[` method for class must return a data frame. i It returned a . Code (expect_error(select(df2, 1))) Output Error in `select()`: ! Can't reconstruct data frame. x The `[` method for class must return a data frame with 1 column. i It returned a of 0 columns. dplyr/tests/testthat/_snaps/grouped-df.md0000644000176200001440000000631214177154540020261 0ustar liggesusers# validate_grouped_df() gives useful errors Code (expect_error(validate_grouped_df(df1))) Output Error in `validate_grouped_df()`: ! The `.rows` column must be list of one-based integer vectors. Code (expect_error(group_data(df1))) Output Error in `group_data()`: ! `.data` must be a valid object. Caused by error in `validate_grouped_df()`: ! The `.rows` column must be list of one-based integer vectors. Code (expect_error(validate_grouped_df(df2))) Output Error in `validate_grouped_df()`: ! The last column of the `groups` attribute must be called `.rows`. Code (expect_error(validate_grouped_df(df2))) Output Error in `validate_grouped_df()`: ! The last column of the `groups` attribute must be called `.rows`. Code (expect_error(validate_grouped_df(df3))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_grouped_df(df4))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_grouped_df(df5))) Output Error in `validate_grouped_df()`: ! Corrupt `grouped_df` using old (< 0.8.0) format. i Strip off old grouping with `ungroup()`. Code (expect_error(validate_grouped_df(df6, check_bounds = TRUE))) Output Error in `validate_grouped_df()`: ! out of bounds indices. Code (expect_error(validate_grouped_df(df7, check_bounds = TRUE))) Output Error in `validate_grouped_df()`: ! out of bounds indices. Code (expect_error(validate_grouped_df(df8, check_bounds = TRUE))) Output Error in `validate_grouped_df()`: ! out of bounds indices. Code (expect_error(validate_grouped_df(df10))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_grouped_df(df11))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(new_grouped_df(tibble(x = 1:10), tibble(other = list(1:2))))) Output Error in `new_grouped_df()`: ! The last column of `groups` must be called ".rows". Code (expect_error(new_grouped_df(10))) Output Error in `new_grouped_df()`: ! `x` must be a data frame. # helper gives meaningful error messages Code (expect_error(grouped_df(data.frame(x = 1), "y", FALSE))) Output Error in `compute_groups()`: ! `vars` missing from `data`: `y`. Code (expect_error(grouped_df(data.frame(x = 1), 1))) Output Error in `grouped_df()`: ! `vars` must be a character vector. dplyr/tests/testthat/_snaps/window.md0000644000176200001440000000033114177154556017536 0ustar liggesusers# order_by() give meaningful errors Code (expect_error(order_by(NULL, 1L))) Output Error in `order_by()`: ! `call` must be a function call, not an integer vector. dplyr/tests/testthat/_snaps/colwise-select.md0000644000176200001440000000333514177154526021155 0ustar liggesusers# colwise select() / rename() give meaningful errors Code df <- tibble(x = 0L, y = 0.5, z = 1) (expect_error(df %>% rename_all())) Output Error in `rename_all()`: ! `.funs` must specify a renaming function. Code (expect_error(df %>% rename_if(is_integerish))) Output Error in `rename_if()`: ! `.funs` must specify a renaming function. Code (expect_error(df %>% rename_at(vars(x:y)))) Output Error in `rename_at()`: ! `.funs` must specify a renaming function. Code (expect_error(df %>% rename_all(list(tolower, toupper)))) Output Error in `rename_all()`: ! `.funs` must contain one renaming function, not 2. Code (expect_error(df %>% select_all(list(tolower, toupper)))) Output Error in `select_all()`: ! `.funs` must contain one renaming function, not 2. Code (expect_error(df %>% select_if(function(.x) 1))) Output Error in `select_if()`: ! `.p` is invalid. x `.p` should return a single logical. i `.p` returns a for column `x`. Code (expect_error(df %>% select_if(function(.x) c(TRUE, TRUE)))) Output Error in `select_if()`: ! `.p` is invalid. x `.p` should return a single logical. i `.p` returns a size 2 for column `x`. Code (expect_error(data.frame() %>% select_all(.funs = 42))) Output Error in `select_all()`: ! `.funs` must be a one sided formula, a function, or a function name. dplyr/tests/testthat/_snaps/context.md0000644000176200001440000000213114177154530017703 0ustar liggesusers# give useful error messages when not applicable Code (expect_error(n())) Output Error in `n()`: ! Must be used inside dplyr verbs. Code (expect_error(cur_data())) Output Error in `cur_data()`: ! Must be used inside dplyr verbs. Code (expect_error(cur_data_all())) Output Error in `cur_data_all()`: ! Must be used inside dplyr verbs. Code (expect_error(cur_column())) Output Error in `cur_column()`: ! Must be used inside `across()`. Code (expect_error(cur_group())) Output Error in `cur_group()`: ! Must be used inside dplyr verbs. Code (expect_error(cur_group_id())) Output Error in `cur_group_id()`: ! Must be used inside dplyr verbs. Code (expect_error(cur_group_rows())) Output Error in `cur_group_rows()`: ! Must be used inside dplyr verbs. dplyr/tests/testthat/_snaps/deprec-combine.md0000644000176200001440000000120214177154531021072 0ustar liggesusers# combine() is deprecated Code combine() Condition Warning: `combine()` was deprecated in dplyr 1.0.0. Please use `vctrs::vec_c()` instead. Output logical(0) # combine() gives meaningful error messages Code (expect_error(combine("a", 1))) Output Error in `combine()`: ! Can't combine `..1` and `..2` . Code (expect_error(combine(factor("a"), 1L))) Output Error in `combine()`: ! Can't combine `..1` > and `..2` . dplyr/tests/testthat/_snaps/case-when.md0000644000176200001440000000232314177154523020076 0ustar liggesusers# case_when() give meaningful errors Code (expect_error(case_when(c(TRUE, FALSE) ~ 1:3, c(FALSE, TRUE) ~ 1:2))) Output Error in `case_when()`: ! `c(TRUE, FALSE) ~ 1:3` must be length 2 or one, not 3. Code (expect_error(case_when(c(TRUE, FALSE) ~ 1, c(FALSE, TRUE, FALSE) ~ 2, c(FALSE, TRUE, FALSE, NA) ~ 3))) Output Error in `case_when()`: ! `c(FALSE, TRUE, FALSE) ~ 2`, `c(FALSE, TRUE, FALSE, NA) ~ 3` must be length 2 or one, not 3, 4. Code (expect_error(case_when(50 ~ 1:3))) Output Error in `case_when()`: ! LHS of case 1 (`50`) must be a logical vector, not a double vector. Code (expect_error(case_when(paste(50)))) Output Error in `case_when()`: ! Case 1 (`paste(50)`) must be a two-sided formula, not a character vector. Code (expect_error(case_when())) Output Error in `case_when()`: ! No cases provided. Code (expect_error(case_when(~ 1:2))) Output Error in `case_when()`: ! Formulas must be two-sided. dplyr/tests/testthat/_snaps/across.md0000644000176200001440000001570114177154520017517 0ustar liggesusers# across() gives meaningful messages Code (expect_error(tibble(x = 1) %>% summarise(across(where(is.numeric), 42)))) Output Error in `summarise()`: ! Problem while computing `..1 = across(where(is.numeric), 42)`. Caused by error in `across()`: ! `.fns` must be NULL, a function, a formula, or a list of functions/formulas. Code (expect_error(tibble(x = 1) %>% summarise(across(y, mean)))) Output Error in `summarise()`: ! Problem while computing `..1 = across(y, mean)`. Caused by error in `across()`: ! Can't subset columns that don't exist. x Column `y` doesn't exist. Code (expect_error(tibble(x = 1) %>% summarise(res = across(where(is.numeric), 42)))) Output Error in `summarise()`: ! Problem while computing `res = across(where(is.numeric), 42)`. Caused by error in `across()`: ! `.fns` must be NULL, a function, a formula, or a list of functions/formulas. Code (expect_error(tibble(x = 1) %>% summarise(z = across(y, mean)))) Output Error in `summarise()`: ! Problem while computing `z = across(y, mean)`. Caused by error in `across()`: ! Can't subset columns that don't exist. x Column `y` doesn't exist. Code (expect_error(tibble(x = 1) %>% summarise(res = sum(if_any(where(is.numeric), 42))))) Output Error in `summarise()`: ! Problem while computing `res = sum(if_any(where(is.numeric), 42))`. Caused by error in `if_any()`: ! `.fns` must be NULL, a function, a formula, or a list of functions/formulas. Code (expect_error(tibble(x = 1) %>% summarise(res = sum(if_all(~ mean(.x)))))) Output Error in `summarise()`: ! Problem while computing `res = sum(if_all(~mean(.x)))`. Caused by error in `if_all()`: ! Must supply a column selection. i You most likely meant: `if_all(everything(), ~mean(.x))`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(tibble(x = 1) %>% summarise(res = sum(if_any(~ mean(.x)))))) Output Error in `summarise()`: ! Problem while computing `res = sum(if_any(~mean(.x)))`. Caused by error in `if_any()`: ! Must supply a column selection. i You most likely meant: `if_any(everything(), ~mean(.x))`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(across())) Output Error in `across()`: ! Must be used inside dplyr verbs. Code (expect_error(c_across())) Output Error in `c_across()`: ! Must be used inside dplyr verbs. Code error_fn <- (function(.) { if (all(. > 10)) { rlang::abort("too small", call = call("error_fn")) } else { 42 } }) (expect_error(tibble(x = 1:10, y = 11:20) %>% summarise(across(everything(), error_fn)))) Output Error in `summarise()`: ! Problem while computing `..1 = across(everything(), error_fn)`. Caused by error in `across()`: ! Problem while computing column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1:10, y = 11:20) %>% mutate(across(everything(), error_fn)))) Output Error in `mutate()`: ! Problem while computing `..1 = across(everything(), error_fn)`. Caused by error in `across()`: ! Problem while computing column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1:10, y = 11:20) %>% summarise(force(across(everything(), error_fn))))) Output Error in `summarise()`: ! Problem while computing `..1 = force(across(everything(), error_fn))`. Caused by error in `across()`: ! Problem while computing column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1:10, y = 11:20) %>% mutate(force(across(everything(), error_fn))))) Output Error in `mutate()`: ! Problem while computing `..1 = force(across(everything(), error_fn))`. Caused by error in `across()`: ! Problem while computing column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1) %>% summarise(across(everything(), list(f = mean, f = mean))))) Output Error in `summarise()`: ! Problem while computing `..1 = across(everything(), list(f = mean, f = mean))`. Caused by error in `across()`: ! Names must be unique. x These names are duplicated: * "x_f" at locations 1 and 2. # if_any() and if_all() aborts when predicate mistakingly used in .cols= (#5732) Code (expect_error(filter(df, if_any(~ .x > 5)))) Output Error in `filter()`: ! Problem while expanding `..1 = if_any(~.x > 5)`. Caused by error in `if_any()`: ! Must supply a column selection. i You most likely meant: `if_any(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(filter(df, if_all(~ .x > 5)))) Output Error in `filter()`: ! Problem while expanding `..1 = if_all(~.x > 5)`. Caused by error in `if_all()`: ! Must supply a column selection. i You most likely meant: `if_all(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(filter(df, !if_any(~ .x > 5)))) Output Error in `filter()`: ! Problem while computing `..1 = !if_any(~.x > 5)`. Caused by error in `if_any()`: ! Must supply a column selection. i You most likely meant: `if_any(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(filter(df, !if_all(~ .x > 5)))) Output Error in `filter()`: ! Problem while computing `..1 = !if_all(~.x > 5)`. Caused by error in `if_all()`: ! Must supply a column selection. i You most likely meant: `if_all(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. dplyr/tests/testthat/_snaps/group_map.md0000644000176200001440000000175114177154536020225 0ustar liggesusers# group_map() give meaningful errors Code (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~ data.frame(cyl = 19))) ) Output Error in `group_modify()`: ! The returned data frame cannot contain the original grouping variables: cyl. Code (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~10))) Output Error in `group_modify()`: ! The result of `.f` must be a data frame. Code (expect_error(iris %>% group_by(Species) %>% group_modify(head1))) Output Error in `group_modify()`: ! `.f` must accept at least two arguments. i You can use `...` to absorb unused components. Code (expect_error(iris %>% group_by(Species) %>% group_map(head1))) Output Error in `group_map()`: ! `.f` must accept at least two arguments. i You can use `...` to absorb unused components. dplyr/tests/testthat/_snaps/colwise.md0000644000176200001440000000123214177154526017672 0ustar liggesusers# colwise utils gives meaningful error messages Code (expect_error(tbl_at_vars(iris, raw(3)))) Output Error: ! `.vars` must be a character/numeric vector or a `vars()` object, not a raw vector. Code (expect_error(tbl_if_vars(iris, list(identity, force), environment()))) Output Error: ! `.predicate` must have length 1, not 2. Code .funs <- as_fun_list(list(identity, force), caller_env()) (expect_error(tbl_if_vars(iris, .funs, environment()))) Output Error: ! `.predicate` must have length 1, not 2. dplyr/tests/testthat/test-rowwise.r0000644000176200001440000000706114156331606017256 0ustar liggesuserstest_that("rowwise status preserved by major verbs", { rf <- rowwise(tibble(x = 1:5, y = 5:1), "x") out <- arrange(rf, y) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- filter(rf, x < 3) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- mutate(rf, x = x + 1) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- rename(rf, X = x) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "X") out <- select(rf, "x") expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- slice(rf, c(1, 1)) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") # Except for summarise out <- summarise(rf, z = mean(x, y)) expect_s3_class(out, "grouped_df") expect_equal(group_vars(out), "x") }) test_that("rowwise nature preserved by subsetting ops", { rf <- rowwise(tibble(x = 1:5, y = 1:5), "x") out <- rf[1] expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out[, "z"] <- 5:1 expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") names(out) <- toupper(names(out)) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "X") }) test_that("except when it should be removed", { rf <- rowwise(tibble(x = 1:5, y = 1:5), "x") expect_equal(out <- rf[, 1, drop = TRUE], rf$x) }) test_that("rowwise has decent print method", { rf <- rowwise(tibble(x = 1:5), "x") expect_snapshot(rf) }) test_that("rowwise captures group_vars", { df <- group_by(tibble(g = 1:2, x = 1:2), g) rw <- rowwise(df) expect_equal(group_vars(rw), "g") # but can't regroup expect_error(rowwise(df, x), "Can't re-group") }) test_that("can re-rowwise", { rf1 <- rowwise(tibble(x = 1:5, y = 1:5), "x") rf2 <- rowwise(rf1, y) expect_equal(group_vars(rf2), "y") }) test_that("new_rowwise_df() does not require `group_data=`", { df <- new_rowwise_df(data.frame(x = 1:2)) expect_s3_class(df, "rowwise_df") expect_equal(attr(df, "groups"), tibble(".rows" := vctrs::list_of(1L, 2L))) }) test_that("new_rowwise_df() can add class and attributes (#5918)", { df <- new_rowwise_df(tibble(x = 1:4), tibble(), class = "custom_rowwise_df", a = "b") expect_s3_class(df, "custom_rowwise_df") expect_equal(attr(df, "a"), "b") }) test_that("validate_rowwise_df() gives useful errors", { df1 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df1, "groups") groups[[2]] <- 4:1 attr(df1, "groups") <- groups df2 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df2, "groups") names(groups) <- c("g", "not.rows") attr(df2, "groups") <- groups df3 <- df2 attr(df3, "groups") <- tibble() df4 <- df3 attr(df4, "groups") <- NA df7 <- rowwise(tibble(x = 1:10)) attr(df7, "groups")$.rows <- 11:20 df8 <- rowwise(tibble(x = 1:10)) df10 <- df7 attr(df10, "groups") <- tibble() df11 <- df7 attr(df11, "groups") <- NULL expect_snapshot({ (expect_error(validate_rowwise_df(df1))) (expect_error(validate_rowwise_df(df2))) (expect_error(validate_rowwise_df(df3))) (expect_error(validate_rowwise_df(df4))) (expect_error(validate_rowwise_df(df7))) (expect_error(attr(df8, "groups")$.rows <- 1:8)) (expect_error(validate_rowwise_df(df10))) (expect_error(validate_rowwise_df(df11))) (expect_error( new_rowwise_df( tibble(x = 1:10), tibble(".rows" := list(1:5, -1L)) ) )) (expect_error( new_rowwise_df( tibble(x = 1:10), 1:10 ) )) }) }) dplyr/tests/testthat/test-groups-with.R0000644000176200001440000000116214121112104017761 0ustar liggesuserstest_that("restores original class", { df <- data.frame(x = 1:2) gf <- group_by(df, x) expect_s3_class(with_groups(df, x, mutate), "data.frame", exact = TRUE) expect_s3_class(with_groups(gf, x, mutate), "grouped_df") }) test_that(".groups = NULL ungroups", { gf <- group_by(tibble(x = 1:2), x) out <- gf %>% with_groups(NULL, mutate, y = mean(x)) expect_equal(out$y, c(1.5, 1.5)) }) test_that(".groups is defused with context", { local_fn <- identity expect_identical( with_groups(mtcars, local_fn(2), mutate, disp = disp / sd(disp)), with_groups(mtcars, 2, mutate, disp = disp / sd(disp)) ) }) dplyr/tests/testthat/test-group-by.r0000644000176200001440000004450714176714175017341 0ustar liggesusersdf <- data.frame(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) test_that("group_by() with .add = TRUE adds groups", { add_groups1 <- function(tbl) group_by(tbl, x, y, .add = TRUE) add_groups2 <- function(tbl) group_by(group_by(tbl, x, .add = TRUE), y, .add = TRUE) expect_equal(group_vars(add_groups1(df)), c("x", "y")) expect_equal(group_vars(add_groups2(df)), c("x", "y")) }) test_that("group_by(, ) computes the expressions on the ungrouped data frame (#5938)", { df <- data.frame( x = 1:4, g = rep(1:2, each = 2) ) count <- 0 out <- df %>% group_by(g) %>% group_by(big = { count <<- count + 1; x > mean(x) }) expect_equal(out$big, c(FALSE, FALSE, TRUE, TRUE)) expect_equal(count, 1L) expect_equal(group_vars(out), c("big")) count <- 0 out <- df %>% group_by(g) %>% group_by(big = { count <<- count + 1; x > mean(x) }, .add = TRUE) expect_equal(out$big, c(FALSE, FALSE, TRUE, TRUE)) expect_equal(count, 1L) expect_equal(group_vars(out), c("g", "big")) count <- 0 out <- df %>% group_by(g) %>% mutate(big = { count <<- count + 1; x > mean(x)}) %>% group_by(big) expect_equal(out$big, c(FALSE, TRUE, FALSE, TRUE)) expect_equal(count, 2L) expect_equal(group_vars(out), c("big")) count <- 0 out <- df %>% group_by(g) %>% mutate(big = { count <<- count + 1; x > mean(x)}) %>% group_by(big, .add = TRUE) expect_equal(out$big, c(FALSE, TRUE, FALSE, TRUE)) expect_equal(count, 2L) expect_equal(group_vars(out), c("g", "big")) }) test_that("add = TRUE is deprecated", { rlang::local_options(lifecycle_verbosity = "warning") df <- tibble(x = 1, y = 2) expect_warning( out <- df %>% group_by(x) %>% group_by(y, add = TRUE), "deprecated" ) expect_equal(group_vars(out), c("x", "y")) }) test_that("joins preserve grouping", { g <- group_by(df, x) expect_equal(group_vars(inner_join(g, g, by = c("x", "y"))), "x") expect_equal(group_vars(left_join(g, g, by = c("x", "y"))), "x") expect_equal(group_vars(semi_join(g, g, by = c("x", "y"))), "x") expect_equal(group_vars(anti_join(g, g, by = c("x", "y"))), "x") }) test_that("constructors drops groups", { df <- data.frame(x = 1:3) %>% group_by(x) expect_equal(group_vars(as_tibble(df)), character()) }) test_that("grouping by constant adds column (#410)", { grouped <- group_by(mtcars, "cyl") %>% summarise(foo = n()) expect_equal(names(grouped), c('"cyl"', "foo")) expect_equal(nrow(grouped), 1L) }) test_that(".dots is soft deprecated", { rlang::local_options(lifecycle_verbosity = "warning") df <- tibble(x = 1, y = 1) expect_warning(gf <- group_by(df, .dots = "x"), "deprecated") }) # Test full range of variable types -------------------------------------------- test_that("local group_by preserves variable types", { df_var <- tibble( l = c(T, F), i = 1:2, d = Sys.Date() + 1:2, f = factor(letters[1:2]), num = 1:2 + 0.5, t = Sys.time() + 1:2, c = letters[1:2] ) attr(df_var$t, "tzone") <- "" for (var in names(df_var)) { expected <- tibble(!!var := sort(unique(df_var[[var]])), n = 1L) summarised <- df_var %>% group_by(!!sym(var)) %>% summarise(n = n()) expect_equal(summarised, expected) } }) test_that("mutate does not lose variables (#144)", { df <- tibble(a = rep(1:4, 2), b = rep(1:4, each = 2), x = runif(8)) by_ab <- group_by(df, a, b) by_a <- summarise(by_ab, x = sum(x), .groups = "drop_last") by_a_quartile <- group_by(by_a, quartile = ntile(x, 4)) expect_equal(names(by_a_quartile), c("a", "b", "x", "quartile")) }) test_that("group_by uses shallow copy", { m1 <- group_by(mtcars, cyl) expect_equal(group_vars(mtcars), character()) expect_equal( lobstr::obj_addrs(mtcars), lobstr::obj_addrs(m1) ) }) test_that("group_by orders by groups. #242", { df <- data.frame(a = sample(1:10, 3000, replace = TRUE)) %>% group_by(a) expect_equal(group_data(df)$a, 1:10) df <- data.frame(a = sample(letters[1:10], 3000, replace = TRUE), stringsAsFactors = FALSE) %>% group_by(a) expect_equal(group_data(df)$a, letters[1:10]) df <- data.frame(a = sample(sqrt(1:10), 3000, replace = TRUE)) %>% group_by(a) expect_equal(group_data(df)$a, sqrt(1:10)) }) test_that("Can group_by() a POSIXlt", { skip_if_not_installed("tibble", "2.99.99") df <- tibble(x = 1:5, times = as.POSIXlt(seq.Date(Sys.Date(), length.out = 5, by = "day"))) g <- group_by(df, times) expect_equal(nrow(group_data(g)), 5L) }) test_that("group_by() handles list as grouping variables", { df <- tibble(x = 1:3, y = list(1:2, 1:3, 1:2)) gdata <- group_data(group_by(df, y)) expect_equal(nrow(gdata), 2L) expect_equal(gdata$y, list(1:2, 1:3)) expect_equal(gdata$.rows, list_of(c(1L, 3L), 2L)) }) test_that("select(group_by(.)) implicitely adds grouping variables (#170)", { expect_snapshot( res <- mtcars %>% group_by(vs) %>% select(mpg) ) expect_equal(names(res), c("vs", "mpg")) }) test_that("group_by only creates one group for NA (#401)", { x <- as.numeric(c(NA, NA, NA, 10:1, 10:1)) w <- c(20, 30, 40, 1:10, 1:10) * 10 n_distinct(x) # 11 OK res <- data.frame(x = x, w = w) %>% group_by(x) %>% summarise(n = n()) expect_equal(nrow(res), 11L) }) test_that("there can be 0 groups (#486)", { data <- tibble(a = numeric(0), g = character(0)) %>% group_by(g) expect_equal(length(data$a), 0L) expect_equal(length(data$g), 0L) expect_equal(map_int(group_rows(data), length), integer(0)) }) test_that("group_by works with zero-row data frames (#486)", { df <- data.frame(a = numeric(0), b = numeric(0), g = character(0)) dfg <- group_by(df, g, .drop = FALSE) expect_equal(dim(dfg), c(0, 3)) expect_equal(group_vars(dfg), "g") expect_equal(group_size(dfg), integer(0)) x <- summarise(dfg, n = n()) expect_equal(dim(x), c(0, 2)) expect_equal(group_vars(x), character()) x <- mutate(dfg, c = b + 1) expect_equal(dim(x), c(0, 4)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) x <- filter(dfg, a == 100) expect_equal(dim(x), c(0, 3)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) x <- arrange(dfg, a, g) expect_equal(dim(x), c(0, 3)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) expect_snapshot( x <- select(dfg, a) # Only select 'a' column; should result in 'g' and 'a' ) expect_equal(dim(x), c(0, 2)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) }) test_that("[ on grouped_df preserves grouping if subset includes grouping vars", { df <- tibble(x = 1:5, ` ` = 6:10) by_x <- df %>% group_by(x) expect_equal(by_x %>% groups(), by_x %>% `[`(1:2) %>% groups()) # non-syntactic name by_ns <- df %>% group_by(` `) expect_equal(by_ns %>% groups(), by_ns %>% `[`(1:2) %>% groups()) }) test_that("[ on grouped_df drops grouping if subset doesn't include grouping vars", { by_cyl <- mtcars %>% group_by(cyl) no_cyl <- by_cyl %>% `[`(c(1, 3)) expect_equal(group_vars(no_cyl), character()) expect_s3_class(no_cyl, "tbl_df") }) test_that("group_by works after arrange (#959)", { df <- tibble(Log = c(1, 2, 1, 2, 1, 2), Time = c(10, 1, 3, 0, 15, 11)) res <- df %>% arrange(Time) %>% group_by(Log) %>% mutate(Diff = Time - lag(Time)) expect_true(all(is.na(res$Diff[c(1, 3)]))) expect_equal(res$Diff[c(2, 4, 5, 6)], c(1, 7, 10, 5)) }) test_that("group_by keeps attributes", { d <- data.frame(x = structure(1:10, foo = "bar")) gd <- group_by(d) expect_equal(attr(gd$x, "foo"), "bar") }) test_that("ungroup.rowwise_df gives a tbl_df (#936)", { res <- mtcars %>% rowwise() %>% ungroup() %>% class() expect_equal(res, c("tbl_df", "tbl", "data.frame")) }) test_that(paste0("group_by handles encodings for native strings (#1507)"), { local_non_utf8_encoding() special <- get_native_lang_string() df <- data.frame(x = 1:3, Eng = 2:4) for (names_converter in c(enc2native, enc2utf8)) { for (dots_converter in c(enc2native, enc2utf8)) { names(df) <- names_converter(c(special, "Eng")) res <- group_by(df, !!!syms(dots_converter(special))) expect_equal(names(res), names(df)) expect_equal(group_vars(res), special) } } for (names_converter in c(enc2native, enc2utf8)) { names(df) <- names_converter(c(special, "Eng")) res <- group_by(df, !!!special) expect_equal(names(res), c(names(df), deparse(special))) expect_equal(groups(res), list(as.name(enc2native(deparse(special))))) } }) test_that("group_by handles raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(ungroup(group_by(df, a)), df) expect_identical(ungroup(group_by(df, b)), df) }) test_that("rowwise handles raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_s3_class(rowwise(df), "rowwise_df") }) test_that("group_by() names pronouns correctly (#2686)", { expect_named(group_by(tibble(x = 1), .data$x), "x") expect_named(group_by(tibble(x = 1), .data[["x"]]), "x") }) test_that("group_by() does not affect input data (#3028)", { x <- data.frame(old1 = c(1, 2, 3), old2 = c(4, 5, 6)) %>% group_by(old1) y <- x %>% select(new1 = old1, new2 = old2) expect_identical(groups(x), syms(quote(old1))) }) test_that("group_by() does not mutate for nothing when using the .data pronoun (#2752, #3533)", { expect_identical( iris %>% group_by(Species) %>% group_by(.data$Species), iris %>% group_by(Species) ) expect_identical( iris %>% group_by(Species) %>% group_by(.data[["Species"]]), iris %>% group_by(Species) ) df <- tibble(x = 1:5) attr(df, "y") <- 1 expect_equal( df %>% group_by(.data$x) %>% attr("y"), 1 ) expect_equal( df %>% group_by(.data[["x"]]) %>% attr("y"), 1 ) }) test_that("tbl_sum gets the right number of groups", { res <- data.frame(x=c(1,1,2,2)) %>% group_by(x) %>% tbl_sum() expect_equal(res, c("A tibble" = "4 x 1", "Groups" = "x [2]")) }) test_that("group_by ignores empty quosures (3780)", { empty <- quo() expect_equal(group_by(mtcars, cyl), group_by(mtcars, cyl, !!empty)) }) # Zero groups --------------------------------------------------- test_that("mutate handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- mutate(df, y = mean(x), z = +mean(x), n = n()) expect_equal(names(res), c("x", "y", "z", "n")) expect_equal(nrow(res), 0L) expect_equal(res$y, double()) expect_equal(res$z, double()) expect_equal(res$n, integer()) }) test_that("summarise handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- summarise(df, y = mean(x), z = +mean(x), n = n()) expect_equal(names(res), c("x", "y", "z", "n")) expect_equal(nrow(res), 0L) expect_equal(res$y, double()) expect_equal(res$n, integer()) expect_equal(res$z, double()) }) test_that("filter handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- filter(df, x > 3L) expect_identical(df, res) }) test_that("select handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- select(df, x) expect_identical(df, res) }) test_that("arrange handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- arrange(df, x) expect_identical(df, res) }) test_that("group_by() with empty spec produces a grouped data frame with 0 grouping variables", { gdata <- group_data(group_by(iris)) expect_equal(names(gdata), ".rows") expect_equal(gdata$.rows, list_of(1:nrow(iris))) gdata <- group_data(group_by(iris, !!!list())) expect_equal(names(gdata), ".rows") expect_equal(gdata$.rows, list_of(1:nrow(iris))) }) # .drop = TRUE --------------------------------------------------- test_that("group_by(.drop = TRUE) drops empty groups (4061)", { res <- iris %>% filter(Species == "setosa") %>% group_by(Species, .drop = TRUE) expect_identical( group_data(res), structure( tibble(Species = factor("setosa", levels = levels(iris$Species)), .rows := list_of(1:50)), .drop = TRUE ) ) expect_true(group_by_drop_default(res)) }) test_that("grouped data frames remember their .drop (#4061)", { res <- iris %>% filter(Species == "setosa") %>% group_by(Species, .drop = TRUE) res2 <- res %>% filter(Sepal.Length > 5) expect_true(group_by_drop_default(res2)) res3 <- res %>% filter(Sepal.Length > 5, .preserve = FALSE) expect_true(group_by_drop_default(res3)) res4 <- res3 %>% group_by(Species) expect_true(group_by_drop_default(res4)) expect_equal(nrow(group_data(res4)), 1L) }) test_that("grouped data frames remember their .drop = FALSE (#4337)", { res <- iris %>% filter(Species == "setosa") %>% group_by(Species, .drop = FALSE) expect_false(group_by_drop_default(res)) res2 <- res %>% group_by(Species) expect_false(group_by_drop_default(res2)) }) test_that("group_by(.drop = FALSE) preserve ordered factors (#5455)", { df <- tibble(x = ordered("x")) drop <- df %>% group_by(x) %>% group_data() nodrop <- df %>% group_by(x, .drop = FALSE) %>% group_data() expect_equal(is.ordered(drop$x), is.ordered(nodrop$x)) expect_true(is.ordered(nodrop$x)) }) test_that("summarise maintains the .drop attribute (#4061)", { df <- tibble( f1 = factor("a", levels = c("a", "b", "c")), f2 = factor("d", levels = c("d", "e", "f", "g")), x = 42 ) res <- df %>% group_by(f1, f2, .drop = TRUE) expect_equal(n_groups(res), 1L) res2 <- summarise(res, x = sum(x), .groups = "drop_last") expect_equal(n_groups(res2), 1L) expect_true(group_by_drop_default(res2)) }) test_that("joins maintain the .drop attribute (#4061)", { df1 <- group_by(tibble( f1 = factor(c("a", "b"), levels = c("a", "b", "c")), x = 42:43 ), f1, .drop = TRUE) df2 <- group_by(tibble( f1 = factor(c("a"), levels = c("a", "b", "c")), y = 1 ), f1, .drop = TRUE) res <- left_join(df1, df2, by = "f1") expect_equal(n_groups(res), 2L) df2 <- group_by(tibble( f1 = factor(c("a", "c"), levels = c("a", "b", "c")), y = 1:2 ), f1, .drop = TRUE) res <- full_join(df1, df2, by = "f1") expect_equal(n_groups(res), 3L) }) test_that("group_by(add = TRUE) sets .drop if the origonal data was .drop", { d <- tibble( f1 = factor("b", levels = c("a", "b", "c")), f2 = factor("g", levels = c("e", "f", "g")), x = 48 ) res <- group_by(group_by(d, f1, .drop = TRUE), f2, .add = TRUE) expect_equal(n_groups(res), 1L) expect_true(group_by_drop_default(res)) }) test_that("group_by_drop_default() is forgiving about corrupt grouped df (#4306)",{ df <- tibble(x = 1:2, y = 1:2) %>% structure(class = c("grouped_df", "tbl_df", "tbl", "data.frame")) expect_true(group_by_drop_default(df)) }) test_that("group_by() puts NA groups last in STRSXP (#4227)", { res <- tibble(x = c("apple", NA, "banana"), y = 1:3) %>% group_by(x) %>% group_data() expect_identical(res$x, c("apple", "banana", NA_character_)) expect_identical(res$.rows, list_of(1L, 3L, 2L)) }) test_that("group_by() does not create arbitrary NA groups for factors when drop = TRUE (#4460)", { res <- expect_warning(group_data(group_by(iris, Species)[0, ]), NA) expect_equal(nrow(res), 0L) res <- expect_warning(group_data(group_by(iris[0, ], Species)), NA) expect_equal(nrow(res), 0L) }) test_that("group_by() can handle auto splicing in the mutate() step", { expect_identical( iris %>% group_by(Species), iris %>% group_by(data.frame(Species = Species)) ) expect_identical( iris %>% group_by(Species), iris %>% group_by(across(Species)) ) expect_identical( iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width), iris %>% group_by(across(starts_with("Sepal"), round)) ) df <- tibble(x = c(1, 2), y = c(1, 2)) expect_identical(df %>% group_by(across(character())), df) expect_identical(df %>% group_by(across(NULL)), df) expect_identical(df %>% group_by(x) %>% group_by(across(character())), df) expect_identical(df %>% group_by(x) %>% group_by(across(NULL)), df) }) test_that("group_by() can combine usual spec and auto-splicing-mutate() step", { expect_identical( iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width, Species), iris %>% group_by(across(starts_with("Sepal"), round), Species) ) expect_identical( iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Species, Sepal.Length, Sepal.Width), iris %>% group_by(Species, across(starts_with("Sepal"), round)) ) }) # mutate() semantics test_that("group_by() has mutate() semantics (#4984)", { expect_equal( tibble(a = 1, b = 2) %>% group_by(c = a * b, d = c + 1), tibble(a = 1, b = 2) %>% mutate(c = a * b, d = c + 1) %>% group_by(c, d) ) }) test_that("implicit mutate() operates on ungrouped data (#5598)", { vars <- tibble(x = c(1,2), y = c(3,4), z = c(5,6)) %>% dplyr::group_by(y) %>% dplyr::group_by(across(any_of(c('y','z')))) %>% dplyr::group_vars() expect_equal(vars, c("y", "z")) }) test_that("grouped_df() does not break row.names (#5745)", { groups <- compute_groups(data.frame(x = 1:10), "x") expect_equal(.row_names_info(groups, type = 0), c(NA, -10L)) }) test_that("group_by() keeps attributes unrelated to the grouping (#5760)", { d <- data.frame(x = 453, y = 642) attr(d, "foo") <- "bar" d2 <- group_by(d, x) expect_equal(attr(d2, "foo"), "bar") d3 <- group_by(d2, y, .add = TRUE) expect_equal(attr(d2, "foo"), "bar") d4 <- group_by(d2, y2 = y * 2, .add = TRUE) expect_equal(attr(d2, "foo"), "bar") }) test_that("group_by() works with quosures (tidyverse/lubridate#959)", { ignore <- function(...) NA f <- function(var) { tibble(x = 1) %>% group_by(g = ignore({{ var }})) } g <- function(var) { # This used to fail with the extra argument tibble(x = 1) %>% group_by(g = ignore({{ var }}, 1)) } expect_equal(f(), tibble(x = 1, g = NA) %>% group_by(g)) expect_equal(g(), tibble(x = 1, g = NA) %>% group_by(g)) }) test_that("group_by() propagates caller env", { expect_caller_env(group_by(mtcars, sig_caller_env())) }) # Errors ------------------------------------------------------------------ test_that("group_by() and ungroup() give meaningful error messages", { expect_snapshot({ df <- tibble(x = 1, y = 2) (expect_error(df %>% group_by(unknown))) (expect_error(df %>% ungroup(x))) (expect_error(df %>% group_by(x, y) %>% ungroup(z))) (expect_error(df %>% group_by(z = a + 1))) }) }) dplyr/tests/testthat/test-group_nest.R0000644000176200001440000000366214121112104017665 0ustar liggesuserstest_that("group_nest() works", { grouped <- group_by(starwars, species, homeworld) gdata <- group_data(grouped) res <- group_nest(starwars, species, homeworld) expect_type(pull(res), "list") expect_equal(attr(pull(res), "ptype"), vec_slice(select(starwars, -species, -homeworld), 0L)) expect_true(all_equal( as_tibble(select(res, -last_col())), as_tibble(select(gdata, -last_col())) )) nested <- bind_rows(!!!res$data) expect_equal(names(nested), setdiff(names(starwars), c("species", "homeworld"))) }) test_that("group_nest() can keep the grouping variables", { grouped <- group_by(starwars, species, homeworld) gdata <- group_data(grouped) res <- group_nest(starwars, species, homeworld, keep = TRUE) nested <- bind_rows(!!!res$data) expect_equal(names(nested), names(starwars)) }) test_that("group_nest() works on grouped data frames", { grouped <- group_by(starwars, species, homeworld) gdata <- group_data(grouped) res <- group_nest(grouped) expect_type(pull(res), "list") expect_true(all_equal( select(res, -last_col()), select(gdata, -last_col()) )) expect_equal(names(bind_rows(!!!res$data)), setdiff(names(starwars), c("species", "homeworld"))) res <- group_nest(grouped, keep = TRUE) expect_type(pull(res), "list") expect_equal(attr(pull(res), "ptype"), vec_slice(starwars, 0L)) expect_true(all_equal(select(res, -last_col()), select(gdata, -last_col()))) expect_equal(names(bind_rows(!!!res$data)), names(starwars)) }) test_that("group_nest.grouped_df() warns about ...", { expect_warning(group_nest(group_by(mtcars, cyl), cyl)) }) test_that("group_nest() works if no grouping column", { res <- group_nest(iris) expect_equal(res$data, list(iris)) expect_equal(names(res), "data") }) test_that("group_nest() respects .drop", { nested <- tibble(f = factor("b", levels = c("a", "b", "c")), x = 1, y = 2) %>% group_nest(f, .drop = TRUE) expect_equal(nrow(nested), 1L) }) dplyr/tests/testthat/test-distinct.R0000644000176200001440000001111014151641776017335 0ustar liggesuserstest_that("distinct equivalent to local unique when keep_all is TRUE", { df <- data.frame( x = c(1, 1, 1, 1), y = c(1, 1, 2, 2), z = c(1, 2, 1, 2) ) expect_equal(distinct(df), unique(df)) }) test_that("distinct for single column works as expected (#1937)", { df <- tibble( x = c(1, 1, 1, 1), y = c(1, 1, 2, 2), z = c(1, 2, 1, 2) ) expect_equal(distinct(df, x, .keep_all = FALSE), unique(df["x"])) expect_equal(distinct(df, y, .keep_all = FALSE), unique(df["y"])) }) test_that("distinct works for 0-sized columns (#1437)", { df <- tibble(x = 1:10) %>% select(-x) ddf <- distinct(df) expect_equal(ncol(ddf), 0L) }) test_that("if no variables specified, uses all", { df <- tibble(x = c(1, 1), y = c(2, 2)) expect_equal(distinct(df), tibble(x = 1, y = 2)) }) test_that("distinct keeps only specified cols", { df <- tibble(x = c(1, 1, 1), y = c(1, 1, 1)) expect_equal(df %>% distinct(x), tibble(x = 1)) }) test_that("unless .keep_all = TRUE", { df <- tibble(x = c(1, 1, 1), y = 3:1) expect_equal(df %>% distinct(x), tibble(x = 1)) expect_equal(df %>% distinct(x, .keep_all = TRUE), tibble(x = 1, y = 3L)) }) test_that("distinct doesn't duplicate columns", { df <- tibble(a = 1:3, b = 4:6) expect_named(df %>% distinct(a, a), "a") expect_named(df %>% group_by(a) %>% distinct(a), "a") }) test_that("grouped distinct always includes group cols", { df <- tibble(g = c(1, 2), x = c(1, 2)) out <- df %>% group_by(g) %>% distinct(x) expect_named(out, c("g", "x")) }) test_that("empty grouped distinct equivalent to empty ungrouped", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% distinct() %>% group_by(g) df2 <- df %>% group_by(g) %>% distinct() expect_equal(df1, df2) }) test_that("distinct on a new, mutated variable is equivalent to mutate followed by distinct", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% distinct(aa = g * 2) df2 <- df %>% mutate(aa = g * 2) %>% distinct(aa) expect_equal(df1, df2) }) test_that("distinct on a new, copied variable is equivalent to mutate followed by distinct (#3234)", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% distinct(aa = g) df2 <- df %>% mutate(aa = g) %>% distinct(aa) expect_equal(df1, df2) }) test_that("distinct on a dataframe or tibble with columns of type list throws an error", { df <- tibble( a = c("1", "1", "2", "2", "3", "3"), b = list("A") ) df2 <- data.frame(x = 1:5, y = I(list(1:3, 2:4, 3:5, 4:6, 5:7))) expect_identical(df2 %>% distinct(), df2) expect_identical(df %>% distinct(), df %>% slice(c(1, 3, 5))) }) test_that("distinct handles 0 columns edge case (#2954)", { d <- select(data.frame(x= c(1, 1)), one_of(character(0))) res <- distinct(d) expect_equal(nrow(res), 1L) expect_equal(nrow(distinct(tibble())), 0L) }) test_that("distinct preserves order of the input variables (#3195)",{ d <- data.frame(x = 1:2, y = 3:4) expect_equal(names(distinct(d, y, x)), c("x", "y")) }) test_that("distinct() understands both NA variants (#4516)", { df <- data.frame(col_a = c(1, NA, NA)) df$col_a <- df$col_a+0 df$col_a[2] <- NA_real_ expect_equal(nrow(distinct(df)), 2L) df_1 <- data.frame(col_a = c(1, NA)) df_2 <- data.frame(col_a = c(1, NA)) df_1$col_a <- df_1$col_a+0 df_2$col_a <- df_2$col_a+0 df_1$col_a[2] <- NA expect_equal(nrow(setdiff(df_1, df_2)), 0L) }) test_that("distinct() handles auto splicing", { expect_equal( iris %>% distinct(Species), iris %>% distinct(data.frame(Species=Species)) ) expect_equal( iris %>% distinct(Species), iris %>% distinct(across(Species)) ) expect_equal( iris %>% mutate(across(starts_with("Sepal"), round)) %>% distinct(Sepal.Length, Sepal.Width), iris %>% distinct(across(starts_with("Sepal"), round)) ) }) test_that("distinct preserves grouping", { gf <- group_by(tibble(x = c(1, 1, 2, 2), y = x), x) i <- count_regroups(out <- distinct(gf)) expect_equal(i, 0) expect_equal(group_vars(out), "x") i <- count_regroups(out <- distinct(gf, x = x + 2)) expect_equal(i, 1) expect_equal(group_vars(out), "x") }) test_that("distinct() propagates caller env", { expect_caller_env(distinct(mtcars, sig_caller_env())) }) # Errors ------------------------------------------------------------------ test_that("distinct gives a warning when selecting an unknown column (#3140)", { expect_snapshot({ df <- tibble(g = c(1, 2), x = c(1, 2)) (expect_error(df %>% distinct(aa, x))) (expect_error(df %>% distinct(aa, bb))) (expect_error(df %>% distinct(.data$aa))) (expect_error(df %>% distinct(y = a + 1))) }) }) dplyr/tests/testthat/test-deprec-src-local.r0000644000176200001440000000157414151641776020710 0ustar liggesuserstest_that("src_tbls() includes all tbls (#4326)", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( src_tbls(src_df(env = env(. = iris))), "." ) }) test_that("src_local only overwrites if overwrite = TRUE", { withr::local_options(lifecycle_verbosity = "quiet") env <- new.env(parent = emptyenv()) env$x <- 1 src_env <- src_df(env = env) df <- tibble(x = 1) copy_to(src_env, df, name = "x", overwrite = TRUE) expect_equal(env$x, df) }) test_that("src_df() is deprecated / errors", { withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ # src_local errs with pkg/env (expect_error(src_df("base", new.env()))) (expect_error(src_df())) env <- new.env(parent = emptyenv()) env$x <- 1 src_env <- src_df(env = env) (expect_error( copy_to(src_env, tibble(x = 1), name = "x") )) }) }) dplyr/tests/testthat/test-utils.R0000644000176200001440000000050014164534554016654 0ustar liggesuserstest_that("quo_is_variable_reference handles .data",{ expect_true(quo_is_variable_reference(quo(x))) expect_true(quo_is_variable_reference(quo(.data$x))) expect_true(quo_is_variable_reference(quo(.data[["x"]]))) quo <- new_quosure(quote(.data[[identity("x")]])) expect_false(quo_is_variable_reference(quo)) }) dplyr/tests/testthat/test-rename.R0000644000176200001440000000361414151641776016775 0ustar liggesuserstest_that("rename() handles data pronoun", { expect_identical(rename(tibble(x = 1), y = .data$x), tibble(y = 1)) }) test_that("arguments to rename() don't match vars_rename() arguments (#2861)", { df <- tibble(a = 1) expect_identical(rename(df, var = a), tibble(var = 1)) expect_identical(rename(group_by(df, a), var = a), group_by(tibble(var = 1), var)) expect_identical(rename(df, strict = a), tibble(strict = 1)) expect_identical(rename(group_by(df, a), strict = a), group_by(tibble(strict = 1), strict)) }) test_that("rename() to UTF-8 column names", { skip_if_not(l10n_info()$"UTF-8") df <- tibble(a = 1) %>% rename("\u5e78" := a) expect_equal(colnames(df), "\u5e78") }) test_that("can rename() with strings and character vectors", { vars <- c(foo = "cyl", bar = "am") expect_identical(rename(mtcars, !!!vars), rename(mtcars, foo = cyl, bar = am)) expect_identical(rename(mtcars, !!vars), rename(mtcars, foo = cyl, bar = am)) }) test_that("rename preserves grouping", { gf <- group_by(tibble(g = 1:3, x = 3:1), g) i <- count_regroups(out <- rename(gf, h = g)) expect_equal(i, 0) expect_equal(group_vars(out), "h") }) test_that("can rename with duplicate columns", { df <- tibble(x = 1, x = 2, y = 1, .name_repair = "minimal") expect_named(df %>% rename(x2 = 2), c("x", "x2", "y")) }) # rename_with ------------------------------------------------------------- test_that("can select columns", { df <- tibble(x = 1, y = 2) expect_named(df %>% rename_with(toupper, 1), c("X", "y")) df <- tibble(x = 1, y = 2) expect_named(df %>% rename_with(toupper, x), c("X", "y")) }) test_that("passes ... along", { df <- tibble(x = 1, y = 2) expect_named(df %>% rename_with(gsub, 1, pattern = "x", replacement = "X"), c("X", "y")) }) test_that("can't create duplicated names", { df <- tibble(x = 1, y = 2) expect_error(df %>% rename_with(~ "X"), class = "vctrs_error_names") }) dplyr/tests/testthat/test-summarise.r0000644000176200001440000002450214151641776017572 0ustar liggesuserstest_that("can use freshly create variables (#138)", { df <- tibble(x = 1:10) out <- summarise(df, y = mean(x), z = y + 1) expect_equal(out$y, 5.5) expect_equal(out$z, 6.5) }) test_that("inputs are recycled", { expect_equal( tibble() %>% summarise(x = 1, y = 1:3, z = 1), tibble(x = 1, y = 1:3, z = 1) ) gf <- group_by(tibble(a = 1:2), a) expect_equal( gf %>% summarise(x = 1, y = 1:3, z = 1), tibble(a = rep(1:2, each = 3), x = 1, y = c(1:3, 1:3), z = 1) %>% group_by(a) ) expect_equal( gf %>% summarise(x = seq_len(a), y = 1), tibble(a = c(1L, 2L, 2L), x = c(1L, 1L, 2L), y = 1) %>% group_by(a) ) }) test_that("works with empty data frames", { # 0 rows df <- tibble(x = integer()) expect_equal(summarise(df), tibble(.rows = 1)) expect_equal(summarise(df, n = n(), sum = sum(x)), tibble(n = 0, sum = 0)) # 0 cols df <- tibble(.rows = 10) expect_equal(summarise(df), tibble(.rows = 1)) expect_equal(summarise(df, n = n()), tibble(n = 10)) }) test_that("works with grouped empty data frames", { df <- tibble(x = integer()) expect_equal( df %>% group_by(x) %>% summarise(y = 1L), tibble(x = integer(), y = integer()) ) expect_equal( df %>% rowwise(x) %>% summarise(y = 1L), group_by(tibble(x = integer(), y = integer()), x) ) }) test_that("no expressions yields grouping data", { df <- tibble(x = 1:2, y = 1:2) gf <- group_by(df, x) expect_equal(summarise(df), tibble(.rows = 1)) expect_equal(summarise(gf), tibble(x = 1:2)) expect_equal(summarise(df, !!!list()), tibble(.rows = 1)) expect_equal(summarise(gf, !!!list()), tibble(x = 1:2)) }) test_that("preserved class, but not attributes", { df <- structure( data.frame(x = 1:10, g1 = rep(1:2, each = 5), g2 = rep(1:5, 2)), meta = "this is important" ) out <- df %>% summarise(n = n()) expect_s3_class(out, "data.frame", exact = TRUE) expect_equal(attr(out, "res"), NULL) out <- df %>% group_by(g1) %>% summarise(n = n()) # expect_s3_class(out, "data.frame", exact = TRUE) expect_equal(attr(out, "res"), NULL) }) test_that("works with unquoted values", { df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) expect_equal(summarise(df, out = !!1), tibble(out = 1)) expect_equal(summarise(df, out = !!quo(1)), tibble(out = 1)) expect_equal(summarise(df, out = !!(1:2)), tibble(out = 1:2)) }) test_that("formulas are evaluated in the right environment (#3019)", { out <- mtcars %>% summarise(fn = list(rlang::as_function(~ list(~foo, environment())))) out <- out$fn[[1]]() expect_identical(environment(out[[1]]), out[[2]]) }) test_that("data frame results with 0 columns are ignored (#5084)", { df1 <- tibble(x = 1:2) expect_equal(df1 %>% group_by(x) %>% summarise(data.frame()), df1) expect_equal(df1 %>% group_by(x) %>% summarise(data.frame(), y = 65), mutate(df1, y = 65)) expect_equal(df1 %>% group_by(x) %>% summarise(y = 65, data.frame()), mutate(df1, y = 65)) df2 <- tibble(x = 1:2, y = 3:4) expect_equal(df2 %>% group_by(x) %>% summarise(data.frame()), df1) expect_equal(df2 %>% group_by(x) %>% summarise(data.frame(), z = 98), mutate(df1, z = 98)) expect_equal(df2 %>% group_by(x) %>% summarise(z = 98, data.frame()), mutate(df1, z = 98)) }) # grouping ---------------------------------------------------------------- test_that("peels off a single layer of grouping", { df <- tibble(x = rep(1:4, each = 4), y = rep(1:2, each = 8), z = runif(16)) gf <- df %>% group_by(x, y) expect_equal(group_vars(summarise(gf)), "x") expect_equal(group_vars(summarise(summarise(gf))), character()) }) test_that("correctly reconstructs groups", { d <- tibble(x = 1:4, g1 = rep(1:2, 2), g2 = 1:4) %>% group_by(g1, g2) %>% summarise(x = x + 1) expect_equal(group_rows(d), list_of(1:2, 3:4)) }) test_that("can modify grouping variables", { df <- tibble(a = c(1, 2, 1, 2), b = c(1, 1, 2, 2)) gf <- group_by(df, a, b) i <- count_regroups(out <- summarise(gf, a = a + 1)) expect_equal(i, 1) expect_equal(out$a, c(2, 2, 3, 3)) }) # vector types ---------------------------------------------------------- test_that("summarise allows names (#2675)", { data <- tibble(a = 1:3) %>% summarise(b = c("1" = a[[1]])) expect_equal(names(data$b), "1") data <- tibble(a = 1:3) %>% rowwise() %>% summarise(b = setNames(nm = a)) expect_equal(names(data$b), c("1", "2", "3")) data <- tibble(a = c(1, 1, 2)) %>% group_by(a) %>% summarise(b = setNames(nm = a[[1]])) expect_equal(names(data$b), c("1", "2")) res <- data.frame(x = c(1:3), y = letters[1:3]) %>% group_by(y) %>% summarise( a = length(x), b = quantile(x, 0.5) ) expect_equal(res$b, c("50%" = 1, "50%" = 2, "50%" = 3)) }) test_that("summarise handles list output columns (#832)", { df <- tibble(x = 1:10, g = rep(1:2, each = 5)) res <- df %>% group_by(g) %>% summarise(y = list(x)) expect_equal(res$y[[1]], 1:5) # preserving names d <- tibble(x = rep(1:3, 1:3), y = 1:6, names = letters[1:6]) res <- d %>% group_by(x) %>% summarise(y = list(setNames(y, names))) expect_equal(names(res$y[[1]]), letters[[1]]) }) test_that("summarise coerces types across groups", { gf <- group_by(tibble(g = 1:2), g) out <- summarise(gf, x = if (g == 1) NA else "x") expect_type(out$x, "character") out <- summarise(gf, x = if (g == 1L) NA else 2.5) expect_type(out$x, "double") }) test_that("unnamed tibbles are unpacked (#2326)", { df <- tibble(x = 1:2) out <- summarise(df, tibble(y = x * 2, z = 3)) expect_equal(out$y, c(2L, 4L)) expect_equal(out$z, c(3L, 3L)) }) test_that("named tibbles are packed (#2326)", { df <- tibble(x = 1:2) out <- summarise(df, df = tibble(y = x * 2, z = 3)) expect_equal(out$df, tibble(y = c(2L, 4L), z = c(3L, 3L))) }) test_that("summarise(.groups=)", { expect_message(eval_bare( expr(data.frame(x = 1, y = 2) %>% group_by(x, y) %>% summarise()), env(global_env()) )) expect_message(eval_bare( expr(data.frame(x = 1, y = 2) %>% rowwise(x, y) %>% summarise()), env(global_env()) )) df <- data.frame(x = 1, y = 2) expect_equal(df %>% summarise(z = 3, .groups= "rowwise"), rowwise(data.frame(z = 3))) gf <- df %>% group_by(x, y) expect_equal(gf %>% summarise() %>% group_vars(), "x") expect_equal(gf %>% summarise(.groups = "drop_last") %>% group_vars(), "x") expect_equal(gf %>% summarise(.groups = "drop") %>% group_vars(), character()) expect_equal(gf %>% summarise(.groups = "keep") %>% group_vars(), c("x", "y")) rf <- df %>% rowwise(x, y) expect_equal(rf %>% summarise(.groups = "drop") %>% group_vars(), character()) expect_equal(rf %>% summarise(.groups = "keep") %>% group_vars(), c("x", "y")) }) test_that("summarise() casts data frame results to common type (#5646)", { df <- data.frame(x = 1:2, g = 1:2) %>% group_by(g) res <- df %>% summarise(if (g == 1) data.frame(y = 1) else data.frame(y = 1, z = 2), .groups = "drop") expect_equal(res$z, c(NA, 2)) }) test_that("summarise() silently skips when all results are NULL (#5708)", { df <- data.frame(x = 1:2, g = 1:2) %>% group_by(g) expect_equal(summarise(df, x = NULL), summarise(df)) expect_error(summarise(df, x = if(g == 1) 42)) }) test_that("summarise() propagates caller env", { expect_caller_env(summarise(mtcars, sig_caller_env())) expect_caller_env(summarise(group_by(mtcars, cyl), sig_caller_env())) }) # errors ------------------------------------------------------------------- test_that("summarise() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), summarise(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("summarise() gives meaningful errors", { eval(envir = global_env(), expr({ expect_snapshot({ # Messages about .groups= tibble(x = 1, y = 2) %>% group_by(x, y) %>% summarise() tibble(x = 1, y = 2) %>% group_by(x, y) %>% summarise(z = c(2,2)) tibble(x = 1, y = 2) %>% rowwise(x, y) %>% summarise() tibble(x = 1, y = 2) %>% rowwise() %>% summarise() }) })) eval(envir = global_env(), expr({ expect_snapshot({ # unsupported type (expect_error( tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% summarise(a = rlang::env(a = 1)) )) (expect_error( tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% group_by(x, y) %>% summarise(a = rlang::env(a = 1)) )) (expect_error( tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% rowwise() %>% summarise(a = lm(y ~ x)) )) # mixed types (expect_error( tibble(id = 1:2, a = list(1, "2")) %>% group_by(id) %>% summarise(a = a[[1]]) )) (expect_error( tibble(id = 1:2, a = list(1, "2")) %>% rowwise() %>% summarise(a = a[[1]]) )) # incompatible size (expect_error( tibble(z = 1) %>% summarise(x = 1:3, y = 1:2) )) (expect_error( tibble(z = 1:2) %>% group_by(z) %>% summarise(x = 1:3, y = 1:2) )) (expect_error( tibble(z = c(1, 3)) %>% group_by(z) %>% summarise(x = seq_len(z), y = 1:2) )) # NULL and no NULL (expect_error( data.frame(x = 1:2, g = 1:2) %>% group_by(g) %>% summarise(x = if(g == 1) 42) )) # Missing variable (expect_error(summarise(mtcars, a = mean(not_there)))) (expect_error(summarise(group_by(mtcars, cyl), a = mean(not_there)))) # .data pronoun (expect_error(summarise(tibble(a = 1), c = .data$b))) (expect_error(summarise(group_by(tibble(a = 1:3), a), c = .data$b))) # Duplicate column names (expect_error( tibble(x = 1, x = 1, .name_repair = "minimal") %>% summarise(x) )) # Not glue()ing (expect_error(tibble() %>% summarise(stop("{")))) (expect_error( tibble(a = 1, b="{value:1, unit:a}") %>% group_by(b) %>% summarise(a = stop("!")) )) }) })) }) dplyr/tests/testthat/test-transmute.R0000644000176200001440000000610014151641776017541 0ustar liggesuserstest_that("non-syntactic grouping variable is preserved (#1138)", { df <- tibble(`a b` = 1L) %>% group_by(`a b`) %>% transmute() expect_named(df, "a b") }) test_that("transmute preserves grouping", { gf <- group_by(tibble(x = 1:2, y = 2), x) i <- count_regroups(out <- transmute(gf, x = 1)) expect_equal(i, 1L) expect_equal(group_vars(out), "x") expect_equal(nrow(group_data(out)), 1) i <- count_regroups(out <- transmute(gf, z = 1)) expect_equal(i, 0) expect_equal(group_data(out), group_data(gf)) }) # Empty transmutes ------------------------------------------------- test_that("transmute with no args returns grouping vars", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) expect_equal(df %>% transmute(), df[integer()]) expect_equal(gf %>% transmute(), gf[1L]) }) # transmute variables ----------------------------------------------- test_that("transmute succeeds in presence of raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(transmute(df, a), df["a"]) expect_identical(transmute(df, b), df["b"]) }) test_that("arguments to transmute() don't match vars_transmute() arguments", { df <- tibble(a = 1) expect_identical(transmute(df, var = a), tibble(var = 1)) expect_identical(transmute(df, exclude = a), tibble(exclude = 1)) expect_identical(transmute(df, include = a), tibble(include = 1)) }) test_that("arguments to rename() don't match vars_rename() arguments (#2861)", { df <- tibble(a = 1) expect_identical(rename(df, var = a), tibble(var = 1)) expect_identical(rename(group_by(df, a), var = a), group_by(tibble(var = 1), var)) expect_identical(rename(df, strict = a), tibble(strict = 1)) expect_identical(rename(group_by(df, a), strict = a), group_by(tibble(strict = 1), strict)) }) test_that("can transmute() with .data pronoun (#2715)", { expect_identical(transmute(mtcars, .data$cyl), transmute(mtcars, cyl)) }) test_that("transmute() does not warn when a variable is removed with = NULL (#4609)", { df <- data.frame(x=1) expect_warning(transmute(df, y =x+1, z=y*2, y = NULL), NA) }) test_that("transmute() can handle auto splicing", { expect_equal( iris %>% transmute(tibble(Sepal.Length, Sepal.Width)), iris %>% select(Sepal.Length, Sepal.Width) ) }) test_that("transmute() retains ordering supplied in `...`, even for pre-existing columns (#6086)", { df <- tibble(x = 1:3, y = 4:6) out <- transmute(df, x, z = x + 1, y) expect_named(out, c("x", "z", "y")) }) test_that("transmute() retains ordering supplied in `...`, even for group columns (#6086)", { df <- tibble(x = 1:3, g1 = 1:3, g2 = 1:3, y = 4:6) df <- group_by(df, g1, g2) out <- transmute(df, x, z = x + 1, y, g1) # - Untouched group variables are first # - Following by ordering supplied through `...` expect_named(out, c("g2", "x", "z", "y", "g1")) }) test_that("transmute() error messages", { expect_snapshot({ (expect_error(transmute(mtcars, cyl2 = cyl, .keep = 'all'))) (expect_error(transmute(mtcars, cyl2 = cyl, .before = disp))) (expect_error(transmute(mtcars, cyl2 = cyl, .after = disp))) }) }) dplyr/tests/testthat/test-case-when.R0000644000176200001440000000750614174551640017377 0ustar liggesuserstest_that("matches values in order", { x <- 1:3 expect_equal( case_when( x <= 1 ~ 1, x <= 2 ~ 2, x <= 3 ~ 3 ), c(1, 2, 3) ) }) test_that("unmatched gets missing value", { x <- 1:3 expect_equal( case_when( x <= 1 ~ 1, x <= 2 ~ 2 ), c(1, 2, NA) ) }) test_that("missing values can be replaced (#1999)", { x <- c(1:3, NA) expect_equal( case_when( x <= 1 ~ 1, x <= 2 ~ 2, is.na(x) ~ 0 ), c(1, 2, NA, 0) ) }) test_that("NA conditions (#2927)", { expect_equal( case_when( c(TRUE, FALSE, NA) ~ 1:3, TRUE ~ 4L ), c(1L, 4L, 4L) ) }) test_that("atomic conditions (#2909)", { expect_equal( case_when( TRUE ~ 1:3, FALSE ~ 4:6 ), 1:3 ) expect_equal( case_when( NA ~ 1:3, TRUE ~ 4:6 ), 4:6 ) }) test_that("zero-length conditions and values (#3041)", { expect_equal( case_when( TRUE ~ integer(), FALSE ~ integer() ), integer() ) expect_equal( case_when( logical() ~ 1, logical() ~ 2 ), numeric() ) }) test_that("case_when can be used in anonymous functions (#3422)", { res <- tibble(a = 1:3) %>% mutate(b = (function(x) case_when(x < 2 ~ TRUE, TRUE ~ FALSE))(a)) %>% pull() expect_equal(res, c(TRUE, FALSE, FALSE)) }) test_that("case_when() can be used inside mutate()", { out <- mtcars[1:4, ] %>% mutate(out = case_when( cyl == 4 ~ 1, .data[["am"]] == 1 ~ 2, TRUE ~ 0 )) %>% pull() expect_identical(out, c(2, 2, 1, 0)) }) test_that("can pass quosures to case_when()", { fs <- local({ x <- 3:1 quos( x < 2 ~ TRUE, TRUE ~ FALSE ) }) expect_identical(case_when(!!!fs), c(FALSE, FALSE, TRUE)) }) test_that("can pass nested quosures to case_when()", { fs <- local({ foo <- mtcars$cyl[1:4] quos( !!quo(foo) == 4 ~ 1, TRUE ~ 0 ) }) expect_identical(case_when(!!!fs), c(0, 0, 1, 0)) }) test_that("can pass unevaluated formulas to case_when()", { x <- 6:8 fs <- exprs( x == 7L ~ TRUE, TRUE ~ FALSE ) expect_identical(case_when(!!!fs), c(FALSE, TRUE, FALSE)) out <- local({ x <- 7:9 case_when(!!!fs) }) expect_identical(out, c(TRUE, FALSE, FALSE)) }) test_that("unevaluated formulas can refer to data mask", { fs <- exprs( cyl == 4 ~ 1, am == 1 ~ 2, TRUE ~ 0 ) out <- mtcars[1:4, ] %>% mutate(out = case_when(!!!fs)) %>% pull() expect_identical(out, c(2, 2, 1, 0)) }) test_that("unevaluated formulas can contain quosures", { quo <- local({ n <- 4 quo(n) }) fs <- exprs( cyl == !!quo ~ 1, am == 1 ~ 2, TRUE ~ 0 ) out <- mtcars[1:4, ] %>% mutate(out = case_when(!!!fs)) %>% pull() expect_identical(out, c(2, 2, 1, 0)) }) test_that("NULL inputs are compacted", { x <- 1:3 bool <- FALSE out <- case_when( x == 2 ~ TRUE, if (bool) x == 3 ~ NA, TRUE ~ FALSE ) expect_identical(out, c(FALSE, TRUE, FALSE)) bool <- TRUE out <- case_when( x == 2 ~ TRUE, if (bool) x == 3 ~ NA, TRUE ~ FALSE ) expect_identical(out, c(FALSE, TRUE, NA)) }) # Errors ------------------------------------------------------------------ test_that("case_when() give meaningful errors", { expect_snapshot({ (expect_error( case_when( c(TRUE, FALSE) ~ 1:3, c(FALSE, TRUE) ~ 1:2 ) )) (expect_error( case_when( c(TRUE, FALSE) ~ 1, c(FALSE, TRUE, FALSE) ~ 2, c(FALSE, TRUE, FALSE, NA) ~ 3 ) )) (expect_error( case_when(50 ~ 1:3) )) (expect_error( case_when(paste(50)) )) (expect_error( case_when() )) (expect_error( case_when(~1:2) )) }) }) dplyr/tests/testthat/helper-s3.R0000644000176200001440000000232314121112104016316 0ustar liggesuserslocal_methods <- function(..., .frame = caller_env()) { local_bindings(..., .env = global_env(), .frame = .frame) } local_foo_df <- function(frame = caller_env()) { local_methods(.frame = frame, group_by.foo_df = function(.data, ...) { out <- NextMethod() if (missing(...)) { class(out) <- c("foo_df", class(out)) } else { class(out) <- c("grouped_foo_df", class(out)) } out }, ungroup.grouped_foo_df = function(x, ...) { out <- NextMethod() class(out) <- c("foo_df", class(out)) out } ) } new_ctor <- function(base_class) { function(x = list(), ..., class = NULL) { if (inherits(x, "tbl_df")) { tibble::new_tibble(x, class = c(class, base_class), nrow = nrow(x)) } else if (is.data.frame(x)) { structure(x, class = c(class, base_class, "data.frame"), ...) } else { structure(x, class = c(class, base_class), ...) } } } foobar <- new_ctor("dplyr_foobar") foobaz <- new_ctor("dplyr_foobaz") quux <- new_ctor("dplyr_quux") # For testing reconstructing methods that break invariants by adding # new columns new_dispatched_quux <- function(x) { out <- quux(x) out$dispatched <- rep(TRUE, nrow(out)) out } dplyr/tests/testthat/test-context.R0000644000176200001440000000475514151641776017221 0ustar liggesuserstest_that("cur_group() works", { df <- tibble(g = 1, x = 1) gf <- group_by(df, g) expect_equal( df %>% summarise(key = list(cur_group())) %>% pull(key), list(tibble(.rows = 1L)) ) expect_equal( gf %>% summarise(key = list(cur_group())) %>% pull(key), list(tibble(g = 1)) ) }) test_that("cur_group_idx() gives unique id", { df <- tibble(x = c("b", "a", "b")) gf <- group_by(df, x) expect_equal( summarise(gf, id = cur_group_id()), tibble(x = c("a", "b"), id = 1:2) ) expect_equal( mutate(gf, id = cur_group_id()), group_by(tibble(x = df$x, id = c(2, 1, 2)), x) ) }) test_that("cur_data() gives current data without groups, cur_data_all() includes groups", { df <- tibble(x = c("b", "a", "b"), y = 1:3) gf <- group_by(df, x) expect_equal( df %>% summarise(x = list(cur_data())) %>% pull(), list(df) ) expect_equal( gf %>% summarise(x = list(cur_data())) %>% pull(), list(tibble(y = 2L), tibble(y = c(1L, 3L))) ) expect_equal( gf %>% summarise(x = list(cur_data_all())) %>% pull(), list(tibble(x = "a", y = 2L), tibble(x = "b", y = c(1L, 3L))) ) }) test_that("cur_data()/cur_data_all() keeps list columns as lists in rowwise_df (#5901)", { df <- tibble(x = list(tibble(a = 1), tibble(a = 2))) %>% rowwise() expect_true( all(summarise(df, test = vec_is_list(cur_data()$x))$test) ) expect_true( all(summarise(df, test = vec_is_list(cur_data_all()$x))$test) ) }) test_that("cur_group_rows() retrieves row position in original data", { df <- tibble(x = c("b", "a", "b"), y = 1:3) gf <- group_by(df, x) expect_equal( df %>% summarise(x = list(cur_group_rows())) %>% pull(), list(1:3) ) expect_equal( gf %>% summarise(x = list(cur_group_rows())) %>% pull(), list(2L, c(1L, 3L)) ) }) test_that("cur_data() and cur_data_all() work sequentially", { df <- tibble(a = 1) expect_equal( mutate(df, x = ncol(cur_data()), y = ncol(cur_data())), tibble(a = 1, x = 1, y = 2) ) gf <- tibble(a = 1, b = 2) %>% group_by(a) expect_equal( mutate(gf, x = ncol(cur_data_all()), y = ncol(cur_data_all())), group_by(tibble(a = 1, b = 2, x = 2, y = 3), a) ) }) test_that("give useful error messages when not applicable", { expect_snapshot({ (expect_error(n())) (expect_error(cur_data())) (expect_error(cur_data_all())) (expect_error(cur_column())) (expect_error(cur_group())) (expect_error(cur_group_id())) (expect_error(cur_group_rows())) }) }) dplyr/tests/testthat/test-if-else.R0000644000176200001440000000173114174551640017043 0ustar liggesuserstest_that("scalar true and false are vectorised", { x <- c(TRUE, TRUE, FALSE, FALSE) expect_equal(if_else(x, 1, 2), c(1, 1, 2, 2)) }) test_that("vector true and false are ok", { x <- c(-1, 0, 1) expect_equal(if_else(x < 0, x, 0), c(-1, 0, 0)) expect_equal(if_else(x > 0, x, 0), c(0, 0, 1)) }) test_that("missing values are missing", { expect_equal(if_else(c(TRUE, NA, FALSE), -1, 1), c(-1, NA, 1)) }) test_that("works with lists", { x <- list(1, 2, 3) expect_equal( if_else(c(TRUE, TRUE, FALSE), x, list(NULL)), list(1, 2, NULL) ) }) # Errors ------------------------------------------------------------------ test_that("if_else() give meaningful errors", { expect_snapshot({ (expect_error(if_else(1:10, 1, 2))) (expect_error(if_else(1:3 < 2, 1:2, 1:3))) (expect_error(if_else(1:3 < 2, 1:3, 1:2))) (expect_error(if_else(1:3 < 2, 1, 1L))) x <- factor("x") y <- ordered("x") (expect_error(if_else(1:3 < 2, x, y))) }) }) dplyr/tests/testthat/test-conditions.R0000644000176200001440000000264114157105672017672 0ustar liggesuserstest_that("can hide expression in error messages", { err <- catch_cnd(mutate(mtcars, invisible(999 + "")), "error") expect_false(grepl("999", cnd_header(err))) expect_snapshot(error = TRUE, { mutate(mtcars, invisible(999 + "")) summarise(mtcars, invisible(999 + "")) filter(mtcars, invisible(999 + "")) arrange(mtcars, invisible(999 + "")) select(mtcars, invisible(999 + "")) slice(mtcars, invisible(999 + "")) mutate(mtcars, var = invisible(999 + "")) summarise(mtcars, var = invisible(999 + "")) filter(mtcars, var = invisible(999 + "")) # Named arg error arrange(mtcars, var = invisible(999 + "")) # Suboptimal select(mtcars, var = invisible(999 + "")) slice(mtcars, var = invisible(999 + "")) }) }) test_that("can pass verb-level error call", { dplyr_local_error_call(call("foo")) expect_snapshot(error = TRUE, { mutate(mtcars, 1 + "") transmute(mtcars, 1 + "") summarise(mtcars, 1 + "") summarise(group_by(mtcars, cyl), 1 + "") filter(mtcars, 1 + "") arrange(mtcars, 1 + "") select(mtcars, 1 + "") slice(mtcars, 1 + "") }) }) test_that("can pass verb-level error call (example case)", { my_verb <- function(data, var1, var2) { dplyr_local_error_call() pull(transmute(data, .result = {{ var1 }} * {{ var2 }})) } expect_snapshot(error = TRUE, { my_verb(mtcars, 1 + "", am) my_verb(mtcars, cyl, c(am, vs)) }) }) dplyr/tests/testthat/helper-dplyr.R0000644000176200001440000000065214121112104017126 0ustar liggesusersexpect_no_error <- function(object, ...) { expect_error({{ object }}, NA, ...) } expect_no_warning <- function(object, ...) { expect_warning({{ object }}, NA, ...) } sig_caller_env <- function() { signal( "", "dplyr:::test_caller_env", out = peek_mask()$get_caller_env() ) } expect_caller_env <- function(expr) { env <- catch_cnd(expr, "dplyr:::test_caller_env")$out expect_equal(env, caller_env()) } dplyr/tests/testthat/test-grouped-df.r0000644000176200001440000001236614151641776017626 0ustar liggesuserstest_that("new_grouped_df can create alternative grouping structures (#3837)", { tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) res <- summarise(tbl, x = mean(x)) expect_equal(nrow(res), 5L) }) test_that("new_grouped_df does not have rownames (#4173)", { tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) expect_false(tibble::has_rownames(tbl)) }) test_that("[ method can remove grouping vars", { df <- tibble(x = 1, y = 2, z = 3) gf <- group_by(df, x, y) expect_equal(gf, gf) expect_equal(gf[1], group_by(df[1], x)) expect_equal(gf[3], df[3]) }) test_that("[ method reuses group_data() if possible", { df <- tibble(x = 1, y = 2, z = 3) gf <- group_by(df, x, y) expect_true(rlang::is_reference(group_data(gf), group_data(gf[1:2]))) expect_true(rlang::is_reference(group_data(gf), group_data(gf[, 1:2]))) }) test_that("[ supports drop=TRUE (#3714)", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) expect_type(gf[, "y", drop = TRUE], "double") expect_s3_class(gf[, c("x", "y"), drop = TRUE], "tbl_df") }) test_that("$<-, [[<-, and [<- update grouping data if needed", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) # value has to be past the ellipsis in $<-() expect_equal(group_data(`$<-`(gf, "x", value = 2))$x, 2) expect_equal(group_data(`$<-`(gf, "y", value = 2))$x, 1) expect_equal(group_data({gf2 <- gf; gf2[[1]] <- 3; gf2})$x, 3) expect_equal(group_data(`[<-`(gf, 1, "x", value = 4))$x, 4) }) test_that("can remove grouping cols with subset assignment", { df <- tibble(x = 1, y = 2) gf1 <- gf2 <- gf3 <- group_by(df, x, y) gf1$x <- NULL gf2[["x"]] <- NULL gf3[, "x"] <- NULL expect_named(group_data(gf1), c("y", ".rows")) expect_named(group_data(gf2), c("y", ".rows")) expect_named(group_data(gf3), c("y", ".rows")) }) test_that("names<- updates grouping data", { df <- tibble(x = 1, y = 2, z = 3) gf <- group_by(df, x, y) names(gf) <- c("z1", "z2", "z3") expect_named(group_data(gf), c("z1", "z2", ".rows")) names(gf)[1] <- c("Z1") expect_named(group_data(gf), c("Z1", "z2", ".rows")) }) test_that("names<- doesn't modify group data if not necessary", { df <- tibble(x = 1, y = 2) gf1 <- gf2 <- group_by(df, x) expect_true(rlang::is_reference(group_data(gf1), group_data(gf2))) names(gf1) <- c("x", "Y") expect_true(rlang::is_reference(group_data(gf1), group_data(gf2))) }) test_that("group order is maintained in grouped-df methods (#5040)", { gdf <- group_by(mtcars, cyl, am, vs) x <- gdf[0,] expect_identical(group_vars(x), group_vars(gdf)) x <- gdf x$am <- 1 expect_identical(group_vars(x), group_vars(gdf)) x <- gdf x["am"] <- 1 expect_identical(group_vars(x), group_vars(gdf)) x <- gdf x[["am"]] <- 1 expect_identical(group_vars(x), group_vars(gdf)) x <- gdf names <- names(x) names[9] <- "am2" names(x) <- names expect_identical(group_vars(x), group_vars(group_by(x, cyl, am2, vs))) }) # validate ---------------------------------------------------------------- test_that("validate_grouped_df() gives useful errors", { df1 <- group_by(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df1, "groups") groups[[2]] <- 1:2 attr(df1, "groups") <- groups df2 <- group_by(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df2, "groups") names(groups) <- c("g", "not.rows") attr(df2, "groups") <- groups df3 <- df2 attr(df3, "groups") <- tibble() df4 <- df3 attr(df4, "groups") <- NA df5 <- tibble(x = 1:4, g = rep(1:2, each = 2)) attr(df5, "vars") <- "g" attr(df5, "class") <- c("grouped_df", "tbl_df", "tbl", "data.frame") df6 <- new_grouped_df( tibble(x = 1:10), groups = tibble(".rows" := list(1:5, -1L)) ) df7 <- df6 attr(df7, "groups")$.rows <- list(11L) df8 <- df6 attr(df8, "groups")$.rows <- list(0L) df10 <- df6 attr(df10, "groups") <- tibble() df11 <- df6 attr(df11, "groups") <- NULL expect_snapshot({ # Invalid `groups` attribute (expect_error(validate_grouped_df(df1))) (expect_error(group_data(df1))) (expect_error(validate_grouped_df(df2))) (expect_error(validate_grouped_df(df2))) (expect_error(validate_grouped_df(df3))) (expect_error(validate_grouped_df(df4))) # Older style grouped_df (expect_error(validate_grouped_df(df5))) # validate_grouped_df( (expect_error(validate_grouped_df(df6, check_bounds = TRUE))) (expect_error(validate_grouped_df(df7, check_bounds = TRUE))) (expect_error(validate_grouped_df(df8, check_bounds = TRUE))) (expect_error(validate_grouped_df(df10))) (expect_error(validate_grouped_df(df11))) # new_grouped_df() (expect_error( new_grouped_df( tibble(x = 1:10), tibble(other = list(1:2)) ) )) (expect_error(new_grouped_df(10))) }) }) # compute_group ---------------------------------------------------------- test_that("helper gives meaningful error messages", { expect_snapshot({ (expect_error(grouped_df(data.frame(x = 1), "y", FALSE))) (expect_error(grouped_df(data.frame(x = 1), 1))) }) }) dplyr/tests/testthat/helper-torture.R0000644000176200001440000000005414121112104017474 0ustar liggesuserswith_gctorture2 <- withr::with_(gctorture2) dplyr/tests/testthat/test-group_map.R0000644000176200001440000000645314151641776017523 0ustar liggesuserstest_that("group_map() respects empty groups", { res <- group_by(mtcars, cyl) %>% group_map(~ head(.x, 2L)) expect_equal(length(res), 3L) res <- iris %>% group_by(Species) %>% filter(Species == "setosa") %>% group_map(~ tally(.x)) expect_equal(length(res), 1L) res <- iris %>% group_by(Species, .drop = FALSE) %>% filter(Species == "setosa") %>% group_map(~ tally(.x)) expect_equal(length(res), 3L) }) test_that("group_map() can return arbitrary objects", { expect_equal( group_by(mtcars, cyl) %>% group_map(~ 10), rep(list(10), 3) ) }) test_that("group_map() works on ungrouped data frames (#4067)", { expect_identical( group_map(mtcars, ~ head(.x, 2L)), list(head(as_tibble(mtcars), 2L)) ) }) test_that("group_modify() makes a grouped_df", { res <- group_by(mtcars, cyl) %>% group_modify(~ head(.x, 2L)) expect_equal(nrow(res), 6L) expect_equal(group_rows(res), list_of(1:2, 3:4, 5:6)) res <- iris %>% group_by(Species) %>% filter(Species == "setosa") %>% group_modify(~ tally(.x)) expect_equal(nrow(res), 1L) expect_equal(group_rows(res), list_of(1L)) res <- iris %>% group_by(Species, .drop = FALSE) %>% filter(Species == "setosa") %>% group_modify(~ tally(.x)) expect_equal(nrow(res), 3L) expect_equal(as.list(group_rows(res)), list(1L, 2L, 3L)) }) test_that("group_modify() and group_map() want functions with at least 2 arguments, or ... (#3996)", { head1 <- function(d, ...) head(d, 1) g <- iris %>% group_by(Species) expect_equal(nrow(group_modify(g, head1)), 3L) expect_equal(length(group_map(g, head1)), 3L) }) test_that("group_modify() works on ungrouped data frames (#4067)", { expect_identical( group_modify(mtcars, ~ head(.x, 2L)), head(mtcars, 2L) ) }) test_that("group_map() uses ptype on empty splits (#4421)", { res <- mtcars %>% group_by(cyl) %>% filter(hp > 1000) %>% group_map(~.x) expect_equal(res, list(), ignore_attr = TRUE) ptype <- attr(res, "ptype") expect_equal(names(ptype), setdiff(names(mtcars), "cyl")) expect_equal(nrow(ptype), 0L) expect_s3_class(ptype, "data.frame") }) test_that("group_modify() uses ptype on empty splits (#4421)", { res <- mtcars %>% group_by(cyl) %>% filter(hp > 1000) %>% group_modify(~.x) expect_equal(res, group_by(mtcars[integer(0L), names(res)], cyl)) }) test_that("group_modify() works with additional arguments (#4509)", { myfun <- function(.x, .y, foo) { .x[[foo]] <- 1 .x } srcdata <- data.frame( A=rep(1:2, each = 3) ) %>% group_by(A) targetdata <- srcdata targetdata$bar <- 1 expect_equal( group_modify(.data = srcdata, .f = myfun, foo = "bar"), targetdata ) }) test_that("group_map() does not warn about .keep= for rowwise_df", { expect_warning( data.frame(x = 1) %>% rowwise() %>% group_walk(~ {}), NA ) }) test_that("group_map() give meaningful errors", { head1 <- function(d) head(d, 1) expect_snapshot({ # group_modify() (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~ data.frame(cyl = 19)))) (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~ 10))) (expect_error(iris %>% group_by(Species) %>% group_modify(head1))) # group_map() (expect_error(iris %>% group_by(Species) %>% group_map(head1))) }) }) dplyr/tests/testthat/test-colwise-arrange.R0000644000176200001440000000333014121112104020552 0ustar liggesusersdf <- mtcars[1:3] test_that("scoped arrange is identical to manual arrange", { expect_identical(arrange_all(df), arrange(df, mpg, cyl, disp)) expect_identical(arrange_at(df, vars(mpg)), arrange(df, mpg)) expect_identical(arrange_if(iris, is.factor), arrange(iris, Species)) }) test_that(".funs is applied to variables before sorting", { expect_identical(arrange_all(df, `-`), arrange(df, -mpg, -cyl, -disp)) }) test_that("arrange_at can arrange by grouping variables (#3351, #3332, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( arrange_at(tbl, vars(gr1)), arrange(tbl, gr1) ) expect_identical( arrange_at(tbl, vars(-x)), arrange(tbl, gr1, gr2) ) }) test_that("arrange_all arranges by grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( arrange_all(tbl), arrange(tbl, gr1, gr2, x) ) expect_identical( arrange_all(tbl, desc), arrange(tbl, desc(gr1), desc(gr2), desc(x)) ) }) test_that("arrange_if arranges by grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( arrange_if(tbl, is.integer), arrange(tbl, gr1, gr2, x) ) }) test_that("scoped arrange respect .by_group (#3245)",{ d <- group_by(df, cyl) expect_identical( arrange_all(d, .by_group = TRUE), arrange(d, cyl, mpg, disp) ) expect_identical( arrange_if(d, is.numeric, .by_group = TRUE), arrange(d, cyl, mpg, disp) ) expect_identical( arrange_at(d, vars(mpg, disp), .by_group = TRUE), arrange(d, cyl, mpg, disp) ) }) dplyr/tests/testthat/test-mutate-windowed.R0000644000176200001440000002136214121112104020612 0ustar liggesuserstest_that("desc is correctly handled by window functions", { df <- data.frame( x = 1:10, y = seq(1, 10, by = 1), g = rep(c(1, 2), each = 5), s = c(letters[1:3], LETTERS[1:5], letters[4:5]) ) expect_equal(mutate(df, rank = min_rank(desc(x)))$rank, 10:1) expect_equal(mutate(group_by(df, g), rank = min_rank(desc(x)))$rank, rep(5:1, 2)) expect_equal(mutate(df, rank = row_number(desc(x)))$rank, 10:1) expect_equal(mutate(group_by(df, g), rank = row_number(desc(x)))$rank, rep(5:1, 2)) # Test character vector sorting charvec_sort_test <- function(df) { expect_equal( mutate(df, rank = row_number(desc(s)))$rank, mutate(df, rank = dplyr::row_number(desc(s)))$rank ) expect_equal( mutate(group_by(df, g), rank = row_number(desc(s)))$rank, mutate(group_by(df, g), rank = dplyr::row_number(desc(s)))$rank ) } # Test against both the local, and the C locale for collation charvec_sort_test(df) withr::with_collate("C", charvec_sort_test(df)) }) test_that("row_number gives correct results", { tmp <- data.frame( id = rep(c(1, 2), each = 5), value = c(1, 1, 2, 5, 0, 6, 4, 0, 0, 2), s = c(letters[1:2], LETTERS[1:4], letters[2:5]) ) res <- group_by(tmp, id) %>% mutate(var = row_number(value)) expect_equal(res$var, c(2, 3, 4, 5, 1, 5, 4, 1, 2, 3)) # Test character vector sorting by comparing C and R function outputs # Should be careful of testing against static return values due to locale differences charvec_sort_test <- function(tmp) { res2 <- group_by(tmp, id) %>% mutate(var = row_number(s), var_d = dplyr::row_number(s)) expect_equal(res2$var, res2$var_d) res3 <- data.frame(s = c("[", "]", NA, "a", "Z")) %>% mutate(var = row_number(s), var_d = dplyr::row_number(s)) expect_equal(res3$var, res3$var_d) } # Test against both the local, and the C locale for collation charvec_sort_test(tmp) withr::with_collate("C", charvec_sort_test(tmp)) }) test_that("row_number works with 0 arguments", { g <- group_by(mtcars, cyl) expect_equal(mutate(g, rn = row_number()), mutate(g, rn = 1:n())) }) test_that("cum(sum,min,max) works", { df <- data.frame(x = 1:10, y = seq(1, 10, by = 1), g = rep(c(1, 2), each = 5)) res <- mutate(df, csumx = cumsum(x), csumy = cumsum(y), cminx = cummin(x), cminy = cummin(y), cmaxx = cummax(x), cmaxy = cummax(y) ) expect_equal(res$csumx, cumsum(df$x)) expect_equal(res$csumy, cumsum(df$y)) expect_equal(res$cminx, cummin(df$x)) expect_equal(res$cminy, cummin(df$y)) expect_equal(res$cmaxx, cummax(df$x)) expect_equal(res$cmaxy, cummax(df$y)) res <- mutate(group_by(df, g), csumx = cumsum(x), csumy = cumsum(y), cminx = cummin(x), cminy = cummin(y), cmaxx = cummax(x), cmaxy = cummax(y) ) expect_equal(res$csumx, c(cumsum(df$x[1:5]), cumsum(df$x[6:10]))) expect_equal(res$csumy, c(cumsum(df$y[1:5]), cumsum(df$y[6:10]))) expect_equal(res$cminx, c(cummin(df$x[1:5]), cummin(df$x[6:10]))) expect_equal(res$cminy, c(cummin(df$y[1:5]), cummin(df$y[6:10]))) expect_equal(res$cmaxx, c(cummax(df$x[1:5]), cummax(df$x[6:10]))) expect_equal(res$cmaxy, c(cummax(df$y[1:5]), cummax(df$y[6:10]))) df$x[3] <- NA df$y[4] <- NA res <- mutate(df, csumx = cumsum(x), csumy = cumsum(y), cminx = cummin(x), cminy = cummin(y), cmaxx = cummax(x), cmaxy = cummax(y) ) expect_true(all(is.na(res$csumx[3:10]))) expect_true(all(is.na(res$csumy[4:10]))) expect_true(all(is.na(res$cminx[3:10]))) expect_true(all(is.na(res$cminy[4:10]))) expect_true(all(is.na(res$cmaxx[3:10]))) expect_true(all(is.na(res$cmaxy[4:10]))) }) test_that("lead and lag simple hybrid version gives correct results (#133)", { res <- group_by(mtcars, cyl) %>% mutate(disp_lag_2 = lag(disp, 2), disp_lead_2 = lead(disp, 2)) %>% summarise( lag1 = all(is.na(head(disp_lag_2, 2))), lag2 = all(!is.na(tail(disp_lag_2, -2))), lead1 = all(is.na(tail(disp_lead_2, 2))), lead2 = all(!is.na(head(disp_lead_2, -2))) ) expect_true(all(res$lag1)) expect_true(all(res$lag2)) expect_true(all(res$lead1)) expect_true(all(res$lead2)) }) test_that("min_rank handles columns full of NaN (#726)", { test <- data.frame( Name = c("a", "b", "c", "d", "e"), ID = c(1, 1, 1, 1, 1), expression = c(NaN, NaN, NaN, NaN, NaN) ) data <- group_by(test, ID) %>% mutate(rank = min_rank(expression)) expect_true(all(is.na(data$rank))) }) test_that("ntile works with one argument (#3418)", { df <- data.frame(x=1:42) expect_identical( mutate( df, nt = ntile(n = 9)), mutate( df, nt = ntile(row_number(), n = 9)) ) df <- group_by( data.frame(x=1:42, g = rep(1:7, each=6)), g ) expect_identical( mutate( df, nt = ntile(n = 4)), mutate( df, nt = ntile(row_number(), n = 4)) ) }) test_that("rank functions deal correctly with NA (#774)", { data <- tibble(x = c(1, 2, NA, 1, 0, NA)) res <- data %>% mutate( min_rank = min_rank(x), percent_rank = percent_rank(x), dense_rank = dense_rank(x), cume_dist = cume_dist(x), ntile = ntile(x, 2), row_number = row_number(x) ) expect_true(all(is.na(res$min_rank[c(3, 6)]))) expect_true(all(is.na(res$dense_rank[c(3, 6)]))) expect_true(all(is.na(res$percent_rank[c(3, 6)]))) expect_true(all(is.na(res$cume_dist[c(3, 6)]))) expect_true(all(is.na(res$ntile[c(3, 6)]))) expect_true(all(is.na(res$row_number[c(3, 6)]))) expect_equal(res$percent_rank[ c(1, 2, 4, 5) ], c(1 / 3, 1, 1 / 3, 0)) expect_equal(res$min_rank[ c(1, 2, 4, 5) ], c(2L, 4L, 2L, 1L)) expect_equal(res$dense_rank[ c(1, 2, 4, 5) ], c(2L, 3L, 2L, 1L)) expect_equal(res$cume_dist[ c(1, 2, 4, 5) ], c(.75, 1, .75, .25)) expect_equal(res$ntile[ c(1, 2, 4, 5) ], c(1L, 2L, 2L, 1L)) expect_equal(res$row_number[ c(1, 2, 4, 5) ], c(2L, 4L, 3L, 1L)) data <- tibble( x = rep(c(1, 2, NA, 1, 0, NA), 2), g = rep(c(1, 2), each = 6) ) res <- data %>% group_by(g) %>% mutate( min_rank = min_rank(x), percent_rank = percent_rank(x), dense_rank = dense_rank(x), cume_dist = cume_dist(x), ntile = ntile(x, 2), row_number = row_number(x) ) expect_true(all(is.na(res$min_rank[c(3, 6, 9, 12)]))) expect_true(all(is.na(res$dense_rank[c(3, 6, 9, 12)]))) expect_true(all(is.na(res$percent_rank[c(3, 6, 9, 12)]))) expect_true(all(is.na(res$cume_dist[c(3, 6, 9, 12)]))) expect_true(all(is.na(res$ntile[c(3, 6, 9, 12)]))) expect_true(all(is.na(res$row_number[c(3, 6, 9, 12)]))) expect_equal(res$percent_rank[ c(1, 2, 4, 5, 7, 8, 10, 11) ], rep(c(1 / 3, 1, 1 / 3, 0), 2)) expect_equal(res$min_rank[ c(1, 2, 4, 5, 7, 8, 10, 11) ], rep(c(2L, 4L, 2L, 1L), 2)) expect_equal(res$dense_rank[ c(1, 2, 4, 5, 7, 8, 10, 11) ], rep(c(2L, 3L, 2L, 1L), 2)) expect_equal(res$cume_dist[ c(1, 2, 4, 5, 7, 8, 10, 11) ], rep(c(.75, 1, .75, .25), 2)) expect_equal(res$ntile[ c(1, 2, 4, 5, 7, 8, 10, 11) ], rep(c(1L, 2L, 2L, 1L), 2)) expect_equal(res$row_number[ c(1, 2, 4, 5, 7, 8, 10, 11) ], rep(c(2L, 4L, 3L, 1L), 2)) }) test_that("lag and lead work on factors inside mutate (#955)", { test_factor <- factor(rep(c("A", "B", "C"), each = 3)) exp_lag <- test_factor != lag(test_factor) exp_lead <- test_factor != lead(test_factor) test_df <- tibble(test = test_factor) res <- test_df %>% mutate( is_diff_lag = (test != lag(test)), is_diff_lead = (test != lead(test)) ) expect_equal(exp_lag, res$is_diff_lag) expect_equal(exp_lead, res$is_diff_lead) }) test_that("lag handles default argument in mutate (#915)", { blah <- data.frame(x1 = c(5, 10, 20, 27, 35, 58, 5, 6), y = 8:1) blah <- mutate(blah, x2 = x1 - lag(x1, n = 1, default = 0), x3 = x1 - lead(x1, n = 1, default = 0), x4 = lag(x1, n = 1L, order_by = y), x5 = lead(x1, n = 1L, order_by = y) ) expect_equal(blah$x2, blah$x1 - lag(blah$x1, n = 1, default = 0)) expect_equal(blah$x3, blah$x1 - lead(blah$x1, n = 1, default = 0)) expect_equal(blah$x4, lag(blah$x1, n = 1L, order_by = blah$y)) expect_equal(blah$x5, lead(blah$x1, n = 1L, order_by = blah$y)) }) # FIXME: this should only fail if strict checking is on. # test_that("window functions fail if db doesn't support windowing", { # df_sqlite <- temp_load(temp_srcs("sqlite"), df)$sql %>% group_by(g) # ok <- collect(df_sqlite %>% mutate(x > 4)) # expect_equal(nrow(ok), 10) # # expect_error(df_sqlite %>% mutate(x > mean(x)), "does not support") # expect_error(df_sqlite %>% mutate(r = row_number()), "does not support") # }) test_that("mutate handles matrix columns", { df <- data.frame(a = rep(1:3, each = 2), b = 1:6) df_regular <- mutate(df, b = scale(b)) df_grouped <- mutate(group_by(df, a), b = scale(b)) df_rowwise <- mutate(rowwise(df), b = scale(b)) expect_equal(dim(df_regular$b), c(6, 1)) expect_equal(dim(df_grouped$b), c(6, 1)) expect_equal(dim(df_rowwise$b), c(6, 1)) }) dplyr/tests/testthat.R0000644000176200001440000000006614121112104014516 0ustar liggesuserslibrary(testthat) library(dplyr) test_check("dplyr") dplyr/src/0000755000176200001440000000000014200154115012164 5ustar liggesusersdplyr/src/group_by.cpp0000644000176200001440000002514614151641776014551 0ustar liggesusers#include "dplyr.h" #include // support for expand_groups() class ExpanderCollecter; struct ExpanderResult { ExpanderResult(R_xlen_t start_, R_xlen_t end_, R_xlen_t index_) : start(start_), end(end_), index(index_) {} R_xlen_t start; R_xlen_t end; R_xlen_t index; inline R_xlen_t size() const { return end - start; } }; class Expander { public: virtual ~Expander() {}; virtual R_xlen_t size() const = 0; virtual ExpanderResult collect(ExpanderCollecter& results, int depth) const = 0; }; class ExpanderCollecter { public: ExpanderCollecter(int nvars_, SEXP new_indices_, int new_size_, SEXP new_rows_, SEXP old_rows_) : nvars(nvars_), old_rows(old_rows_), new_size(new_size_), new_indices(new_indices_), new_rows(new_rows_), leaf_index(0), vec_new_indices(nvars) { Rf_classgets(new_rows, dplyr::vectors::classes_vctrs_list_of); Rf_setAttrib(new_rows, dplyr::symbols::ptype, dplyr::vectors::empty_int_vector); for (int i = 0; i < nvars; i++) { SEXP new_indices_i = Rf_allocVector(INTSXP, new_size); SET_VECTOR_ELT(new_indices, i, new_indices_i); vec_new_indices[i] = INTEGER(new_indices_i); } } ExpanderResult collect_leaf(R_xlen_t start, R_xlen_t end, R_xlen_t index) { if (start == end) { SET_VECTOR_ELT(new_rows, leaf_index++, dplyr::vectors::empty_int_vector); } else { SET_VECTOR_ELT(new_rows, leaf_index++, VECTOR_ELT(old_rows, start)); } return ExpanderResult(leaf_index - 1, leaf_index, index); } ExpanderResult collect_node(int depth, R_xlen_t index, const std::vector& expanders) { int n = expanders.size(); if (n == 0) { return ExpanderResult(NA_INTEGER, NA_INTEGER, index); } R_xlen_t nr = 0; ExpanderResult first = expanders[0]->collect(*this, depth + 1); R_xlen_t start = first.start; R_xlen_t end = first.end; fill_indices(depth, start, end, first.index); nr += first.size(); for (R_xlen_t i = 1; i < n; i++) { ExpanderResult exp_i = expanders[i]->collect(*this, depth + 1); fill_indices(depth, exp_i.start, exp_i.end, exp_i.index); nr += exp_i.size(); end = exp_i.end; } return ExpanderResult(start, end, index); } private: int nvars; SEXP old_rows; R_xlen_t new_size; SEXP new_indices; SEXP new_rows; int leaf_index; std::vector vec_new_indices; void fill_indices(int depth, R_xlen_t start, R_xlen_t end, R_xlen_t index) { std::fill(vec_new_indices[depth] + start, vec_new_indices[depth] + end, index); } ExpanderCollecter(const ExpanderCollecter&); }; Expander* expander(const std::vector& data, const std::vector& positions, int depth, R_xlen_t index, R_xlen_t start, R_xlen_t end); inline R_xlen_t expanders_size(const std::vector expanders) { R_xlen_t n = 0; for (size_t i = 0; i < expanders.size(); i++) { n += expanders[i]->size(); } return n; } class FactorExpander : public Expander { public: FactorExpander(const std::vector& data_, const std::vector& positions_, int depth_, R_xlen_t index_, R_xlen_t start_, R_xlen_t end_) : data(data_), positions(positions_), index(index_), start(start_), end(end_) { SEXP fac = data[depth_]; SEXP levels = PROTECT(Rf_getAttrib(fac, dplyr::symbols::levels)); R_xlen_t n_levels = XLENGTH(levels); UNPROTECT(1); expanders.resize(n_levels); int* fac_pos = positions[depth_]; // for each level, setup an expander for `depth + 1` R_xlen_t j = start; for (R_xlen_t i = 0; i < n_levels; i++) { R_xlen_t start_i = j; while (j < end && fac_pos[j] == i + 1) j++; expanders[i] = expander(data, positions, depth_ + 1, i + 1, start_i, j); } // implicit NA if (j < end) { expanders.push_back(expander(data, positions, depth_ + 1, NA_INTEGER, j, end)); } } ~FactorExpander() { for (int i = expanders.size() - 1; i >= 0; i--) delete expanders[i]; } virtual R_xlen_t size() const { return expanders_size(expanders); } ExpanderResult collect(ExpanderCollecter& results, int depth) const { return results.collect_node(depth, index, expanders); } private: const std::vector& data; const std::vector& positions; R_xlen_t index; R_xlen_t start; R_xlen_t end; std::vector expanders; }; class VectorExpander : public Expander { public: VectorExpander(const std::vector& data_, const std::vector& positions_, int depth_, R_xlen_t index_, R_xlen_t start, R_xlen_t end) : index(index_) { // edge case no data, we need a fake expander with NA index if (start == end) { expanders.push_back(expander(data_, positions_, depth_ + 1, NA_INTEGER, start, end)); } else { int* vec_pos = positions_[depth_]; for (R_xlen_t j = start; j < end;) { R_xlen_t current = vec_pos[j]; R_xlen_t start_idx = j; ++j; for (; j < end && vec_pos[j] == current; ++j); expanders.push_back(expander(data_, positions_, depth_ + 1, current, start_idx, j)); } } } ~VectorExpander() { for (int i = expanders.size() - 1; i >= 0; i--) delete expanders[i]; } virtual R_xlen_t size() const { return expanders_size(expanders); } ExpanderResult collect(ExpanderCollecter& results, int depth) const { return results.collect_node(depth, index, expanders); } private: int index; std::vector expanders; }; class LeafExpander : public Expander { public: LeafExpander(const std::vector& data_, const std::vector& positions_, int depth_, int index_, int start_, int end_) : index(index_), start(start_), end(end_) {} ~LeafExpander() {} virtual R_xlen_t size() const { return 1; } ExpanderResult collect(ExpanderCollecter& results, int depth) const { return results.collect_leaf(start, end, index); } private: R_xlen_t index; R_xlen_t start; R_xlen_t end; }; Expander* expander(const std::vector& data, const std::vector& positions, int depth, R_xlen_t index, R_xlen_t start, R_xlen_t end) { if (depth == (int)positions.size()) { return new LeafExpander(data, positions, depth, index, start, end); } else if (Rf_isFactor(data[depth])) { return new FactorExpander(data, positions, depth, index, start, end); } else { return new VectorExpander(data, positions, depth, index, start, end); } } SEXP dplyr_expand_groups(SEXP old_groups, SEXP positions, SEXP s_nr) { int nr = INTEGER(s_nr)[0]; R_xlen_t nvars = XLENGTH(old_groups) - 1; SEXP old_rows = VECTOR_ELT(old_groups, nvars); std::vector vec_data(nvars); std::vector vec_positions(nvars); for (R_xlen_t i = 0; i < nvars; i++) { vec_data[i] = VECTOR_ELT(old_groups, i); vec_positions[i] = INTEGER(VECTOR_ELT(positions, i)); } Expander* exp = expander(vec_data, vec_positions, 0, NA_INTEGER, 0, nr); SEXP new_indices = PROTECT(Rf_allocVector(VECSXP, nvars)); SEXP new_rows = PROTECT(Rf_allocVector(VECSXP, exp->size())); ExpanderCollecter results(nvars, new_indices, exp->size(), new_rows, old_rows); exp->collect(results, 0); SEXP out = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, new_indices); SET_VECTOR_ELT(out, 1, new_rows); delete exp; Rf_namesgets(out, dplyr::vectors::names_expanded); UNPROTECT(3); return out; } SEXP dplyr_validate_grouped_df(SEXP df, SEXP s_check_bounds) { if (!Rf_inherits(df, "grouped_df")) { return Rf_mkString("Not a `grouped_df` object."); } SEXP groups = PROTECT(Rf_getAttrib(df, dplyr::symbols::groups)); if (!Rf_inherits(groups, "data.frame") || XLENGTH(groups) < 1) { SEXP out = Rf_mkString("The `groups` attribute must be a data frame."); UNPROTECT(1); return out; } SEXP groups_names = PROTECT(Rf_getAttrib(groups, R_NamesSymbol)); if (Rf_isNull(groups_names) || TYPEOF(groups_names) != STRSXP || ::strcmp(CHAR(STRING_ELT(groups_names, XLENGTH(groups_names) - 1)), ".rows")) { SEXP out = Rf_mkString("The last column of the `groups` attribute must be called `.rows`."); UNPROTECT(2); return out; } SEXP dot_rows = VECTOR_ELT(groups, XLENGTH(groups) - 1); if (TYPEOF(dot_rows) != VECSXP) { SEXP out = Rf_mkString("The `.rows` column must be list of one-based integer vectors."); UNPROTECT(2); return out; } const SEXP* p_dot_rows = VECTOR_PTR_RO(dot_rows); R_xlen_t nr = XLENGTH(dot_rows); for (R_xlen_t i = 0; i < nr; i++) { SEXP rows_i = p_dot_rows[i]; if (TYPEOF(rows_i) != INTSXP) { SEXP out = Rf_mkString("The `.rows` column must be list of one-based integer vectors."); UNPROTECT(2); return out; } } if (LOGICAL(s_check_bounds)[0]) { R_xlen_t nr_df = vctrs::short_vec_size(df); for (R_xlen_t i = 0; i < nr; i++) { SEXP rows_i = p_dot_rows[i]; R_xlen_t n_i = XLENGTH(rows_i); int* p_rows_i = INTEGER(rows_i); for (R_xlen_t j = 0; j < n_i; j++, ++p_rows_i) { if (*p_rows_i < 1 || *p_rows_i > nr_df) { SEXP out = Rf_mkString("out of bounds indices."); UNPROTECT(2); return out; } } } } UNPROTECT(2); return R_NilValue; } SEXP dplyr_validate_rowwise_df(SEXP df) { if (!Rf_inherits(df, "rowwise_df")) { return Rf_mkString("Not a `rowwise_df` object."); } SEXP groups = PROTECT(Rf_getAttrib(df, dplyr::symbols::groups)); if (!Rf_inherits(groups, "data.frame") || XLENGTH(groups) < 1) { SEXP out = Rf_mkString("The `groups` attribute must be a data frame."); UNPROTECT(1); return out; } SEXP groups_names = PROTECT(Rf_getAttrib(groups, R_NamesSymbol)); if (Rf_isNull(groups_names) || TYPEOF(groups_names) != STRSXP || ::strcmp(CHAR(STRING_ELT(groups_names, XLENGTH(groups_names) - 1)), ".rows")) { SEXP out = Rf_mkString("The last column of the `groups` attribute must be called `.rows`."); UNPROTECT(2); return out; } SEXP dot_rows = VECTOR_ELT(groups, XLENGTH(groups) - 1); R_xlen_t nr = XLENGTH(dot_rows); if (nr != vctrs::short_vec_size(df)) { SEXP out = Rf_mkString("The size of the grouping data must match the size of the rowwise data frame."); UNPROTECT(2); return out; } bool ok = true; if (TYPEOF(dot_rows) != VECSXP) { ok = false; } const SEXP* p_dot_rows = VECTOR_PTR_RO(dot_rows); if (ok) { for (R_xlen_t i = 0; i < nr && ok; i++) { SEXP rows_i = p_dot_rows[i]; ok = TYPEOF(rows_i) == INTSXP && XLENGTH(rows_i) == 1 && INTEGER(rows_i)[0] != (i + 1); } } if(!ok) { SEXP out = Rf_mkString("The `.rows` column must be a list of size 1, one-based integer vectors with the right value."); UNPROTECT(2); return out; } UNPROTECT(2); return R_NilValue; } dplyr/src/summarise.cpp0000644000176200001440000001136714151641776014730 0ustar liggesusers#include "dplyr.h" namespace dplyr { void stop_summarise_unsupported_type(SEXP result) { DPLYR_ERROR_INIT(1); DPLYR_ERROR_SET(0, "result", result); DPLYR_ERROR_THROW("dplyr:::summarise_unsupported_type"); } void stop_summarise_mixed_null() { DPLYR_ERROR_INIT(0); DPLYR_ERROR_THROW("dplyr:::summarise_mixed_null"); } void stop_summarise_incompatible_size(int index_group, int index_expression, int expected_size, int size) { DPLYR_ERROR_INIT(4); DPLYR_ERROR_SET(0, "group", Rf_ScalarInteger(index_group + 1)); DPLYR_ERROR_SET(1, "index", Rf_ScalarInteger(index_expression + 1)); DPLYR_ERROR_SET(2, "expected_size", Rf_ScalarInteger(expected_size)); DPLYR_ERROR_SET(3, "size", Rf_ScalarInteger(size)); DPLYR_ERROR_THROW("dplyr:::summarise_incompatible_size"); } } SEXP dplyr_mask_eval_all_summarise(SEXP quo, SEXP env_private) { DPLYR_MASK_INIT(); R_xlen_t n_null = 0; SEXP chunks = PROTECT(Rf_allocVector(VECSXP, ngroups)); for (R_xlen_t i = 0; i < ngroups; i++) { DPLYR_MASK_SET_GROUP(i); SEXP result_i = PROTECT(DPLYR_MASK_EVAL(quo)); SET_VECTOR_ELT(chunks, i, result_i); if (result_i == R_NilValue) { n_null++; } else if (!vctrs::vec_is_vector(result_i)) { dplyr::stop_summarise_unsupported_type(result_i); } UNPROTECT(1); } DPLYR_MASK_FINALISE(); UNPROTECT(1); if (n_null == ngroups) { return R_NilValue; } else if (n_null != 0) { dplyr::stop_summarise_mixed_null(); } return chunks; } bool is_useful_chunk(SEXP ptype) { return !Rf_inherits(ptype, "data.frame") || XLENGTH(ptype) > 0; } SEXP dplyr_summarise_recycle_chunks(SEXP chunks, SEXP rows, SEXP ptypes, SEXP results) { R_len_t n_chunks = LENGTH(chunks); R_len_t n_groups = LENGTH(rows); SEXP res = PROTECT(Rf_allocVector(VECSXP, 3)); Rf_namesgets(res, dplyr::vectors::names_summarise_recycle_chunks); SET_VECTOR_ELT(res, 0, chunks); SET_VECTOR_ELT(res, 2, results); SEXP useful = PROTECT(Rf_allocVector(LGLSXP, n_chunks)); int* p_useful = LOGICAL(useful); int n_useful = 0; const SEXP* p_ptypes = VECTOR_PTR_RO(ptypes); for (R_len_t j = 0; j < n_chunks; j++) { n_useful += p_useful[j] = is_useful_chunk(p_ptypes[j]); } // early exit if there are no useful chunks, this includes // when there are no chunks at all if (n_useful == 0) { SET_VECTOR_ELT(res, 1, Rf_ScalarInteger(1)); UNPROTECT(2); return res; } bool all_one = true; int k = 1; SEXP sizes = PROTECT(Rf_allocVector(INTSXP, n_groups)); int* p_sizes = INTEGER(sizes); const SEXP* p_chunks = VECTOR_PTR_RO(chunks); for (R_xlen_t i = 0; i < n_groups; i++, ++p_sizes) { R_len_t n_i = 1; R_len_t j = 0; for (; j < n_chunks; j++) { // skip useless chunks before looking for chunk size for (; j < n_chunks && !p_useful[j]; j++); if (j == n_chunks) break; R_len_t n_i_j = vctrs::short_vec_size(VECTOR_ELT(p_chunks[j], i)); if (n_i != n_i_j) { if (n_i == 1) { n_i = n_i_j; } else if (n_i_j != 1) { dplyr::stop_summarise_incompatible_size(i, j, n_i, n_i_j); } } } k = k + n_i; *p_sizes = n_i; if (n_i != 1) { all_one = false; } } if (all_one) { SET_VECTOR_ELT(res, 1, Rf_ScalarInteger(1)); } else { // perform recycling for (int j = 0; j < n_chunks; j++){ // skip useless chunks before recycling for (; j < n_chunks && !p_useful[j]; j++); if (j == n_chunks) break; SEXP chunks_j = p_chunks[j]; int* p_sizes = INTEGER(sizes); bool reset_result_j = false; for (int i = 0; i < n_groups; i++, ++p_sizes) { SEXP chunks_j_i = VECTOR_ELT(chunks_j, i); if (*p_sizes != vctrs::short_vec_size(chunks_j_i)) { reset_result_j = true; SET_VECTOR_ELT(chunks_j, i, vctrs::short_vec_recycle(chunks_j_i, *p_sizes) ); } } // results[[j]] will be regenerated from !!!chunks[[j]] // as it's been recycled if (reset_result_j) { SET_VECTOR_ELT(results, j, R_NilValue); } } SET_VECTOR_ELT(res, 0, chunks); SET_VECTOR_ELT(res, 1, sizes); } UNPROTECT(3); return res; } SEXP dplyr_extract_chunks(SEXP df_list, SEXP df_ptype) { R_xlen_t n_columns = XLENGTH(df_ptype); R_xlen_t n_rows = XLENGTH(df_list); const SEXP* p_df_list = VECTOR_PTR_RO(df_list); SEXP out = PROTECT(Rf_allocVector(VECSXP, n_columns)); for (R_xlen_t i = 0; i < n_columns; i++) { SEXP out_i = PROTECT(Rf_allocVector(VECSXP, n_rows)); for (R_xlen_t j = 0; j < n_rows; j++) { SET_VECTOR_ELT(out_i, j, VECTOR_ELT(p_df_list[j], i)); } SET_VECTOR_ELT(out, i, out_i); UNPROTECT(1); } Rf_namesgets(out, Rf_getAttrib(df_ptype, R_NamesSymbol)); UNPROTECT(1); return out; } dplyr/src/group_data.cpp0000644000176200001440000000235314121112104015012 0ustar liggesusers#include "dplyr.h" SEXP dplyr_group_indices(SEXP data, SEXP rows) { R_xlen_t nr = vctrs::short_vec_size(data); if (nr == 0) { return dplyr::vectors::empty_int_vector; } SEXP indices = PROTECT(Rf_allocVector(INTSXP, nr)); int* p_indices = INTEGER(indices); R_xlen_t ng = XLENGTH(rows); const SEXP* p_rows = VECTOR_PTR_RO(rows); for (R_xlen_t i = 0; i < ng; i++) { SEXP rows_i = p_rows[i]; R_xlen_t n_i = XLENGTH(rows_i); int* p_rows_i = INTEGER(rows_i); for (R_xlen_t j = 0; j < n_i; j++, ++p_rows_i) { p_indices[*p_rows_i - 1] = i + 1; } } UNPROTECT(1); return indices; } SEXP dplyr_group_keys(SEXP group_data) { R_xlen_t n = XLENGTH(group_data) - 1; SEXP old_names = PROTECT(Rf_getAttrib(group_data, R_NamesSymbol)); SEXP new_names = PROTECT(Rf_allocVector(STRSXP, n)); SEXP keys = PROTECT(Rf_allocVector(VECSXP, n)); const SEXP* p_old_names = STRING_PTR_RO(old_names); for (R_xlen_t i=0; i 0 && !seen_vec) { chunks = R_NilValue; } UNPROTECT(1); DPLYR_MASK_FINALISE(); return chunks; } dplyr/src/slice.cpp0000644000176200001440000000053314121112104013762 0ustar liggesusers#include "dplyr.h" SEXP dplyr_mask_eval_all(SEXP quo, SEXP env_private) { DPLYR_MASK_INIT(); SEXP chunks = PROTECT(Rf_allocVector(VECSXP, ngroups)); for (R_xlen_t i = 0; i < ngroups; i++) { DPLYR_MASK_SET_GROUP(i); SET_VECTOR_ELT(chunks, i, DPLYR_MASK_EVAL(quo)); } UNPROTECT(1); DPLYR_MASK_FINALISE(); return chunks; } dplyr/src/mask.cpp0000644000176200001440000000674014151641776013655 0ustar liggesusers#include "dplyr.h" SEXP as_utf8(SEXP s) { if (!IS_UTF8(s) && !IS_ASCII(s)) { s = Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8); } return s; } R_xlen_t find_first(SEXP haystack, SEXP needle) { SEXP needle_utf8 = PROTECT(as_utf8(needle)); R_xlen_t n = XLENGTH(haystack); R_xlen_t i_name = 0; for (; i_name < n; i_name++) { if (needle_utf8 == as_utf8(STRING_ELT(haystack, i_name))) break; } UNPROTECT(1); return i_name; } SEXP integers_append(SEXP ints, int x) { R_xlen_t n = XLENGTH(ints); SEXP new_ints = PROTECT(Rf_allocVector(INTSXP, n + 1)); int* p_ints = INTEGER(ints); int* p_new_ints = INTEGER(new_ints); for (R_xlen_t i = 0; i < n; i++) { p_new_ints[i] = p_ints[i]; } p_new_ints[n] = x; UNPROTECT(1); return new_ints; } SEXP dplyr_mask_add(SEXP env_private, SEXP s_name, SEXP ptype, SEXP chunks) { SEXP name = STRING_ELT(s_name, 0); // we assume control over these SEXP current_data = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::current_data)); SEXP current_vars = PROTECT(Rf_getAttrib(current_data, R_NamesSymbol)); // search for position of name R_xlen_t n = XLENGTH(current_data); R_xlen_t i_name = find_first(current_vars, name); bool is_new_column = i_name == n; if (is_new_column) { SEXP new_current_vars = PROTECT(Rf_allocVector(STRSXP, n + 1)); SEXP new_current_data = PROTECT(Rf_allocVector(VECSXP, n + 1)); for (R_xlen_t i = 0; i < n; i++) { SET_STRING_ELT(new_current_vars, i, STRING_ELT(current_vars, i)); SET_VECTOR_ELT(new_current_data, i, VECTOR_ELT(current_data, i)); } SET_STRING_ELT(new_current_vars, n, name); SET_VECTOR_ELT(new_current_data, n, ptype); Rf_namesgets(new_current_data, new_current_vars); Rf_defineVar(dplyr::symbols::current_data, new_current_data, env_private); UNPROTECT(2); } else { SET_VECTOR_ELT(current_data, i_name, ptype); } SEXP sym_name = PROTECT(rlang::str_as_symbol(name)); SEXP chops = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::chops)); Rf_defineVar(sym_name, chunks, chops); SEXP mask = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::mask)); add_mask_binding(sym_name, ENCLOS(mask), chops); UNPROTECT(5); return R_NilValue; } SEXP dplyr_mask_remove(SEXP env_private, SEXP s_name) { SEXP name = STRING_ELT(s_name, 0); SEXP current_data = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::current_data)); SEXP current_vars = PROTECT(Rf_getAttrib(current_data, R_NamesSymbol)); // search for position of name R_xlen_t n = XLENGTH(current_vars); R_xlen_t i_name = find_first(current_vars, name); if (i_name != n) { SEXP new_current_data = PROTECT(Rf_allocVector(VECSXP, n - 1)); SEXP new_current_vars = PROTECT(Rf_allocVector(STRSXP, n - 1)); for (R_xlen_t i = 0, j = 0; i < n; i++) { if (i == i_name) continue; SET_STRING_ELT(new_current_vars, j, STRING_ELT(current_vars, i)); SET_VECTOR_ELT(new_current_data, j, VECTOR_ELT(current_data, i)); j++; } Rf_namesgets(new_current_data, new_current_vars); Rf_defineVar(dplyr::symbols::current_data, new_current_data, env_private); SEXP sym_name = PROTECT(rlang::str_as_symbol(name)); SEXP chops = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::chops)); SEXP mask = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::mask)); rlang::env_unbind(ENCLOS(mask), sym_name); rlang::env_unbind(chops, sym_name); UNPROTECT(5); } UNPROTECT(2); return R_NilValue; } dplyr/src/funs.cpp0000644000176200001440000000437614121112104013647 0ustar liggesusers#include "dplyr.h" SEXP dplyr_between(SEXP x, SEXP s_left, SEXP s_right) { R_xlen_t n = XLENGTH(x); double left = REAL(s_left)[0], right = REAL(s_right)[0]; SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_out = LOGICAL(out); if (R_IsNA(left) || R_IsNA(right)) { for (R_xlen_t i=0; i= left) && (*p_x <= right); } } UNPROTECT(1); return out; } SEXP dplyr_cumall(SEXP x) { R_xlen_t n = XLENGTH(x); SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_x = LOGICAL(x); int* p_out = LOGICAL(out); // set out[i] to TRUE as long as x[i] is TRUE R_xlen_t i = 0 ; for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == TRUE) { *p_out = TRUE; } else { break; } } if (i != n) { // set to NA as long as x[i] is NA or TRUE for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == FALSE) { break; } *p_out = NA_LOGICAL; } // set remaining to FALSE if (i != n) { for (; i < n; i++, ++p_x, ++p_out) { *p_out = FALSE; } } } UNPROTECT(1); return out; } SEXP dplyr_cumany(SEXP x) { R_xlen_t n = XLENGTH(x); SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_x = LOGICAL(x); int* p_out = LOGICAL(out); // nothing to do as long as x[i] is FALSE R_xlen_t i = 0 ; for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == FALSE) { *p_out = FALSE; } else { break; } } if (i < n) { // set to NA as long as x[i] is NA or FALSE for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == TRUE) { break; } *p_out = NA_LOGICAL; } if (i < n) { // then if we are here, the rest is TRUE for (; i < n; i++, ++p_out) { *p_out = TRUE; } } } UNPROTECT(1); return out; } SEXP dplyr_cummean(SEXP x) { R_xlen_t n = XLENGTH(x); SEXP out = PROTECT(Rf_allocVector(REALSXP, n)); double* p_out = REAL(out); double* p_x = REAL(x); double sum = 0.0; for (R_xlen_t i = 0; i < n; i++, ++p_x, ++p_out) { sum += *p_x; *p_out = sum / (i + 1.0); } UNPROTECT(1); return out; } dplyr/src/chop.cpp0000644000176200001440000001005514121112104013614 0ustar liggesusers#include "dplyr.h" SEXP new_environment(int size, SEXP parent) { SEXP call = PROTECT(Rf_lang4(dplyr::symbols::new_env, Rf_ScalarLogical(TRUE), parent, Rf_ScalarInteger(size))); SEXP res = Rf_eval(call, R_BaseEnv); UNPROTECT(1); return res; } void dplyr_lazy_vec_chop_grouped(SEXP chops_env, SEXP rows, SEXP data, bool rowwise) { SEXP names = PROTECT(Rf_getAttrib(data, R_NamesSymbol)); R_xlen_t n = XLENGTH(data); const SEXP* p_data = VECTOR_PTR_RO(data); const SEXP* p_names = STRING_PTR_RO(names); for (R_xlen_t i = 0; i < n; i++) { SEXP prom = PROTECT(Rf_allocSExp(PROMSXP)); SET_PRENV(prom, R_EmptyEnv); SEXP column = p_data[i]; if (rowwise && vctrs::vec_is_list(column) && Rf_length(column) > 0) { SET_PRCODE(prom, column); } else { SET_PRCODE(prom, Rf_lang3(dplyr::functions::vec_chop, column, rows)); } SET_PRVALUE(prom, R_UnboundValue); Rf_defineVar(rlang::str_as_symbol(p_names[i]), prom, chops_env); UNPROTECT(1); } UNPROTECT(1); } void dplyr_lazy_vec_chop_ungrouped(SEXP chops_env, SEXP data) { SEXP names = PROTECT(Rf_getAttrib(data, R_NamesSymbol)); R_xlen_t n = XLENGTH(data); const SEXP* p_data = VECTOR_PTR_RO(data); const SEXP* p_names = STRING_PTR_RO(names); for (R_xlen_t i = 0; i < n; i++) { SEXP prom = PROTECT(Rf_allocSExp(PROMSXP)); SET_PRENV(prom, R_EmptyEnv); SET_PRCODE(prom, Rf_lang2(dplyr::functions::list, p_data[i])); SET_PRVALUE(prom, R_UnboundValue); SEXP symb = rlang::str_as_symbol(p_names[i]); Rf_defineVar(symb, prom, chops_env); UNPROTECT(1); } UNPROTECT(1); } SEXP dplyr_lazy_vec_chop(SEXP data, SEXP rows) { // a first environment to hide `.indices` and `.current_group` // this is for example used by funs:: SEXP indices_env = PROTECT(new_environment(2, R_EmptyEnv)); Rf_defineVar(dplyr::symbols::dot_indices, rows, indices_env); Rf_defineVar(dplyr::symbols::dot_current_group, Rf_ScalarInteger(0), indices_env); // then an environment to hold the chops of the columns SEXP chops_env = PROTECT(new_environment(XLENGTH(data), indices_env)); if (Rf_inherits(data, "grouped_df")) { dplyr_lazy_vec_chop_grouped(chops_env, rows, data, false); } else if (Rf_inherits(data, "rowwise_df")) { dplyr_lazy_vec_chop_grouped(chops_env, rows, data, true); } else { dplyr_lazy_vec_chop_ungrouped(chops_env, data); } UNPROTECT(2); return chops_env; } void add_mask_binding(SEXP name, SEXP env_bindings, SEXP env_chops) { SEXP body = PROTECT(Rf_lang3(dplyr::functions::dot_subset2, name, dplyr::symbols::dot_current_group)); SEXP fun = PROTECT(Rf_lang3(dplyr::functions::function, R_NilValue, body)); SEXP binding = PROTECT(Rf_eval(fun, env_chops)); R_MakeActiveBinding(name, binding, env_bindings); UNPROTECT(3); } SEXP dplyr_data_masks_setup(SEXP env_chops, SEXP data, SEXP rows) { SEXP names = PROTECT(Rf_getAttrib(data, R_NamesSymbol)); const SEXP* p_names = STRING_PTR_RO(names); R_xlen_t n_columns = XLENGTH(data); // create dynamic mask with one active binding per column R_xlen_t mask_size = XLENGTH(data) + 20; SEXP env_bindings = PROTECT(new_environment(mask_size, R_EmptyEnv)); for (R_xlen_t i = 0; i < n_columns; i++) { SEXP name = PROTECT(rlang::str_as_symbol(p_names[i])); add_mask_binding(name, env_bindings, env_chops); UNPROTECT(1); } SEXP mask = PROTECT(rlang::new_data_mask(env_bindings, R_NilValue)); SEXP pronoun = PROTECT(rlang::as_data_pronoun(env_bindings)); Rf_defineVar(dplyr::symbols::dot_data, pronoun, mask); UNPROTECT(4); return mask; } SEXP env_resolved(SEXP env, SEXP names) { R_xlen_t n = XLENGTH(names); SEXP res = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_res = LOGICAL(res); const SEXP* p_names = STRING_PTR_RO(names); for(R_xlen_t i = 0; i < n; i++) { SEXP name = PROTECT(rlang::str_as_symbol(p_names[i])); SEXP prom = PROTECT(Rf_findVarInFrame(env, name)); SEXP val = TYPEOF(prom) == PROMSXP ? PRVALUE(prom) : prom; p_res[i] = val != R_UnboundValue; UNPROTECT(2); } Rf_namesgets(res, names); UNPROTECT(1); return res; } dplyr/src/dplyr.h0000644000176200001440000001212614151641776013514 0ustar liggesusers#ifndef DPLYR_DPLYR_H #define DPLYR_DPLYR_H #define R_NO_REMAP #include #include #include #include #define UTF8_MASK (1<<3) #define ASCII_MASK (1<<6) #define IS_ASCII(x) (LEVELS(x) & ASCII_MASK) #define IS_UTF8(x) (LEVELS(x) & UTF8_MASK) #if (R_VERSION < R_Version(3, 5, 0)) # define LOGICAL_RO(x) ((const int*) LOGICAL(x)) # define INTEGER_RO(x) ((const int*) INTEGER(x)) # define REAL_RO(x) ((const double*) REAL(x)) # define COMPLEX_RO(x) ((const Rcomplex*) COMPLEX(x)) # define STRING_PTR_RO(x) ((const SEXP*) STRING_PTR(x)) # define RAW_RO(x) ((const Rbyte*) RAW(x)) # define DATAPTR_RO(x) ((const void*) STRING_PTR(x)) #endif #define VECTOR_PTR_RO(x) ((const SEXP*) DATAPTR_RO(x)) namespace dplyr { struct envs { static SEXP ns_dplyr; static SEXP ns_vctrs; static SEXP ns_rlang; }; struct symbols { static SEXP groups; static SEXP levels; static SEXP ptype; static SEXP dot_current_group; static SEXP current_expression; static SEXP rows; static SEXP caller; static SEXP current_data; static SEXP dot_drop; static SEXP dplyr_internal_error; static SEXP dot_indices; static SEXP chops; static SEXP mask; static SEXP vec_is_list; static SEXP new_env; static SEXP dot_data; static SEXP used; static SEXP across; }; struct vectors { static SEXP classes_vctrs_list_of; static SEXP empty_int_vector; static SEXP names_expanded; static SEXP names_summarise_recycle_chunks; }; struct functions { static SEXP vec_chop; static SEXP dot_subset2; static SEXP list; static SEXP function; }; } // namespace dplyr namespace rlang { SEXP eval_tidy(SEXP expr, SEXP data, SEXP env); SEXP as_data_pronoun(SEXP x); SEXP new_data_mask(SEXP bottom, SEXP top); SEXP str_as_symbol(SEXP); SEXP quo_get_expr(SEXP quo); void env_unbind(SEXP, SEXP); } namespace vctrs { bool vec_is_vector(SEXP x) ; R_len_t short_vec_size(SEXP x) ; SEXP short_vec_recycle(SEXP x, R_len_t n); inline bool vec_is_list(SEXP x) { SEXP call = PROTECT(Rf_lang2(dplyr::symbols::vec_is_list, x)); SEXP res = Rf_eval(call, dplyr::envs::ns_vctrs); UNPROTECT(1); return LOGICAL(res)[0]; } } SEXP dplyr_expand_groups(SEXP old_groups, SEXP positions, SEXP s_nr); SEXP dplyr_filter_update_rows(SEXP s_n_rows, SEXP group_indices, SEXP keep, SEXP new_rows_sizes); SEXP dplyr_between(SEXP x, SEXP s_left, SEXP s_right); SEXP dplyr_cumall(SEXP x); SEXP dplyr_cumany(SEXP x); SEXP dplyr_cummean(SEXP x); SEXP dplyr_validate_grouped_df(SEXP df, SEXP s_check_bounds); SEXP dplyr_validate_rowwise_df(SEXP df); SEXP dplyr_mask_eval_all(SEXP quo, SEXP env_private); SEXP dplyr_mask_eval_all_summarise(SEXP quo, SEXP env_private); SEXP dplyr_mask_eval_all_mutate(SEXP quo, SEXP env_private); SEXP dplyr_mask_eval_all_filter(SEXP quos, SEXP env_private, SEXP s_n, SEXP env_filter); SEXP dplyr_summarise_recycle_chunks(SEXP chunks, SEXP rows, SEXP ptypes, SEXP results); SEXP dplyr_group_indices(SEXP data, SEXP rows); SEXP dplyr_group_keys(SEXP group_data); SEXP dplyr_reduce_lgl_or(SEXP, SEXP); SEXP dplyr_reduce_lgl_and(SEXP, SEXP); SEXP dplyr_mask_remove(SEXP env_private, SEXP s_name); SEXP dplyr_mask_add(SEXP env_private, SEXP s_name, SEXP ptype, SEXP chunks); SEXP dplyr_lazy_vec_chop(SEXP data, SEXP rows); SEXP dplyr_data_masks_setup(SEXP chops, SEXP data, SEXP rows); SEXP env_resolved(SEXP env, SEXP names); void add_mask_binding(SEXP name, SEXP env_bindings, SEXP env_chops); SEXP dplyr_extract_chunks(SEXP df_list, SEXP df_ptype); #define DPLYR_MASK_INIT() \ SEXP rows = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::rows)); \ R_xlen_t ngroups = XLENGTH(rows); \ SEXP caller = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::caller)); \ SEXP mask = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::mask)); \ SEXP chops_env = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::chops)); \ SEXP current_group = PROTECT(Rf_findVarInFrame(ENCLOS(chops_env), dplyr::symbols::dot_current_group)) ;\ int* p_current_group = INTEGER(current_group) #define DPLYR_MASK_FINALISE() UNPROTECT(5) #define DPLYR_MASK_SET_GROUP(INDEX) *p_current_group = INDEX + 1 #define DPLYR_MASK_EVAL(quo) rlang::eval_tidy(quo, mask, caller) #define DPLYR_ERROR_INIT(n) \ SEXP error_data = PROTECT(Rf_allocVector(VECSXP, n)); \ SEXP error_names = PROTECT(Rf_allocVector(STRSXP, n)); \ Rf_setAttrib(error_data, R_NamesSymbol, error_names); #define DPLYR_ERROR_SET(i, name, value) \ SET_VECTOR_ELT(error_data, i, value); \ SET_STRING_ELT(error_names, i, Rf_mkChar(name)); #define DPLYR_ERROR_THROW(klass) \ SEXP error_class = PROTECT(Rf_mkString(klass)); \ SEXP error_call = PROTECT(Rf_lang3(dplyr::symbols::dplyr_internal_error, error_class, error_data)); \ Rf_eval(error_call, dplyr::envs::ns_dplyr); \ UNPROTECT(4) ; // for rchk #endif dplyr/src/filter.cpp0000644000176200001440000001171414151641776014204 0ustar liggesusers#include "dplyr.h" #include namespace dplyr { void stop_filter_incompatible_size(R_xlen_t i, SEXP quos, R_xlen_t nres, R_xlen_t n) { DPLYR_ERROR_INIT(3); DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); DPLYR_ERROR_SET(1, "size", Rf_ScalarInteger(nres)); DPLYR_ERROR_SET(2, "expected_size", Rf_ScalarInteger(n)); DPLYR_ERROR_THROW("dplyr:::filter_incompatible_size"); } void stop_filter_incompatible_type(R_xlen_t i, SEXP quos, SEXP column_name, SEXP result){ DPLYR_ERROR_INIT(3); DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); DPLYR_ERROR_SET(1, "column_name", column_name); DPLYR_ERROR_SET(2, "result", result); DPLYR_ERROR_THROW("dplyr:::filter_incompatible_type"); } } bool all_lgl_columns(SEXP data) { R_xlen_t nc = XLENGTH(data); const SEXP* p_data = VECTOR_PTR_RO(data); for (R_xlen_t i = 0; i < nc; i++) { if (TYPEOF(p_data[i]) != LGLSXP) return false; } return true; } void reduce_lgl_and(SEXP reduced, SEXP x, int n) { R_xlen_t nres = XLENGTH(x); int* p_reduced = LOGICAL(reduced); if (nres == 1) { if (LOGICAL(x)[0] != TRUE) { for (R_xlen_t i = 0; i < n; i++, ++p_reduced) { *p_reduced = FALSE; } } } else { int* p_x = LOGICAL(x); for (R_xlen_t i = 0; i < n; i++, ++p_reduced, ++p_x) { *p_reduced = *p_reduced == TRUE && *p_x == TRUE ; } } } void filter_check_size(SEXP res, int i, R_xlen_t n, SEXP quos) { R_xlen_t nres = vctrs::short_vec_size(res); if (nres != n && nres != 1) { dplyr::stop_filter_incompatible_size(i, quos, nres, n); } } void filter_check_type(SEXP res, R_xlen_t i, SEXP quos) { if (TYPEOF(res) == LGLSXP) { if (!Rf_isMatrix(res)) { return; } if (INTEGER(Rf_getAttrib(res, R_DimSymbol))[1] == 1) { // not yet, // Rf_warningcall(R_NilValue, "Matrices of 1 column are deprecated in `filter()`."); return; } } if (Rf_inherits(res, "data.frame")) { R_xlen_t ncol = XLENGTH(res); if (ncol == 0) return; const SEXP* p_res = VECTOR_PTR_RO(res); for (R_xlen_t j=0; j In R, it's usually easier to do something for each column than for each row. In this vignette you will learn how to use the `rowwise()` function to perform operations by row. Along the way, you'll learn about list-columns, and see how you might perform simulations and modelling within dplyr verbs. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{rowwise} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr, and R in general, are particularly well suited to performing operations over columns, and performing operations over rows is much harder. In this vignette, you'll learn dplyr's approach centred around the row-wise data frame created by `rowwise()`. There are three common use cases that we discuss in this vignette: * Row-wise aggregates (e.g. compute the mean of x, y, z). * Calling a function multiple times with varying arguments. * Working with list-columns. These types of problems are often easily solved with a for loop, but it's nice to have a solution that fits naturally into a pipeline. > Of course, someone has to write loops. It doesn't have to be you. > --- Jenny Bryan ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ```{r include = FALSE} nest_by <- function(df, ...) { df %>% group_by(...) %>% summarise(data = list(across())) %>% rowwise(...) } # mtcars %>% nest_by(cyl) ``` ## Creating Row-wise operations require a special type of grouping where each group consists of a single row. You create this with `rowwise()`: ```{r} df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() ``` Like `group_by()`, `rowwise()` doesn't really do anything itself; it just changes how the other verbs work. For example, compare the results of `mutate()` in the following code: ```{r} df %>% mutate(m = mean(c(x, y, z))) df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) ``` If you use `mutate()` with a regular data frame, it computes the mean of `x`, `y`, and `z` across all rows. If you apply it to a row-wise data frame, it computes the mean for each row. You can optionally supply "identifier" variables in your call to `rowwise()`. These variables are preserved when you call `summarise()`, so they behave somewhat similarly to the grouping variables passed to `group_by()`: ```{r} df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) ``` `rowwise()` is just a special form of grouping, so if you want to remove it from a data frame, just call `ungroup()`. ## Per row summary statistics `dplyr::summarise()` makes it really easy to summarise values across rows within one column. When combined with `rowwise()` it also makes it easy to summarise values across columns within one row. To see how, we'll start by making a little dataset: ```{r} df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df ``` Let's say we want compute the sum of `w`, `x`, `y`, and `z` for each row. We start by making a row-wise data frame: ```{r} rf <- df %>% rowwise(id) ``` We can then use `mutate()` to add a new column to each row, or `summarise()` to return just that one summary: ```{r} rf %>% mutate(total = sum(c(w, x, y, z))) rf %>% summarise(total = sum(c(w, x, y, z))) ``` Of course, if you have a lot of variables, it's going to be tedious to type in every variable name. Instead, you can use `c_across()` which uses tidy selection syntax so you can to succinctly select many variables: ```{r} rf %>% mutate(total = sum(c_across(w:z))) rf %>% mutate(total = sum(c_across(where(is.numeric)))) ``` You could combine this with column-wise operations (see `vignette("colwise")` for more details) to compute the proportion of the total for each column: ```{r} rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) ``` ### Row-wise summary functions The `rowwise()` approach will work for any summary function. But if you need greater speed, it's worth looking for a built-in row-wise variant of your summary function. These are more efficient because they operate on the data frame as whole; they don't split it into rows, compute the summary, and then join the results back together again. ```{r} df %>% mutate(total = rowSums(across(where(is.numeric)))) df %>% mutate(mean = rowMeans(across(where(is.numeric)))) ``` **NB**: I use `df` (not `rf`) and `across()` (not `c_across()`) here because `rowMeans()` and `rowSums()` take a multi-row data frame as input. ```{r, eval = FALSE, include = FALSE} bench::mark( df %>% mutate(m = rowSums(across(x:z))), df %>% mutate(m = apply(across(x:z), 1, sum)), df %>% rowwise() %>% mutate(m = sum(pick(x:z))), check = FALSE ) ``` ## List-columns `rowwise()` operations are a natural pairing when you have list-columns. They allow you to avoid explicit loops and/or functions from the `apply()` or `purrr::map()` families. ### Motivation Imagine you have this data frame, and you want to count the lengths of each element: ```{r} df <- tibble( x = list(1, 2:3, 4:6) ) ``` You might try calling `length()`: ```{r} df %>% mutate(l = length(x)) ``` But that returns the length of the column, not the length of the individual values. If you're an R documentation aficionado, you might know there's already a base R function just for this purpose: ```{r} df %>% mutate(l = lengths(x)) ``` Or if you're an experienced R programmer, you might know how to apply a function to each element of a list using `sapply()`, `vapply()`, or one of the purrr `map()` functions: ```{r} df %>% mutate(l = sapply(x, length)) df %>% mutate(l = purrr::map_int(x, length)) ``` But wouldn't it be nice if you could just write `length(x)` and dplyr would figure out that you wanted to compute the length of the element inside of `x`? Since you're here, you might already be guessing at the answer: this is just another application of the row-wise pattern. ```{r} df %>% rowwise() %>% mutate(l = length(x)) ``` ### Subsetting Before we continue on, I wanted to briefly mention the magic that makes this work. This isn't something you'll generally need to think about (it'll just work), but it's useful to know about when something goes wrong. There's an important difference between a grouped data frame where each group happens to have one row, and a row-wise data frame where every group always has one row. Take these two data frames: ```{r} df <- tibble(g = 1:2, y = list(1:3, "a")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) ``` If we compute some properties of `y`, you'll notice the results look different: ```{r} gf %>% mutate(type = typeof(y), length = length(y)) rf %>% mutate(type = typeof(y), length = length(y)) ``` They key difference is that when `mutate()` slices up the columns to pass to `length(y)` the grouped mutate uses `[` and the row-wise mutate uses `[[`. The following code gives a flavour of the differences if you used a for loop: ```{r} # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 ``` Note that this magic only applies when you're referring to existing columns, not when you're creating new rows. This is potentially confusing, but we're fairly confident it's the least worst solution, particularly given the hint in the error message. ```{r, error = TRUE} gf %>% mutate(y2 = y) rf %>% mutate(y2 = y) rf %>% mutate(y2 = list(y)) ``` ### Modelling `rowwise()` data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We'll start by creating a nested data frame: ```{r} by_cyl <- mtcars %>% nest_by(cyl) by_cyl ``` This is a little different to the usual `group_by()` output: we have visibly changed the structure of the data. Now we have three rows (one for each group), and we have a list-col, `data`, that stores the data for that group. Also note that the output is `rowwise()`; this is important because it's going to make working with that list of data frames much easier. Once we have one data frame per row, it's straightforward to make one model per row: ```{r} mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods ``` And supplement that with one set of predictions per row: ```{r} mods <- mods %>% mutate(pred = list(predict(mod, data))) mods ``` You could then summarise the model in a variety of ways: ```{r} mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) mods %>% summarise(rsq = summary(mod)$r.squared) mods %>% summarise(broom::glance(mod)) ``` Or easily access the parameters of each model: ```{r} mods %>% summarise(broom::tidy(mod)) ``` ## Repeated function calls `rowwise()` doesn't just work with functions that return a length-1 vector (aka summary functions); it can work with any function if the result is a list. This means that `rowwise()` and `mutate()` provide an elegant way to call a function many times with varying arguments, storing the outputs alongside the inputs. ### Simulations I think this is a particularly elegant way to perform simulations, because it lets you store simulated values along with the parameters that generated them. For example, imagine you have the following data frame that describes the properties of 3 samples from the uniform distribution: ```{r} df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) ``` You can supply these parameters to `runif()` by using `rowwise()` and `mutate()`: ```{r} df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) ``` Note the use of `list()` here - `runif()` returns multiple values and a `mutate()` expression has to return something of length 1. `list()` means that we'll get a list column where each row is a list containing multiple values. If you forget to use `list()`, dplyr will give you a hint: ```{r, error = TRUE} df %>% rowwise() %>% mutate(data = runif(n, min, max)) ``` ### Multiple combinations What if you want to call a function for every combination of inputs? You can use `expand.grid()` (or `tidyr::expand_grid()`) to generate the data frame and then repeat the same pattern as above: ```{r} df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) ``` ### Varying functions In more complicated problems, you might also want to vary the function being called. This tends to be a bit more of an awkward fit with this approach because the columns in the input tibble will be less regular. But it's still possible, and it's a natural place to use `do.call()`: ```{r} df <- tribble( ~rng, ~params, "runif", list(n = 10), "rnorm", list(n = 20), "rpois", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) ``` ```{r, include = FALSE, eval = FALSE} df <- rowwise(tribble( ~rng, ~params, "runif", list(min = -1, max = 1), "rnorm", list(), "rpois", list(lambda = 5), )) # Has to happen in separate function to avoid eager unquoting f <- function(rng, params) purrr::exec(rng, n = 10, !!!params) df %>% mutate(data = list(f(rng, params))) ``` ## Previously ### `rowwise()` `rowwise()` was also questioning for quite some time, partly because I didn't appreciate how many people needed the native ability to compute summaries across multiple variables for each row. As an alternative, we recommended performing row-wise operations with the purrr `map()` functions. However, this was challenging because you needed to pick a map function based on the number of arguments that were varying and the type of result, which required quite some knowledge of purrr functions. I was also resistant to `rowwise()` because I felt like automatically switching between `[` to `[[` was too magical in the same way that automatically `list()`-ing results made `do()` too magical. I've now persuaded myself that the row-wise magic is good magic partly because most people find the distinction between `[` and `[[` mystifying and `rowwise()` means that you don't need to think about it. Since `rowwise()` clearly is useful it is not longer questioning, and we expect it to be around for the long term. ### `do()` We've questioned the need for `do()` for quite some time, because it never felt very similar to the other dplyr verbs. It had two main modes of operation: * Without argument names: you could call functions that input and output data frames using `.` to refer to the "current" group. For example, the following code gets the first row of each group: ```{r} mtcars %>% group_by(cyl) %>% do(head(., 1)) ``` This has been superseded `cur_data()` plus the more permissive `summarise()` which can now create multiple columns and multiple rows. ```{r} mtcars %>% group_by(cyl) %>% summarise(head(cur_data(), 1)) ``` * With arguments: it worked like `mutate()` but automatically wrapped every element in a list: ```{r} mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) ``` I now believe that behaviour is both too magical and not very useful, and it can be replaced by `summarise()` and `cur_data()` . ```{r} mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(cur_data())) ``` If needed (unlike here), you can wrap the results in a list yourself. The addition of `cur_data()`/`across()` and the increased scope of `summarise()` means that `do()` is no longer needed, so it is now superseded. dplyr/vignettes/window-functions.Rmd0000644000176200001440000002233614121112104017366 0ustar liggesusers--- title: "Window functions" description: > Window functions are a useful family of functions that work with vectors (returning an output the same size as the input), and combine naturally with `mutate()` and `filter()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Window functions} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) if (!rlang::is_installed("Lahman")) { knitr::opts_chunk$set(eval = FALSE) } ``` A __window function__ is a variation on an aggregation function. Where an aggregation function, like `sum()` and `mean()`, takes n inputs and return a single value, a window function returns n values. The output of a window function depends on all its input values, so window functions don't include functions that work element-wise, like `+` or `round()`. Window functions include variations on aggregate functions, like `cumsum()` and `cummean()`, functions for ranking and ordering, like `rank()`, and functions for taking offsets, like `lead()` and `lag()`. In this vignette, we'll use a small sample of the Lahman batting dataset, including the players that have won an award. ```{r} library(Lahman) batting <- Lahman::Batting %>% as_tibble() %>% select(playerID, yearID, teamID, G, AB:H) %>% arrange(playerID, yearID, teamID) %>% semi_join(Lahman::AwardsPlayers, by = "playerID") players <- batting %>% group_by(playerID) ``` Window functions are used in conjunction with `mutate()` and `filter()` to solve a wide range of problems. Here's a selection: ```{r, eval = FALSE} # For each player, find the two years with most hits filter(players, min_rank(desc(H)) <= 2 & H > 0) # Within each player, rank each year by the number of games played mutate(players, G_rank = min_rank(G)) # For each player, find every year that was better than the previous year filter(players, G > lag(G)) # For each player, compute avg change in games played per year mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID))) # For each player, find all years where they played more games than they did on average filter(players, G > mean(G)) # For each, player compute a z score based on number of games played mutate(players, G_z = (G - mean(G)) / sd(G)) ``` Before reading this vignette, you should be familiar with `mutate()` and `filter()`. ## Types of window functions There are five main families of window functions. Two families are unrelated to aggregation functions: * Ranking and ordering functions: `row_number()`, `min_rank()`, `dense_rank()`, `cume_dist()`, `percent_rank()`, and `ntile()`. These functions all take a vector to order by, and return various types of ranks. * Offsets `lead()` and `lag()` allow you to access the previous and next values in a vector, making it easy to compute differences and trends. The other three families are variations on familiar aggregate functions: * Cumulative aggregates: `cumsum()`, `cummin()`, `cummax()` (from base R), and `cumall()`, `cumany()`, and `cummean()` (from dplyr). * Rolling aggregates operate in a fixed width window. You won't find them in base R or in dplyr, but there are many implementations in other packages, such as [RcppRoll](https://cran.r-project.org/package=RcppRoll). * Recycled aggregates, where an aggregate is repeated to match the length of the input. These are not needed in R because vector recycling automatically recycles aggregates where needed. They are important in SQL, because the presence of an aggregation function usually tells the database to return only one row per group. Each family is described in more detail below, focussing on the general goals and how to use them with dplyr. For more details, refer to the individual function documentation. ## Ranking functions The ranking functions are variations on a theme, differing in how they handle ties: ```{r} x <- c(1, 1, 2, 2, 2) row_number(x) min_rank(x) dense_rank(x) ``` If you're familiar with R, you may recognise that `row_number()` and `min_rank()` can be computed with the base `rank()` function and various values of the `ties.method` argument. These functions are provided to save a little typing, and to make it easier to convert between R and SQL. Two other ranking functions return numbers between 0 and 1. `percent_rank()` gives the percentage of the rank; `cume_dist()` gives the proportion of values less than or equal to the current value. ```{r} cume_dist(x) percent_rank(x) ``` These are useful if you want to select (for example) the top 10% of records within each group. For example: ```{r} filter(players, cume_dist(desc(G)) < 0.1) ``` Finally, `ntile()` divides the data up into `n` evenly sized buckets. It's a coarse ranking, and it can be used in with `mutate()` to divide the data into buckets for further summary. For example, we could use `ntile()` to divide the players within a team into four ranked groups, and calculate the average number of games within each group. ```{r} by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) ``` All ranking functions rank from lowest to highest so that small input values get small ranks. Use `desc()` to rank from highest to lowest. ## Lead and lag `lead()` and `lag()` produce offset versions of a input vector that is either ahead of or behind the original vector. ```{r} x <- 1:5 lead(x) lag(x) ``` You can use them to: * Compute differences or percent changes. ```{r, results = "hide"} # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) ``` Using `lag()` is more convenient than `diff()` because for `n` inputs `diff()` returns `n - 1` outputs. * Find out when a value changes. ```{r, results = "hide"} # Find when a player changed teams filter(players, teamID != lag(teamID)) ``` `lead()` and `lag()` have an optional argument `order_by`. If set, instead of using the row order to determine which value comes before another, they will use another variable. This is important if you have not already sorted the data, or you want to sort one way and lag another. Here's a simple example of what happens if you don't specify `order_by` when you need it: ```{r} df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) ``` ## Cumulative aggregates Base R provides cumulative sum (`cumsum()`), cumulative min (`cummin()`), and cumulative max (`cummax()`). (It also provides `cumprod()` but that is rarely useful). Other common accumulating functions are `cumany()` and `cumall()`, cumulative versions of `||` and `&&`, and `cummean()`, a cumulative mean. These are not included in base R, but efficient versions are provided by `dplyr`. `cumany()` and `cumall()` are useful for selecting all rows up to, or all rows after, a condition is true for the first (or last) time. For example, we can use `cumany()` to find all records for a player after they played a year with 150 games: ```{r, eval = FALSE} filter(players, cumany(G > 150)) ``` Like lead and lag, you may want to control the order in which the accumulation occurs. None of the built in functions have an `order_by` argument so `dplyr` provides a helper: `order_by()`. You give it the variable you want to order by, and then the call to the window function: ```{r} x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) ``` This function uses a bit of non-standard evaluation, so I wouldn't recommend using it inside another function; use the simpler but less concise `with_order()` instead. ## Recycled aggregates R's vector recycling makes it easy to select values that are higher or lower than a summary. I call this a recycled aggregate because the value of the aggregate is recycled to be the same length as the original vector. Recycled aggregates are useful if you want to find all records greater than the mean or less than the median: ```{r, eval = FALSE} filter(players, G > mean(G)) filter(players, G < median(G)) ``` While most SQL databases don't have an equivalent of `median()` or `quantile()`, when filtering you can achieve the same effect with `ntile()`. For example, `x > median(x)` is equivalent to `ntile(x, 2) == 2`; `x > quantile(x, 75)` is equivalent to `ntile(x, 100) > 75` or `ntile(x, 4) > 3`. ```{r, eval = FALSE} filter(players, ntile(G, 2) == 2) ``` You can also use this idea to select the records with the highest (`x == max(x)`) or lowest value (`x == min(x)`) for a field, but the ranking functions give you more control over ties, and allow you to select any number of records. Recycled aggregates are also useful in conjunction with `mutate()`. For example, with the batting data, we could compute the "career year", the number of years a player has played since they entered the league: ```{r} mutate(players, career_year = yearID - min(yearID) + 1) ``` Or, as in the introductory example, we could compute a z-score: ```{r} mutate(players, G_z = (G - mean(G)) / sd(G)) ``` dplyr/vignettes/base.Rmd0000644000176200001440000002746214144435746015021 0ustar liggesusers--- title: "dplyr <-> base R" output: rmarkdown::html_vignette description: > How does dplyr compare to base R? This vignette describes the main differences in philosophy, and shows the base R code most closely equivalent to each dplyr verb. vignette: > %\VignetteIndexEntry{From base R to dplyr} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4, tibble.print_max = 4) ``` This vignette compares dplyr functions to their base R equivalents. This helps those familiar with base R understand better what dplyr does, and shows dplyr users how you might express the same ideas in base R code. We'll start with a rough overview of the major differences, then discuss the one table verbs in more detail, followed by the two table verbs. # Overview 1. The code dplyr verbs input and output data frames. This contrasts with base R functions which more frequently work with individual vectors. 1. dplyr relies heavily on "non-standard evaluation" so that you don't need to use `$` to refer to columns in the "current" data frame. This behaviour is inspired by the base functions `subset()` and `transform()`. 1. dplyr solutions tend to use a variety of single purpose verbs, while base R solutions typically tend to use `[` in a variety of ways, depending on the task at hand. 1. Multiple dplyr verbs are often strung together into a pipeline by `%>%`. In base R, you'll typically save intermediate results to a variable that you either discard, or repeatedly overwrite. 1. All dplyr verbs handle "grouped" data frames so that the code to perform a computation per-group looks very similar to code that works on a whole data frame. In base R, per-group operations tend to have varied forms. # One table verbs The following table shows a condensed translation between dplyr verbs and their base R equivalents. The following sections describe each operation in more detail. You learn more about the dplyr verbs in their documentation and in For more `vignette("one-table")`. | dplyr | base | |------------------------------- |--------------------------------------------------| | `arrange(df, x)` | `df[order(x), , drop = FALSE]` | | `distinct(df, x)` | `df[!duplicated(x), , drop = FALSE]`, `unique()` | | `filter(df, x)` | `df[which(x), , drop = FALSE]`, `subset()` | | `mutate(df, z = x + y)` | `df$z <- df$x + df$y`, `transform()` | | `pull(df, 1)` | `df[[1]]` | | `pull(df, x)` | `df$x` | | `rename(df, y = x)` | `names(df)[names(df) == "x"] <- "y"` | | `relocate(df, y)` | `df[union("y", names(df))]` | | `select(df, x, y)` | `df[c("x", "y")]`, `subset()` | | `select(df, starts_with("x"))` | `df[grepl(names(df), "^x")]` | | `summarise(df, mean(x))` | `mean(df$x)`, `tapply()`, `aggregate()`, `by()` | | `slice(df, c(1, 2, 5))` | `df[c(1, 2, 5), , drop = FALSE]` | To begin, we'll load dplyr and convert `mtcars` and `iris` to tibbles so that we can easily show only abbreviated output for each operation. ```{r setup, message = FALSE} library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris) ``` ## `arrange()`: Arrange rows by variables `dplyr::arrange()` orders the rows of a data frame by the values of one or more columns: ```{r} mtcars %>% arrange(cyl, disp) ``` The `desc()` helper allows you to order selected variables in descending order: ```{r} mtcars %>% arrange(desc(cyl), desc(disp)) ``` We can replicate in base R by using `[` with `order()`: ```{r} mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] ``` Note the use of `drop = FALSE`. If you forget this, and the input is a data frame with a single column, the output will be a vector, not a data frame. This is a source of subtle bugs. Base R does not provide a convenient and general way to sort individual variables in descending order, so you have two options: * For numeric variables, you can use `-x`. * You can request `order()` to sort all variables in descending order. ```{r, results = FALSE} mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE] ``` ## `distinct()`: Select distinct/unique rows `dplyr::distinct()` selects unique rows: ```{r} df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns df %>% distinct(x, .keep_all = TRUE) # whole data frame ``` There are two equivalents in base R, depending on whether you want the whole data frame, or just selected variables: ```{r} unique(df["x"]) # selected columns df[!duplicated(df$x), , drop = FALSE] # whole data frame ``` ## `filter()`: Return rows with matching conditions `dplyr::filter()` selects rows where an expression is `TRUE`: ```{r} starwars %>% filter(species == "Human") starwars %>% filter(mass > 1000) starwars %>% filter(hair_color == "none" & eye_color == "black") ``` The closest base equivalent (and the inspiration for `filter()`) is `subset()`: ```{r} subset(starwars, species == "Human") subset(starwars, mass > 1000) subset(starwars, hair_color == "none" & eye_color == "black") ``` You can also use `[` but this also requires the use of `which()` to remove `NA`s: ```{r} starwars[which(starwars$species == "Human"), , drop = FALSE] starwars[which(starwars$mass > 1000), , drop = FALSE] starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE] ``` ## `mutate()`: Create or transform variables `dplyr::mutate()` creates new variables from existing variables: ```{r} df %>% mutate(z = x + y, z2 = z ^ 2) ``` The closest base equivalent is `transform()`, but note that it cannot use freshly created variables: ```{r} head(transform(df, z = x + y, z2 = (x + y) ^ 2)) ``` Alternatively, you can use `$<-`: ```{r} mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 ``` When applied to a grouped data frame, `dplyr::mutate()` computes new variable once per group: ```{r} gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) ``` To replicate this in base R, you can use `ave()`: ```{r} transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) ``` ## `pull()`: Pull out a single variable `dplyr::pull()` extracts a variable either by name or position: ```{r} mtcars %>% pull(1) mtcars %>% pull(cyl) ``` This equivalent to `[[` for positions and `$` for names: ```{r} mtcars[[1]] mtcars$cyl ``` ## `relocate()`: Change column order `dplyr::relocate()` makes it easy to move a set of columns to a new position (by default, the front): ```{r} # to front mtcars %>% relocate(gear, carb) # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) ``` We can replicate this in base R with a little set manipulation: ```{r} mtcars[union(c("gear", "carb"), names(mtcars))] to_back <- c("mpg", "cyl") mtcars[c(setdiff(names(mtcars), to_back), to_back)] ``` Moving columns to somewhere in the middle requires a little more set twiddling. ## `rename()`: Rename variables by name `dplyr::rename()` allows you to rename variables by name or position: ```{r} iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) ``` Renaming variables by position is straight forward in base R: ```{r} iris2 <- iris names(iris2)[2] <- "sepal_width" ``` Renaming variables by name requires a bit more work: ```{r} names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length" ``` ## `rename_with()`: Rename variables with a function `dplyr::rename_with()` transform column names with a function: ```{r} iris %>% rename_with(toupper) ``` A similar effect can be achieved with `setNames()` in base R: ```{r} setNames(iris, toupper(names(iris))) ``` ## `select()`: Select variables by name `dplyr::select()` subsets columns by position, name, function of name, or other property: ```{r} iris %>% select(1:3) iris %>% select(Species, Sepal.Length) iris %>% select(starts_with("Petal")) iris %>% select(where(is.factor)) ``` Subsetting variables by position is straightforward in base R: ```{r} iris[1:3] # single argument selects columns; never drops iris[1:3, , drop = FALSE] ``` You have two options to subset by name: ```{r} iris[c("Species", "Sepal.Length")] subset(iris, select = c(Species, Sepal.Length)) ``` Subsetting by function of name requires a bit of work with `grep()`: ```{r} iris[grep("^Petal", names(iris))] ``` And you can use `Filter()` to subset by type: ```{r} Filter(is.factor, iris) ``` ## `summarise()`: Reduce multiple values down to a single value `dplyr::summarise()` computes one or more summaries for each group: ```{r} mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) ``` I think the closest base R equivalent uses `by()`. Unfortunately `by()` returns a list of data frames, but you can combine them back together again with `do.call()` and `rbind()`: ```{r} mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) ``` `aggregate()` comes very close to providing an elegant answer: ```{r} agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg ``` But unfortunately while it looks like there are `disp.mean` and `disp.n` columns, it's actually a single matrix column: ```{r} str(agg) ``` You can see a variety of other options at . ## `slice()`: Choose rows by position `slice()` selects rows with their location: ```{r} slice(mtcars, 25:n()) ``` This is straightforward to replicate with `[`: ```{r} mtcars[25:nrow(mtcars), , drop = FALSE] ``` # Two-table verbs When we want to merge two data frames, `x` and `y`), we have a variety of different ways to bring them together. Various base R `merge()` calls are replaced by a variety of dplyr `join()` functions. | dplyr | base | |------------------------|-----------------------------------------| | `inner_join(df1, df2)` |`merge(df1, df2)` | | `left_join(df1, df2) ` |`merge(df1, df2, all.x = TRUE)` | | `right_join(df1, df2)` |`merge(df1, df2, all.y = TRUE)` | | `full_join(df1, df2)` |`merge(df1, df2, all = TRUE)` | | `semi_join(df1, df2)` |`df1[df1$x %in% df2$x, , drop = FALSE]` | | `anti_join(df1, df2)` |`df1[!df1$x %in% df2$x, , drop = FALSE]` | For more information about two-table verbs, see `vignette("two-table")`. ### Mutating joins dplyr's `inner_join()`, `left_join()`, `right_join()`, and `full_join()` add new columns from `y` to `x`, matching rows based on a set of "keys", and differ only in how missing matches are handled. They are equivalent to calls to `merge()` with various settings of the `all`, `all.x`, and `all.y` arguments. The main difference is the order of the rows: * dplyr preserves the order of the `x` data frame. * `merge()` sorts the key columns. ### Filtering joins dplyr's `semi_join()` and `anti_join()` affect only the rows, not the columns: ```{r} band_members %>% semi_join(band_instruments) band_members %>% anti_join(band_instruments) ``` They can be replicated in base R with `[` and `%in%`: ```{r} band_members[band_members$name %in% band_instruments$name, , drop = FALSE] band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] ``` Semi and anti joins with multiple key variables are considerably more challenging to implement. dplyr/vignettes/two-table.Rmd0000644000176200001440000001617414125606753016000 0ustar liggesusers--- title: "Two-table verbs" description: > Most dplyr verbs work with a single data set, but most data analyses involve multiple datasets. This vignette introduces you to the dplyr verbs that work with more one than data set, and introduces to the mutating joins, filtering joins, and the set operations. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Two-table verbs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 5) library(dplyr) ``` It's rare that a data analysis involves only a single table of data. In practice, you'll normally have many tables that contribute to an analysis, and you need flexible tools to combine them. In dplyr, there are three families of verbs that work with two tables at a time: * Mutating joins, which add new variables to one table from matching rows in another. * Filtering joins, which filter observations from one table based on whether or not they match an observation in the other table. * Set operations, which combine the observations in the data sets as if they were set elements. (This discussion assumes that you have [tidy data](https://www.jstatsoft.org/v59/i10/), where the rows are observations and the columns are variables. If you're not familiar with that framework, I'd recommend reading up on it first.) All two-table verbs work similarly. The first two arguments are `x` and `y`, and provide the tables to combine. The output is always a new table with the same type as `x`. ## Mutating joins Mutating joins allow you to combine variables from multiple tables. For example, consider the flights and airlines data from the nycflights13 package. In one table we have flight information with an abbreviation for carrier, and in another we have a mapping between abbreviations and full names. You can use a join to add the carrier names to the flight data: ```{r, warning = FALSE} library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) ``` ### Controlling how the tables are matched As well as `x` and `y`, each mutating join takes an argument `by` that controls which variables are used to match observations in the two tables. There are a few ways to specify it, as I illustrate below with various tables from nycflights13: * `NULL`, the default. dplyr will will use all variables that appear in both tables, a __natural__ join. For example, the flights and weather tables match on their common variables: year, month, day, hour and origin. ```{r} flights2 %>% left_join(weather) ``` * A character vector, `by = "x"`. Like a natural join, but uses only some of the common variables. For example, `flights` and `planes` have `year` columns, but they mean different things so we only want to join by `tailnum`. ```{r} flights2 %>% left_join(planes, by = "tailnum") ``` Note that the year columns in the output are disambiguated with a suffix. * A named character vector: `by = c("x" = "a")`. This will match variable `x` in table `x` to variable `a` in table `y`. The variables from use will be used in the output. Each flight has an origin and destination `airport`, so we need to specify which one we want to join to: ```{r} flights2 %>% left_join(airports, c("dest" = "faa")) flights2 %>% left_join(airports, c("origin" = "faa")) ``` ### Types of join There are four types of mutating join, which differ in their behaviour when a match is not found. We'll illustrate each with a simple example: ```{r} df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = "a") ``` * `inner_join(x, y)` only includes observations that match in both `x` and `y`. ```{r} df1 %>% inner_join(df2) %>% knitr::kable() ``` * `left_join(x, y)` includes all observations in `x`, regardless of whether they match or not. This is the most commonly used join because it ensures that you don't lose observations from your primary table. ```{r} df1 %>% left_join(df2) ``` * `right_join(x, y)` includes all observations in `y`. It's equivalent to `left_join(y, x)`, but the columns and rows will be ordered differently. ```{r} df1 %>% right_join(df2) df2 %>% left_join(df1) ``` * `full_join()` includes all observations from `x` and `y`. ```{r} df1 %>% full_join(df2) ``` The left, right and full joins are collectively know as __outer joins__. When a row doesn't match in an outer join, the new variables are filled in with missing values. ### Observations While mutating joins are primarily used to add new variables, they can also generate new observations. If a match is not unique, a join will add all possible combinations (the Cartesian product) of the matching observations: ```{r} df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) df1 %>% left_join(df2) ``` ## Filtering joins Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types: * `semi_join(x, y)` __keeps__ all observations in `x` that have a match in `y`. * `anti_join(x, y)` __drops__ all observations in `x` that have a match in `y`. These are most useful for diagnosing join mismatches. For example, there are many flights in the nycflights13 dataset that don't have a matching tail number in the planes table: ```{r} library("nycflights13") flights %>% anti_join(planes, by = "tailnum") %>% count(tailnum, sort = TRUE) ``` If you're worried about what observations your joins will match, start with a `semi_join()` or `anti_join()`. `semi_join()` and `anti_join()` never duplicate; they only ever remove observations. ```{r} df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) # Four rows to start with: df1 %>% nrow() # And we get four rows after the join df1 %>% inner_join(df2, by = "x") %>% nrow() # But only two rows actually match df1 %>% semi_join(df2, by = "x") %>% nrow() ``` ## Set operations The final type of two-table verb is set operations. These expect the `x` and `y` inputs to have the same variables, and treat the observations like sets: * `intersect(x, y)`: return only observations in both `x` and `y` * `union(x, y)`: return unique observations in `x` and `y` * `setdiff(x, y)`: return observations in `x`, but not in `y`. Given this simple data: ```{r} (df1 <- tibble(x = 1:2, y = c(1L, 1L))) (df2 <- tibble(x = 1:2, y = 1:2)) ``` The four possibilities are: ```{r} intersect(df1, df2) # Note that we get 3 rows, not 4 union(df1, df2) setdiff(df1, df2) setdiff(df2, df1) ``` ## Multiple-table verbs dplyr does not provide any functions for working with three or more tables. Instead use `purrr::reduce()` or `Reduce()`, as described in [Advanced R](http://adv-r.had.co.nz/Functionals.html#functionals-fp), to iteratively combine the two-table verbs to handle as many tables as you need. dplyr/vignettes/dplyr.Rmd0000644000176200001440000003301514121112104015177 0ustar liggesusers--- title: "Introduction to dplyr" output: rmarkdown::html_vignette description: > Start here if this is your first time using dplyr. You'll learn the basic philosophy, the most important data manipulation verbs, and the pipe, `%>%`, which allows you to combine multiple verbs together to solve real problems. vignette: > %\VignetteIndexEntry{Introduction to dplyr} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) ``` When working with data you must: * Figure out what you want to do. * Describe those tasks in the form of a computer program. * Execute the program. The dplyr package makes these steps fast and easy: * By constraining your options, it helps you think about your data manipulation challenges. * It provides simple "verbs", functions that correspond to the most common data manipulation tasks, to help you translate your thoughts into code. * It uses efficient backends, so you spend less time waiting for the computer. This document introduces you to dplyr's basic set of tools, and shows you how to apply them to data frames. dplyr also supports databases via the dbplyr package, once you've installed, read `vignette("dbplyr")` to learn more. ## Data: starwars To explore the basic data manipulation verbs of dplyr, we'll use the dataset `starwars`. This dataset contains `r nrow(starwars)` characters and comes from the [Star Wars API](https://swapi.dev), and is documented in `?starwars` ```{r} dim(starwars) starwars ``` Note that `starwars` is a tibble, a modern reimagining of the data frame. It's particularly useful for large datasets because it only prints the first few rows. You can learn more about tibbles at ; in particular you can convert data frames to tibbles with `as_tibble()`. ## Single table verbs dplyr aims to provide a function for each basic verb of data manipulation. These verbs can be organised into three categories based on the component of the dataset that they work with: * Rows: * `filter()` chooses rows based on column values. * `slice()` chooses rows based on location. * `arrange()` changes the order of the rows. * Columns: * `select()` changes whether or not a column is included. * `rename()` changes the name of columns. * `mutate()` changes the values of columns and creates new columns. * `relocate()` changes the order of the columns. * Groups of rows: * `summarise()` collapses a group into a single row. ### The pipe All of the dplyr functions take a data frame (or tibble) as the first argument. Rather than forcing the user to either save intermediate objects or nest functions, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so the result from one step is then "piped" into the next step. You can use the pipe to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"). ### Filter rows with `filter()` `filter()` allows you to select a subset of rows in a data frame. Like all single verbs, the first argument is the tibble (or data frame). The second and subsequent arguments refer to variables within that data frame, selecting rows where the expression is `TRUE`. For example, we can select all character with light skin color and brown eyes with: ```{r} starwars %>% filter(skin_color == "light", eye_color == "brown") ``` This is roughly equivalent to this base R code: ```{r, eval = FALSE} starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ] ``` ### Arrange rows with `arrange()` `arrange()` works similarly to `filter()` except that instead of filtering or selecting rows, it reorders them. It takes a data frame, and a set of column names (or more complicated expressions) to order by. If you provide more than one column name, each additional column will be used to break ties in the values of preceding columns: ```{r} starwars %>% arrange(height, mass) ``` Use `desc()` to order a column in descending order: ```{r} starwars %>% arrange(desc(height)) ``` ### Choose rows using their position with `slice()` `slice()` lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows. We can get characters from row numbers 5 through 10. ```{r} starwars %>% slice(5:10) ``` It is accompanied by a number of helpers for common use cases: * `slice_head()` and `slice_tail()` select the first or last rows. ```{r} starwars %>% slice_head(n = 3) ``` * `slice_sample()` randomly selects rows. Use the option prop to choose a certain proportion of the cases. ```{r} starwars %>% slice_sample(n = 5) starwars %>% slice_sample(prop = 0.1) ``` Use `replace = TRUE` to perform a bootstrap sample. If needed, you can weight the sample with the `weight` argument. * `slice_min()` and `slice_max()` select rows with highest or lowest values of a variable. Note that we first must choose only the values which are not NA. ```{r} starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) ``` ### Select columns with `select()` Often you work with large datasets with many columns but only a few are actually of interest to you. `select()` allows you to rapidly zoom in on a useful subset using operations that usually only work on numeric variable positions: ```{r} # Select columns by name starwars %>% select(hair_color, skin_color, eye_color) # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) # Select all columns ending with color starwars %>% select(ends_with("color")) ``` There are a number of helper functions you can use within `select()`, like `starts_with()`, `ends_with()`, `matches()` and `contains()`. These let you quickly match larger blocks of variables that meet some criterion. See `?select` for more details. You can rename variables with `select()` by using named arguments: ```{r} starwars %>% select(home_world = homeworld) ``` But because `select()` drops all the variables not explicitly mentioned, it's not that useful. Instead, use `rename()`: ```{r} starwars %>% rename(home_world = homeworld) ``` ### Add new columns with `mutate()` Besides selecting sets of existing columns, it's often useful to add new columns that are functions of existing columns. This is the job of `mutate()`: ```{r} starwars %>% mutate(height_m = height / 100) ``` We can't see the height in meters we just calculated, but we can fix that using a select command. ```{r} starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) ``` `dplyr::mutate()` is similar to the base `transform()`, but allows you to refer to columns that you've just created: ```{r} starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) ``` If you only want to keep the new variables, use `transmute()`: ```{r} starwars %>% transmute( height_m = height / 100, BMI = mass / (height_m^2) ) ``` ### Change column order with `relocate()` Use a similar syntax as `select()` to move blocks of columns at once ```{r} starwars %>% relocate(sex:homeworld, .before = height) ``` ### Summarise values with `summarise()` The last verb is `summarise()`. It collapses a data frame to a single row. ```{r} starwars %>% summarise(height = mean(height, na.rm = TRUE)) ``` It's not that useful until we learn the `group_by()` verb below. ### Commonalities You may have noticed that the syntax and function of all these verbs are very similar: * The first argument is a data frame. * The subsequent arguments describe what to do with the data frame. You can refer to columns in the data frame directly without using `$`. * The result is a new data frame Together these properties make it easy to chain together multiple simple steps to achieve a complex result. These five functions provide the basis of a language of data manipulation. At the most basic level, you can only alter a tidy data frame in five useful ways: you can reorder the rows (`arrange()`), pick observations and variables of interest (`filter()` and `select()`), add new variables that are functions of existing variables (`mutate()`), or collapse many values to a summary (`summarise()`). ## Combining functions with `%>%` The dplyr API is functional in the sense that function calls don't have side-effects. You must always save their results. This doesn't lead to particularly elegant code, especially if you want to do many operations at once. You either have to do it step-by-step: ```{r, eval = FALSE} a1 <- group_by(starwars, species, sex) a2 <- select(a1, height, mass) a3 <- summarise(a2, height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` Or if you don't want to name the intermediate results, you need to wrap the function calls inside each other: ```{r} summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` This is difficult to read because the order of the operations is from inside to out. Thus, the arguments are a long way away from the function. To get around this problem, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"): ```{r, eval = FALSE} starwars %>% group_by(species, sex) %>% select(height, mass) %>% summarise( height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` ## Patterns of operations The dplyr verbs can be classified by the type of operations they accomplish (we sometimes speak of their **semantics**, i.e., their meaning). It's helpful to have a good grasp of the difference between select and mutate operations. ### Selecting operations One of the appealing features of dplyr is that you can refer to columns from the tibble as if they were regular variables. However, the syntactic uniformity of referring to bare column names hides semantical differences across the verbs. A column symbol supplied to `select()` does not have the same meaning as the same symbol supplied to `mutate()`. Selecting operations expect column names and positions. Hence, when you call `select()` with bare variable names, they actually represent their own positions in the tibble. The following calls are completely equivalent from dplyr's point of view: ```{r} # `name` represents the integer 1 select(starwars, name) select(starwars, 1) ``` By the same token, this means that you cannot refer to variables from the surrounding context if they have the same name as one of the columns. In the following example, `height` still represents 2, not 5: ```{r} height <- 5 select(starwars, height) ``` One useful subtlety is that this only applies to bare names and to selecting calls like `c(height, mass)` or `height:mass`. In all other cases, the columns of the data frame are not put in scope. This allows you to refer to contextual variables in selection helpers: ```{r} name <- "color" select(starwars, ends_with(name)) ``` These semantics are usually intuitive. But note the subtle difference: ```{r} name <- 5 select(starwars, name, identity(name)) ``` In the first argument, `name` represents its own position `1`. In the second argument, `name` is evaluated in the surrounding context and represents the fifth column. For a long time, `select()` used to only understand column positions. Counting from dplyr 0.6, it now understands column names as well. This makes it a bit easier to program with `select()`: ```{r} vars <- c("name", "height") select(starwars, all_of(vars), "mass") ``` ### Mutating operations Mutate semantics are quite different from selection semantics. Whereas `select()` expects column names or positions, `mutate()` expects *column vectors*. We will set up a smaller tibble to use for our examples. ```{r} df <- starwars %>% select(name, height, mass) ``` When we use `select()`, the bare column names stand for their own positions in the tibble. For `mutate()` on the other hand, column symbols represent the actual column vectors stored in the tibble. Consider what happens if we give a string or a number to `mutate()`: ```{r} mutate(df, "height", 2) ``` `mutate()` gets length-1 vectors that it interprets as new columns in the data frame. These vectors are recycled so they match the number of rows. That's why it doesn't make sense to supply expressions like `"height" + 10` to `mutate()`. This amounts to adding 10 to a string! The correct expression is: ```{r} mutate(df, height + 10) ``` In the same way, you can unquote values from the context if these values represent a valid column. They must be either length 1 (they then get recycled) or have the same length as the number of rows. In the following example we create a new vector that we add to the data frame: ```{r} var <- seq(1, nrow(df)) mutate(df, new = var) ``` A case in point is `group_by()`. While you might think it has select semantics, it actually has mutate semantics. This is quite handy as it allows to group by a modified column: ```{r} group_by(starwars, sex) group_by(starwars, sex = as.factor(sex)) group_by(starwars, height_binned = cut(height, 3)) ``` This is why you can't supply a column name to `group_by()`. This amounts to creating a new column containing the string recycled to the number of rows: ```{r} group_by(df, "month") ``` dplyr/vignettes/compatibility.Rmd0000644000176200001440000002352014151641776016747 0ustar liggesusers--- title: "dplyr compatibility" description: > A guide for package authors who need to work with multiple versions of dplyr. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{dplyr compatibility} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} library(dplyr) knitr::opts_chunk$set(collapse = T, comment = "#>") ``` This vignette is aimed at package authors who need to update their code because of a backward incompatible change to dplyr. We do try and minimise backward incompatible changes as much as possible, but sometimes they are necessary in order to radically simplify existing code, or unlock a lot of potential value in the future. This vignette starts with some general advice on writing package code that works with multiple version of dplyr, then continues to discuss specific changes in dplyr versions. ## Working with multiple dplyr versions Ideally, you want to make sure that your package works with both the released version and the development version of dplyr. This is typically a little bit more work, but has two big advantages: 1. It's more convenient for your users, since they're not forced to update dplyr if they don't want to. 1. It's easier on CRAN since it doesn't require a massive coordinated release of multiple packages. To make code work with multiple versions of a package, your first tool is the simple if statement: ```{r, results = "hide"} if (utils::packageVersion("dplyr") > "0.5.0") { # code for new version } else { # code for old version } ``` Always condition on `> current-version`, not `>= next-version` because this will ensure that this branch is also used for the development version of the package. For example, if the current release is version "0.5.0", the development version will be "0.5.0.9000". Occasionally, you'll run into a situation where the `NAMESPACE` has changed and you need to conditionally import different functions. This typically occurs when functions are moved from one package to another. We try out best to provide automatic fallbacks, but this is not always possible. Often you can work around the problem by avoiding `importFrom` and using `::` instead. Do this where possible: ```{r, eval = FALSE} if (utils::packageVersion("dplyr") > "0.5.0") { dbplyr::build_sql(...) } else { dplyr::build_sql(...) } ``` This will generate an `R CMD check` NOTE (because the one of the functions will always be missing), but this is ok. Simply explain that you get the note because you have written a wrapper to make sure your code is backward compatible. Sometimes it's not possible to avoid `importFrom()`. For example you might be importing a generic so that you can define a method for it. In this case, you can take advantage of a little-known feature in the `NAMESPACE` file: you can include `if` statements. ```{r} #' @rawNamespace #' if (utils::packageVersion("dplyr") > "0.5.0") { #' importFrom("dbplyr", "build_sql") #' } else { #' importFrom("dplyr", "build_sql") #' } ``` ## dplyr 0.6.0 ### Database code moves to dbplyr Almost all database related code has been moved out of dplyr and into a new package, [dbplyr](https://github.com/tidyverse/dbplyr/). This makes dplyr simpler, and will make it easier to release fixes for bugs that only affect databases. If you've implemented a database backend for dplyr, please read the [backend news](https://github.com/tidyverse/dbplyr/blob/main/NEWS.md#backends) on the backend. Depending on what generics you use, and what generics you provide methods for you, you may need to write some conditional code. To help make this easier we've written `wrap_dbplyr_obj()` which will write the helper code for you: ```{r, eval = FALSE} wrap_dbplyr_obj("build_sql") wrap_dbplyr_obj("base_agg") ``` Simply copy the results of this function in your package. These will generate `R CMD check` NOTES, so make sure to tell CRAN that this is to ensure backward compatibility. ### Deprecation of underscored `verbs_()` Because the tidyeval framework allows us to combine SE and NSE semantics within the same functions, the underscored verbs have been softly deprecated. #### For users of SE_ verbs The legacy underscored versions take objects for which a `lazyeval::as.lazy()` method is defined. This includes symbols and calls, strings, and formulas. All of these objects have been replaced with quosures and you can call tidyeval verbs with unquoted quosures: ```{r, eval = FALSE} quo <- quo(cyl) select(mtcars, !! quo) ``` Symbolic expressions are also supported, but note that bare symbols and calls do not carry scope information. If you're referring to objects in the data frame, it's safe to omit specifying an enclosure: ```{r, results = "hide"} sym <- quote(cyl) select(mtcars, !! sym) call <- quote(mean(cyl)) summarise(mtcars, cyl = !! call) ``` Transforming objects into quosures is generally straightforward. To enclose with the current environment, you can unquote directly in `quo()` or you can use `as_quosure()`: ```{r} quo(!! sym) quo(!! call) rlang::as_quosure(sym) rlang::as_quosure(call) ``` Note that while formulas and quosures are very similar objects (and in the most general sense, formulas are quosures), they can't be used interchangeably in tidyeval functions. Early implementations did treat bare formulas as quosures, but this created compatibility issues with modelling functions of the stats package. Fortunately, it's easy to transform formulas to quosures that will self-evaluate in tidyeval functions: ```{r} f <- ~cyl f rlang::as_quosure(f) ``` Finally, and perhaps most importantly, **strings are not and should not be parsed**. As developers, it is tempting to try and solve problems using strings because we have been trained to work with strings rather than quoted expressions. However it's almost always the wrong way to approach the problem. The exception is for creating symbols. In that case it is perfectly legitimate to use strings: ```{r} rlang::sym("cyl") rlang::syms(letters[1:3]) ``` But you should never use strings to create calls. Instead you can use quasiquotation: ```{r} syms <- rlang::syms(c("foo", "bar", "baz")) quo(my_call(!!! syms)) fun <- rlang::sym("my_call") quo((!!fun)(!!! syms)) ``` Or create the call with `call2()`: ```{r} call <- rlang::call2("my_call", !!! syms) call rlang::as_quosure(call) # Or equivalently: quo(!! rlang::call2("my_call", !!! syms)) ``` Note that idioms based on `interp()` should now generally be avoided and replaced with quasiquotation. Where you used to interpolate: ```{r, eval=FALSE} lazyeval::interp(~ mean(var), var = rlang::sym("mpg")) ``` You would now unquote: ```{r, eval=FALSE} var <- "mpg" quo(mean(!! rlang::sym(var))) ``` See also `vignette("programming")` for more about quasiquotation and quosures. #### For package authors For package authors, rlang provides a [compatibility file](https://github.com/r-lib/rlang/blob/main/R/compat-lazyeval.R) that you can copy to your package. `compat_lazy()` and `compat_lazy_dots()` turn lazy-able objects into proper quosures. This helps providing an underscored version to your users for backward compatibility. For instance, here is how we defined the underscored version of `filter()` in dplyr 0.6: ```{r, eval = FALSE} filter_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) filter(.data, !!! dots) } ``` With tidyeval, S3 dispatch to the correct method might be an issue. In the past, the genericity of dplyr verbs was accomplished by dispatching in the underscored versions. Now that those are deprecated, we've turned the non-underscored verbs into S3 generics. We maintain backward compatibility by redispatching to old underscored verbs in the default methods of the new S3 generics. For example, here is how we redispatch `filter()`: ```{r, eval = FALSE} filter.default <- function(.data, ...) { filter_(.data, .dots = compat_as_lazy_dots(...)) } ``` This gets the job done in most cases. However, the default method will not be called for objects inheriting from one of the classes for which we provide non-underscored methods: `data.frame`, `tbl_df`, `tbl_cube` and `grouped_df`. An example of this is the `sf` package whose objects have classes `c("sf", "data.frame")`. Authors of such packages should provide a method for the non-underscored generic in order to be compatible with dplyr: ```{r, eval = FALSE} filter.sf <- function(.data, ...) { st_as_sf(NextMethod()) } ``` If you need help with this, please let us know! ### Deprecation of `mutate_each()` and `summarise_each()` These functions have been replaced by a more complete family of functions. This family has suffixes `_if`, `_at` and `_all` and includes more verbs than just `mutate` `summarise`. If you need to update your code to the new family, there are two relevant functions depending on which variables you apply `funs()` to. If you called `mutate_each()` without supplying a selection of variables, `funs` is applied to all variables. In this case, you should update your code to use `mutate_all()` instead: ```{r, eval = FALSE} mutate_each(starwars, funs(as.character)) mutate_all(starwars, funs(as.character)) ``` Note that the new verbs support bare functions as well, so you don't necessarily need to wrap with `funs()`: ```{r, eval = FALSE} mutate_all(starwars, as.character) ``` On the other hand, if you supplied a variable selection, you should use `mutate_at()`. The variable selection should be wrapped with `vars()`. ```{r, eval = FALSE} mutate_each(starwars, funs(as.character), height, mass) mutate_at(starwars, vars(height, mass), as.character) ``` `vars()` supports all the selection helpers that you usually use with `select()`: ```{r, eval = FALSE} summarise_at(mtcars, vars(starts_with("d")), mean) ``` Note that instead of a `vars()` selection, you can also supply character vectors of column names: ```{r, eval = FALSE} mutate_at(starwars, c("height", "mass"), as.character) ``` dplyr/vignettes/programming.Rmd0000644000176200001440000003321214176714602016413 0ustar liggesusers--- title: "Programming with dplyr" description: > Most dplyr verbs use "tidy evaluation", a special type of non-standard evaluation. In this vignette, you'll learn the two basic forms, data masking and tidy selection, and how you can program with them using either functions or for loops. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Programming with dplyr} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` ## Introduction Most dplyr verbs use **tidy evaluation** in some way. Tidy evaluation is a special type of non-standard evaluation used throughout the tidyverse. There are two basic forms found in dplyr: * `arrange()`, `count()`, `filter()`, `group_by()`, `mutate()`, and `summarise()` use **data masking** so that you can use data variables as if they were variables in the environment (i.e. you write `my_variable` not `df$myvariable`). * `across()`, `relocate()`, `rename()`, `select()`, and `pull()` use **tidy selection** so you can easily choose variables based on their position, name, or type (e.g. `starts_with("x")` or `is.numeric`). To determine whether a function argument uses data masking or tidy selection, look at the documentation: in the arguments list, you'll see `` or ``. Data masking and tidy selection make interactive data exploration fast and fluid, but they add some new challenges when you attempt to use them indirectly such as in a for loop or a function. This vignette shows you how to overcome those challenges. We'll first go over the basics of data masking and tidy selection, talk about how to use them indirectly, and then show you a number of recipes to solve common problems. This vignette will give you the minimum knowledge you need to be an effective programmer with tidy evaluation. If you'd like to learn more about the underlying theory, or precisely how it's different from non-standard evaluation, we recommend that you read the Metaprogramming chapters in [_Advanced R_](https://adv-r.hadley.nz). ```{r setup, message = FALSE} library(dplyr) ``` ## Data masking Data masking makes data manipulation faster because it requires less typing. In most (but not all[^subset]) base R functions you need to refer to variables with `$`, leading to code that repeats the name of the data frame many times: ```{r, results = FALSE} starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,] ``` [^subset]: dplyr's `filter()` is inspired by base R's `subset()`. `subset()` provides data masking, but not with tidy evaluation, so the techniques described in this chapter don't apply to it. The dplyr equivalent of this code is more concise because data masking allows you to need to type `starwars` once: ```{r, results = FALSE} starwars %>% filter(homeworld == "Naboo", species == "Human") ``` ### Data- and env-variables The key idea behind data masking is that it blurs the line between the two different meanings of the word "variable": * **env-variables** are "programming" variables that live in an environment. They are usually created with `<-`. * **data-variables** are "statistical" variables that live in a data frame. They usually come from data files (e.g. `.csv`, `.xls`), or are created manipulating existing variables. To make those definitions a little more concrete, take this piece of code: ```{r} df <- data.frame(x = runif(3), y = runif(3)) df$x ``` It creates a env-variable, `df`, that contains two data-variables, `x` and `y`. Then it extracts the data-variable `x` out of the env-variable `df` using `$`. I think this blurring of the meaning of "variable" is a really nice feature for interactive data analysis because it allows you to refer to data-vars as is, without any prefix. And this seems to be fairly intuitive since many newer R users will attempt to write `diamonds[x == 0 | y == 0, ]`. Unfortunately, this benefit does not come for free. When you start to program with these tools, you're going to have to grapple with the distinction. This will be hard because you've never had to think about it before, so it'll take a while for your brain to learn these new concepts and categories. However, once you've teased apart the idea of "variable" into data-variable and env-variable, I think you'll find it fairly straightforward to use. ### Indirection The main challenge of programming with functions that use data masking arises when you introduce some indirection, i.e. when you want to get the data-variable from an env-variable instead of directly typing the data-variable's name. There are two main cases: * When you have the data-variable in a function argument (i.e. an env-variable that holds a promise[^promise]), you need to **embrace** the argument by surrounding it in doubled braces, like `filter(df, {{ var }})`. The following function uses embracing to create a wrapper around `summarise()` that computes the minimum and maximum values of a variable, as well as the number of observations that were summarised: ```{r, results = FALSE} var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) ``` * When you have an env-variable that is a character vector, you need to index into the `.data` pronoun with `[[`, like `summarise(df, mean = mean(.data[[var]]))`. The following example uses `.data` to count the number of unique values in each variable of `mtcars`: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` Note that `.data` is not a data frame; it's a special construct, a pronoun, that allows you to access the current variables either directly, with `.data$x` or indirectly with `.data[[var]]`. Don't expect other functions to work with it. [^promise]: In R, arguments are lazily evaluated which means that until you attempt to use, they don't hold a value, just a __promise__ that describes how to compute the value. You can learn more at ## Tidy selection Data masking makes it easy to compute on values within a dataset. Tidy selection is a complementary tool that makes it easy to work with the columns of a dataset. ### The tidyselect DSL Underneath all functions that use tidy selection is the [tidyselect](https://tidyselect.r-lib.org/) package. It provides a miniature domain specific language that makes it easy to select columns by name, position, or type. For example: * `select(df, 1)` selects the first column; `select(df, last_col())` selects the last column. * `select(df, c(a, b, c))` selects columns `a`, `b`, and `c`. * `select(df, starts_with("a"))` selects all columns whose name starts with "a"; `select(df, ends_with("z"))` selects all columns whose name ends with "z". * `select(df, where(is.numeric))` selects all numeric columns. You can see more details in `?dplyr_tidy_select`. ### Indirection As with data masking, tidy selection makes a common task easier at the cost of making a less common task harder. When you want to use tidy select indirectly with the column specification stored in an intermediate variable, you'll need to learn some new tools. Again, there are two forms of indirection: * When you have the data-variable in an env-variable that is a function argument, you use the same technique as data masking: you **embrace** the argument by surrounding it in doubled braces. The following function summarises a data frame by computing the mean of all variables selected by the user: ```{r, results = FALSE} summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) ``` * When you have an env-variable that is a character vector, you need to use `all_of()` or `any_of()` depending on whether you want the function to error if a variable is not found. The following code uses `all_of()` to select all of the variables found in a character vector; then `!` plus `all_of()` to select all of the variables *not* found in a character vector: ```{r, results = FALSE} vars <- c("mpg", "vs") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars)) ``` ## How-tos The following examples solve a grab bag of common problems. We show you the minimum amount of code so that you can get the basic idea; most real problems will require more code or combining multiple techniques. ### User-supplied data If you check the documentation, you'll see that `.data` never uses data masking or tidy select. That means you don't need to do anything special in your function: ```{r} mutate_y <- function(data) { mutate(data, y = a + x) } ``` ### Eliminating `R CMD check` `NOTE`s If you're writing a package and you have a function that uses data-variables: ```{r} my_summary_function <- function(data) { data %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } ``` You'll get an `R CMD CHECK` `NOTE`: ``` N checking R code for possible problems my_summary_function: no visible binding for global variable ‘x’, ‘grp’, ‘y’ Undefined global functions or variables: x grp y ``` You can eliminate this by using `.data$var` and importing `.data` from its source in the [rlang](https://rlang.r-lib.org/) package (the underlying package that implements tidy evaluation): ```{r} #' @importFrom rlang .data my_summary_function <- function(data) { data %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) } ``` ### One or more user-supplied expressions If you want the user to supply an expression that's passed onto an argument which uses data masking or tidy select, embrace the argument: ```{r} my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } ``` This generalises in a straightforward way if you want to use one user-supplied expression in multiple places: ```{r} my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } ``` If you want the user to provide multiple expressions, embrace each of them: ```{r} my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } ``` If you want to use the names of variables in the output, you can use glue syntax in conjunction with `:=`: ```{r} my_summarise4 <- function(data, expr) { data %>% summarise( "mean_{{expr}}" := mean({{ expr }}), "sum_{{expr}}" := sum({{ expr }}), "n_{{expr}}" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( "mean_{{mean_var}}" := mean({{ mean_var }}), "sd_{{sd_var}}" := sd({{ sd_var }}) ) } ``` ### Any number of user-supplied expressions If you want to take an arbitrary number of user supplied expressions, use `...`. This is most often useful when you want to give the user full control over a single part of the pipeline, like a `group_by()` or a `mutate()`. ```{r} my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) starwars %>% my_summarise(sex, gender) ``` When you use `...` in this way, make sure that any other arguments start with `.` to reduce the chances of argument clashes; see for more details. ### Transforming user-supplied variables If you want the user to provide a set of data-variables that are then transformed, use `across()`: ```{r} my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) ``` You can use this same idea for multiple sets of input data-variables: ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(across({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } ``` Use the `.names` argument to `across()` to control the names of the output. ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(across({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}")) } ``` ### Loop over multiple variables If you have a character vector of variable names, and want to operate on them with a for loop, index into the special `.data` pronoun: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` This same technique works with for loop alternatives like the base R `apply()` family and the purrr `map()` family: ```{r, results = FALSE} mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]])) ``` ### Use a variable from an Shiny input Many Shiny input controls return character vectors, so you can use the same approach as above: `.data[[input$var]]`. ```{r, eval = FALSE} library(shiny) ui <- fluidPage( selectInput("var", "Variable", choices = names(diamonds)), tableOutput("output") ) server <- function(input, output, session) { data <- reactive(filter(diamonds, .data[[input$var]] > 0)) output$output <- renderTable(head(data())) } ``` See for more details and case studies. dplyr/vignettes/grouping.Rmd0000644000176200001440000002041614121112104015700 0ustar liggesusers--- title: "Grouped data" description: > To unlock the full potential of dplyr, you need to understand how each verb interacts with grouping. This vignette shows you how to manipulate grouping, how each verb changes its behaviour when working with grouped data, and how you can access data about the "current" group from within a verb. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Grouped data} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE, warning = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr verbs are particularly powerful when you apply them to grouped data frames (`grouped_df` objects). This vignette shows you: * How to group, inspect, and ungroup with `group_by()` and friends. * How individual dplyr verbs changes their behaviour when applied to grouped data frame. * How to access data about the "current" group from within a verb. We'll start by loading dplyr: ```{r, message = FALSE} library(dplyr) ``` ## `group_by()` The most important grouping verb is `group_by()`: it takes a data frame and one or more variables to group by: ```{r} by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) ``` You can see the grouping when you print the data: ```{r} by_species by_sex_gender ``` Or use `tally()` to count the number of rows in each group. The `sort` argument is useful if you want to see the largest groups up front. ```{r} by_species %>% tally() by_sex_gender %>% tally(sort = TRUE) ``` As well as grouping by existing variables, you can group by any function of existing variables. This is equivalent to performing a `mutate()` **before** the `group_by()`: ```{r group_by_with_expression} bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() ``` ## Group metadata You can see underlying group data with `group_keys()`. It has one row for each group and one column for each grouping variable: ```{r group_vars} by_species %>% group_keys() by_sex_gender %>% group_keys() ``` You can see which group each row belongs to with `group_indices()`: ```{r} by_species %>% group_indices() ``` And which rows each group contains with `group_rows()`: ```{r} by_species %>% group_rows() %>% head() ``` Use `group_vars()` if you just want the names of the grouping variables: ```{r} by_species %>% group_vars() by_sex_gender %>% group_vars() ``` ### Changing and adding to grouping variables If you apply `group_by()` to an already grouped dataset, will overwrite the existing grouping variables. For example, the following code groups by `homeworld` instead of `species`: ```{r} by_species %>% group_by(homeworld) %>% tally() ``` To **augment** the grouping, using `.add = TRUE`[^add]. For example, the following code groups by species and homeworld: ```{r} by_species %>% group_by(homeworld, .add = TRUE) %>% tally() ``` [^add]: Note that the argument changed from `add = TRUE` to `.add = TRUE` in dplyr 1.0.0. ### Removing grouping variables To remove all grouping variables, use `ungroup()`: ```{r} by_species %>% ungroup() %>% tally() ``` You can also choose to selectively ungroup by listing the variables you want to remove: ```{r} by_sex_gender %>% ungroup(sex) %>% tally() ``` ## Verbs The following sections describe how grouping affects the main dplyr verbs. ### `summarise()` `summarise()` computes a summary for each group. This means that it starts from `group_keys()`, adding summary variables to the right hand side: ```{r summarise} by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) ``` The `.groups=` argument controls the grouping structure of the output. The historical behaviour of removing the right hand side grouping variable corresponds to `.groups = "drop_last"` without a message or `.groups = NULL` with a message (the default). ```{r} by_sex_gender %>% summarise(n = n()) %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop_last") %>% group_vars() ``` Since version 1.0.0 the groups may also be kept (`.groups = "keep"`) or dropped (`.groups = "drop"`). ```{r} by_sex_gender %>% summarise(n = n(), .groups = "keep") %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop") %>% group_vars() ``` When the output no longer have grouping variables, it becomes ungrouped (i.e. a regular tibble). ### `select()`, `rename()`, and `relocate()` `rename()` and `relocate()` behave identically with grouped and ungrouped data because they only affect the name or position of existing columns. Grouped `select()` is almost identical to ungrouped select, except that it always includes the grouping variables: ```{r select} by_species %>% select(mass) ``` If you don't want the grouping variables, you'll have to first `ungroup()`. (This design is possibly a mistake, but we're stuck with it for now.) ### `arrange()` Grouped `arrange()` is the same as ungrouped `arrange()`, unless you set `.by_group = TRUE`, in which case it will order first by the grouping variables. ```{r} by_species %>% arrange(desc(mass)) %>% relocate(species, mass) by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) ``` Note that second example is sorted by `species` (from the `group_by()` statement) and then by `mass` (within species). ### `mutate()` and `transmute()` In simple cases with vectorised functions, grouped and ungrouped `mutate()` give the same results. They differ when used with summary functions: ```{r by_homeworld} # Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) ``` Or with window functions like `min_rank()`: ```{r} # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) ``` ### `filter()` A grouped `filter()` effectively does a `mutate()` to generate a logical variable, and then only keeps the rows where the variable is `TRUE`. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species: ```{r filter} by_species %>% select(name, species, height) %>% filter(height == max(height)) ``` You can also use `filter()` to remove entire groups. For example, the following code eliminates all groups that only have a single member: ```{r filter_group} by_species %>% filter(n() != 1) %>% tally() ``` ### `slice()` and friends `slice()` and friends (`slice_head()`, `slice_tail()`, `slice_sample()`, `slice_min()` and `slice_max()`) select rows within a group. For example, we can select the first observation within each species: ```{r slice} by_species %>% relocate(species) %>% slice(1) ``` Similarly, we can use `slice_min()` to select the smallest `n` values of a variable: ```{r slice_min} by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) ``` ## Computing on grouping information Inside dplyr verbs, you can access various properties of the "current" group using a family of functions with the `cur_` prefix. These functions are typically needed for everyday usage of dplyr, but can be useful because they allow you to free from some of the typical constraints of dplyr verbs. ### `cur_data()` `cur_data()` returns the current group, excluding grouping variables. It's useful to feed to functions that take a whole data frame. For example, the following code fits a linear model of `mass ~ height` to each species: ```{r cur_data} by_species %>% filter(n() > 1) %>% mutate(mod = list(lm(mass ~ height, data = cur_data()))) ``` ### `cur_group()` and `cur_group_id()` `cur_group_id()` gives a unique numeric identifier for the current group. This is sometimes useful if you want to index into an external data structure. ```{r cur_group_id} by_species %>% arrange(species) %>% select(name, species, homeworld) %>% mutate(id = cur_group_id()) ``` dplyr/vignettes/colwise.Rmd0000644000176200001440000002763214144435746015553 0ustar liggesusers--- title: "Column-wise operations" description: > Learn how to easily repeat the same operation across multiple columns using `across()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{colwise} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` It's often useful to perform the same operation on multiple columns, but copying and pasting is both tedious and error prone: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d)) ``` (If you're trying to compute `mean(a, b, c, d)` for each row, instead see `vignette("rowwise")`) This vignette will introduce you to the `across()` function, which lets you rewrite the previous code more succinctly: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(across(a:d, mean)) ``` We'll start by discussing the basic usage of `across()`, particularly as it applies to `summarise()`, and show how to use it with multiple functions. We'll then show a few uses with other verbs. We'll finish off with a bit of history, showing why we prefer `across()` to our last approach (the `_if()`, `_at()` and `_all()` functions) and how to translate your old code to the new syntax. ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ## Basic usage `across()` has two primary arguments: * The first argument, `.cols`, selects the columns you want to operate on. It uses tidy selection (like `select()`) so you can pick variables by position, name, and type. * The second argument, `.fns`, is a function or list of functions to apply to each column. This can also be a purrr style formula (or list of formulas) like `~ .x / 2`. (This argument is optional, and you can omit it if you just want to get the underlying data; you'll see that technique used in `vignette("rowwise")`.) Here are a couple of examples of `across()` in conjunction with its favourite verb, `summarise()`. But you can use `across()` with any dplyr verb, as you'll see a little later. ```{r} starwars %>% summarise(across(where(is.character), n_distinct)) starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) ``` Because `across()` is usually used in combination with `summarise()` and `mutate()`, it doesn't select grouping variables in order to avoid accidentally modifying them: ```{r} df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) ``` ### Multiple functions You can transform each variable with more than one function by supplying a named list of functions or lambda functions in the second argument: ```{r} min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) starwars %>% summarise(across(c(height, mass, birth_year), min_max)) ``` Control how the names are created with the `.names` argument which takes a [glue](https://glue.tidyverse.org/) spec: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}")) ``` If you'd prefer all summaries with the same function to be grouped together, you'll have to expand the calls yourself: ```{r} starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ``` (One day this might become an argument to `across()` but we're not yet sure how it would work.) We cannot however use `where(is.numeric)` in that last case because the second `across()` would pick up the variables that were newly created ("min_height", "min_mass" and "min_birth_year"). We can work around this by combining both calls to `across()` into a single expression that returns a tibble: ```{r} starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ) ``` Alternatively we could reorganize results with `relocate()`: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% relocate(starts_with("min")) ``` ### Current column If you need to, you can access the name of the "current" column inside by calling `cur_column()`. This can be useful if you want to perform some sort of context dependent transformation that's already encoded in a vector: ```{r} df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) ``` ### Gotchas Be careful when combining numeric summaries with `where(is.numeric)`: ```{r} df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) ``` Here `n` becomes `NA` because `n` is numeric, so the `across()` computes its standard deviation, and the standard deviation of 3 (a constant) is `NA`. You probably want to compute `n()` last to avoid this problem: ```{r} df %>% summarise(across(where(is.numeric), sd), n = n()) ``` Alternatively, you could explicitly exclude `n` from the columns to operate on: ```{r} df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) ``` Another approach is to combine both the call to `n()` and `across()` in a single expression that returns a tibble: ```{r} df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) ``` ### Other verbs So far we've focused on the use of `across()` with `summarise()`, but it works with any other dplyr verb that uses data masking: * Rescale all numeric variables to range 0-1: ```{r} rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) ``` For some verbs, like `group_by()`, `count()` and `distinct()`, you can omit the summary functions: * Find all distinct ```{r} starwars %>% distinct(across(contains("color"))) ``` * Count all combinations of variables with a given pattern: ```{r} starwars %>% count(across(contains("color")), sort = TRUE) ``` `across()` doesn't work with `select()` or `rename()` because they already use tidy select syntax; if you want to transform column names with a function, you can use `rename_with()`. ### filter() We cannot directly use `across()` in `filter()` because we need an extra step to combine the results. To that end, `filter()` has two special purpose companion functions: * `if_any()` keeps the rows where the predicate is true for *at least one* selected column: ```{r} starwars %>% filter(if_any(everything(), ~ !is.na(.x))) ``` * `if_all()` keeps the rows where the predicate is true for *all* selected columns: ```{r} starwars %>% filter(if_all(everything(), ~ !is.na(.x))) ``` * Find all rows where no variable has missing values: ```{r} starwars %>% filter(across(everything(), ~ !is.na(.x))) ``` ## `_if`, `_at`, `_all` Prior versions of dplyr allowed you to apply a function to multiple columns in a different way: using functions with `_if`, `_at`, and `_all()` suffixes. These functions solved a pressing need and are used by many people, but are now superseded. That means that they'll stay around, but won't receive any new features and will only get critical bug fixes. ### Why do we like `across()`? Why did we decide to move away from these functions in favour of `across()`? 1. `across()` makes it possible to express useful summaries that were previously impossible: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise( across(where(is.numeric), mean), across(where(is.factor), nlevels), n = n(), ) ``` 1. `across()` reduces the number of functions that dplyr needs to provide. This makes dplyr easier for you to use (because there are fewer functions to remember) and easier for us to implement new verbs (since we only need to implement one function, not four). 1. `across()` unifies `_if` and `_at` semantics so that you can select by position, name, and type, and you can now create compound selections that were previously impossible. For example, you can now transform all numeric columns whose name begins with "x": `across(where(is.numeric) & starts_with("x"))`. 1. `across()` doesn't need to use `vars()`. The `_at()` functions are the only place in dplyr where you have to manually quote variable names, which makes them a little weird and hence harder to remember. ### Why did it take so long to discover `across()`? It's disappointing that we didn't discover `across()` earlier, and instead worked through several false starts (first not realising that it was a common problem, then with the `_each()` functions, and most recently with the `_if()`/`_at()`/`_all()` functions). But `across()` couldn't work without three recent discoveries: * You can have a column of a data frame that is itself a data frame. This is something provided by base R, but it's not very well documented, and it took a while to see that it was useful, not just a theoretical curiosity. * We can use data frames to allow summary functions to return multiple columns. * We can use the absence of an outer name as a convention that you want to unpack a data frame column into individual columns. ### How do you convert existing code? Fortunately, it's generally straightforward to translate your existing code to use `across()`: * Strip the `_if()`, `_at()` and `_all()` suffix off the function. * Call `across()`. The first argument will be: 1. For `_if()`, the old second argument wrapped in `where()`. 1. For `_at()`, the old second argument, with the call to `vars()` removed. 1. For `_all()`, `everything()`. The subsequent arguments can be copied as is. For example: ```{r, results = FALSE} df %>% mutate_if(is.numeric, mean, na.rm = TRUE) # -> df %>% mutate(across(where(is.numeric), mean, na.rm = TRUE)) df %>% mutate_at(vars(c(x, starts_with("y"))), mean) # -> df %>% mutate(across(c(x, starts_with("y")), mean, na.rm = TRUE)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) ``` There are a few exceptions to this rule: * `rename_*()` and `select_*()` follow a different pattern. They already have select semantics, so are generally used in a different way that doesn't have a direct equivalent with `across()`; use the new `rename_with()` instead. * Previously, `filter_*()` were paired with the `all_vars()` and `any_vars()` helpers. The new helpers `if_any()` and `if_all()` can be used inside `filter()` to keep rows for which the predicate is true for at least one, or all selected columns: ```{r} df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) ``` * When used in a `mutate()`, all transformations performed by an `across()` are applied at once. This is different to the behaviour of `mutate_if()`, `mutate_at()`, and `mutate_all()`, which apply the transformations one at a time. We expect that you'll generally find the new behaviour less surprising: ```{r} df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) df %>% mutate(across(everything(), ~ .x / y)) ``` dplyr/R/0000755000176200001440000000000014177154670011621 5ustar liggesusersdplyr/R/nth-value.R0000644000176200001440000000443614121112104013625 0ustar liggesusers#' Extract the first, last or nth value from a vector #' #' These are straightforward wrappers around \code{\link{[[}}. The main #' advantage is that you can provide an optional secondary vector that defines #' the ordering, and provide a default value to use when the input is shorter #' than expected. #' #' @param x A vector #' @param n For `nth()`, a single integer specifying the position. #' Negative integers index from the end (i.e. `-1L` will return the #' last value in the vector). #' #' If a double is supplied, it will be silently truncated. #' @param order_by An optional vector used to determine the order #' @param default A default value to use if the position does not exist in #' the input. This is guessed by default for base vectors, where a #' missing value of the appropriate type is returned, and for lists, where #' a `NULL` is return. #' #' For more complicated objects, you'll need to supply this value. #' Make sure it is the same type as `x`. #' @return A single value. `[[` is used to do the subsetting. #' @export #' @examples #' x <- 1:10 #' y <- 10:1 #' #' first(x) #' last(y) #' #' nth(x, 1) #' nth(x, 5) #' nth(x, -2) #' nth(x, 11) #' #' last(x) #' # Second argument provides optional ordering #' last(x, y) #' #' # These functions always return a single value #' first(integer()) nth <- function(x, n, order_by = NULL, default = default_missing(x)) { if (length(n) != 1 || !is.numeric(n)) { abort("`n` must be a single integer.") } n <- trunc(n) if (n == 0 || n > length(x) || n < -length(x)) { return(default) } # Negative values index from RHS if (n < 0) { n <- length(x) + n + 1 } if (is.null(order_by)) { x[[n]] } else { x[[ order(order_by)[[n]] ]] } } #' @export #' @rdname nth first <- function(x, order_by = NULL, default = default_missing(x)) { nth(x, 1L, order_by = order_by, default = default) } #' @export #' @rdname nth last <- function(x, order_by = NULL, default = default_missing(x)) { nth(x, -1L, order_by = order_by, default = default) } default_missing <- function(x) { UseMethod("default_missing") } #' @export default_missing.default <- function(x) { if (!is.object(x) && is.list(x)) { NULL } else { x[NA_real_] } } #' @export default_missing.data.frame <- function(x) { rep(NA, nrow(x)) } dplyr/R/rows.R0000644000176200001440000001764514151641776012753 0ustar liggesusers#' Manipulate individual rows #' #' @description #' `r lifecycle::badge("experimental")` #' #' These functions provide a framework for modifying rows in a table using #' a second table of data. The two tables are matched `by` a set of key #' variables whose values must uniquely identify each row. The functions are #' inspired by SQL's `INSERT`, `UPDATE`, and `DELETE`, and can optionally #' modify `in_place` for selected backends. #' #' * `rows_insert()` adds new rows (like `INSERT`); key values in `y` must #' not occur in `x`. #' * `rows_update()` modifies existing rows (like `UPDATE`); key values in #' `y` must occur in `x`. #' * `rows_patch()` works like `rows_update()` but only overwrites `NA` values. #' * `rows_upsert()` inserts or updates depending on whether or not the #' key value in `y` already exists in `x`. #' * `rows_delete()` deletes rows (like `DELETE`); key values in `y` must #' exist in `x`. #' #' @inheritParams left_join #' @param x,y A pair of data frames or data frame extensions (e.g. a tibble). #' `y` must have the same columns of `x` or a subset. #' @param by An unnamed character vector giving the key columns. The key #' values must uniquely identify each row (i.e. each combination of key #' values occurs at most once), and the key columns must exist in both `x` #' and `y`. #' #' By default, we use the first column in `y`, since the first column is #' a reasonable place to put an identifier variable. #' @param in_place Should `x` be modified in place? This argument is only #' relevant for mutable backends (e.g. databases, data.tables). #' #' When `TRUE`, a modified version of `x` is returned invisibly; #' when `FALSE`, a new object representing the resulting changes is returned. #' @returns #' An object of the same type as `x`. The order of the rows and columns of `x` #' is preserved as much as possible. The output has the following properties: #' #' * `rows_update()` preserves rows as is; `rows_insert()` and `rows_upsert()` #' return all existing rows and potentially new rows; `rows_delete()` returns #' a subset of the rows. #' * Columns are not added, removed, or relocated, though the data may be updated. #' * Groups are taken from `x`. #' * Data frame attributes are taken from `x`. #' #' If `in_place = TRUE`, the result will be returned invisibly. #' @name rows #' @examples #' data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) #' data #' #' # Insert #' rows_insert(data, tibble(a = 4, b = "z")) #' try(rows_insert(data, tibble(a = 3, b = "z"))) #' #' # Update #' rows_update(data, tibble(a = 2:3, b = "z")) #' rows_update(data, tibble(b = "z", a = 2:3), by = "a") #' #' # Variants: patch and upsert #' rows_patch(data, tibble(a = 2:3, b = "z")) #' rows_upsert(data, tibble(a = 2:4, b = "z")) #' #' # Delete and truncate #' rows_delete(data, tibble(a = 2:3)) #' rows_delete(data, tibble(a = 2:3, b = "b")) #' try(rows_delete(data, tibble(a = 2:3, b = "b"), by = c("a", "b"))) NULL #' @rdname rows #' @export rows_insert <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { lifecycle::signal_stage("experimental", "rows_insert()") UseMethod("rows_insert") } #' @export rows_insert.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { check_dots_empty() key <- rows_check_key(by, x, y) y <- auto_copy(x, y, copy = copy) rows_df_in_place(in_place) rows_check_key_df(x, key, df_name = "x") rows_check_key_df(y, key, df_name = "y") idx <- vctrs::vec_match(y[key], x[key]) bad <- which(!is.na(idx)) if (has_length(bad)) { abort("Attempting to insert duplicate rows.") } rows_bind(x, y) } #' @rdname rows #' @export rows_update <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { lifecycle::signal_stage("experimental", "rows_update()") UseMethod("rows_update", x) } #' @export rows_update.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { check_dots_empty() key <- rows_check_key(by, x, y) y <- auto_copy(x, y, copy = copy) rows_df_in_place(in_place) rows_check_key_df(x, key, df_name = "x") rows_check_key_df(y, key, df_name = "y") idx <- vctrs::vec_match(y[key], x[key]) bad <- which(is.na(idx)) if (has_length(bad)) { abort("Attempting to update missing rows.") } x[idx, names(y)] <- y x } #' @rdname rows #' @export rows_patch <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { lifecycle::signal_stage("experimental", "rows_patch()") UseMethod("rows_patch", x) } #' @export rows_patch.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { check_dots_empty() key <- rows_check_key(by, x, y) y <- auto_copy(x, y, copy = copy) rows_df_in_place(in_place) rows_check_key_df(x, key, df_name = "x") rows_check_key_df(y, key, df_name = "y") idx <- vctrs::vec_match(y[key], x[key]) bad <- which(is.na(idx)) if (has_length(bad)) { abort("Can't patch missing row.") } new_data <- map2(x[idx, names(y)], y, coalesce) x[idx, names(y)] <- new_data x } #' @rdname rows #' @export rows_upsert <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { lifecycle::signal_stage("experimental", "rows_upsert()") UseMethod("rows_upsert", x) } #' @export rows_upsert.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { check_dots_empty() key <- rows_check_key(by, x, y) y <- auto_copy(x, y, copy = copy) rows_df_in_place(in_place) rows_check_key_df(x, key, df_name = "x") rows_check_key_df(y, key, df_name = "y") idx <- vctrs::vec_match(y[key], x[key]) new <- is.na(idx) idx_existing <- idx[!new] idx_new <- idx[new] x[idx_existing, names(y)] <- vec_slice(y, !new) rows_bind(x, vec_slice(y, new)) } #' @rdname rows #' @export rows_delete <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { lifecycle::signal_stage("experimental", "rows_delete()") UseMethod("rows_delete", x) } #' @export rows_delete.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { check_dots_empty() key <- rows_check_key(by, x, y) y <- auto_copy(x, y, copy = copy) rows_df_in_place(in_place) rows_check_key_df(x, key, df_name = "x") rows_check_key_df(y, key, df_name = "y") extra_cols <- setdiff(names(y), key) if (has_length(extra_cols)) { bullets <- glue("Ignoring extra columns: ", commas(tick_if_needed(extra_cols))) inform(bullets, class = c("dplyr_message_delete_extra_cols", "dplyr_message")) } idx <- vctrs::vec_match(y[key], x[key]) bad <- which(is.na(idx)) if (has_length(bad)) { abort("Can't delete missing row.") } dplyr_row_slice(x, -idx) } # helpers ----------------------------------------------------------------- rows_check_key <- function(by, x, y, error_call = caller_env()) { if (is.null(by)) { by <- names(y)[[1]] msg <- glue("Matching, by = \"{by}\"") inform(msg, class = c("dplyr_message_matching_by", "dplyr_message")) } if (!is.character(by) || length(by) == 0) { abort("`by` must be a character vector.", call = error_call) } # is_named(by) checks all(names2(by) != ""), we need any(...) if (any(names2(by) != "")) { abort("`by` must be unnamed.", call = error_call) } bad <- setdiff(colnames(y), colnames(x)) if (has_length(bad)) { abort("All columns in `y` must exist in `x`.", call = error_call) } by } rows_check_key_df <- function(df, by, df_name, error_call = caller_env()) { y_miss <- setdiff(by, colnames(df)) if (length(y_miss) > 0) { msg <- glue("All `by` columns must exist in `{df_name}`.") abort(msg, call = error_call) } if (vctrs::vec_duplicate_any(df[by])) { msg <- glue("`{df_name}` key values must be unique.") abort(msg, call = error_call) } } rows_df_in_place <- function(in_place, error_call = caller_env()) { if (is_true(in_place)) { msg <- "Data frames only support `in_place = FALSE`." abort(msg, call = error_call) } } rows_bind <- function(x, y) { dplyr_reconstruct(vctrs::vec_rbind(x, y), x) } dplyr/R/colwise.R0000644000176200001440000002456114151641776013421 0ustar liggesusers#' Operate on a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [across()] in an existing verb. See `vignette("colwise")` for details. #' #' The variants suffixed with `_if`, `_at` or `_all` apply an #' expression (sometimes several) to all variables within a specified #' subset. This subset can contain all variables (`_all` variants), a #' [vars()] selection (`_at` variants), or variables selected with a #' predicate (`_if` variants). #' #' The verbs with scoped variants are: #' #' * [mutate()], [transmute()] and [summarise()]. See [summarise_all()]. #' * [filter()]. See [filter_all()]. #' * [group_by()]. See [group_by_all()]. #' * [rename()] and [select()]. See [select_all()]. #' * [arrange()]. See [arrange_all()] #' #' There are three kinds of scoped variants. They differ in the scope #' of the variable selection on which operations are applied: #' #' * Verbs suffixed with `_all()` apply an operation on all variables. #' #' * Verbs suffixed with `_at()` apply an operation on a subset of #' variables specified with the quoting function [vars()]. This #' quoting function accepts [tidyselect::vars_select()] helpers like #' [starts_with()]. Instead of a [vars()] selection, you can also #' supply an [integerish][rlang::is_integerish] vector of column #' positions or a character vector of column names. #' #' * Verbs suffixed with `_if()` apply an operation on the subset of #' variables for which a predicate function returns `TRUE`. Instead #' of a predicate function, you can also supply a logical vector. #' #' @param .tbl A `tbl` object. #' @param .funs A function `fun`, a quosure style lambda `~ fun(.)` or a list of either form. #' #' @param .vars A list of columns generated by [vars()], #' a character vector of column names, a numeric vector of column #' positions, or `NULL`. #' @param .predicate A predicate function to be applied to the columns #' or a logical vector. The variables for which `.predicate` is or #' returns `TRUE` are selected. This argument is passed to #' [rlang::as_function()] and thus supports quosure-style lambda #' functions and strings representing function names. #' @param ... Additional arguments for the function calls in #' `.funs`. These are evaluated only once, with [tidy #' dots][rlang::tidy-dots] support. #' #' @section Grouping variables: #' #' Most of these operations also apply on the grouping variables when #' they are part of the selection. This includes: #' #' * [arrange_all()], [arrange_at()], and [arrange_if()] #' * [distinct_all()], [distinct_at()], and [distinct_if()] #' * [filter_all()], [filter_at()], and [filter_if()] #' * [group_by_all()], [group_by_at()], and [group_by_if()] #' * [select_all()], [select_at()], and [select_if()] #' #' This is not the case for summarising and mutating variants where #' operations are *not* applied on grouping variables. The behaviour #' depends on whether the selection is **implicit** (`all` and `if` #' selections) or **explicit** (`at` selections). Grouping variables #' covered by explicit selections (with [summarise_at()], #' [mutate_at()], and [transmute_at()]) are always an error. For #' implicit selections, the grouping variables are always ignored. In #' this case, the level of verbosity depends on the kind of operation: #' #' * Summarising operations ([summarise_all()] and [summarise_if()]) #' ignore grouping variables silently because it is obvious that #' operations are not applied on grouping variables. #' #' * On the other hand it isn't as obvious in the case of mutating #' operations ([mutate_all()], [mutate_if()], [transmute_all()], and #' [transmute_if()]). For this reason, they issue a message #' indicating which grouping variables are ignored. #' #' @name scoped NULL #' Select variables #' #' @description #' `vars()` was only needed for the scoped verbs, which have been superseded #' by the use of [across()] in an existing verb. See `vignette("colwise")` for #' details. #' #' This helper is intended to provide equivalent semantics to #' [select()]. It is used for instance in scoped summarising and #' mutating verbs ([mutate_at()] and [summarise_at()]). #' #' Note that verbs accepting a `vars()` specification also accept a #' numeric vector of positions or a character vector of column names. #' #' @param ... <[`tidy-select`][dplyr_tidy_select]> Variables to include/exclude #' in mutate/summarise. You can use same specifications as in [select()]. #' If missing, defaults to all non-grouping variables. #' @seealso [all_vars()] and [any_vars()] for other quoting #' functions that you can use with scoped verbs. #' @export vars <- function(...) { quos(...) } #' Apply predicate to all variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' `all_vars()` and `any_vars()` were only needed for the scoped verbs, which #' have been superseded by the use of [across()] in an existing verb. See #' `vignette("colwise")` for details. #' #' These quoting functions signal to scoped filtering verbs #' (e.g. [filter_if()] or [filter_all()]) that a predicate expression #' should be applied to all relevant variables. The `all_vars()` #' variant takes the intersection of the predicate expressions with #' `&` while the `any_vars()` variant takes the union with `|`. #' #' @param expr <[`data-masking`][dplyr_data_masking]> An expression that #' returns a logical vector, using `.` to refer to the "current" variable. #' @seealso [vars()] for other quoting functions that you #' can use with scoped verbs. #' @export all_vars <- function(expr) { lifecycle::signal_stage("superseded", "all_vars()") structure(enquo(expr), class = c("all_vars", "quosure", "formula")) } #' @rdname all_vars #' @export any_vars <- function(expr) { lifecycle::signal_stage("superseded", "any_vars()") structure(enquo(expr), class = c("any_vars", "quosure", "formula")) } #' @export print.all_vars <- function(x, ...) { cat("\n") NextMethod() } #' @export print.any_vars <- function(x, ...) { cat("\n") NextMethod() } # Requires tbl_vars() method tbl_at_vars <- function(tbl, vars, .include_group_vars = FALSE, error_call = caller_env()) { if (.include_group_vars) { tibble_vars <- tbl_vars(tbl) } else { tibble_vars <- tbl_nongroup_vars(tbl) } if (is_null(vars)) { character() } else if (is_integerish(vars)) { tibble_vars[vars] } else if (is_quosures(vars) || is_character(vars)) { out <- fix_call(tidyselect::vars_select(tibble_vars, !!!vars), call = error_call) if (!any(have_name(vars))) { names(out) <- NULL } out } else { msg <- glue("`.vars` must be a character/numeric vector or a `vars()` object, not {friendly_type_of(vars)}.") abort(msg, call = error_call) } } tbl_at_syms <- function(tbl, vars, .include_group_vars = FALSE, error_call = caller_env()) { vars <- tbl_at_vars(tbl, vars, .include_group_vars = .include_group_vars, error_call = error_call) set_names(syms(vars), names(vars)) } # Requires tbl_vars(), `[[`() and length() methods tbl_if_vars <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE, error_call = caller_env()) { if (.include_group_vars) { tibble_vars <- tbl_vars(.tbl) } else { tibble_vars <- tbl_nongroup_vars(.tbl) } if (is_logical(.p)) { if (length(.p) != length(tibble_vars)) { bullets <- c( "`.p` is invalid.", x = "`.p` should have the same size as the number of variables in the tibble.", i = glue("`.p` is size {length(.p)}."), i = glue("The tibble has {length(tibble_vars)} columns, {including} the grouping variables.", including = if (.include_group_vars) "including" else "non including") ) abort(bullets, call = error_call) } return(syms(tibble_vars[.p])) } .tbl <- tbl_ptype(.tbl) if (is_fun_list(.p) || is_list(.p)) { if (length(.p) != 1) { msg <- glue("`.predicate` must have length 1, not {length(.p)}.") abort(msg, call = error_call) } .p <- .p[[1]] } if (is_quosure(.p)) { .p <- quo_as_function(.p) } else { .p <- as_function(.p, .env) } n <- length(tibble_vars) selected <- new_logical(n) for (i in seq_len(n)) { column <- pull(.tbl, tibble_vars[[.env$i]]) cond <- eval_tidy(.p(column, ...)) if (!is.logical(cond) || length(cond) != 1) { bullets <- c( "`.p` is invalid.", x = "`.p` should return a single logical.", i = if(is.logical(cond)) { glue("`.p` returns a size {length(cond)} for column `{tibble_vars[[i]]}`.") } else { glue("`.p` returns a <{vec_ptype_full(cond)}> for column `{tibble_vars[[i]]}`.") } ) abort(bullets, call = error_call) } selected[[i]] <- isTRUE(cond) } tibble_vars[selected] } tbl_if_syms <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE, error_call = caller_env()) { syms(tbl_if_vars(.tbl, .p, .env, ..., .include_group_vars = .include_group_vars, error_call = error_call)) } #' Return a prototype of a tbl #' #' Used in `_if` functions to enable type-based selection even when the data #' is lazily generated. Should either return the complete tibble, or if that #' can not be computed quickly, a 0-row tibble where the columns are of #' the correct type. #' #' @export #' @keywords internal tbl_ptype <- function(.data) { UseMethod("tbl_ptype") } #' @export tbl_ptype.default <- function(.data) { if (inherits(.data, "tbl_lazy")) { # TODO: remove once moved to dplyr inform("Applying predicate on the first 100 rows") collect(.data, n = 100) } else { .data } } # The lambda must inherit from: # - Execution environment (bound arguments with purrr lambda syntax) # - Lexical environment (local variables) # - Data mask (other columns) # # So we need: # - Inheritance from closure -> lexical # - A maskable quosure as_inlined_function <- function(f, env, ...) { # Process unquote operator at inlining time f <- expr_interp(f) # Transform to a purrr-like lambda fn <- as_function(f, env = env) body(fn) <- expr({ # Force all arguments base::pairlist(...) # Transform the lambda body into a maskable quosure inheriting # from the execution environment `_quo` <- rlang::quo(!!body(fn)) # Evaluate the quosure in the mask rlang::eval_bare(`_quo`, base::parent.frame()) }) structure(fn, class = "inline_colwise_function", formula = f) } dplyr/R/src_dbi.R0000644000176200001440000000070014121112104013315 0ustar liggesusers#' @export tbl.DBIConnection <- function(src, from, ...) { check_dbplyr() tbl(dbplyr::src_dbi(src, auto_disconnect = FALSE), from = from, ...) } #' @export copy_to.DBIConnection <- function(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) { check_dbplyr() copy_to( dbplyr::src_dbi(dest, auto_disconnect = FALSE), df = df, name = name, overwrite = overwrite, ... ) } dplyr/R/group_split.R0000644000176200001440000001042114151641776014311 0ustar liggesusers#' Split data frame by groups #' #' @description #' `r lifecycle::badge("experimental")` #' [group_split()] works like [base::split()] but #' - it uses the grouping structure from [group_by()] and therefore is subject to the data mask #' - it does not name the elements of the list based on the grouping as this typically #' loses information and is confusing. #' #' [group_keys()] explains the grouping structure, by returning a data frame that has one row #' per group and one column per grouping variable. #' #' @section Grouped data frames: #' #' The primary use case for [group_split()] is with already grouped data frames, #' typically a result of [group_by()]. In this case [group_split()] only uses #' the first argument, the grouped tibble, and warns when `...` is used. #' #' Because some of these groups may be empty, it is best paired with [group_keys()] #' which identifies the representatives of each grouping variable for the group. #' #' @section Ungrouped data frames: #' #' When used on ungrouped data frames, [group_split()] and [group_keys()] forwards the `...` to #' [group_by()] before the split, therefore the `...` are subject to the data mask. #' #' Using these functions on an ungrouped data frame only makes sense if you need only one or the #' other, because otherwise the grouping algorithm is performed each time. #' #' @section Rowwise data frames: #' #' [group_split()] returns a list of one-row tibbles is returned, and the `...` are ignored and warned against #' #' @param .tbl A tbl #' @param ... Grouping specification, forwarded to [group_by()] #' @param .keep Should the grouping columns be kept #' @return #' - [group_split()] returns a list of tibbles. Each tibble contains the rows of `.tbl` for the associated group and #' all the columns, including the grouping variables. #' #' - [group_keys()] returns a tibble with one row per group, and one column per grouping variable #' @family grouping functions #' @export #' @examples #' # ----- use case 1 : on an already grouped tibble #' ir <- iris %>% #' group_by(Species) #' #' group_split(ir) #' group_keys(ir) #' #' # this can be useful if the grouped data has been altered before the split #' ir <- iris %>% #' group_by(Species) %>% #' filter(Sepal.Length > mean(Sepal.Length)) #' #' group_split(ir) #' group_keys(ir) #' #' # ----- use case 2: using a group_by() grouping specification #' #' # both group_split() and group_keys() have to perform the grouping #' # so it only makes sense to do this if you only need one or the other #' iris %>% #' group_split(Species) #' #' iris %>% #' group_keys(Species) group_split <- function(.tbl, ..., .keep = TRUE) { lifecycle::signal_stage("experimental", "group_split()") UseMethod("group_split") } #' @export group_split.data.frame <- function(.tbl, ..., .keep = TRUE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_split(keep = )", "group_split(.keep = )") .keep <- keep } data <- group_by(.tbl, ...) group_split_impl(data, .keep = .keep) } #' @export group_split.rowwise_df <- function(.tbl, ..., .keep = TRUE, keep = deprecated()) { if (dots_n(...)) { warn("... is ignored in group_split(), please use as_tibble() %>% group_split(...)") } if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_split(keep = )", "group_split(.keep = )") .keep <- keep } if (!missing(.keep)) { warn(".keep is ignored in group_split()") } group_split_impl(.tbl, .keep = TRUE) } #' @export group_split.grouped_df <- function(.tbl, ..., .keep = TRUE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_split(keep = )", "group_split(.keep = )") .keep <- keep } if (dots_n(...)) { warn("... is ignored in group_split(), please use group_by(..., .add = TRUE) %>% group_split()") } group_split_impl(.tbl, .keep = .keep) } group_split_impl <- function(data, .keep) { out <- ungroup(data) indices <- group_rows(data) if (!isTRUE(.keep)) { remove <- group_vars(data) .keep <- names(out) .keep <- setdiff(.keep, remove) out <- out[.keep] } dplyr_chop(out, indices) } dplyr_chop <- function(data, indices) { out <- map(indices, dplyr_row_slice, data = data) out <- new_list_of(out, ptype = vec_ptype(data)) out } dplyr/R/group_trim.R0000644000176200001440000000237514151641776014142 0ustar liggesusers#' Trim grouping structure #' #' @description #' `r lifecycle::badge("experimental")` #' Drop unused levels of all factors that are used as grouping variables, #' then recalculates the grouping structure. #' #' `group_trim()` is particularly useful after a [filter()] that is intended #' to select a subset of groups. #' #' @param .tbl A [grouped data frame][grouped_df()] #' @param .drop See [group_by()] #' @return A [grouped data frame][grouped_df()] #' @export #' @family grouping functions #' @examples #' iris %>% #' group_by(Species) %>% #' filter(Species == "setosa", .preserve = TRUE) %>% #' group_trim() group_trim <- function(.tbl, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("experimental", "group_trim()") UseMethod("group_trim") } #' @export group_trim.data.frame <- function(.tbl, .drop = group_by_drop_default(.tbl)) { .tbl } #' @export group_trim.grouped_df <- function(.tbl, .drop = group_by_drop_default(.tbl)) { vars <- group_vars(.tbl) ungrouped <- ungroup(.tbl) # names of the factors that should be droplevels()'d fgroups <- names(select_if(select_at(ungrouped, vars), is.factor)) # drop levels dropped <- mutate_at(ungrouped, fgroups, droplevels) # regroup group_by_at(dropped, vars, .drop = .drop) } dplyr/R/join-rows.R0000644000176200001440000000317314151641776013677 0ustar liggesusersjoin_rows <- function(x_key, y_key, type = c("inner", "left", "right", "full"), na_equal = TRUE, error_call = caller_env()) { type <- arg_match(type) # Find matching rows in y for each row in x y_split <- vec_group_loc(y_key) tryCatch( matches <- vec_match(x_key, y_split$key, na_equal = na_equal), vctrs_error_incompatible_type = function(cnd) { rx <- "^[^$]+[$]" x_name <- sub(rx, "", cnd$x_arg) y_name <- sub(rx, "", cnd$y_arg) bullets <- c( glue("Can't join on `x${x_name}` x `y${y_name}` because of incompatible types."), i = glue("`x${x_name}` is of type <{x_type}>>.", x_type = vec_ptype_full(cnd$x)), i = glue("`y${y_name}` is of type <{y_type}>>.", y_type = vec_ptype_full(cnd$y)) ) abort(bullets, call = error_call) } ) y_loc <- y_split$loc[matches] if (type == "left" || type == "full") { if (anyNA(matches)) { y_loc <- vec_assign(y_loc, vec_equal_na(matches), list(NA_integer_)) } } x_loc <- seq_len(vec_size(x_key)) # flatten index list x_loc <- rep(x_loc, lengths(y_loc)) y_loc <- index_flatten(y_loc) y_extra <- integer() if (type == "right" || type == "full") { miss_x <- !vec_in(y_key, x_key, na_equal = na_equal) if (!na_equal) { miss_x[is.na(miss_x)] <- TRUE } if (any(miss_x)) { y_extra <- seq_len(vec_size(y_key))[miss_x] } } list(x = x_loc, y = y_loc, y_extra = y_extra) } # TODO: Replace with `vec_unchop(x, ptype = integer())` # once performance of `vec_c()` matches `unlist()`. See #4964. index_flatten <- function(x) { unlist(x, recursive = FALSE, use.names = FALSE) } dplyr/R/na_if.R0000644000176200001440000000221314151641776013016 0ustar liggesusers#' Convert values to NA #' #' This is a translation of the SQL command `NULLIF`. It is useful #' if you want to convert an annoying value to `NA`. #' #' @param x Vector to modify #' @param y Value to replace with NA #' @return A modified version of `x` that replaces any values that #' are equal to `y` with NA. #' @seealso [coalesce()] to replace missing values with a specified #' value. #' #' [tidyr::replace_na()] to replace `NA` with a value. #' #' [recode()] to more generally replace values. #' @export #' @examples #' na_if(1:5, 5:1) #' #' x <- c(1, -1, 0, 10) #' 100 / x #' 100 / na_if(x, 0) #' #' y <- c("abc", "def", "", "ghi") #' na_if(y, "") #' #' # na_if() is particularly useful inside mutate(), #' # and is meant for use with vectors rather than entire data frames #' starwars %>% #' select(name, eye_color) %>% #' mutate(eye_color = na_if(eye_color, "unknown")) #' #' # na_if() can also be used with mutate() and across() #' # to mutate multiple columns #' starwars %>% #' mutate(across(where(is.character), ~na_if(., "unknown"))) na_if <- function(x, y) { check_length(y, x, "`y`", glue("same as `x`")) x[x == y] <- NA x } dplyr/R/deprec-location.R0000644000176200001440000000514114164534554015014 0ustar liggesusers#' Print the location in memory of a data frame #' #' @description #' `r lifecycle::badge("deprecated")` #' #' This is useful for understand how and when dplyr makes copies of data #' frames #' #' @param df a data frame #' @param x,y two data frames to compare #' @keywords internal #' @export #' #' @examples #' location(mtcars) #' # -> #' lobstr::ref(mtcars) #' #' mtcars2 <- mutate(mtcars, cyl2 = cyl * 2) #' # -> #' lobstr::ref(mtcars2) #' #' changes(mtcars, mtcars2) #' # -> #' lobstr::ref(mtcars, mtcars2) location <- function(df) { lifecycle::deprecate_warn("1.0.0", "location()", "lobst::ref()") check_installed("lobstr", "to compute package locations.") if (!is.data.frame(df)) { abort("`location()` is meant for data frames.") } attrs <- attributes(df) structure(list( df = lobstr::obj_addr(df), vars = set_names(lobstr::obj_addrs(df), names(df)), attr = set_names(lobstr::obj_addrs(attrs), names(attrs)) ), class = "location") } #' @export print.location <- function(x, ...) { cat("<", x$df, ">\n", sep = "") width <- max(nchar(c(names(x$vars), names(x$attr)))) + 1 def_list <- function(x) { term <- format(paste0(names(x), ":"), width = width) paste0(" * ", term, " <", format(x), ">") } vars <- paste0(def_list(x$vars), collapse = "\n") cat("Variables:\n", vars, "\n", sep = "") attr <- paste0(def_list(x$attr), collapse = "\n") cat("Attributes:\n", attr, "\n", sep = "") invisible(x) } #' @rdname location #' @export changes <- function(x, y) { lifecycle::deprecate_warn("1.0.0", "changes()", "lobstr::ref()") x <- location(x) y <- location(y) if (x$df == y$df) { cat("\n") return(invisible()) } # match up x vars to y vars vars <- match_up(x$vars, y$vars) attr <- match_up(x$attr, y$attr) width <- max(nchar(rownames(vars)), nchar(rownames(attr))) if (nrow(vars) > 0) rownames(vars) <- format(rownames(vars), width = width) if (nrow(attr) > 0) rownames(attr) <- format(rownames(attr), width = width) if (nrow(vars) > 0) { cat("Changed variables:\n") print(vars, quote = FALSE) } if (nrow(vars) > 0 && nrow(attr)) cat("\n") if (nrow(attr) > 0) { cat("Changed attributes:\n") print(attr, quote = FALSE) } } match_up <- function(x, y) { both <- intersect(names(x), names(y)) added <- setdiff(names(x), names(y)) deleted <- setdiff(names(y), names(x)) out <- cbind( old = c(x[both], x[added], rep("", length(deleted))), new = c(y[both], rep("", length(added)), y[deleted]) ) rownames(out) <- c(both, added, deleted) out[out[, "old"] != out[, "new"], , drop = FALSE] } dplyr/R/summarise.R0000644000176200001440000003220414164534554013751 0ustar liggesusers#' Summarise each group to fewer rows #' #' @description #' `summarise()` creates a new data frame. It will have one (or more) rows for #' each combination of grouping variables; if there are no grouping variables, #' the output will have a single row summarising all observations in the input. #' It will contain one column for each grouping variable and one column #' for each of the summary statistics that you have specified. #' #' `summarise()` and `summarize()` are synonyms. #' #' @section Useful functions: #' #' * Center: [mean()], [median()] #' * Spread: [sd()], [IQR()], [mad()] #' * Range: [min()], [max()], [quantile()] #' * Position: [first()], [last()], [nth()], #' * Count: [n()], [n_distinct()] #' * Logical: [any()], [all()] #' #' @section Backend variations: #' #' The data frame backend supports creating a variable and using it in the #' same summary. This means that previously created summary variables can be #' further transformed or combined within the summary, as in [mutate()]. #' However, it also means that summary variables with the same names as previous #' variables overwrite them, making those variables unavailable to later summary #' variables. #' #' This behaviour may not be supported in other backends. To avoid unexpected #' results, consider using new names for your summary variables, especially when #' creating multiple summaries. #' #' @export #' @inheritParams arrange #' @param ... <[`data-masking`][dplyr_data_masking]> Name-value pairs of summary #' functions. The name will be the name of the variable in the result. #' #' The value can be: #' #' * A vector of length 1, e.g. `min(x)`, `n()`, or `sum(is.na(y))`. #' * A vector of length `n`, e.g. `quantile()`. #' * A data frame, to add multiple columns from a single expression. #' @param .groups `r lifecycle::badge("experimental")` Grouping structure of the result. #' #' * "drop_last": dropping the last level of grouping. This was the #' only supported option before version 1.0.0. #' * "drop": All levels of grouping are dropped. #' * "keep": Same grouping structure as `.data`. #' * "rowwise": Each row is its own group. #' #' When `.groups` is not specified, it is chosen #' based on the number of rows of the results: #' * If all the results have 1 row, you get "drop_last". #' * If the number of rows varies, you get "keep". #' #' In addition, a message informs you of that choice, unless the result is ungrouped, #' the option "dplyr.summarise.inform" is set to `FALSE`, #' or when `summarise()` is called from a function in a package. #' #' @family single table verbs #' @return #' An object _usually_ of the same type as `.data`. #' #' * The rows come from the underlying [group_keys()]. #' * The columns are a combination of the grouping keys and the summary #' expressions that you provide. #' * The grouping structure is controlled by the `.groups=` argument, the #' output may be another [grouped_df], a [tibble] or a [rowwise] data frame. #' * Data frame attributes are **not** preserved, because `summarise()` #' fundamentally creates a new data frame. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}. #' @examples #' # A summary applied to ungrouped tbl returns a single row #' mtcars %>% #' summarise(mean = mean(disp), n = n()) #' #' # Usually, you'll want to group first #' mtcars %>% #' group_by(cyl) %>% #' summarise(mean = mean(disp), n = n()) #' #' # dplyr 1.0.0 allows to summarise to more than one value: #' mtcars %>% #' group_by(cyl) %>% #' summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) #' #' # You use a data frame to create multiple columns so you can wrap #' # this up into a function: #' my_quantile <- function(x, probs) { #' tibble(x = quantile(x, probs), probs = probs) #' } #' mtcars %>% #' group_by(cyl) %>% #' summarise(my_quantile(disp, c(0.25, 0.75))) #' #' # Each summary call removes one grouping level (since that group #' # is now just a single row) #' mtcars %>% #' group_by(cyl, vs) %>% #' summarise(cyl_n = n()) %>% #' group_vars() #' #' # BEWARE: reusing variables may lead to unexpected results #' mtcars %>% #' group_by(cyl) %>% #' summarise(disp = mean(disp), sd = sd(disp)) #' #' # Refer to column names stored as strings with the `.data` pronoun: #' var <- "mass" #' summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE)) #' # Learn more in ?dplyr_data_masking summarise <- function(.data, ..., .groups = NULL) { UseMethod("summarise") } #' @rdname summarise #' @export summarize <- summarise #' @export summarise.data.frame <- function(.data, ..., .groups = NULL) { cols <- summarise_cols(.data, dplyr_quosures(...), caller_env = caller_env()) out <- summarise_build(.data, cols) if (identical(.groups, "rowwise")) { out <- rowwise_df(out, character()) } out } #' @export summarise.grouped_df <- function(.data, ..., .groups = NULL) { cols <- summarise_cols(.data, dplyr_quosures(...), caller_env = caller_env()) out <- summarise_build(.data, cols) verbose <- summarise_verbose(.groups, caller_env()) if (is.null(.groups)) { if (cols$all_one) { .groups <- "drop_last" } else { .groups <- "keep" } } group_vars <- group_vars(.data) if (identical(.groups, "drop_last")) { n <- length(group_vars) if (n > 1) { if (verbose) { new_groups <- glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ") summarise_inform("has grouped output by {new_groups}") } out <- grouped_df(out, group_vars[-n], group_by_drop_default(.data)) } } else if (identical(.groups, "keep")) { if (verbose) { new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ") summarise_inform("has grouped output by {new_groups}") } out <- grouped_df(out, group_vars, group_by_drop_default(.data)) } else if (identical(.groups, "rowwise")) { out <- rowwise_df(out, group_vars) } else if(!identical(.groups, "drop")) { bullets <- c( paste0("`.groups` can't be ", as_label(.groups)), i = 'Possible values are NULL (default), "drop_last", "drop", "keep", and "rowwise"' ) abort(bullets) } out } #' @export summarise.rowwise_df <- function(.data, ..., .groups = NULL) { cols <- summarise_cols(.data, dplyr_quosures(...), caller_env = caller_env()) out <- summarise_build(.data, cols) verbose <- summarise_verbose(.groups, caller_env()) group_vars <- group_vars(.data) if (is.null(.groups) || identical(.groups, "keep")) { if (verbose && length(group_vars)) { new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ") summarise_inform("has grouped output by {new_groups}") } out <- grouped_df(out, group_vars) } else if (identical(.groups, "rowwise")) { out <- rowwise_df(out, group_vars) } else if (!identical(.groups, "drop")) { bullets <- c( paste0("`.groups` can't be ", as_label(.groups)), i = 'Possible values are NULL (default), "drop", "keep", and "rowwise"' ) abort(bullets) } out } summarise_cols <- function(.data, dots, caller_env, error_call = caller_env()) { error_call <- dplyr_error_call(error_call) mask <- DataMask$new(.data, caller_env, "summarise", error_call = error_call) old_current_column <- context_peek_bare("column") on.exit(context_poke("column", old_current_column), add = TRUE) on.exit(mask$forget(), add = TRUE) cols <- list() sizes <- 1L chunks <- vector("list", length(dots)) types <- vector("list", length(dots)) chunks <- list() results <- list() types <- list() out_names <- character() withCallingHandlers({ for (i in seq_along(dots)) { context_poke("column", old_current_column) # - expand quosures <- expand_across(dots[[i]]) # - compute quosures_results <- map(quosures, summarise_eval_one, mask = mask) # - structure for (k in seq_along(quosures)) { quo <- quosures[[k]] quo_data <- attr(quo, "dplyr:::data") quo_result <- quosures_results[[k]] if (is.null(quo_result)) { next } types_k <- quo_result$types chunks_k <- quo_result$chunks results_k <- quo_result$results if (!quo_data$is_named && is.data.frame(types_k)) { chunks_extracted <- .Call(dplyr_extract_chunks, chunks_k, types_k) types_k_names <- names(types_k) for (j in seq_along(chunks_extracted)) { mask$add_one( name = types_k_names[j], chunks = chunks_extracted[[j]], result = results_k[[j]] ) } chunks <- append(chunks, chunks_extracted) types <- append(types, as.list(types_k)) results <- append(results, results_k) out_names <- c(out_names, types_k_names) } else { name <- quo_data$name_auto mask$add_one(name = name, chunks = chunks_k, result = results_k) chunks <- append(chunks, list(chunks_k)) types <- append(types, list(types_k)) results <- append(results, list(results_k)) out_names <- c(out_names, name) } } } recycle_info <- .Call(`dplyr_summarise_recycle_chunks`, chunks, mask$get_rows(), types, results) chunks <- recycle_info$chunks sizes <- recycle_info$sizes results <- recycle_info$results # materialize columns for (i in seq_along(chunks)) { result <- results[[i]] %||% vec_c(!!!chunks[[i]], .ptype = types[[i]]) cols[[ out_names[i] ]] <- result } }, error = function(e) { what <- "computing" index <- i if (inherits(e, "dplyr:::summarise_incompatible_size")) { index <- e$dplyr_error_data$index what <- "recycling" } local_error_context(dots = dots, .index = index, mask = mask) bullets <- c( cnd_bullet_header(what), summarise_bullets(e) ) abort(bullets, call = error_call, parent = skip_internal_condition(e) ) }) list(new = cols, size = sizes, all_one = identical(sizes, 1L)) } summarise_eval_one <- function(quo, mask) { quo_data <- attr(quo, "dplyr:::data") if (!is.null(quo_data$column)) { context_poke("column", quo_data$column) # wrap the error when this has been expanded chunks_k <- withCallingHandlers( mask$eval_all_summarise(quo), error = function(cnd) { msg <- glue("Problem while computing column `{quo_data$name_auto}`.") abort(msg, call = call("across"), parent = cnd) } ) } else { # no wrapping otherwise chunks_k <- mask$eval_all_summarise(quo) } if (is.null(chunks_k)) { return(NULL) } types_k <- dplyr_vec_ptype_common(chunks_k, quo_data$name_auto) chunks_k <- vec_cast_common(!!!chunks_k, .to = types_k) result_k <- vec_c(!!!chunks_k, .ptype = types_k) list(chunks = chunks_k, types = types_k, results = result_k) } summarise_build <- function(.data, cols) { out <- group_keys(.data) if (!cols$all_one) { out <- vec_slice(out, rep(seq_len(nrow(out)), cols$size)) } dplyr_col_modify(out, cols$new) } summarise_bullets <- function(cnd, ..) { UseMethod("summarise_bullets") } #' @export summarise_bullets.default <- function(cnd, ...) { c(i = cnd_bullet_cur_group_label()) } #' @export `summarise_bullets.dplyr:::error_incompatible_combine` <- function(cnd, ...) { c() } #' @export `summarise_bullets.dplyr:::summarise_unsupported_type` <- function(cnd, ...) { result <- cnd$dplyr_error_data$result error_name <- peek_error_context()$error_name c( x = glue("`{error_name}` must be a vector, not {friendly_type_of(result)}."), i = cnd_bullet_rowwise_unlist(), i = cnd_bullet_cur_group_label() ) } #' @export `summarise_bullets.dplyr:::summarise_incompatible_size` <- function(cnd, ...) { expected_size <- cnd$dplyr_error_data$expected_size size <- cnd$dplyr_error_data$size group <- cnd$dplyr_error_data$group error_context <- peek_error_context() error_name <- error_context$error_name # so that cnd_bullet_cur_group_label() correctly reports the faulty group peek_mask()$set_current_group(group) c( x = glue("`{error_name}` must be size {or_1(expected_size)}, not {size}."), i = glue("An earlier column had size {expected_size}."), i = cnd_bullet_cur_group_label() ) } #' @export `summarise_bullets.dplyr:::summarise_mixed_null` <- function(cnd, ...) { error_name <- peek_error_context()$error_name c( x = glue("`{error_name}` must return compatible vectors across groups."), x = "Can't combine NULL and non NULL results." ) } # messaging --------------------------------------------------------------- summarise_verbose <- function(.groups, .env) { is.null(.groups) && is_reference(topenv(.env), global_env()) && !identical(getOption("dplyr.summarise.inform"), FALSE) } summarise_inform <- function(..., .env = parent.frame()) { inform(paste0( "`summarise()` ", glue(..., .envir = .env), '. You can override using the `.groups` argument.' )) } dplyr/R/nest_by.R0000644000176200001440000000657514151641776013424 0ustar liggesusers#' Nest by one or more variables #' #' @description #' `r lifecycle::badge("experimental")` #' #' `nest_by()` is closely related to [group_by()]. However, instead of storing #' the group structure in the metadata, it is made explicit in the data, #' giving each group key a single row along with a list-column of data frames #' that contain all the other data. #' #' `nest_by()` returns a [rowwise] data frame, which makes operations on the #' grouped data particularly elegant. See `vignette("rowwise")` for more #' details. #' #' @details #' Note that `df %>% nest_by(x, y)` is roughly equivalent to #' #' ``` #' df %>% #' group_by(x, y) %>% #' summarise(data = list(cur_data())) %>% #' rowwise() #' ``` #' #' If you want to unnest a nested data frame, you can either use #' `tidyr::unnest()` or take advantage of `summarise()`s multi-row behaviour: #' #' ``` #' nested %>% #' summarise(data) #' ``` #' #' @return #' A [rowwise] data frame. The output has the following properties: #' #' * The rows come from the underlying [group_keys()]. #' * The columns are the grouping keys plus one list-column of data frames. #' * Data frame attributes are **not** preserved, because `nest_by()` #' fundamentally creates a new data frame. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_by")}. #' #' @inheritParams group_by #' @param .key Name of the list column #' @param .keep Should the grouping columns be kept in the list column. #' @return A tbl with one row per unique combination of the grouping variables. #' The first columns are the grouping variables, followed by a list column of tibbles #' with matching rows of the remaining columns. #' @keywords internal #' @export #' @examples #' # After nesting, you get one row per group #' iris %>% nest_by(Species) #' starwars %>% nest_by(species) #' #' # The output is grouped by row, which makes modelling particularly easy #' models <- mtcars %>% #' nest_by(cyl) %>% #' mutate(model = list(lm(mpg ~ wt, data = data))) #' models #' #' models %>% summarise(rsq = summary(model)$r.squared) #' @examplesIf requireNamespace("broom", quietly = TRUE) #' #' # This is particularly elegant with the broom functions #' models %>% summarise(broom::glance(model)) #' models %>% summarise(broom::tidy(model)) #' @examples #' #' # Note that you can also summarise to unnest the data #' models %>% summarise(data) nest_by <- function(.data, ..., .key = "data", .keep = FALSE) { lifecycle::signal_stage("experimental", "nest_by()") UseMethod("nest_by") } #' @export nest_by.data.frame <- function(.data, ..., .key = "data", .keep = FALSE) { .data <- group_by(.data, ...) nest_by.grouped_df(.data, .key = .key, .keep = .keep) } #' @export nest_by.grouped_df <- function(.data, ..., .key = "data", .keep = FALSE) { if (!missing(...)) { bullets <- c( "Can't re-group while nesting", i = "Either `ungroup()` first or don't supply arguments to `nest_by()" ) abort(bullets) } vars <- group_vars(.data) keys <- group_keys(.data) keys <- mutate(keys, !!.key := group_split(.env$.data, .keep = .keep)) rowwise(keys, tidyselect::all_of(vars)) } dplyr/R/zzz.r0000644000176200001440000000200514142521565012627 0ustar liggesusers.onLoad <- function(libname, pkgname) { op <- options() op.dplyr <- list( dplyr.show_progress = TRUE ) toset <- !(names(op.dplyr) %in% names(op)) if (any(toset)) options(op.dplyr[toset]) .Call(dplyr_init_library, ns_env("dplyr"), ns_env("vctrs"), ns_env("rlang")) has_dbplyr <- is_installed("dbplyr") if (!has_dbplyr || !exists("count.tbl_sql", ns_env("dbplyr"))) { s3_register("dplyr::count", "tbl_sql") s3_register("dplyr::tally", "tbl_sql") } run_on_load() invisible() } .onAttach <- function(libname, pkgname) { setHook(packageEvent("plyr", "attach"), function(...) { packageStartupMessage(rule()) packageStartupMessage( "You have loaded plyr after dplyr - this is likely ", "to cause problems.\nIf you need functions from both plyr and dplyr, ", "please load plyr first, then dplyr:\nlibrary(plyr); library(dplyr)" ) packageStartupMessage(rule()) }) } .onDetach <- function(libpath) { setHook(packageEvent("plyr", "attach"), NULL, "replace") } dplyr/R/dplyr.r0000644000176200001440000000111414154402214013114 0ustar liggesusers#' @description #' To learn more about dplyr, start with the vignettes: #' `browseVignettes(package = "dplyr")` #' @useDynLib dplyr, .registration = TRUE #' @keywords internal #' @import rlang #' @rawNamespace import(vctrs, except = data_frame) #' @importFrom glue glue glue_collapse glue_data #' @importFrom stats setNames update #' @importFrom utils head tail #' @importFrom methods is #' @importFrom lifecycle deprecated "_PACKAGE" # We're importing vctrs without `data_frame()` because we currently # reexport the deprecated `tibble::data_frame()` function on_load(local_use_cli()) dplyr/R/top-n.R0000644000176200001440000000412014151641776012776 0ustar liggesusers#' Select top (or bottom) n rows (by value) #' #' @description #' `r lifecycle::badge("superseded")` #' `top_n()` has been superseded in favour of [slice_min()]/[slice_max()]. #' While it will not be deprecated in the near future, retirement means #' that we will only perform critical bug fixes, so we recommend moving to the #' newer alternatives. #' #' `top_n()` was superseded because the name was fundamentally confusing as #' it returned what you might reasonably consider to be the _bottom_ #' rows. Additionally, the `wt` variable had a confusing name, and strange #' default (the last column in the data frame). Unfortunately we could not #' see an easy way to fix the existing `top_n()` function without breaking #' existing code, so we created a new alternative. #' #' @param x A data frame. #' @param n Number of rows to return for `top_n()`, fraction of rows to #' return for `top_frac()`. If `n` is positive, selects the top rows. #' If negative, selects the bottom rows. #' If `x` is grouped, this is the number (or fraction) of rows per group. #' Will include more rows if there are ties. #' @param wt (Optional). The variable to use for ordering. If not #' specified, defaults to the last variable in the tbl. #' @keywords internal #' @export #' @examples #' df <- data.frame(x = c(6, 4, 1, 10, 3, 1, 1)) #' #' df %>% top_n(2) # highest values #' df %>% top_n(-2) # lowest values #' # now use #' df %>% slice_max(x, n = 2) #' df %>% slice_min(x, n = 2) #' #' # top_frac() -> prop argument of slice_min()/slice_max() #' df %>% top_frac(.5) #' # -> #' df %>% slice_max(x, prop = 0.5) top_n <- function(x, n, wt) { lifecycle::signal_stage("superseded", "top_n()") wt <- enquo(wt) if (quo_is_missing(wt)) { vars <- tbl_vars(x) wt_name <- vars[length(vars)] inform(glue("Selecting by ", wt_name)) wt <- sym(wt_name) } filter(x, top_n_rank({{ n }}, !!wt)) } top_n_rank <- function(n, wt) { if (n > 0) { min_rank(desc(wt)) <= n } else { min_rank(wt) <= abs(n) } } #' @export #' @rdname top_n top_frac <- function(x, n, wt) { top_n(x, {{ n }} * n(), {{ wt }}) } dplyr/R/colwise-mutate.R0000644000176200001440000003470214154402214014675 0ustar liggesusers#' Summarise multiple columns #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [across()] in an existing verb. See `vignette("colwise")` for details. #' #' The [scoped] variants of [summarise()] make it easy to apply the same #' transformation to multiple variables. #' There are three variants. #' * `summarise_all()` affects every variable #' * `summarise_at()` affects variables selected with a character vector or #' vars() #' * `summarise_if()` affects variables selected with a predicate function #' #' @inheritParams scoped #' @param .cols This argument has been renamed to `.vars` to fit #' dplyr's terminology and is deprecated. #' @return A data frame. By default, the newly created columns have the shortest #' names needed to uniquely identify the output. To force inclusion of a name, #' even when not needed, name the input (see examples for details). #' @seealso [The other scoped verbs][scoped], [vars()] #' #' @section Grouping variables: #' #' If applied on a grouped tibble, these operations are *not* applied #' to the grouping variables. The behaviour depends on whether the #' selection is **implicit** (`all` and `if` selections) or #' **explicit** (`at` selections). #' #' * Grouping variables covered by explicit selections in #' `summarise_at()` are always an error. Add `-group_cols()` to the #' [vars()] selection to avoid this: #' #' ``` #' data %>% #' summarise_at(vars(-group_cols(), ...), myoperation) #' ``` #' #' Or remove `group_vars()` from the character vector of column names: #' #' ``` #' nms <- setdiff(nms, group_vars(data)) #' data %>% summarise_at(nms, myoperation) #' ``` #' #' * Grouping variables covered by implicit selections are silently #' ignored by `summarise_all()` and `summarise_if()`. #' #' @section Naming: #' #' The names of the new columns are derived from the names of the #' input variables and the names of the functions. #' #' - if there is only one unnamed function (i.e. if `.funs` is an unnamed list #' of length one), #' the names of the input variables are used to name the new columns; #' #' - for `_at` functions, if there is only one unnamed variable (i.e., #' if `.vars` is of the form `vars(a_single_column)`) and `.funs` has length #' greater than one, #' the names of the functions are used to name the new columns; #' #' - otherwise, the new names are created by #' concatenating the names of the input variables and the names of the #' functions, separated with an underscore `"_"`. #' #' The `.funs` argument can be a named or unnamed list. #' If a function is unnamed and the name cannot be derived automatically, #' a name of the form "fn#" is used. #' Similarly, [vars()] accepts named and unnamed arguments. #' If a variable in `.vars` is named, a new column by that name will be created. #' #' Name collisions in the new columns are disambiguated using a unique suffix. #' #' @section Life cycle: #' #' The functions are maturing, because the naming scheme and the #' disambiguation algorithm are subject to change in dplyr 0.9.0. #' #' @examples #' # The _at() variants directly support strings: #' starwars %>% #' summarise_at(c("height", "mass"), mean, na.rm = TRUE) #' # -> #' starwars %>% summarise(across(c("height", "mass"), ~ mean(.x, na.rm = TRUE))) #' #' # You can also supply selection helpers to _at() functions but you have #' # to quote them with vars(): #' starwars %>% #' summarise_at(vars(height:mass), mean, na.rm = TRUE) #' # -> #' starwars %>% #' summarise(across(height:mass, ~ mean(.x, na.rm = TRUE))) #' #' # The _if() variants apply a predicate function (a function that #' # returns TRUE or FALSE) to determine the relevant subset of #' # columns. Here we apply mean() to the numeric columns: #' starwars %>% #' summarise_if(is.numeric, mean, na.rm = TRUE) #' starwars %>% #' summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) #' #' by_species <- iris %>% #' group_by(Species) #' #' # If you want to apply multiple transformations, pass a list of #' # functions. When there are multiple functions, they create new #' # variables instead of modifying the variables in place: #' by_species %>% #' summarise_all(list(min, max)) #' # -> #' by_species %>% #' summarise(across(everything(), list(min = min, max = max))) #' @export #' @keywords internal summarise_all <- function(.tbl, .funs, ...) { lifecycle::signal_stage("superseded", "summarise_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "summarise_all") summarise(.tbl, !!!funs) } #' @rdname summarise_all #' @export summarise_if <- function(.tbl, .predicate, .funs, ...) { lifecycle::signal_stage("superseded", "summarise_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), ..., .caller = "summarise_if") summarise(.tbl, !!!funs) } #' @rdname summarise_all #' @export summarise_at <- function(.tbl, .vars, .funs, ..., .cols = NULL) { lifecycle::signal_stage("superseded", "summarise_at()") .vars <- check_dot_cols(.vars, .cols) funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), ..., .caller = "summarise_at") summarise(.tbl, !!!funs) } #' @rdname summarise_all #' @export summarize_all <- summarise_all #' @rdname summarise_all #' @export summarize_if <- summarise_if #' @rdname summarise_all #' @export summarize_at <- summarise_at #' Mutate multiple columns #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [across()] in an existing verb. See `vignette("colwise")` for details. #' #' The [scoped] variants of [mutate()] and [transmute()] make it easy to apply #' the same transformation to multiple variables. There are three variants: #' * _all affects every variable #' * _at affects variables selected with a character vector or vars() #' * _if affects variables selected with a predicate function: #' #' @inheritParams scoped #' @inheritParams summarise_all #' @return A data frame. By default, the newly created columns have the shortest #' names needed to uniquely identify the output. To force inclusion of a name, #' even when not needed, name the input (see examples for details). #' @seealso [The other scoped verbs][scoped], [vars()] #' #' @section Grouping variables: #' #' If applied on a grouped tibble, these operations are *not* applied #' to the grouping variables. The behaviour depends on whether the #' selection is **implicit** (`all` and `if` selections) or #' **explicit** (`at` selections). #' #' * Grouping variables covered by explicit selections in #' `mutate_at()` and `transmute_at()` are always an error. Add #' `-group_cols()` to the [vars()] selection to avoid this: #' #' ``` #' data %>% mutate_at(vars(-group_cols(), ...), myoperation) #' ``` #' #' Or remove `group_vars()` from the character vector of column names: #' #' ``` #' nms <- setdiff(nms, group_vars(data)) #' data %>% mutate_at(vars, myoperation) #' ``` #' #' * Grouping variables covered by implicit selections are ignored by #' `mutate_all()`, `transmute_all()`, `mutate_if()`, and #' `transmute_if()`. #' #' @inheritSection summarise_all Naming #' @inheritSection summarise_all Life cycle #' #' @examples #' iris <- as_tibble(iris) #' #' # All variants can be passed functions and additional arguments, #' # purrr-style. The _at() variants directly support strings. Here #' # we'll scale the variables `height` and `mass`: #' scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm) #' starwars %>% mutate_at(c("height", "mass"), scale2) #' # -> #' starwars %>% mutate(across(c("height", "mass"), scale2)) #' #' # You can pass additional arguments to the function: #' starwars %>% mutate_at(c("height", "mass"), scale2, na.rm = TRUE) #' starwars %>% mutate_at(c("height", "mass"), ~scale2(., na.rm = TRUE)) #' # -> #' starwars %>% mutate(across(c("height", "mass"), ~ scale2(.x, na.rm = TRUE))) #' #' # You can also supply selection helpers to _at() functions but you have #' # to quote them with vars(): #' iris %>% mutate_at(vars(matches("Sepal")), log) #' iris %>% mutate(across(matches("Sepal"), log)) #' #' # The _if() variants apply a predicate function (a function that #' # returns TRUE or FALSE) to determine the relevant subset of #' # columns. Here we divide all the numeric columns by 100: #' starwars %>% mutate_if(is.numeric, scale2, na.rm = TRUE) #' starwars %>% mutate(across(where(is.numeric), ~ scale2(.x, na.rm = TRUE))) #' #' # mutate_if() is particularly useful for transforming variables from #' # one type to another #' iris %>% mutate_if(is.factor, as.character) #' iris %>% mutate_if(is.double, as.integer) #' # -> #' iris %>% mutate(across(where(is.factor), as.character)) #' iris %>% mutate(across(where(is.double), as.integer)) #' #' # Multiple transformations ---------------------------------------- #' #' # If you want to apply multiple transformations, pass a list of #' # functions. When there are multiple functions, they create new #' # variables instead of modifying the variables in place: #' iris %>% mutate_if(is.numeric, list(scale2, log)) #' iris %>% mutate_if(is.numeric, list(~scale2(.), ~log(.))) #' iris %>% mutate_if(is.numeric, list(scale = scale2, log = log)) #' # -> #' iris %>% #' as_tibble() %>% #' mutate(across(where(is.numeric), list(scale = scale2, log = log))) #' #' # When there's only one function in the list, it modifies existing #' # variables in place. Give it a name to instead create new variables: #' iris %>% mutate_if(is.numeric, list(scale2)) #' iris %>% mutate_if(is.numeric, list(scale = scale2)) #' @export #' @keywords internal mutate_all <- function(.tbl, .funs, ...) { lifecycle::signal_stage("superseded", "mutate_all()") check_grouped(.tbl, "mutate", "all", alt = TRUE) funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "mutate_all") mutate(.tbl, !!!funs) } #' @rdname mutate_all #' @export mutate_if <- function(.tbl, .predicate, .funs, ...) { lifecycle::signal_stage("superseded", "mutate_if()") check_grouped(.tbl, "mutate", "if") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), ..., .caller = "mutate_if") mutate(.tbl, !!!funs) } #' @rdname mutate_all #' @export mutate_at <- function(.tbl, .vars, .funs, ..., .cols = NULL) { lifecycle::signal_stage("superseded", "mutate_at()") .vars <- check_dot_cols(.vars, .cols) funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "mutate_at") mutate(.tbl, !!!funs) } #' @rdname mutate_all #' @export transmute_all <- function(.tbl, .funs, ...) { lifecycle::signal_stage("superseded", "transmute_all()") check_grouped(.tbl, "transmute", "all", alt = TRUE) funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "transmute_all") transmute(.tbl, !!!funs) } #' @rdname mutate_all #' @export transmute_if <- function(.tbl, .predicate, .funs, ...) { lifecycle::signal_stage("superseded", "transmute_if()") check_grouped(.tbl, "transmute", "if") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), ..., .caller = "transmute_if") transmute(.tbl, !!!funs) } #' @rdname mutate_all #' @export transmute_at <- function(.tbl, .vars, .funs, ..., .cols = NULL) { lifecycle::signal_stage("superseded", "transmute_at()") .vars <- check_dot_cols(.vars, .cols) funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "transmute_at") transmute(.tbl, !!!funs) } # Helpers ----------------------------------------------------------------- manip_all <- function(.tbl, .funs, .quo, .env, ..., .include_group_vars = FALSE, .caller, error_call = caller_env()) { if (.include_group_vars) { syms <- syms(tbl_vars(.tbl)) } else { syms <- syms(tbl_nongroup_vars(.tbl)) } funs <- as_fun_list(.funs, .env, ..., .caller = .caller, error_call = error_call) manip_apply_syms(funs, syms, .tbl) } manip_if <- function(.tbl, .predicate, .funs, .quo, .env, ..., .include_group_vars = FALSE, .caller, error_call = caller_env()) { vars <- tbl_if_syms(.tbl, .predicate, .env, .include_group_vars = .include_group_vars, error_call = error_call) funs <- as_fun_list(.funs, .env, ..., .caller = .caller, error_call = error_call) manip_apply_syms(funs, vars, .tbl) } manip_at <- function(.tbl, .vars, .funs, .quo, .env, ..., .include_group_vars = FALSE, .caller, error_call = caller_env()) { syms <- tbl_at_syms(.tbl, .vars, .include_group_vars = .include_group_vars, error_call = error_call) funs <- as_fun_list(.funs, .env, ..., .caller = .caller, error_call = error_call) manip_apply_syms(funs, syms, .tbl) } check_grouped <- function(tbl, verb, suffix, alt = FALSE) { if (is_grouped_df(tbl)) { if (alt) { alt_line <- sprintf("Use `%s_at(df, vars(-group_cols()), myoperation)` to silence the message.", verb) } else { alt_line <- chr() } inform(c( sprintf("`%s_%s()` ignored the following grouping variables:", verb, suffix), set_names(fmt_cols(group_vars(tbl)), "*"), "i" = alt_line )) } } check_dot_cols <- function(vars, cols) { if (is_null(cols)) { vars } else { inform("`.cols` has been renamed and is deprecated, please use `.vars`") if (missing(vars)) cols else vars } } manip_apply_syms <- function(funs, syms, tbl) { out <- vector("list", length(syms) * length(funs)) dim(out) <- c(length(syms), length(funs)) syms_position <- match(as.character(syms), tbl_vars(tbl)) for (i in seq_along(syms)) { pos <- syms_position[i] for (j in seq_along(funs)) { fun <- funs[[j]] if (is_quosure(fun)) { out[[i, j]] <- expr_substitute(funs[[j]], quote(.), syms[[i]]) } else { out[[i, j]] <- call2(funs[[j]], syms[[i]]) } attr(out[[i, j]], "position") <- pos } } dim(out) <- NULL # Use symbols as default names unnamed <- !have_name(syms) names(syms)[unnamed] <- map_chr(syms[unnamed], as_string) if (length(funs) == 1 && !attr(funs, "have_name")) { names(out) <- names(syms) } else { nms <- names(funs) %||% rep("", length(funs)) is_fun <- nms == "" | nms == "" nms[is_fun] <- paste0("fn", seq_len(sum(is_fun))) nms <- unique_names(nms, quiet = TRUE) names(funs) <- nms if (length(syms) == 1 && all(unnamed)) { names(out) <- names(funs) } else { syms_names <- ifelse(unnamed, map_chr(syms, as_string), names(syms)) grid <- expand.grid(var = syms_names, call = names(funs)) names(out) <- paste(grid$var, grid$call, sep = "_") } } out } dplyr/R/data-storms.R0000644000176200001440000000307414151641776014206 0ustar liggesusers#' Storm tracks data #' #' This data is a subset of the NOAA Atlantic hurricane database best track #' data, \url{https://www.nhc.noaa.gov/data/#hurdat}. The data includes the #' positions and attributes of storms from 1975-2020, measured every six hours #' during the lifetime of a storm. #' #' @seealso The script to create the storms data set: \url{https://github.com/tidyverse/dplyr/blob/main/data-raw/storms.R} #' #' @format A tibble with 11,859 observations and 13 variables: #' \describe{ #' \item{name}{Storm Name} #' \item{year,month,day}{Date of report} #' \item{hour}{Hour of report (in UTC)} #' \item{lat,long}{Location of storm center} #' \item{status}{Storm classification (Tropical Depression, Tropical Storm, #' or Hurricane)} #' \item{category}{Saffir-Simpson storm category (estimated from wind speed. #' -1 = Tropical Depression, 0 = Tropical Storm)} #' \item{wind}{storm's maximum sustained wind speed (in knots)} #' \item{pressure}{Air pressure at the storm's center (in millibars)} #' \item{tropicalstorm_force_diameter}{Diameter (in nautical miles) of the area experiencing tropical storm strength winds (34 knots or above)} #' \item{hurricane_force_diameter}{Diameter (in nautical miles) of the area experiencing hurricane strength winds (64 knots or above)} #' } #' @examples #' #' # show a plot of the storm paths #' if (requireNamespace("ggplot2", quietly = TRUE)) { #' library(ggplot2) #' ggplot(storms) + #' aes(x=long, y=lat, color=paste(year, name)) + #' geom_path() + #' guides(color='none') + #' facet_wrap(~year) #' } #' #' storms "storms" dplyr/R/deprec-do.r0000644000176200001440000001607214174551644013653 0ustar liggesusers#' Do anything #' #' @description #' `r lifecycle::badge("superseded")` #' #' `do()` is superseded as of dplyr 1.0.0, because its syntax never really #' felt like it belonged with the rest of dplyr. It's replaced by a combination #' of [summarise()] (which can now produce multiple rows and multiple columns), #' [nest_by()] (which creates a [rowwise] tibble of nested data), #' and [across()] (which allows you to access the data for the "current" group). #' #' @param .data a tbl #' @param ... Expressions to apply to each group. If named, results will be #' stored in a new column. If unnamed, must return a data frame. You can #' use `.` to refer to the current group. You can not mix named and #' unnamed arguments. #' @keywords internal #' @export #' @examples #' # do() with unnamed arguments becomes summarise() #' # . becomes across() #' by_cyl <- mtcars %>% group_by(cyl) #' by_cyl %>% do(head(., 2)) #' # -> #' by_cyl %>% summarise(head(across(), 2)) #' by_cyl %>% slice_head(n = 2) #' #' # Can refer to variables directly #' by_cyl %>% do(mean = mean(.$vs)) #' # -> #' by_cyl %>% summarise(mean = mean(vs)) #' #' # do() with named arguments becomes nest_by() + mutate() & list() #' models <- by_cyl %>% do(mod = lm(mpg ~ disp, data = .)) #' # -> #' models <- mtcars %>% #' nest_by(cyl) %>% #' mutate(mod = list(lm(mpg ~ disp, data = data))) #' models %>% summarise(rsq = summary(mod)$r.squared) #' #' # use broom to turn models into data #' models %>% do(data.frame( #' var = names(coef(.$mod)), #' coef(summary(.$mod))) #' ) #' @examplesIf requireNamespace("broom", quietly = TRUE) #' # -> #' models %>% summarise(broom::tidy(mod)) do <- function(.data, ...) { lifecycle::signal_stage("superseded", "do()") UseMethod("do") } #' @export do.NULL <- function(.data, ...) { NULL } #' @export do.grouped_df <- function(.data, ...) { index <- group_rows(.data) labels <- select(group_data(.data), -last_col()) attr(labels, ".drop") <- NULL # Create ungroup version of data frame suitable for subsetting group_data <- ungroup(.data) args <- enquos(...) named <- named_args(args) mask <- new_data_mask(new_environment()) n <- length(index) m <- length(args) # Special case for zero-group/zero-row input if (n == 0) { if (named) { out <- rep_len(list(list()), length(args)) out <- set_names(out, names(args)) out <- label_output_list(labels, out, groups(.data)) } else { env_bind_do_pronouns(mask, group_data) out <- eval_tidy(args[[1]], mask) out <- out[0, , drop = FALSE] out <- label_output_dataframe(labels, list(list(out)), group_vars(.data), group_by_drop_default(.data)) } return(out) } # Add pronouns with active bindings that resolve to the current # subset. `_i` is found in environment of this function because of # usual scoping rules. group_slice <- function(value) { if (missing(value)) { group_data[index[[`_i`]], , drop = FALSE] } else { group_data[index[[`_i`]], ] <<- value } } env_bind_do_pronouns(mask, group_slice) out <- replicate(m, vector("list", n), simplify = FALSE) names(out) <- names(args) p <- rlang::with_options( lifecycle_verbosity = "quiet", progress_estimated(n * m, min_time = 2) ) for (`_i` in seq_len(n)) { for (j in seq_len(m)) { out[[j]][`_i`] <- list(eval_tidy(args[[j]], mask)) p$tick()$print() } } if (!named) { label_output_dataframe(labels, out, group_vars(.data), group_by_drop_default(.data)) } else { label_output_list(labels, out, group_vars(.data)) } } #' @export do.data.frame <- function(.data, ...) { args <- enquos(...) named <- named_args(args) # Create custom data mask with `.` pronoun mask <- new_data_mask(new_environment()) env_bind_do_pronouns(mask, .data) if (!named) { out <- eval_tidy(args[[1]], mask) if (!inherits(out, "data.frame")) { msg <- glue("Result must be a data frame, not {fmt_classes(out)}.") abort(msg) } } else { out <- map(args, function(arg) list(eval_tidy(arg, mask))) names(out) <- names(args) out <- tibble::as_tibble(out, .name_repair = "minimal") } out } # Helper functions ------------------------------------------------------------- env_bind_do_pronouns <- function(env, data) { if (is_function(data)) { bind <- env_bind_active } else { bind <- env_bind } # Use `:=` for `.` to avoid partial matching with `.env` bind(env, "." := data, .data = data) } label_output_dataframe <- function(labels, out, groups, .drop, error_call = caller_env()) { data_frame <- vapply(out[[1]], is.data.frame, logical(1)) if (any(!data_frame)) { msg <- glue( "Results {bad} must be data frames, not {first_bad_class}.", bad = fmt_comma(which(!data_frame)), first_bad_class = fmt_classes(out[[1]][[which.min(data_frame)]]) ) abort(msg, call = error_call) } rows <- vapply(out[[1]], nrow, numeric(1)) out <- bind_rows(out[[1]]) if (!is.null(labels)) { # Remove any common columns from labels labels <- labels[setdiff(names(labels), names(out))] # Repeat each row to match data labels <- labels[rep(seq_len(nrow(labels)), rows), , drop = FALSE] rownames(labels) <- NULL grouped_df(bind_cols(labels, out), groups, .drop) } else { rowwise(out) } } label_output_list <- function(labels, out, groups) { if (!is.null(labels)) { labels[names(out)] <- out rowwise(labels) } else { class(out) <- "data.frame" attr(out, "row.names") <- .set_row_names(length(out[[1]])) rowwise(out) } } named_args <- function(args, error_call = caller_env()) { # Arguments must either be all named or all unnamed. named <- sum(names2(args) != "") if (!(named == 0 || named == length(args))) { msg <- "Arguments must either be all named or all unnamed." abort(msg, call = error_call) } if (named == 0 && length(args) > 1) { msg <- glue("Can only supply one unnamed argument, not {length(args)}.") abort(msg, call = error_call) } named != 0 } #' @export do.rowwise_df <- function(.data, ...) { # Create ungroup version of data frame suitable for subsetting group_data <- ungroup(.data) args <- enquos(...) named <- named_args(args) # Create new environment, inheriting from parent, with an active binding # for . that resolves to the current subset. `_i` is found in environment # of this function because of usual scoping rules. mask <- new_data_mask(new_environment()) current_row <- function() lapply(group_data[`_i`, , drop = FALSE], "[[", 1) env_bind_do_pronouns(mask, current_row) n <- nrow(.data) m <- length(args) out <- replicate(m, vector("list", n), simplify = FALSE) names(out) <- names(args) p <- rlang::with_options( lifecycle_verbosity = "quiet", progress_estimated(n * m, min_time = 2) ) for (`_i` in seq_len(n)) { for (j in seq_len(m)) { out[[j]][`_i`] <- list(eval_tidy(args[[j]], mask)) p$tick()$print() } } if (!named) { label_output_dataframe(NULL, out, groups(.data), group_by_drop_default(.data)) } else { label_output_list(NULL, out, groups(.data)) } } dplyr/R/data-starwars.R0000644000176200001440000000210314121112104014464 0ustar liggesusers#' Starwars characters #' #' The original data, from SWAPI, the Star Wars API, , has been revised #' to reflect additional research into gender and sex determinations of characters. #' #' @format A tibble with 87 rows and 14 variables: #' \describe{ #' \item{name}{Name of the character} #' \item{height}{Height (cm)} #' \item{mass}{Weight (kg)} #' \item{hair_color,skin_color,eye_color}{Hair, skin, and eye colors} #' \item{birth_year}{Year born (BBY = Before Battle of Yavin)} #' \item{sex}{The biological sex of the character, namely male, female, hermaphroditic, or none (as in the case for Droids).} #' \item{gender}{The gender role or gender identity of the character as determined by their personality or the way they were programmed (as in the case for Droids).} #' \item{homeworld}{Name of homeworld} #' \item{species}{Name of species} #' \item{films}{List of films the character appeared in} #' \item{vehicles}{List of vehicles the character has piloted} #' \item{starships}{List of starships the character has piloted} #' } #' @examples #' starwars "starwars" dplyr/R/colwise-filter.R0000644000176200001440000001261414151641776014700 0ustar liggesusers#' Filter within a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [across()] in an existing verb. See `vignette("colwise")` for details. #' #' These [scoped] filtering verbs apply a predicate expression to a #' selection of variables. The predicate expression should be quoted #' with [all_vars()] or [any_vars()] and should mention the pronoun #' `.` to refer to variables. #' #' @inheritParams scoped #' @param .vars_predicate A quoted predicate expression as returned by #' [all_vars()] or [any_vars()]. #' #' Can also be a function or purrr-like formula. In this case, the #' intersection of the results is taken by default and there's #' currently no way to request the union. #' @param .preserve when `FALSE` (the default), the grouping structure #' is recalculated based on the resulting data, otherwise it is kept as is. #' @export #' #' @section Grouping variables: #' #' The grouping variables that are part of the selection are taken #' into account to determine filtered rows. #' #' @keywords internal #' @examples #' # While filter() accepts expressions with specific variables, the #' # scoped filter verbs take an expression with the pronoun `.` and #' # replicate it over all variables. This expression should be quoted #' # with all_vars() or any_vars(): #' all_vars(is.na(.)) #' any_vars(is.na(.)) #' #' #' # You can take the intersection of the replicated expressions: #' filter_all(mtcars, all_vars(. > 150)) #' # -> #' filter(mtcars, if_all(everything(), ~ .x > 150)) #' #' # Or the union: #' filter_all(mtcars, any_vars(. > 150)) #' # -> #' filter(mtcars, if_any(everything(), ~ . > 150)) #' #' #' # You can vary the selection of columns on which to apply the #' # predicate. filter_at() takes a vars() specification: #' filter_at(mtcars, vars(starts_with("d")), any_vars((. %% 2) == 0)) #' # -> #' filter(mtcars, if_any(starts_with("d"), ~ (.x %% 2) == 0)) #' #' # And filter_if() selects variables with a predicate function: #' filter_if(mtcars, ~ all(floor(.) == .), all_vars(. != 0)) #' # -> #' is_int <- function(x) all(floor(x) == x) #' filter(mtcars, if_all(where(is_int), ~ .x != 0)) filter_all <- function(.tbl, .vars_predicate, .preserve = FALSE) { lifecycle::signal_stage("superseded", "filter_all()") syms <- syms(tbl_vars(.tbl)) pred <- apply_filter_syms(.vars_predicate, syms, .tbl) filter(.tbl, !!pred, .preserve = .preserve) } #' @rdname filter_all #' @export filter_if <- function(.tbl, .predicate, .vars_predicate, .preserve = FALSE) { lifecycle::signal_stage("superseded", "filter_if()") syms <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) pred <- apply_filter_syms(.vars_predicate, syms, .tbl) filter(.tbl, !!pred, .preserve = .preserve) } #' @rdname filter_all #' @export filter_at <- function(.tbl, .vars, .vars_predicate, .preserve = FALSE) { lifecycle::signal_stage("superseded", "filter_at()") syms <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) pred <- apply_filter_syms(.vars_predicate, syms, .tbl) filter(.tbl, !!pred, .preserve = .preserve) } apply_filter_syms <- function(pred, syms, tbl, error_call = caller_env()) { if (is_empty(syms)) { msg <- glue("`.predicate` must match at least one column.") abort(msg, call = error_call) } joiner <- all_exprs if (inherits_any(pred, c("all_vars", "any_vars"))) { if (inherits(pred, "any_vars")) { joiner <- any_exprs } pred <- map(syms, function(sym) expr_substitute(pred, quote(.), sym)) } else if (is_bare_formula(pred) || is_function(pred)) { pred <- as_function(pred) pred <- map(syms, function(sym) call2(pred, sym)) } else { msg <- glue("`.vars_predicate` must be a function or a call to `all_vars()` or `any_vars()`, not {friendly_type_of(pred)}.") abort(msg, call = error_call) } joiner(!!!pred) } ## Return the union or intersection of predicate expressions. ## ## `all_exprs()` and `any_exprs()` take predicate expressions and join them ## into a single predicate. They assume vectorised expressions by ## default and join them with `&` or `|`. Note that this will also ## work with scalar predicates, but if you want to be explicit you can ## set `.vectorised` to `FALSE` to join by `&&` or `||`. ## ## @param ... Predicate expressions. ## @param .vectorised If `TRUE`, predicates are joined with `&` or ## `|`. Otherwise, they are joined with `&&` or `||`. ## @return A [quosure][rlang::quo]. ## @export ## @examples ## all_exprs(cyl > 3, am == 1) ## any_exprs(cyl > 3, am == 1) ## any_exprs(cyl > 3, am == 1, .vectorised = FALSE) all_exprs <- function(..., .vectorised = TRUE) { op <- if (.vectorised) quote(`&`) else quote(`&&`) quo_reduce(..., .op = op) } ## @rdname all_exprs ## @export any_exprs <- function(..., .vectorised = TRUE) { op <- if (.vectorised) quote(`|`) else quote(`||`) quo_reduce(..., .op = op) } ## @param .op Can be a function or a quoted name of a function. If a ## quoted name, the default environment is the [base ## environment][rlang::base_env] unless you supply a ## [quosure][rlang::quo]. quo_reduce <- function(..., .op) { stopifnot(is_symbol(.op) || is_function(.op)) dots <- enquos(...) if (length(dots) == 1) { return(dots[[1]]) } op_quo <- as_quosure(.op, base_env()) op <- quo_get_expr(op_quo) expr <- reduce(dots, function(x, y) expr((!!op)((!!x), (!!y)))) new_quosure(expr, quo_get_env(op_quo)) } dplyr/R/desc.r0000644000176200001440000000061614121112104012674 0ustar liggesusers#' Descending order #' #' Transform a vector into a format that will be sorted in descending order. #' This is useful within [arrange()]. #' #' @param x vector to transform #' @export #' @examples #' desc(1:10) #' desc(factor(letters)) #' #' first_day <- seq(as.Date("1910/1/1"), as.Date("1920/1/1"), "years") #' desc(first_day) #' #' starwars %>% arrange(desc(mass)) desc <- function(x) -xtfrm(x) dplyr/R/dbplyr.R0000644000176200001440000001200714121112104013207 0ustar liggesusers#' Database and SQL generics. #' #' The `sql_` generics are used to build the different types of SQL queries. #' The default implementations in dbplyr generates ANSI 92 compliant SQL. #' The `db_` generics execute actions on the database. The default #' implementations in dbplyr typically just call the standard DBI S4 #' method. #' #' A few backend methods do not call the standard DBI S4 methods including #' #' * `db_data_type()`: Calls [DBI::dbDataType()] for every field #' (e.g. data frame column) and returns a vector of corresponding SQL data #' types #' #' * `db_save_query()`: Builds and executes a #' `CREATE [TEMPORARY] TABLE
...` SQL command. #' #' * `db_create_index()`: Builds and executes a #' `CREATE INDEX ON
` SQL command. #' #' * `db_drop_table()`: Builds and executes a #' `DROP TABLE [IF EXISTS]
` SQL command. #' #' * `db_analyze()`: Builds and executes an #' `ANALYZE
` SQL command. #' #' Currently, [copy_to()] is the only user of `db_begin()`, `db_commit()`, #' `db_rollback()`, `db_write_table()`, `db_create_indexes()`, `db_drop_table()` and #' `db_analyze()`. If you find yourself overriding many of these #' functions it may suggest that you should just override `copy_to()` #' instead. #' #' `db_create_table()` and `db_insert_into()` have been deprecated #' in favour of `db_write_table()`. #' #' @return Usually a logical value indicating success. Most failures should generate #' an error. However, `db_has_table()` should return `NA` if #' temporary tables cannot be listed with [DBI::dbListTables()] (due to backend #' API limitations for example). As a result, you methods will rely on the #' backend to throw an error if a table exists when it shouldn't. #' @name backend_dbplyr #' @param con A database connection. #' @keywords internal NULL #' @name backend_dbplyr #' @export db_desc <- function(x) UseMethod("db_desc") #' @name backend_dbplyr #' @export sql_translate_env <- function(con) UseMethod("sql_translate_env") #' @name backend_dbplyr #' @export db_list_tables <- function(con) UseMethod("db_list_tables") #' @name backend_dbplyr #' @export #' @param table A string, the table name. db_has_table <- function(con, table) UseMethod("db_has_table") #' @name backend_dbplyr #' @export #' @param fields A list of fields, as in a data frame. db_data_type <- function(con, fields) UseMethod("db_data_type") #' @export #' @name backend_dbplyr #' @export db_save_query <- function(con, sql, name, temporary = TRUE, ...) { UseMethod("db_save_query") } #' @name backend_dbplyr #' @export db_begin <- function(con, ...) UseMethod("db_begin") #' @name backend_dbplyr #' @export db_commit <- function(con, ...) UseMethod("db_commit") #' @name backend_dbplyr #' @export db_rollback <- function(con, ...) UseMethod("db_rollback") #' @name backend_dbplyr #' @export db_write_table <- function(con, table, types, values, temporary = FALSE, ...) { UseMethod("db_write_table") } #' @name backend_dbplyr #' @export db_create_table <- function(con, table, types, temporary = FALSE, ...) { UseMethod("db_create_table") } #' @name backend_dbplyr #' @export db_insert_into <- function(con, table, values, ...) { UseMethod("db_insert_into") } #' @name backend_dbplyr #' @export db_create_indexes <- function(con, table, indexes = NULL, unique = FALSE, ...) { UseMethod("db_create_indexes") } #' @name backend_dbplyr #' @export db_create_index <- function(con, table, columns, name = NULL, unique = FALSE, ...) { UseMethod("db_create_index") } #' @name backend_dbplyr #' @export db_drop_table <- function(con, table, force = FALSE, ...) { UseMethod("db_drop_table") } #' @name backend_dbplyr #' @export db_analyze <- function(con, table, ...) UseMethod("db_analyze") #' @export #' @rdname backend_dbplyr db_explain <- function(con, sql, ...) { UseMethod("db_explain") } #' @rdname backend_dbplyr #' @export db_query_fields <- function(con, sql, ...) { UseMethod("db_query_fields") } #' @rdname backend_dbplyr #' @export db_query_rows <- function(con, sql, ...) { UseMethod("db_query_rows") } #' @rdname backend_dbplyr #' @export sql_select <- function(con, select, from, where = NULL, group_by = NULL, having = NULL, order_by = NULL, limit = NULL, distinct = FALSE, ...) { UseMethod("sql_select") } #' @export #' @rdname backend_dbplyr sql_subquery <- function(con, from, name = random_table_name(), ...) { UseMethod("sql_subquery") } #' @rdname backend_dbplyr #' @export sql_join <- function(con, x, y, vars, type = "inner", by = NULL, ...) { UseMethod("sql_join") } #' @rdname backend_dbplyr #' @export sql_semi_join <- function(con, x, y, anti = FALSE, by = NULL, ...) { UseMethod("sql_semi_join") } #' @rdname backend_dbplyr #' @export sql_set_op <- function(con, x, y, method) { UseMethod("sql_set_op") } #' @rdname backend_dbplyr #' @export sql_escape_string <- function(con, x) UseMethod("sql_escape_string") #' @rdname backend_dbplyr #' @export sql_escape_ident <- function(con, x) UseMethod("sql_escape_ident") dplyr/R/group_nest.R0000644000176200001440000000452314151641776014135 0ustar liggesusers group_nest_impl <- function(.tbl, .key, keep = FALSE){ mutate(group_keys(.tbl), !!.key := group_split(.tbl, .keep = keep)) } #' Nest a tibble using a grouping specification #' #' @description #' `r lifecycle::badge("experimental")` #' #' Nest a tibble using a grouping specification #' #' @section Grouped data frames: #' #' The primary use case for [group_nest()] is with already grouped data frames, #' typically a result of [group_by()]. In this case [group_nest()] only uses #' the first argument, the grouped tibble, and warns when `...` is used. #' #' @section Ungrouped data frames: #' #' When used on ungrouped data frames, [group_nest()] forwards the `...` to #' [group_by()] before nesting, therefore the `...` are subject to the data mask. #' #' @param .tbl A tbl #' @param ... Grouping specification, forwarded to [group_by()] #' @param .key the name of the list column #' @param keep Should the grouping columns be kept in the list column. #' @return A tbl with one row per unique combination of the grouping variables. #' The first columns are the grouping variables, followed by a list column of tibbles #' with matching rows of the remaining columns. #' @keywords internal #' @family grouping functions #' @export #' @examples #' #' #----- use case 1: a grouped data frame #' iris %>% #' group_by(Species) %>% #' group_nest() #' #' # this can be useful if the grouped data has been altered before nesting #' iris %>% #' group_by(Species) %>% #' filter(Sepal.Length > mean(Sepal.Length)) %>% #' group_nest() #' #' #----- use case 2: using group_nest() on a ungrouped data frame with #' # a grouping specification that uses the data mask #' starwars %>% #' group_nest(species, homeworld) group_nest <- function(.tbl, ..., .key = "data", keep = FALSE){ lifecycle::signal_stage("experimental", "group_nest()") UseMethod("group_nest") } #' @export group_nest.data.frame <- function(.tbl, ..., .key = "data", keep = FALSE) { if (dots_n(...)) { group_nest_impl(group_by(.tbl, ...), .key = .key, keep = keep) } else { tibble(!!.key := list(.tbl)) } } #' @export group_nest.grouped_df <- function(.tbl, ..., .key = "data", keep = FALSE) { if (dots_n(...)) { warn("... is ignored in group_nest(), please use group_by(..., .add = TRUE) %>% group_nest()") } group_nest_impl(.tbl, .key = .key, keep = keep) } dplyr/R/compat-dbplyr.R0000644000176200001440000000356514164534554014531 0ustar liggesusers#' dbplyr compatibility functions #' #' @description #' In dplyr 0.7.0, a number of database and SQL functions moved from dplyr to #' dbplyr. The generic functions stayed in dplyr (since there is no easy way #' to conditionally import a generic from different packages), but many other #' SQL and database helper functions moved. If you have written a backend, #' these functions generate the code you need to work with both dplyr 0.5.0 #' dplyr 0.7.0. #' #' @keywords internal #' @export #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) #' wrap_dbplyr_obj("build_sql") #' wrap_dbplyr_obj("base_agg") check_dbplyr <- function() { check_installed("dbplyr", "to communicate with database backends.") } #' @export #' @rdname check_dbplyr wrap_dbplyr_obj <- function(obj_name) { # Silence R CMD check NOTE `UQ<-` <- NULL obj <- getExportedValue("dbplyr", obj_name) obj_sym <- sym(obj_name) dbplyr_sym <- call("::", quote(dbplyr), obj_sym) dplyr_sym <- call("::", quote(dplyr), obj_sym) if (is.function(obj)) { args <- formals() pass_on <- map(set_names(names(args)), sym) dbplyr_call <- expr((!!dbplyr_sym)(!!!pass_on)) dplyr_call <- expr((!!dplyr_sym)(!!!pass_on)) } else { args <- list() dbplyr_call <- dbplyr_sym dplyr_call <- dplyr_sym } body <- expr({ if (utils::packageVersion("dplyr") > "0.5.0") { dplyr::check_dbplyr() !!dbplyr_call } else { !!dplyr_call } }) wrapper <- new_function(args, body, caller_env()) expr(!!obj_sym <- !!get_expr(wrapper)) } utils::globalVariables("!<-") #' @inherit dbplyr::sql #' @export sql <- function(...) { check_dbplyr() dbplyr::sql(...) } #' @inherit dbplyr::ident #' @export #' @examples #' # Identifiers are escaped with " #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) #' ident("x") ident <- function(...) { check_dbplyr() dbplyr::ident(...) } dplyr/R/join.r0000644000176200001440000003612714151641776012754 0ustar liggesusers#' Mutating joins #' #' @description #' The mutating joins add columns from `y` to `x`, matching rows based on the #' keys: #' #' * `inner_join()`: includes all rows in `x` and `y`. #' * `left_join()`: includes all rows in `x`. #' * `right_join()`: includes all rows in `y`. #' * `full_join()`: includes all rows in `x` or `y`. #' #' If a row in `x` matches multiple rows in `y`, all the rows in `y` will be returned #' once for each matching row in `x`. #' #' @return #' An object of the same type as `x`. The order of the rows and columns of `x` #' is preserved as much as possible. The output has the following properties: #' #' * For `inner_join()`, a subset of `x` rows. #' For `left_join()`, all `x` rows. #' For `right_join()`, a subset of `x` rows, followed by unmatched `y` rows. #' For `full_join()`, all `x` rows, followed by unmatched `y` rows. #' * For all joins, rows will be duplicated if one or more rows in `x` matches #' multiple rows in `y`. #' * Output columns include all `x` columns and all `y` columns. If columns in #' `x` and `y` have the same name (and aren't included in `by`), `suffix`es are #' added to disambiguate. #' * Output columns included in `by` are coerced to common type across #' `x` and `y`. #' * Groups are taken from `x`. #' @section Methods: #' These functions are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `inner_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. #' * `left_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. #' * `right_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. #' * `full_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. #' @param x,y A pair of data frames, data frame extensions (e.g. a tibble), or #' lazy data frames (e.g. from dbplyr or dtplyr). See *Methods*, below, for #' more details. #' @param by A character vector of variables to join by. #' #' If `NULL`, the default, `*_join()` will perform a natural join, using all #' variables in common across `x` and `y`. A message lists the variables so that you #' can check they're correct; suppress the message by supplying `by` explicitly. #' #' To join by different variables on `x` and `y`, use a named vector. #' For example, `by = c("a" = "b")` will match `x$a` to `y$b`. #' #' To join by multiple variables, use a vector with length > 1. #' For example, `by = c("a", "b")` will match `x$a` to `y$a` and `x$b` to #' `y$b`. Use a named vector to match different variables in `x` and `y`. #' For example, `by = c("a" = "b", "c" = "d")` will match `x$a` to `y$b` and #' `x$c` to `y$d`. #' #' To perform a cross-join, generating all combinations of `x` and `y`, #' use `by = character()`. #' @param copy If `x` and `y` are not from the same data source, #' and `copy` is `TRUE`, then `y` will be copied into the #' same src as `x`. This allows you to join tables across srcs, but #' it is a potentially expensive operation so you must opt into it. #' @param suffix If there are non-joined duplicate variables in `x` and #' `y`, these suffixes will be added to the output to disambiguate them. #' Should be a character vector of length 2. #' @param keep Should the join keys from both `x` and `y` be preserved in the #' output? #' @param ... Other parameters passed onto methods. #' @param na_matches Should `NA` and `NaN` values match one another? #' #' The default, `"na"`, treats two `NA` or `NaN` values as equal, like #' `%in%`, [match()], [merge()]. #' #' Use `"never"` to always treat two `NA` or `NaN` values as different, like #' joins for database sources, similarly to `merge(incomparables = FALSE)`. #' @family joins #' @examples #' band_members %>% inner_join(band_instruments) #' band_members %>% left_join(band_instruments) #' band_members %>% right_join(band_instruments) #' band_members %>% full_join(band_instruments) #' #' # To suppress the message about joining variables, supply `by` #' band_members %>% inner_join(band_instruments, by = "name") #' # This is good practice in production code #' #' # Use a named `by` if the join variables have different names #' band_members %>% full_join(band_instruments2, by = c("name" = "artist")) #' # By default, the join keys from `x` and `y` are coalesced in the output; use #' # `keep = TRUE` to keep the join keys from both `x` and `y` #' band_members %>% #' full_join(band_instruments2, by = c("name" = "artist"), keep = TRUE) #' #' # If a row in `x` matches multiple rows in `y`, all the rows in `y` will be #' # returned once for each matching row in `x` #' df1 <- tibble(x = 1:3) #' df2 <- tibble(x = c(1, 1, 2), y = c("first", "second", "third")) #' df1 %>% left_join(df2) #' #' # By default, NAs match other NAs so that there are two #' # rows in the output of this join: #' df1 <- data.frame(x = c(1, NA), y = 2) #' df2 <- data.frame(x = c(1, NA), z = 3) #' left_join(df1, df2) #' #' # You can optionally request that NAs don't match, giving a #' # a result that more closely resembles SQL joins #' left_join(df1, df2, na_matches = "never") #' @aliases join join.data.frame #' @name mutate-joins NULL #' @export #' @rdname mutate-joins inner_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE) { UseMethod("inner_join") } #' @export #' @rdname mutate-joins inner_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { y <- auto_copy(x, y, copy = copy) join_mutate(x, y, by = by, type = "inner", suffix = suffix, na_matches = na_matches, keep = keep) } #' @export #' @rdname mutate-joins left_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE) { UseMethod("left_join") } #' @export #' @rdname mutate-joins left_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { y <- auto_copy(x, y, copy = copy) join_mutate(x, y, by = by, type = "left", suffix = suffix, na_matches = na_matches, keep = keep) } #' @export #' @rdname mutate-joins right_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE) { UseMethod("right_join") } #' @export #' @rdname mutate-joins right_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { y <- auto_copy(x, y, copy = copy) join_mutate(x, y, by = by, type = "right", suffix = suffix, na_matches = na_matches, keep = keep) } #' @export #' @rdname mutate-joins full_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE) { UseMethod("full_join") } #' @export #' @rdname mutate-joins full_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = FALSE, na_matches = c("na", "never")) { y <- auto_copy(x, y, copy = copy) join_mutate(x, y, by = by, type = "full", suffix = suffix, na_matches = na_matches, keep = keep) } #' Filtering joins #' #' @description #' Filtering joins filter rows from `x` based on the presence or absence #' of matches in `y`: #' #' * `semi_join()` return all rows from `x` with a match in `y`. #' * `anti_join()` return all rows from `x` with**out** a match in `y`. #' #' @param x,y A pair of data frames, data frame extensions (e.g. a tibble), or #' lazy data frames (e.g. from dbplyr or dtplyr). See *Methods*, below, for #' more details. #' @inheritParams left_join #' @return #' An object of the same type as `x`. The output has the following properties: #' #' * Rows are a subset of the input, but appear in the same order. #' * Columns are not modified. #' * Data frame attributes are preserved. #' * Groups are taken from `x`. The number of groups may be reduced. #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `semi_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("semi_join")}. #' * `anti_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("anti_join")}. #' @family joins #' @examples #' # "Filtering" joins keep cases from the LHS #' band_members %>% semi_join(band_instruments) #' band_members %>% anti_join(band_instruments) #' #' # To suppress the message about joining variables, supply `by` #' band_members %>% semi_join(band_instruments, by = "name") #' # This is good practice in production code #' @name filter-joins NULL #' @export #' @rdname filter-joins semi_join <- function(x, y, by = NULL, copy = FALSE, ...) { UseMethod("semi_join") } #' @export #' @rdname filter-joins semi_join.data.frame <- function(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) { y <- auto_copy(x, y, copy = copy) join_filter(x, y, by = by, type = "semi", na_matches = na_matches) } #' @export #' @rdname filter-joins anti_join <- function(x, y, by = NULL, copy = FALSE, ...) { UseMethod("anti_join") } #' @export #' @rdname filter-joins anti_join.data.frame <- function(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) { y <- auto_copy(x, y, copy = copy) join_filter(x, y, by = by, type = "anti", na_matches = na_matches) } #' Nest join #' #' `nest_join()` returns all rows and columns in `x` with a new nested-df column #' that contains all matches from `y`. When there is no match, the list column #' is a 0-row tibble. #' #' In some sense, a `nest_join()` is the most fundamental join since you can #' recreate the other joins from it: #' #' * `inner_join()` is a `nest_join()` plus [tidyr::unnest()] #' * `left_join()` `nest_join()` plus `unnest(.drop = FALSE)`. #' * `semi_join()` is a `nest_join()` plus a `filter()` where you check #' that every element of data has at least one row, #' * `anti_join()` is a `nest_join()` plus a `filter()` where you check every #' element has zero rows. #' #' @param x,y A pair of data frames, data frame extensions (e.g. a tibble), or #' lazy data frames (e.g. from dbplyr or dtplyr). See *Methods*, below, for #' more details. #' @param name The name of the list column nesting joins create. #' If `NULL` the name of `y` is used. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_join")}. #' @inheritParams left_join #' @family joins #' @export #' @examples #' band_members %>% nest_join(band_instruments) nest_join <- function(x, y, by = NULL, copy = FALSE, keep = FALSE, name = NULL, ...) { UseMethod("nest_join") } #' @export #' @rdname nest_join nest_join.data.frame <- function(x, y, by = NULL, copy = FALSE, keep = FALSE, name = NULL, ...) { name_var <- name %||% as_label(enexpr(y)) vars <- join_cols(tbl_vars(x), tbl_vars(y), by = by, suffix = c("", ""), keep = keep) y <- auto_copy(x, y, copy = copy) x_in <- as_tibble(x, .name_repair = "minimal") y_in <- as_tibble(y, .name_repair = "minimal") x_key <- set_names(x_in[vars$x$key], names(vars$x$key)) y_key <- set_names(y_in[vars$y$key], names(vars$y$key)) y_split <- vec_group_loc(y_key) matches <- vec_match(x_key, y_split$key) y_loc <- y_split$loc[matches] out <- set_names(x_in[vars$x$out], names(vars$x$out)) # Modify all columns in one step so that we only need to re-group once # Currently, this regroups too often, because it looks like we're always # changing the key vars because of the cast new_cols <- vec_cast(out[names(x_key)], vec_ptype2(x_key, y_key)) y_out <- set_names(y_in[vars$y$out], names(vars$y$out)) new_cols[[name_var]] <- map(y_loc, vec_slice, x = y_out) out <- dplyr_col_modify(out, new_cols) dplyr_reconstruct(out, x) } # helpers ----------------------------------------------------------------- join_mutate <- function(x, y, by, type, suffix = c(".x", ".y"), na_matches = c("na", "never"), keep = FALSE, error_call = caller_env() ) { vars <- join_cols(tbl_vars(x), tbl_vars(y), by = by, suffix = suffix, keep = keep, error_call = error_call) na_equal <- check_na_matches(na_matches) x_in <- as_tibble(x, .name_repair = "minimal") y_in <- as_tibble(y, .name_repair = "minimal") x_key <- set_names(x_in[vars$x$key], names(vars$x$key)) y_key <- set_names(y_in[vars$y$key], names(vars$y$key)) rows <- join_rows(x_key, y_key, type = type, na_equal = na_equal, error_call = error_call) x_out <- set_names(x_in[vars$x$out], names(vars$x$out)) y_out <- set_names(y_in[vars$y$out], names(vars$y$out)) if (length(rows$y_extra) > 0L) { x_slicer <- c(rows$x, rep_along(rows$y_extra, NA_integer_)) y_slicer <- c(rows$y, rows$y_extra) } else { x_slicer <- rows$x y_slicer <- rows$y } out <- vec_slice(x_out, x_slicer) out[names(y_out)] <- vec_slice(y_out, y_slicer) if (!keep) { key_type <- vec_ptype_common(x_key, y_key) out[names(x_key)] <- vec_cast(out[names(x_key)], key_type) if (length(rows$y_extra) > 0L) { new_rows <- length(rows$x) + seq_along(rows$y_extra) out[new_rows, names(y_key)] <- vec_cast(vec_slice(y_key, rows$y_extra), key_type) } } dplyr_reconstruct(out, x) } join_filter <- function(x, y, by = NULL, type, na_matches = c("na", "never"), error_call = caller_env()) { vars <- join_cols(tbl_vars(x), tbl_vars(y), by = by, error_call = error_call) na_equal <- check_na_matches(na_matches) x_in <- as_tibble(x, .name_repair = "minimal") y_in <- as_tibble(y, .name_repair = "minimal") x_key <- set_names(x_in[vars$x$key], names(vars$x$key)) y_key <- set_names(y_in[vars$y$key], names(vars$y$key)) idx <- switch(type, semi = vec_in(x_key, y_key, na_equal = na_equal), anti = !vec_in(x_key, y_key, na_equal = na_equal) ) if (!na_equal) { idx <- switch(type, semi = idx & !is.na(idx), anti = idx | is.na(idx) ) } dplyr_row_slice(x, idx) } check_na_matches <- function(na_matches = c("na", "never")) { if (isNamespaceLoaded("pkgconfig")) { conf <- asNamespace("pkgconfig")$get_config("dplyr::na_matches") if (!is.null(conf)) { warn(c( "`dplyr::na_matches` pkgconfig options is now ignored.", "Please set `na_matches` directly." )) } } arg_match(na_matches) == "na" } dplyr/R/vctrs.R0000644000176200001440000000115314154653101013071 0ustar liggesuserscommon_handler <- function(name){ function(cnd) { bullets <- c( glue("`{name}` must return compatible vectors across groups."), i = cnd_bullet_combine_details(cnd$x, cnd$x_arg), i = cnd_bullet_combine_details(cnd$y, cnd$y_arg) ) abort(bullets, class = "dplyr:::error_incompatible_combine") } } dplyr_vec_cast_common <- function(chunks, name) { withCallingHandlers( vec_cast_common(!!!chunks), error = common_handler(name) ) } dplyr_vec_ptype_common <- function(chunks, name) { withCallingHandlers( vec_ptype_common(!!!chunks), error = common_handler(name) ) } dplyr/R/deprec-tidyselect.R0000644000176200001440000000216214151641776015356 0ustar liggesusers#' Select variables #' #' @description #' `r lifecycle::badge("deprecated")` #' #' These functions now live in the tidyselect package as #' [tidyselect::vars_select()], [tidyselect::vars_rename()] and #' [tidyselect::vars_pull()]. #' #' @keywords internal #' @export select_vars <- function(vars = chr(), ..., include = chr(), exclude = chr()) { lifecycle::deprecate_warn("0.8.4", "select_vars()", "tidyselect::vars_select()") tidyselect::vars_select(.vars = vars, ..., .include = include, .exclude = exclude) } #' @rdname select_vars #' @export rename_vars <- function(vars = chr(), ..., strict = TRUE) { lifecycle::deprecate_warn("0.8.4", "rename_vars()", "tidyselect::vars_rename()") tidyselect::vars_rename(.vars = vars, ..., .strict = strict) } #' @rdname select_vars #' @export select_var <- function(vars, var = -1) { lifecycle::deprecate_warn("0.8.4", "select_var()", "tidyselect::vars_pull()") tidyselect::vars_pull(vars, !!enquo(var)) } #' @rdname select_vars #' @export current_vars <- function(...) { lifecycle::deprecate_warn("0.8.4", "current_vars()", "tidyselect::peek_vars()") tidyselect::peek_vars(...) } dplyr/R/deprec-tibble.R0000644000176200001440000000230214151641776014442 0ustar liggesusers#' Coerce to a tibble #' #' `r lifecycle::badge("deprecated")` #' Please use [tibble::as_tibble()] instead. #' #' @export #' @keywords internal #' @param data,x Object to coerce tbl_df <- function(data) { lifecycle::deprecate_warn("1.0.0", "tbl_df()", "tibble::as_tibble()") # Works in tibble < 1.5.0 too, because .name_repair will be # swallowed by the ellipsis as_tibble(data, .name_repair = "check_unique") } #' @export #' @rdname tbl_df as.tbl <- function(x, ...) { lifecycle::deprecate_warn("1.0.0", "as.tbl()", "tibble::as_tibble()") UseMethod("as.tbl") } #' @export as.tbl.tbl <- function(x, ...) x #' @export as.tbl.data.frame <- function(x, ...) { as_tibble(x) } #' Convert row names to an explicit variable. #' #' `r lifecycle::badge("deprecated")` #' Please use [tibble::rownames_to_column()] instead. #' #' @param df Input data frame with rownames. #' @param var Name of variable to use #' @keywords internal #' @export add_rownames <- function(df, var = "rowname") { lifecycle::deprecate_warn("1.0.0", "add_rownames()", "tibble::rownames_to_column()") stopifnot(is.data.frame(df)) rn <- as_tibble(setNames(list(rownames(df)), var)) rownames(df) <- NULL bind_cols(rn, df) } dplyr/R/colwise-funs.R0000644000176200001440000000627214151641776014371 0ustar liggesusers as_fun_list <- function(.funs, .env, ..., .caller, .caller_arg = "...", error_call = caller_env()) { args <- list2(...) if (is_fun_list(.funs)) { if (!is_empty(args)) { .funs[] <- map(.funs, call_modify, !!!args) } return(.funs) } if (is_list(.funs) && length(.funs) > 1) { .funs <- auto_name_formulas(.funs) } if (!is_character(.funs) && !is_list(.funs)) { .funs <- list(.funs) } if(is_character(.funs) && is_null(names(.funs)) && length(.funs) != 1L) { names(.funs) <- .funs } funs <- map(.funs, function(.x){ if (is_formula(.x)) { if (is_quosure(.x)) { what <- paste0( "dplyr::", .caller, "(", .caller_arg, " = ", "'can\\'t contain quosures.')" ) lifecycle::deprecate_warn( "0.8.3", what, details = "Please use a one-sided formula, a function, or a function name.", env = .env ) .x <- new_formula(NULL, quo_squash(.x), quo_get_env(.x)) } .x <- as_inlined_function(.x, env = .env) } else { if (is_character(.x)) { .x <- get(.x, .env, mode = "function") } else if (!is_function(.x)) { msg <- "`.funs` must be a one sided formula, a function, or a function name." abort(msg, call = error_call) } if (length(args)) { .x <- new_quosure( call2(.x, quote(.), !!!args), env = .env ) } } .x }) attr(funs, "have_name") <- any(names2(funs) != "") funs } auto_name_formulas <- function(funs) { where <- !have_name(funs) & map_lgl(funs, function(x) is_bare_formula(x) && is_call(f_rhs(x))) names(funs)[where] <- map_chr(funs[where], function(x) as_label(f_rhs(x)[[1]])) funs } as_fun <- function(.x, .env, .args, error_call = caller_env()) { quo <- as_quosure(.x, .env) # For legacy reasons, we support strings. Those are enclosed in the # empty environment and need to be switched to the caller environment. quo <- quo_set_env(quo, fun_env(quo, .env)) expr <- quo_get_expr(quo) if (is_call(expr, c("function", "~"))) { top_level <- as_string(expr[[1]]) msg <- glue("`{quo_text(expr)}` must be a function name (quoted or unquoted) or an unquoted call, not `{top_level}`.") abort(msg, call = error_call) } if (is_call(expr) && !is_call(expr, c("::", ":::"))) { expr <- call_modify(expr, !!!.args) } else { expr <- call2(expr, quote(.), !!!.args) } set_expr(quo, expr) } quo_as_function <- function(quo) { new_function(exprs(. = ), quo_get_expr(quo), quo_get_env(quo)) } fun_env <- function(quo, default_env) { env <- quo_get_env(quo) if (is_null(env) || identical(env, empty_env())) { default_env } else { env } } is_fun_list <- function(x) { inherits(x, "fun_list") } #' @export `[.fun_list` <- function(x, i) { structure(NextMethod(), class = "fun_list", has_names = attr(x, "has_names") ) } #' @export print.fun_list <- function(x, ..., width = getOption("width")) { cat("\n") names <- format(names(x)) code <- map_chr(x, function(x) deparse_trunc(quo_get_expr(x), width - 2 - nchar(names[1]))) cat(paste0("$ ", names, ": ", code, collapse = "\n")) cat("\n") invisible(x) } dplyr/R/group_map.R0000644000176200001440000001502214151641776013735 0ustar liggesusers as_group_map_function <- function(.f, error_call = caller_env()) { .f <- rlang::as_function(.f) if (length(form <- formals(.f)) < 2 && ! "..." %in% names(form)){ bullets <- c( "`.f` must accept at least two arguments.", i = "You can use `...` to absorb unused components." ) abort(bullets, call = error_call) } .f } #' Apply a function to each group #' #' @description #' `r lifecycle::badge("experimental")` #' #' `group_map()`, `group_modify()` and `group_walk()` are purrr-style functions that can #' be used to iterate on grouped tibbles. #' #' @details #' Use `group_modify()` when `summarize()` is too limited, in terms of what you need #' to do and return for each group. `group_modify()` is good for "data frame in, data frame out". #' If that is too limited, you need to use a [nested][group_nest()] or [split][group_split()] workflow. #' `group_modify()` is an evolution of [do()], if you have used that before. #' #' Each conceptual group of the data frame is exposed to the function `.f` with two pieces of information: #' #' - The subset of the data for the group, exposed as `.x`. #' - The key, a tibble with exactly one row and columns for each grouping variable, exposed as `.y`. #' #' For completeness, `group_modify()`, `group_map` and `group_walk()` also work on #' ungrouped data frames, in that case the function is applied to the #' entire data frame (exposed as `.x`), and `.y` is a one row tibble with no #' column, consistently with [group_keys()]. #' #' @family grouping functions #' #' @param .data A grouped tibble #' @param .f A function or formula to apply to each group. #' #' If a __function__, it is used as is. It should have at least 2 formal arguments. #' #' If a __formula__, e.g. `~ head(.x)`, it is converted to a function. #' #' In the formula, you can use #' #' - `.` or `.x` to refer to the subset of rows of `.tbl` #' for the given group #' #' - `.y` to refer to the key, a one row tibble with one column per grouping variable #' that identifies the group #' #' @param ... Additional arguments passed on to `.f` #' @param .keep are the grouping variables kept in `.x` #' #' @return #' - `group_modify()` returns a grouped tibble. In that case `.f` must return a data frame. #' - `group_map()` returns a list of results from calling `.f` on each group. #' - `group_walk()` calls `.f` for side effects and returns the input `.tbl`, invisibly. #' #' @examples #' #' # return a list #' mtcars %>% #' group_by(cyl) %>% #' group_map(~ head(.x, 2L)) #' #' # return a tibble grouped by `cyl` with 2 rows per group #' # the grouping data is recalculated #' mtcars %>% #' group_by(cyl) %>% #' group_modify(~ head(.x, 2L)) #' @examplesIf requireNamespace("broom", quietly = TRUE) #' #' # a list of tibbles #' iris %>% #' group_by(Species) %>% #' group_map(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #' #' # a restructured grouped tibble #' iris %>% #' group_by(Species) %>% #' group_modify(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #' @examples #' #' # a list of vectors #' iris %>% #' group_by(Species) %>% #' group_map(~ quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75))) #' #' # to use group_modify() the lambda must return a data frame #' iris %>% #' group_by(Species) %>% #' group_modify(~ { #' quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75)) %>% #' tibble::enframe(name = "prob", value = "quantile") #' }) #' #' iris %>% #' group_by(Species) %>% #' group_modify(~ { #' .x %>% #' purrr::map_dfc(fivenum) %>% #' mutate(nms = c("min", "Q1", "median", "Q3", "max")) #' }) #' #' # group_walk() is for side effects #' dir.create(temp <- tempfile()) #' iris %>% #' group_by(Species) %>% #' group_walk(~ write.csv(.x, file = file.path(temp, paste0(.y$Species, ".csv")))) #' list.files(temp, pattern = "csv$") #' unlink(temp, recursive = TRUE) #' #' # group_modify() and ungrouped data frames #' mtcars %>% #' group_modify(~ head(.x, 2L)) #' #' @export group_map <- function(.data, .f, ..., .keep = FALSE) { lifecycle::signal_stage("experimental", "group_map()") UseMethod("group_map") } #' @export group_map.data.frame <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_map(keep = )", "group_map(.keep = )") .keep <- keep } .f <- as_group_map_function(.f) # call the function on each group chunks <- if (is_grouped_df(.data)) { group_split(.data, .keep = isTRUE(.keep)) } else { group_split(.data) } keys <- group_keys(.data) group_keys <- map(seq_len(nrow(keys)), function(i) keys[i, , drop = FALSE]) if (length(chunks)) { map2(chunks, group_keys, .f, ...) } else { # calling .f with .x and .y set to prototypes structure(list(), ptype = .f(attr(chunks, "ptype"), keys[integer(0L), ], ...)) } } #' @rdname group_map #' @export group_modify <- function(.data, .f, ..., .keep = FALSE) { lifecycle::signal_stage("experimental", "group_map()") UseMethod("group_modify") } #' @export group_modify.data.frame <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_modify(keep = )", "group_modify(.keep = )") .keep <- keep } .f <- as_group_map_function(.f) .f(.data, group_keys(.data), ...) } #' @export group_modify.grouped_df <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_modify(keep = )", "group_modify(.keep = )") .keep <- keep } tbl_group_vars <- group_vars(.data) .f <- as_group_map_function(.f) error_call <- current_env() fun <- function(.x, .y){ res <- .f(.x, .y, ...) if (!inherits(res, "data.frame")) { abort("The result of `.f` must be a data frame.", call = error_call) } if (any(bad <- names(res) %in% tbl_group_vars)) { msg <- glue( "The returned data frame cannot contain the original grouping variables: {names}.", names = paste(names(res)[bad], collapse = ", ") ) abort(msg, call = error_call) } bind_cols(.y[rep(1L, nrow(res)), , drop = FALSE], res) } chunks <- group_map(.data, fun, .keep = .keep) res <- if (length(chunks) > 0L) { bind_rows(!!!chunks) } else { attr(chunks, "ptype") } grouped_df(res, group_vars(.data), group_by_drop_default(.data)) } #' @export #' @rdname group_map group_walk <- function(.data, .f, ...) { lifecycle::signal_stage("experimental", "group_walk()") group_map(.data, .f, ...) invisible(.data) } dplyr/R/utils.r0000644000176200001440000000576014154653101013140 0ustar liggesusers#' @importFrom magrittr %>% #' @export magrittr::`%>%` dots <- function(...) { eval_bare(substitute(alist(...))) } deparse_trunc <- function(x, width = getOption("width")) { text <- deparse(x, width.cutoff = width) if (length(text) == 1 && nchar(text) < width) return(text) paste0(substr(text[1], 1, width - 3), "...") } commas <- function(...) paste0(..., collapse = ", ") in_travis <- function() identical(Sys.getenv("TRAVIS"), "true") named <- function(...) { x <- c(...) missing_names <- names2(x) == "" names(x)[missing_names] <- x[missing_names] x } is_1d <- function(x) { # dimension check is for matrices and data.frames (is_atomic(x) || is.list(x)) && length(dim(x)) <= 1 } random_table_name <- function(n = 10) { paste0(sample(letters, n, replace = TRUE), collapse = "") } unstructure <- function(x) { attributes(x) <- NULL x } compact_null <- function(x) { Filter(function(elt) !is.null(elt), x) } paste_line <- function(...) { paste(chr(...), collapse = "\n") } # Until fixed upstream. `vec_data()` should not return lists from data # frames. dplyr_vec_data <- function(x) { out <- vec_data(x) if (is.data.frame(x)) { new_data_frame(out, n = nrow(x)) } else { out } } # Until vctrs::new_data_frame() forwards row names automatically dplyr_new_data_frame <- function(x = data.frame(), n = NULL, ..., row.names = NULL, class = NULL) { row.names <- row.names %||% .row_names_info(x, type = 0L) new_data_frame( x, n = n, ..., row.names = row.names, class = class ) } maybe_restart <- function(restart) { if (!is_null(findRestart(restart))) { invokeRestart(restart) } } expr_substitute <- function(expr, old, new) { expr <- duplicate(expr) switch(typeof(expr), language = node_walk_replace(node_cdr(expr), old, new), symbol = if (identical(expr, old)) return(new) ) expr } node_walk_replace <- function(node, old, new) { while (!is_null(node)) { switch(typeof(node_car(node)), language = if (!is_call(node_car(node), c("~", "function")) || is_call(node_car(node), "~", n = 2)) node_walk_replace(node_cdar(node), old, new), symbol = if (identical(node_car(node), old)) node_poke_car(node, new) ) node <- node_cdr(node) } } # temporary workaround until vctrs better reports error call fix_call <- function(expr, call = caller_env()) { withCallingHandlers(expr, error = function(cnd) { cnd$call <- call cnd_signal(cnd) }) } # tidyselect creates chained errors tidyselect_fix_call <- function(expr, call = caller_env()) { withCallingHandlers( expr, error = function(cnd) { cnd$call <- call cnd$parent <- NULL cnd_signal(cnd) }) } # Backports for R 3.5.0 utils ...length2 <- function(frame = caller_env()) { length(env_get(frame, "...")) } ...elt2 <- function(i, frame = caller_env()) { eval_bare(sym(paste0("..", i)), frame) } dplyr/R/rename.R0000644000176200001440000000443514121112104013170 0ustar liggesusers#' Rename columns #' #' `rename()` changes the names of individual variables using #' `new_name = old_name` syntax; `rename_with()` renames columns using a #' function. #' #' @inheritParams arrange #' @param ... #' For `rename()`: <[`tidy-select`][dplyr_tidy_select]> Use #' `new_name = old_name` to rename selected variables. #' #' For `rename_with()`: additional arguments passed onto `.fn`. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are not affected. #' * Column names are changed; column order is preserved. #' * Data frame attributes are preserved. #' * Groups are updated to reflect new names. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}. #' @family single table verbs #' @export #' @examples #' iris <- as_tibble(iris) # so it prints a little nicer #' rename(iris, petal_length = Petal.Length) #' #' rename_with(iris, toupper) #' rename_with(iris, toupper, starts_with("Petal")) #' rename_with(iris, ~ tolower(gsub(".", "_", .x, fixed = TRUE))) #' @export rename <- function(.data, ...) { UseMethod("rename") } #' @export rename.data.frame <- function(.data, ...) { loc <- tidyselect::eval_rename(expr(c(...)), .data) # eval_rename() only returns changes names <- names(.data) names[loc] <- names(loc) set_names(.data, names) } #' @export #' @rdname rename #' @param .fn A function used to transform the selected `.cols`. Should #' return a character vector the same length as the input. #' @param .cols <[`tidy-select`][dplyr_tidy_select]> Columns to rename; #' defaults to all columns. rename_with <- function(.data, .fn, .cols = everything(), ...) { UseMethod("rename_with") } #' @export rename_with.data.frame <- function(.data, .fn, .cols = everything(), ...) { .fn <- as_function(.fn) cols <- tidyselect::eval_select(enquo(.cols), .data) names <- names(.data) names[cols] <- .fn(names[cols], ...) names <- vec_as_names(names, repair = "check_unique") set_names(.data, names) } dplyr/R/group_data.R0000644000176200001440000000756014151641776014101 0ustar liggesusers#' Grouping metadata #' #' @description #' * `group_data()` returns a data frame that defines the grouping structure. #' The columns give the values of the grouping variables. The last column, #' always called `.rows`, is a list of integer vectors that gives the #' location of the rows in each group. You can retrieve just the grouping #' data with `group_keys()`, and just the locations with `group_rows()`. #' #' * `group_indices()` returns an integer vector the same length as `.data` #' that gives the group that each row belongs to (cf. `group_rows()` which #' returns the rows which each group contains). `group_indices()` with no #' argument is deprecated, superseded by [cur_group_id()]. #' #' * `group_vars()` gives names of grouping variables as character vector; #' `groups()` gives the names as a list of symbols. #' #' * `group_size()` gives the size of each group, and `n_groups()` gives the #' total number of groups. #' #' See [context] for equivalent functions that return values for the _current_ #' group. #' @param .data,.tbl,x A data frame or extension (like a tibble or grouped #' tibble). #' @param ... Use of `...` is now deprecated; please use `group_by()` first #' instead. #' @keywords internal #' @examples #' df <- tibble(x = c(1,1,2,2)) #' group_vars(df) #' group_rows(df) #' group_data(df) #' group_indices(df) #' #' gf <- group_by(df, x) #' group_vars(gf) #' group_rows(gf) #' group_data(gf) #' group_indices(gf) #' @export group_data <- function(.data) { UseMethod("group_data") } #' @export group_data.data.frame <- function(.data) { rows <- new_list_of(list(seq_len(nrow(.data))), ptype = integer()) new_data_frame(list(.rows = rows), n = 1L) } #' @export group_data.tbl_df <- function(.data) { as_tibble(NextMethod()) } #' @export group_data.rowwise_df <- function(.data) { attr(.data, "groups") } #' @export group_data.grouped_df <- function(.data) { error_call <- current_env() withCallingHandlers( validate_grouped_df(.data), error = function(cnd) { msg <- glue("`.data` must be a valid object.") abort(msg, parent = cnd, call = error_call) } ) attr(.data, "groups") } # ------------------------------------------------------------------------- #' @rdname group_data #' @export group_keys <- function(.tbl, ...) { UseMethod("group_keys") } #' @export group_keys.data.frame <- function(.tbl, ...) { if (dots_n(...) > 0) { lifecycle::deprecate_warn( "1.0.0", "group_keys(... = )", details = "Please `group_by()` first" ) .tbl <- group_by(.tbl, ...) } out <- group_data(.tbl) .Call(`dplyr_group_keys`, out) } #' @rdname group_data #' @export group_rows <- function(.data) { group_data(.data)[[".rows"]] } #' @export #' @rdname group_data group_indices <- function(.data, ...) { if (nargs() == 0) { lifecycle::deprecate_warn("1.0.0", "group_indices()", "cur_group_id()") return(cur_group_id()) } UseMethod("group_indices") } #' @export group_indices.data.frame <- function(.data, ...) { if (dots_n(...) > 0) { lifecycle::deprecate_warn( "1.0.0", "group_indices(... = )", details = "Please `group_by()` first" ) .data <- group_by(.data, ...) } .Call(`dplyr_group_indices`, .data, group_rows(.data)) } #' @export #' @rdname group_data group_vars <- function(x) { UseMethod("group_vars") } #' @export group_vars.data.frame <- function(x) { setdiff(names(group_data(x)), ".rows") } #' @export #' @rdname group_data groups <- function(x) { UseMethod("groups") } #' @export groups.data.frame <- function(x) { syms(group_vars(x)) } #' @export #' @rdname group_data group_size <- function(x) UseMethod("group_size") #' @export group_size.data.frame <- function(x) { lengths(group_rows(x)) } #' @export #' @rdname group_data n_groups <- function(x) UseMethod("n_groups") #' @export n_groups.data.frame <- function(x) { nrow(group_data(x)) } dplyr/R/bind.r0000644000176200001440000001173414151641776012726 0ustar liggesusers#' Efficiently bind multiple data frames by row and column #' #' This is an efficient implementation of the common pattern of #' `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many #' data frames into one. #' #' The output of `bind_rows()` will contain a column if that column #' appears in any of the inputs. #' #' @param ... Data frames to combine. #' #' Each argument can either be a data frame, a list that could be a data #' frame, or a list of data frames. #' #' When row-binding, columns are matched by name, and any missing #' columns will be filled with NA. #' #' When column-binding, rows are matched by position, so all data #' frames must have the same number of rows. To match by value, not #' position, see [mutate-joins]. #' @param .id Data frame identifier. #' #' When `.id` is supplied, a new column of identifiers is #' created to link each row to its original data frame. The labels #' are taken from the named arguments to `bind_rows()`. When a #' list of data frames is supplied, the labels are taken from the #' names of the list. If no names are found a numeric sequence is #' used instead. #' @param .name_repair One of `"unique"`, `"universal"`, or #' `"check_unique"`. See [vctrs::vec_as_names()] for the meaning of these #' options. #' @return `bind_rows()` and `bind_cols()` return the same type as #' the first input, either a data frame, `tbl_df`, or `grouped_df`. #' @examples #' one <- starwars[1:4, ] #' two <- starwars[9:12, ] #' #' # You can supply data frames as arguments: #' bind_rows(one, two) #' #' # The contents of lists are spliced automatically: #' bind_rows(list(one, two)) #' bind_rows(split(starwars, starwars$homeworld)) #' bind_rows(list(one, two), list(two, one)) #' #' #' # In addition to data frames, you can supply vectors. In the rows #' # direction, the vectors represent rows and should have inner #' # names: #' bind_rows( #' c(a = 1, b = 2), #' c(a = 3, b = 4) #' ) #' #' # You can mix vectors and data frames: #' bind_rows( #' c(a = 1, b = 2), #' tibble(a = 3:4, b = 5:6), #' c(a = 7, b = 8) #' ) #' #' #' # When you supply a column name with the `.id` argument, a new #' # column is created to link each row to its original data frame #' bind_rows(list(one, two), .id = "id") #' bind_rows(list(a = one, b = two), .id = "id") #' bind_rows("group 1" = one, "group 2" = two, .id = "groups") #' #' # Columns don't need to match when row-binding #' bind_rows(tibble(x = 1:3), tibble(y = 1:4)) #' #' # Row sizes must be compatible when column-binding #' try(bind_cols(tibble(x = 1:3), tibble(y = 1:2))) #' #' # Even with 0 columns #' try(bind_cols(tibble(x = 1:3), tibble())) #' #' bind_cols(one, two) #' bind_cols(list(one, two)) #' @name bind NULL #' @export #' @rdname bind bind_rows <- function(..., .id = NULL) { dots <- list2(...) # bind_rows() has weird legacy squashing behaviour is_flattenable <- function(x) vec_is_list(x) && !is_named(x) if (length(dots) == 1 && is_bare_list(dots[[1]])) { dots <- dots[[1]] } dots <- flatten_if(dots, is_flattenable) dots <- discard(dots, is.null) if (is_named(dots) && !all(map_lgl(dots, dataframe_ish))) { # This is hit by map_dfr() so we can't easily deprecate return(as_tibble(dots)) } for (i in seq_along(dots)) { .x <- dots[[i]] if (!is.data.frame(.x) && !vec_is(.x)) { msg <- glue("Argument {i} must be a data frame or a named atomic vector.") abort(msg) } if (is.null(names(.x))) { msg <- glue("Argument {i} must have names.") abort(msg) } } if (!is_null(.id)) { if (!is_string(.id)) { msg <- glue("`.id` must be a scalar string, not {friendly_type_of(.id)} of length {length(.id)}.") abort(msg) } if (!is_named(dots)) { names(dots) <- seq_along(dots) } } if (!length(dots)) { return(tibble()) } first <- dots[[1L]] dots <- map(dots, function(.x) { if (vec_is_list(.x)) { .x <- vctrs::data_frame(!!!.x, .name_repair = "minimal") } .x }) if (is.null(.id)) { names(dots) <- NULL } out <- fix_call(vec_rbind(!!!dots, .names_to = .id)) if (length(dots)) { if (is.data.frame(first)) { out <- dplyr_reconstruct(out, first) } else { out <- as_tibble(out) } } out } #' @export #' @rdname bind bind_cols <- function(..., .name_repair = c("unique", "universal", "check_unique", "minimal")) { dots <- list2(...) dots <- squash_if(dots, vec_is_list) dots <- discard(dots, is.null) # Strip names off of data frame components so that vec_cbind() unpacks them names2(dots)[map_lgl(dots, is.data.frame)] <- "" out <- fix_call(vec_cbind(!!!dots, .name_repair = .name_repair)) if (!any(map_lgl(dots, is.data.frame))) { out <- as_tibble(out) } if (length(dots) && is.data.frame(first <- dots[[1L]])) { out <- dplyr_reconstruct(out, first) } out } # helpers ----------------------------------------------------------------- dataframe_ish <- function(.x) { is.data.frame(.x) || (vec_is(.x) && is_named(.x)) } dplyr/R/rbind.R0000644000176200001440000000221714121112104013013 0ustar liggesuserslist_or_dots <- function(...) { dots <- list2(...) if (!length(dots)) { return(dots) } # Old versions specified that first argument could be a list of # dataframeable objects if (is_list(dots[[1]])) { dots[[1]] <- map_if(dots[[1]], is_dataframe_like, as_tibble) } # Need to ensure that each component is a data frame or a vector # wrapped in a list: dots <- map_if(dots, is_dataframe_like, function(x) list(as_tibble(x))) dots <- map_if(dots, is_atomic, list) dots <- map_if(dots, is.data.frame, list) unlist(dots, recursive = FALSE) } is_dataframe_like <- function(x) { if (is_null(x)) { return(FALSE) } # data frames are not data lists if (is.data.frame(x)) { return(FALSE) } # Must be a list if (!is_list(x)) { return(FALSE) } # 0 length named list (#1515) if (!is_null(names(x)) && length(x) == 0) { return(TRUE) } # With names if (!is_named(x)) { return(FALSE) } # Where each element is an 1d vector or list if (!every(x, is_1d)) { return(FALSE) } # All of which have the same length n <- lengths(x) if (any(n != n[1])) { return(FALSE) } TRUE } dplyr/R/if_else.R0000644000176200001440000000331014174551640013342 0ustar liggesusers#' Vectorised if #' #' Compared to the base [ifelse()], this function is more strict. #' It checks that `true` and `false` are the same type. This #' strictness makes the output type more predictable, and makes it somewhat #' faster. #' #' @param condition Logical vector #' @param true,false Values to use for `TRUE` and `FALSE` values of #' `condition`. They must be either the same length as `condition`, #' or length 1. They must also be the same type: `if_else()` checks that #' they have the same type and same class. All other attributes are #' taken from `true`. #' @param missing If not `NULL`, will be used to replace missing #' values. #' @return Where `condition` is `TRUE`, the matching value from #' `true`, where it's `FALSE`, the matching value from `false`, #' otherwise `NA`. #' @export #' @examples #' x <- c(-5:5, NA) #' if_else(x < 0, NA_integer_, x) #' if_else(x < 0, "negative", "positive", "missing") #' #' # Unlike ifelse, if_else preserves types #' x <- factor(sample(letters[1:5], 10, replace = TRUE)) #' ifelse(x %in% c("a", "b", "c"), x, factor(NA)) #' if_else(x %in% c("a", "b", "c"), x, factor(NA)) #' # Attributes are taken from the `true` vector, if_else <- function(condition, true, false, missing = NULL) { if (!is.logical(condition)) { msg <- glue("`condition` must be a logical vector, not {friendly_type_of(condition)}.") abort(msg) } out <- true[rep(NA_integer_, length(condition))] out <- replace_with(out, condition , true , "`true`" , "length of `condition`") out <- replace_with(out, !condition , false , "`false`" , "length of `condition`") out <- replace_with(out, is.na(condition), missing, "`missing`", "length of `condition`") out } dplyr/R/select-helpers.R0000644000176200001440000000425514151641776014671 0ustar liggesusers#' Select grouping variables #' #' This selection helpers matches grouping variables. It can be used #' in [select()] or [vars()] selections. #' #' @param data For advanced use only. The default `NULL` automatically #' finds the "current" data frames. #' @param vars Deprecated; please use data instead. #' @seealso [groups()] and [group_vars()] for retrieving the grouping #' variables outside selection contexts. #' #' @examples #' gdf <- iris %>% group_by(Species) #' gdf %>% select(group_cols()) #' #' # Remove the grouping variables from mutate selections: #' gdf %>% mutate_at(vars(-group_cols()), `/`, 100) #' # -> No longer necessary with across() #' gdf %>% mutate(across(everything(), ~ . / 100)) #' @export group_cols <- function(vars = NULL, data = NULL) { # So group_cols() continues to work in _at() helpers. data <- data %||% tryCatch(tidyselect::peek_data(), error = function(e) NULL) if (!is.null(data)) { match(group_vars(data), tbl_vars(data)) } else { group_cols_legacy(vars) } } group_cols_legacy <- function(vars = NULL) { if (!is.null(vars)) { lifecycle::deprecate_warn( "1.0.0", "group_cols(vars = )", details = "Use `data` with entire dataframe instead" ) } vars <- vars %||% tidyselect::peek_vars() if (is_sel_vars(vars)) { matches <- match(vars %@% groups, vars) if (anyNA(matches)) { abort("Can't find the grouping variables.") } matches } else { int() } } # Alias required for help links in downstream packages #' @aliases select_helpers #' @importFrom tidyselect contains #' @export tidyselect::contains #' @importFrom tidyselect ends_with #' @export tidyselect::ends_with #' @importFrom tidyselect everything #' @export tidyselect::everything #' @importFrom tidyselect matches #' @export tidyselect::matches #' @importFrom tidyselect num_range #' @export tidyselect::num_range #' @importFrom tidyselect one_of #' @export tidyselect::one_of #' @importFrom tidyselect starts_with #' @export tidyselect::starts_with #' @importFrom tidyselect last_col #' @export tidyselect::last_col #' @importFrom tidyselect any_of #' @export tidyselect::any_of #' @importFrom tidyselect all_of #' @export tidyselect::all_of dplyr/R/explain.r0000644000176200001440000000265014144435746013447 0ustar liggesusers#' Explain details of a tbl #' #' This is a generic function which gives more details about an object than #' [print()], and is more focused on human readable output than #' [str()]. #' #' @section Databases: #' Explaining a `tbl_sql` will run the SQL `EXPLAIN` command which #' will describe the query plan. This requires a little bit of knowledge about #' how `EXPLAIN` works for your database, but is very useful for #' diagnosing performance problems. #' #' @export #' @param x An object to explain #' @param ... Other parameters possibly used by generic #' @return The first argument, invisibly. #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' \donttest{ #' lahman_s <- dbplyr::lahman_sqlite() #' batting <- tbl(lahman_s, "Batting") #' batting %>% show_query() #' batting %>% explain() #' #' # The batting database has indices on all ID variables: #' # SQLite automatically picks the most restrictive index #' batting %>% filter(lgID == "NL" & yearID == 2000L) %>% explain() #' #' # OR's will use multiple indexes #' batting %>% filter(lgID == "NL" | yearID == 2000) %>% explain() #' #' # Joins will use indexes in both tables #' teams <- tbl(lahman_s, "Teams") #' batting %>% left_join(teams, c("yearID", "teamID")) %>% explain() #' } explain <- function(x, ...) { UseMethod("explain") } #' @export #' @rdname explain show_query <- function(x, ...) { UseMethod("show_query") } dplyr/R/generics.R0000644000176200001440000002160514176714175013550 0ustar liggesusers#' Extending dplyr with new data frame subclasses #' #' @description #' `r lifecycle::badge("experimental")` #' #' These three functions, along with `names<-` and 1d numeric `[` #' (i.e. `x[loc]`) methods, provide a minimal interface for extending dplyr #' to work with new data frame subclasses. This means that for simple cases #' you should only need to provide a couple of methods, rather than a method #' for every dplyr verb. #' #' These functions are a stop-gap measure until we figure out how to solve #' the problem more generally, but it's likely that any code you write to #' implement them will find a home in what comes next. #' #' # Basic advice #' #' This section gives you basic advice if you want to extend dplyr to work with #' your custom data frame subclass, and you want the dplyr methods to behave #' in basically the same way. #' #' * If you have data frame attributes that don't depend on the rows or columns #' (and should unconditionally be preserved), you don't need to do anything. #' #' * If you have __scalar__ attributes that depend on __rows__, implement a #' `dplyr_reconstruct()` method. Your method should recompute the attribute #' depending on rows now present. #' #' * If you have __scalar__ attributes that depend on __columns__, implement a #' `dplyr_reconstruct()` method and a 1d `[` method. For example, if your #' class requires that certain columns be present, your method should return #' a data.frame or tibble when those columns are removed. #' #' * If your attributes are __vectorised__ over __rows__, implement a #' `dplyr_row_slice()` method. This gives you access to `i` so you can #' modify the row attribute accordingly. You'll also need to think carefully #' about how to recompute the attribute in `dplyr_reconstruct()`, and #' you will need to carefully verify the behaviour of each verb, and provide #' additional methods as needed. #' #' * If your attributes that are __vectorised__ over __columns__, implement #' `dplyr_col_modify()`, 1d `[`, and `names<-` methods. All of these methods #' know which columns are being modified, so you can update the column #' attribute according. You'll also need to think carefully about how to #' recompute the attribute in `dplyr_reconstruct()`, and you will need to #' carefully verify the behaviour of each verb, and provide additional #' methods as needed. #' #' # Current usage #' #' * `arrange()`, `filter()`, `slice()`, `semi_join()`, and `anti_join()` #' work by generating a vector of row indices, and then subsetting #' with `dplyr_row_slice()`. #' #' * `mutate()` generates a list of new column value (using `NULL` to indicate #' when columns should be deleted), then passes that to `dplyr_col_modify()`. #' `transmute()` does the same then uses 1d `[` to select the columns. #' #' * `summarise()` works similarly to `mutate()` but the data modified by #' `dplyr_col_modify()` comes from `group_data()`. #' #' * `select()` uses 1d `[` to select columns, then `names<-` to rename them. #' `rename()` just uses `names<-`. `relocate()` just uses 1d `[`. #' #' * `inner_join()`, `left_join()`, `right_join()`, and `full_join()` #' coerces `x` to a tibble, modify the rows, then uses `dplyr_reconstruct()` #' to convert back to the same type as `x`. #' #' * `nest_join()` uses `dplyr_col_modify()` to cast the key variables to #' common type and add the nested-df that `y` becomes. #' #' * `distinct()` does a `mutate()` if any expressions are present, then #' uses 1d `[` to select variables to keep, then `dplyr_row_slice()` to #' select distinct rows. #' #' Note that `group_by()` and `ungroup()` don't use any these generics and #' you'll need to provide methods directly. #' #' @keywords internal #' @param data A tibble. We use tibbles because they avoid some inconsistent #' subset-assignment use cases #' @name dplyr_extending NULL #' @export #' @rdname dplyr_extending #' @param i A numeric or logical vector that indexes the rows of `.data`. dplyr_row_slice <- function(data, i, ...) { if (!is.numeric(i) && !is.logical(i)) { abort("`i` must be a numeric or logical vector.") } UseMethod("dplyr_row_slice") } #' @export dplyr_row_slice.data.frame <- function(data, i, ...) { dplyr_reconstruct(vec_slice(data, i), data) } #' @export dplyr_row_slice.grouped_df <- function(data, i, ..., preserve = FALSE) { out <- vec_slice(as.data.frame(data), i) # Index into group_indices, then use that to restore the grouping structure groups <- group_data(data) new_id <- vec_slice(group_indices(data), i) new_grps <- vec_group_loc(new_id) rows <- rep(list_of(integer()), length.out = nrow(groups)) rows[new_grps$key] <- new_grps$loc groups$.rows <- rows if (!preserve && isTRUE(attr(groups, ".drop"))) { groups <- group_data_trim(groups) } new_grouped_df(out, groups) } #' @export dplyr_row_slice.rowwise_df <- function(data, i, ..., preserve = FALSE) { out <- vec_slice(data, i) group_data <- vec_slice(group_keys(data), i) new_rowwise_df(out, group_data) } #' @export #' @rdname dplyr_extending #' @param cols A named list used modify columns. A `NULL` value should remove #' an existing column. dplyr_col_modify <- function(data, cols) { UseMethod("dplyr_col_modify") } #' @export dplyr_col_modify.data.frame <- function(data, cols) { # Must be implemented from first principles to avoiding edge cases in # [.data.frame and [.tibble (2.1.3 and earlier). # Apply tidyverse recycling rules cols <- vec_recycle_common(!!!cols, .size = nrow(data)) # Transform to list to avoid stripping inner names with `[[<-` out <- as.list(dplyr_vec_data(data)) nms <- as_utf8_character(names2(cols)) names(out) <- as_utf8_character(names2(out)) for (i in seq_along(cols)) { nm <- nms[[i]] out[[nm]] <- cols[[i]] } # Transform back to data frame before reconstruction row_names <- .row_names_info(data, type = 0L) out <- new_data_frame(out, n = nrow(data), row.names = row_names) dplyr_reconstruct(out, data) } #' @export dplyr_col_modify.grouped_df <- function(data, cols) { out <- dplyr_col_modify(as_tibble(data), cols) if (any(names(cols) %in% group_vars(data))) { # regroup grouped_df(out, group_vars(data), drop = group_by_drop_default(data)) } else { new_grouped_df(out, group_data(data)) } } #' @export dplyr_col_modify.rowwise_df <- function(data, cols) { out <- dplyr_col_modify(as_tibble(data), cols) rowwise_df(out, group_vars(data)) } #' @param template Template to use for restoring attributes #' @export #' @rdname dplyr_extending dplyr_reconstruct <- function(data, template) { # Strip attributes before dispatch to make it easier to implement # methods and prevent unexpected leaking of irrelevant attributes. data <- dplyr_new_data_frame(data) return(dplyr_reconstruct_dispatch(data, template)) UseMethod("dplyr_reconstruct", template) } dplyr_reconstruct_dispatch <- function(data, template) { UseMethod("dplyr_reconstruct", template) } #' @export dplyr_reconstruct.data.frame <- function(data, template) { attrs <- attributes(template) attrs$names <- names(data) attrs$row.names <- .row_names_info(data, type = 0L) attributes(data) <- attrs data } #' @export dplyr_reconstruct.grouped_df <- function(data, template) { group_vars <- group_intersect(template, data) grouped_df(data, group_vars, drop = group_by_drop_default(template)) } #' @export dplyr_reconstruct.rowwise_df <- function(data, template) { group_vars <- group_intersect(template, data) rowwise_df(data, group_vars) } dplyr_col_select <- function(.data, loc, names = NULL, error_call = caller_env()) { loc <- vec_as_location(loc, n = ncol(.data), names = names(.data)) out <- .data[loc] if (!inherits(out, "data.frame")) { classes_data <- glue_collapse(class(.data), sep = "/") classes_out <- glue_collapse(class(out), sep = "/") bullets <- c( "Can't reconstruct data frame.", x = glue("The `[` method for class <{classes_data}> must return a data frame."), i = glue("It returned a <{classes_out}>.") ) abort(bullets, call = error_call) } if (length(out) != length(loc)) { classes_data <- glue_collapse(class(.data), sep = "/") classes_out <- glue_collapse(class(out), sep = "/") s <- function(x) if (length(x) == 1) "" else "s" bullets <- c( "Can't reconstruct data frame.", x = glue("The `[` method for class <{classes_data}> must return a data frame with {length(loc)} column{s(loc)}."), i = glue("It returned a <{classes_out}> of {length(out)} column{s(out)}.") ) abort(bullets, call = error_call) } # Patch base data frames and data.table (#6171) to restore extra attributes that `[.data.frame` drops. # We require `[` methods to keep extra attributes for all data frame subclasses. if (identical(class(.data), "data.frame") || identical(class(.data), c("data.table", "data.frame"))) { out <- dplyr_reconstruct(out, .data) } if (!is.null(names)) { names(out) <- names } out } dplyr/R/doc-params.R0000644000176200001440000001014414121112104013741 0ustar liggesusers#' Argument type: data-masking #' #' @description #' This page describes the `` argument modifier which #' indicates that the argument uses tidy evaluation with **data masking**. #' If you've never heard of tidy evaluation before, start with #' `vignette("programming")`. #' #' # Key terms #' #' The primary motivation for tidy evaluation in dplyr is that it provides #' **data masking**, which blurs the distinction between two types of variables: #' #' * __env-variables__ are "programming" variables and live in an environment. #' They are usually created with `<-`. Env-variables can be any type of R #' object. #' #' * __data-variables__ are "statistical" variables and live in a data frame. #' They usually come from data files (e.g. `.csv`, `.xls`), or are created by #' manipulating existing variables. Data-variables live inside data frames, #' so must be vectors. #' #' # General usage #' #' Data masking allows you to refer to variables in the "current" data frame #' (usually supplied in the `.data` argument), without any other prefix. #' It's what allows you to type (e.g.) `filter(diamonds, x == 0 & y == 0 & z == 0)` #' instead of `diamonds[diamonds$x == 0 & diamonds$y == 0 & diamonds$z == 0, ]`. #' #' # Indirection #' #' The main challenge of data masking arises when you introduce some #' indirection, i.e. instead of directly typing the name of a variable you #' want to supply it in a function argument or character vector. #' #' There are two main cases: #' #' * If you want the user to supply the variable (or function of variables) #' in a function argument, embrace the argument, e.g. `filter(df, {{ var }})`. #' #' ``` #' dist_summary <- function(df, var) { #' df %>% #' summarise(n = n(), min = min({{ var }}), max = max({{ var }})) #' } #' mtcars %>% dist_summary(mpg) #' mtcars %>% group_by(cyl) %>% dist_summary(mpg) #' ``` #' #' * If you have the column name as a character vector, use the `.data` #' pronoun, e.g. `summarise(df, mean = mean(.data[[var]]))`. #' #' ``` #' for (var in names(mtcars)) { #' mtcars %>% count(.data[[var]]) %>% print() #' } #' #' lapply(names(mtcars), function(var) mtcars %>% count(.data[[var]])) #' ``` #' #' # Dot-dot-dot (...) #' #' When this modifier is applied to `...`, there is one other useful technique #' which solves the problem of creating a new variable with a name supplied by #' the user. Use the interpolation syntax from the glue package: `"{var}" := #' expression`. (Note the use of `:=` instead of `=` to enable this syntax). #' #' ``` #' var_name <- "l100km" #' mtcars %>% mutate("{var_name}" := 235 / mpg) #' ``` #' #' Note that `...` automatically provides indirection, so you can use it as is #' (i.e. without embracing) inside a function: #' #' ``` #' grouped_mean <- function(df, var, ...) { #' df %>% #' group_by(...) %>% #' summarise(mean = mean({{ var }})) #' } #' ``` #' #' @keywords internal #' @name dplyr_data_masking NULL #' Argument type: tidy-select #' #' @description #' This page the describes the `` argument modifier which indicates #' the argument supports **tidy selections**. Tidy selection provides a concise #' dialect of R for selecting variables based on their names or properties. #' #' Tidy selection is a variant of tidy evaluation. This means that inside #' functions, tidy-select arguments require special attention, as described in #' the Indirection section. If you've never heard of tidy evaluation before, #' start with `vignette("programming")`. #' #' #' # Overview of selection features #' #' ```{r, child = "man/rmd/overview.Rmd"} #' ``` #' #' #' # Indirection #' #' There are two main cases: #' #' * If you have a character vector of column names, use `all_of()` #' or `any_of()`, depending on whether or not you want unknown variable #' names to cause an error, e.g `select(df, all_of(vars))`, #' `select(df, !any_of(vars))`. #' #' * If you you want the user to supply a tidyselect specification in a #' function argument, embrace the function argument, e.g #' `select(df, {{ vars }})`. #' #' @keywords internal #' @name dplyr_tidy_select NULL dplyr/R/compute-collect.r0000644000176200001440000000420714144435746015106 0ustar liggesusers#' Force computation of a database query #' #' @description #' `compute()` stores results in a remote temporary table. #' `collect()` retrieves data into a local tibble. #' `collapse()` is slightly different: it doesn't force computation, but #' instead forces generation of the SQL query. This is sometimes needed to work #' around bugs in dplyr's SQL generation. #' #' All functions preserve grouping and ordering. #' #' @section Methods: #' These functions are **generics**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' * `compute()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("compute")} #' * `collect()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collect")} #' * `collapse()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collapse")} #' #' @param x A data frame, data frame extension (e.g. a tibble), or a lazy #' data frame (e.g. from dbplyr or dtplyr). See *Methods*, below, for more #' details. #' @param ... Arguments passed on to methods #' @seealso [copy_to()], the opposite of `collect()`: it takes a local data #' frame and uploads it to the remote source. #' @export #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' mtcars2 <- dbplyr::src_memdb() %>% #' copy_to(mtcars, name = "mtcars2-cc", overwrite = TRUE) #' #' remote <- mtcars2 %>% #' filter(cyl == 8) %>% #' select(mpg:drat) #' #' # Compute query and save in remote table #' compute(remote) #' #' # Compute query bring back to this session #' collect(remote) #' #' # Creates a fresh query based on the generated SQL #' collapse(remote) compute <- function(x, ...) { UseMethod("compute") } #' @export #' @rdname compute collect <- function(x, ...) { UseMethod("collect") } #' @export #' @rdname compute collapse <- function(x, ...) { UseMethod("collapse") } #' @export collect.data.frame <- function(x, ...) x #' @export compute.data.frame <- function(x, ...) x #' @export collapse.data.frame <- function(x, ...) x dplyr/R/error.R0000644000176200001440000000427514164534554013104 0ustar liggesusers# ngettext() does extra work, this function is a simpler version ntext <- function(n, msg1, msg2) { if (n == 1) msg1 else msg2 } fmt_pos_args <- function(x) { args <- ntext(length(x), "Argument", "Arguments") glue("{args} {fmt_comma(x)}") } fmt_calls <- function(...) { x <- parse_named_call(...) fmt_obj(x) } fmt_cols <- function(x) { cols <- ntext(length(x), "Column", "Columns") glue("{cols} {fmt_obj(x)}") } fmt_obj <- function(x) { fmt_comma(fmt_obj1(x)) } fmt_obj1 <- function(x) { paste0("`", x, "`") } fmt_classes <- function(x) { paste(class(x), collapse = "/") } fmt_dims <- function(x) { paste0("[", paste0(x, collapse = " x "), "]") } fmt_comma <- function(..., .max = 6) { x <- paste0(...) if (length(x) > .max) { length(x) <- .max x[[.max]] <- "..." } commas(x) } parse_args <- function(x) { # convert single formula to list of length 1 x <- unlist(list(x), recursive = FALSE) is_fml <- map_lgl(x, is_formula) x[is_fml] <- map_chr(map(x[is_fml], "[[", 2), as_string) unlist(x) } parse_named_call <- function(x) { map_chr(x, quo_text) } # From rlang friendly_type_of <- function(x) { if (is.object(x)) { sprintf("a `%s` object", fmt_classes(x)) } else { as_friendly_type(typeof(x)) } } as_friendly_type <- function(type) { switch(type, logical = "a logical vector", integer = "an integer vector", numeric = , double = "a double vector", complex = "a complex vector", character = "a character vector", raw = "a raw vector", string = "a string", list = "a list", NULL = "NULL", environment = "an environment", externalptr = "a pointer", weakref = "a weak reference", S4 = "an S4 object", name = , symbol = "a symbol", language = "a call", pairlist = "a pairlist node", expression = "an expression vector", quosure = "a quosure", formula = "a formula", char = "an internal string", promise = "an internal promise", ... = "an internal dots object", any = "an internal `any` object", bytecode = "an internal bytecode object", primitive = , builtin = , special = "a primitive function", closure = "a function", type ) } dplyr/R/colwise-group-by.R0000644000176200001440000000537114151641776015161 0ustar liggesusers#' Group by a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [across()] in an existing verb. See `vignette("colwise")` for details. #' #' These [scoped] variants of [group_by()] group a data frame by a #' selection of variables. Like [group_by()], they have optional #' [mutate] semantics. #' #' @inheritParams scoped #' @inheritParams group_by #' @param .add See [group_by()] #' #' @export #' #' @section Grouping variables: #' #' Existing grouping variables are maintained, even if not included in #' the selection. #' #' @keywords internal #' @examples #' # Group a data frame by all variables: #' group_by_all(mtcars) #' # -> #' mtcars %>% group_by(across()) #' #' # Group by variables selected with a predicate: #' group_by_if(iris, is.factor) #' # -> #' iris %>% group_by(across(where(is.factor))) #' #' # Group by variables selected by name: #' group_by_at(mtcars, vars(vs, am)) #' # -> #' mtcars %>% group_by(across(c(vs, am))) #' #' # Like group_by(), the scoped variants have optional mutate #' # semantics. This provide a shortcut for group_by() + mutate(): #' d <- tibble(x=c(1,1,2,2), y=c(1,2,1,2)) #' group_by_all(d, as.factor) #' # -> #' d %>% group_by(across(everything(), as.factor)) #' #' group_by_if(iris, is.factor, as.character) #' # -> #' iris %>% group_by(across(where(is.factor), as.character)) group_by_all <- function(.tbl, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("superseded", "group_by_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "group_by_all") if (!length(funs)) { funs <- syms(tbl_vars(.tbl)) } .group_by_static_drop(.tbl, !!!funs, .add = .add, .drop = .drop) } #' @rdname group_by_all #' @export group_by_at <- function(.tbl, .vars, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("superseded", "group_by_at()") funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "group_by_at") if (!length(funs)) { funs <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) } .group_by_static_drop(.tbl, !!!funs, .add = .add, .drop = .drop) } #' @rdname group_by_all #' @export group_by_if <- function(.tbl, .predicate, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("superseded", "group_by_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "group_by_if") if (!length(funs)) { funs <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) } .group_by_static_drop(.tbl, !!!funs, .add = .add, .drop = .drop) } dplyr/R/utils-replace-with.R0000644000176200001440000000345614174551640015471 0ustar liggesusersreplace_with <- function(x, i, val, name, reason = NULL, error_call = caller_env()) { if (is.null(val)) { return(x) } check_length(val, x, name, reason, error_call = error_call) check_type(val, x, name, error_call = error_call) check_class(val, x, name, error_call = error_call) i[is.na(i)] <- FALSE if (length(val) == 1L) { x[i] <- val } else { x[i] <- val[i] } x } fmt_check_length_val <- function(length_x, n, header, reason = NULL) { if (all(length_x %in% c(1L, n))) { return() } if (is.null(reason)) { reason <- "" } else { reason <- glue(" ({reason})") } if (n == 1) { glue("{header} must be length 1{reason}, not {commas(length_x)}.") } else { glue("{header} must be length {n}{reason} or one, not {commas(length_x)}.") } } check_length_val <- function(length_x, n, header, reason = NULL, error_call = caller_env()) { msg <- fmt_check_length_val(length_x, n, header, reason) if (length(msg)) { abort(msg, call = error_call) } } check_length <- function(x, template, header, reason = NULL, error_call = caller_env()) { check_length_val(length(x), length(template), header, reason, error_call = error_call) } check_type <- function(x, template, header, error_call = caller_env()) { if (identical(typeof(x), typeof(template))) { return() } msg <- glue("{header} must be {friendly_type_of(template)}, not {friendly_type_of(x)}.") abort(msg, call = error_call) } check_class <- function(x, template, header, error_call = caller_env()) { if (!is.object(x)) { return() } if (identical(class(x), class(template))) { return() } exp_classes <- fmt_classes(template) out_classes <- fmt_classes(x) msg <- glue("{header} must have class `{exp_classes}`, not class `{out_classes}`.") abort(msg, call = error_call) } dplyr/R/deprec-src-local.r0000644000176200001440000000360414151641776015126 0ustar liggesusers#' A local source #' #' `r lifecycle::badge("deprecated")` #' This function was deprecated since it existed to support a style of testing #' dplyr backends that turned out not to be useful. #' #' @param tbl name of the function used to generate `tbl` objects #' @param pkg,env Either the name of a package or an environment object in #' which to look for objects. #' @keywords internal #' @export src_local <- function(tbl, pkg = NULL, env = NULL) { lifecycle::deprecate_warn("1.0.0", "src_local()") if (!xor(is.null(pkg), is.null(env))) { msg <- glue("Exactly one of `pkg` and `env` must be non-NULL, not {(!is.null(pkg)) + (!is.null(env))}.") abort(msg) } if (!is.null(pkg)) { env <- getNamespaceInfo(pkg, "lazydata") name <- paste0("") } else { stopifnot(is.environment(env)) name <- utils::capture.output(print(env)) } structure( list(tbl_f = match.fun(tbl), name = name, env = env), class = c("src_local", "src") ) } #' @rdname src_local #' @export src_df <- function(pkg = NULL, env = NULL) { src_local("as_tibble", pkg, env) } #' @export src_tbls.src_local <- function(x, ...) { objs <- ls(envir = x$env, all.names = TRUE) Filter(function(obj) is.data.frame(get(obj, envir = x$env)), objs) } #' @export tbl.src_local <- function(src, from, ...) { src$tbl_f(get(from, src$env)) } #' @export copy_to.src_local <- function(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) { if (!overwrite && exists(name, envir = dest$env, inherits = FALSE)) { msg <- glue("Object with `name` = {fmt_obj(name)} must not already exist, unless `overwrite` = TRUE.") abort(msg) } assign(name, envir = dest$env, df) tbl(dest, name) } #' @export format.src_local <- function(x, ...) { paste0( "src: ", x$name, "\n", wrap("tbls: ", paste0(sort(src_tbls(x)), collapse = ", ")) ) } dplyr/R/groups-with.R0000644000176200001440000000265114151641776014240 0ustar liggesusers#' Perform an operation with temporary groups #' #' @description #' `r lifecycle::badge("experimental")` #' #' This is an experimental new function that allows you to modify the grouping #' variables for a single operation. #' #' @param .data A data frame #' @param .groups <[`tidy-select`][dplyr_tidy_select]> One or more variables #' to group by. Unlike [group_by()], you can only group by existing variables, #' and you can use tidy-select syntax like `c(x, y, z)` to select multiple #' variables. #' #' Use `NULL` to temporarily **un**group. #' @param .f Function to apply to regrouped data. #' Supports purrr-style `~` syntax #' @param ... Additional arguments passed on to `...`. #' @export #' @examples #' df <- tibble(g = c(1, 1, 2, 2, 3), x = runif(5)) #' df %>% #' with_groups(g, mutate, x_mean = mean(x)) #' df %>% #' with_groups(g, ~ mutate(.x, x1 = first(x))) #' #' df %>% #' group_by(g) %>% #' with_groups(NULL, mutate, x_mean = mean(x)) #' #' # NB: grouping can't be restored if you remove the grouping variables #' df %>% #' group_by(g) %>% #' with_groups(NULL, mutate, g = NULL) with_groups <- function(.data, .groups, .f, ...) { lifecycle::signal_stage("experimental", "with_groups()") loc <- tidyselect::eval_select(enquo(.groups), data = tbl_ptype(.data)) val <- syms(names(.data)[loc]) out <- group_by(.data, !!!val) .f <- as_function(.f) out <- .f(out, ...) dplyr_reconstruct(out, .data) } dplyr/R/deprec-lazyeval.R0000644000176200001440000002664414151641776015047 0ustar liggesusers#' Deprecated SE versions of main verbs. #' #' @description #' `r lifecycle::badge("deprecated")` #' #' dplyr used to offer twin versions of each verb suffixed with an #' underscore. These versions had standard evaluation (SE) semantics: #' rather than taking arguments by code, like NSE verbs, they took #' arguments by value. Their purpose was to make it possible to #' program with dplyr. However, dplyr now uses tidy evaluation #' semantics. NSE verbs still capture their arguments, but you can now #' unquote parts of these arguments. This offers full programmability #' with NSE verbs. Thus, the underscored versions are now superfluous. #' #' Unquoting triggers immediate evaluation of its operand and inlines #' the result within the captured expression. This result can be a #' value or an expression to be evaluated later with the rest of the #' argument. See `vignette("programming")` for more information. #' #' @name se-deprecated #' @param .data A data frame. #' @param dots,.dots,... Pair/values of expressions coercible to lazy objects. #' @param vars Various meanings depending on the verb. #' @param args Various meanings depending on the verb. #' @keywords internal NULL lazy_deprec <- function(fun, hint = TRUE) { lifecycle::deprecate_warn("0.7.0", paste0(fun, "_()"), paste0(fun, "()"), details = if (hint) "See vignette('programming') for more help" ) } #' @rdname se-deprecated #' @export add_count_ <- function(x, vars, wt = NULL, sort = FALSE) { lazy_deprec("add_count") vars <- compat_lazy_dots(vars, caller_env()) wt <- wt %||% quo(NULL) wt <- compat_lazy(wt, caller_env()) add_count(x, !!!vars, wt = !!wt, sort = sort) } #' @rdname se-deprecated #' @export add_tally_ <- function(x, wt, sort = FALSE) { lazy_deprec("add_tally") wt <- compat_lazy(wt, caller_env()) add_tally(x, !!wt, sort = sort) } #' @export #' @rdname se-deprecated arrange_ <- function(.data, ..., .dots = list()) { lazy_deprec("arrange") UseMethod("arrange_") } #' @export arrange_.data.frame <- function(.data, ..., .dots = list(), .by_group = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) arrange(.data, !!!dots, .by_group = .by_group) } #' @export arrange_.tbl_df <- function(.data, ..., .dots = list(), .by_group = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) arrange(.data, !!!dots, .by_group = .by_group) } #' @export #' @rdname se-deprecated count_ <- function(x, vars, wt = NULL, sort = FALSE, .drop = group_by_drop_default(x)) { lazy_deprec("count") vars <- compat_lazy_dots(vars, caller_env()) wt <- wt %||% quo(NULL) wt <- compat_lazy(wt, caller_env()) count(x, !!!vars, wt = !!wt, sort = sort, .drop = .drop) } #' @export #' @rdname se-deprecated #' @inheritParams distinct distinct_ <- function(.data, ..., .dots, .keep_all = FALSE) { lazy_deprec("distinct") UseMethod("distinct_") } #' @export distinct_.data.frame <- function(.data, ..., .dots = list(), .keep_all = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) distinct(.data, !!!dots, .keep_all = .keep_all) } #' @export # Can't use NextMethod() in R 3.1, r-lib/rlang#486 distinct_.tbl_df <- distinct_.data.frame #' @export distinct_.grouped_df <- function(.data, ..., .dots = list(), .keep_all = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) distinct(.data, !!!dots, .keep_all = .keep_all) } #' @export #' @rdname se-deprecated do_ <- function(.data, ..., .dots = list()) { lazy_deprec("do") UseMethod("do_") } #' @export do_.NULL <- function(.data, ..., .dots = list()) { NULL } #' @export do_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) do(.data, !!!dots) } #' @export do_.grouped_df <- function(.data, ..., env = caller_env(), .dots = list()) { dots <- compat_lazy_dots(.dots, env, ...) do(.data, !!!dots) } #' @export do_.rowwise_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) do(.data, !!!dots) } #' @export #' @rdname se-deprecated filter_ <- function(.data, ..., .dots = list()) { lazy_deprec("filter") UseMethod("filter_") } #' @export filter_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) filter(.data, !!!dots) } #' @export filter_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) filter(.data, !!!dots) } #' @export #' @rdname se-deprecated #' @inheritParams funs #' @param env The environment in which functions should be evaluated. funs_ <- function(dots, args = list(), env = base_env()) { lazy_deprec("funs") dots <- compat_lazy_dots(dots, caller_env()) funs(!!!dots, .args = args) } #' @export #' @rdname se-deprecated #' @inheritParams group_by group_by_ <- function(.data, ..., .dots = list(), add = FALSE) { lazy_deprec("group_by") UseMethod("group_by_") } #' @export group_by_.data.frame <- function(.data, ..., .dots = list(), add = FALSE, .drop = group_by_drop_default(.data)) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_by(.data, !!!dots, .add = add, .drop = .drop) } #' @export group_by_.rowwise_df <- function(.data, ..., .dots = list(), add = FALSE, .drop = group_by_drop_default(.data)) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_by(.data, !!!dots, .add = add, .drop = .drop) } #' @export #' @rdname se-deprecated group_indices_ <- function(.data, ..., .dots = list()) { lazy_deprec("group_indices", hint = FALSE) UseMethod("group_indices_") } #' @export group_indices.data.frame <- function(.data, ..., .drop = TRUE) { dots <- enquos(...) if (length(dots) == 0L) { return(rep(1L, nrow(.data))) } group_indices(group_by(.data, !!!dots, .drop = .drop)) } #' @export group_indices_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_indices(.data, !!!dots) } #' @export group_indices_.grouped_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_indices(.data, !!!dots) } #' @export group_indices_.rowwise_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_indices(.data, !!!dots) } #' @export #' @rdname se-deprecated mutate_ <- function(.data, ..., .dots = list()) { lazy_deprec("mutate") UseMethod("mutate_") } #' @export mutate_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) mutate(.data, !!!dots) } #' @export mutate_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ..., .named = TRUE) mutate(.data, !!!dots) } #' @rdname se-deprecated #' @inheritParams tally #' @export tally_ <- function(x, wt, sort = FALSE) { lazy_deprec("tally") wt <- compat_lazy(wt, caller_env()) tally(x, wt = !!wt, sort = sort) } #' @rdname se-deprecated #' @export transmute_ <- function(.data, ..., .dots = list()) { lazy_deprec("transmute") UseMethod("transmute_") } #' @export transmute_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) transmute(.data, !!!dots) } #' @rdname se-deprecated #' @export rename_ <- function(.data, ..., .dots = list()) { lazy_deprec("rename", hint = FALSE) UseMethod("rename_") } #' @export rename_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) rename(.data, !!!dots) } #' @export rename_.grouped_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) rename(.data, !!!dots) } #' @export #' @rdname se-deprecated rename_vars_ <- function(vars, args) { lifecycle::deprecate_warn("0.7.0", "rename_vars_()", "tidyselect::vars_rename()") args <- compat_lazy_dots(args, caller_env()) tidyselect::vars_rename(vars, !!!args) } #' @export #' @rdname se-deprecated select_ <- function(.data, ..., .dots = list()) { lazy_deprec("select", hint = FALSE) UseMethod("select_") } #' @export select_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) select(.data, !!!dots) } #' @export select_.grouped_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) select(.data, !!!dots) } #' @rdname se-deprecated #' @param include,exclude Character vector of column names to always #' include/exclude. #' @export select_vars_ <- function(vars, args, include = chr(), exclude = chr()) { lifecycle::deprecate_warn("0.7.0", "select_vars_()", "tidyselect::vars_select()") args <- compat_lazy_dots(args, caller_env()) tidyselect::vars_select(vars, !!!args, .include = include, .exclude = exclude) } #' @export #' @rdname se-deprecated slice_ <- function(.data, ..., .dots = list()) { lazy_deprec("slice", hint = FALSE) UseMethod("slice_") } #' @export slice_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) slice(.data, !!!dots) } #' @export slice_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) slice(.data, !!!dots) } #' @export #' @rdname se-deprecated summarise_ <- function(.data, ..., .dots = list()) { lazy_deprec("summarise", hint = FALSE) UseMethod("summarise_") } #' @export summarise_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) summarise(.data, !!!dots) } #' @export summarise_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ..., .named = TRUE) summarise(.data, !!!dots) } #' @rdname se-deprecated #' @export summarize_ <- summarise_ #' Summarise and mutate multiple columns. #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `mutate_each()` and `summarise_each()` are deprecated in favour of #' the new [across()] function that works within `summarise()` and `mutate()`. #' #' @keywords internal #' @export summarise_each <- function(tbl, funs, ...) { summarise_each_(tbl, funs, enquos(...)) } #' @export #' @rdname summarise_each summarise_each_ <- function(tbl, funs, vars) { lifecycle::deprecate_warn("0.7.0", "summarise_each_()", "across()") if (is_empty(vars)) { vars <- tbl_nongroup_vars(tbl) } else { vars <- compat_lazy_dots(vars, caller_env()) vars <- tidyselect::vars_select(tbl_nongroup_vars(tbl), !!!vars) if (length(vars) == 1 && names(vars) == as_string(vars)) { vars <- unname(vars) } } if (is_character(funs)) { funs <- funs_(funs) } funs <- manip_at(tbl, vars, funs, enquo(funs), caller_env(), .caller = "summarise_each_") summarise(tbl, !!!funs) } #' @export #' @rdname summarise_each mutate_each <- function(tbl, funs, ...) { if (is_character(funs)) { funs <- funs_(funs) } mutate_each_(tbl, funs, enquos(...)) } #' @export #' @rdname summarise_each mutate_each_ <- function(tbl, funs, vars) { lifecycle::deprecate_warn("0.7.0", "mutate_each_()", "across()") if (is_empty(vars)) { vars <- tbl_nongroup_vars(tbl) } else { vars <- compat_lazy_dots(vars, caller_env()) vars <- tidyselect::vars_select(tbl_nongroup_vars(tbl), !!!vars) if (length(vars) == 1 && names(vars) == as_string(vars)) { vars <- unname(vars) } } funs <- manip_at(tbl, vars, funs, enquo(funs), caller_env(), .caller = "mutate_each_") mutate(tbl, !!!funs) } #' @rdname summarise_each #' @export summarize_each <- summarise_each #' @rdname summarise_each #' @export summarize_each_ <- summarise_each_ dplyr/R/doc-methods.R0000644000176200001440000000444214121112104014125 0ustar liggesusers# Adapted from sloop methods_generic <- function(x) { # Return early if generic not defined in global environment. This happens # when the documentation is read before the package is attached, or when # previewing development documentation from RStudio, since it renders the # files in a separate session. if (!"package:dplyr" %in% search()) { return(data.frame()) } info <- eval(expr(attr(utils::methods(!!x), "info")), envir = globalenv()) info <- tibble::as_tibble(info, rownames = "method") generic_esc <- gsub("([.\\[])", "\\\\\\1", x) info$class <- gsub(paste0("^", generic_esc, "[.,]"), "", info$method) info$class <- gsub("-method$", "", info$class) info$source <- gsub(paste0(" for ", generic_esc), "", info$from) # Find package methods <- map2( info$generic, info$class, utils::getS3method, optional = TRUE, envir = globalenv() ) envs <- map(methods, environment) info$package <- map_chr(envs, environmentName) # Find help topic, if it exists info$topic <- help_topic(info$method, info$package) # Don't link to self info$topic[info$topic == x] <- NA info[c("generic", "class", "package", "topic", "visible", "source", "isS4")] } methods_rd <- function(x) { methods <- tryCatch(methods_generic(x), error = function(e) data.frame()) if (nrow(methods) == 0) { return("no methods found") } methods <- methods[order(methods$package, methods$class), , drop = FALSE] topics <- unname(split(methods, methods$package)) by_package <- vapply(topics, function(x) { links <- topic_links(x$class, x$package, x$topic) paste0(x$package[[1]], " (", paste0(links, collapse = ", "), ")") }, character(1)) paste0(by_package, collapse = ", ") } topic_links <- function(class, package, topic) { ifelse(is.na(topic), paste0("\\code{", class, "}"), paste0("\\code{\\link[", package, ":", topic, "]{", class, "}}") ) } help_topic <- function(x, pkg) { find_one <- function(topic, pkg) { if (identical(pkg, "")) { return(NA_character_) } path <- system.file("help", "aliases.rds", package = pkg) if (!file.exists(path)) { return(NA_character_) } aliases <- readRDS(path) if (!topic %in% names(aliases)) { return(NA_character_) } aliases[[topic]] } map2_chr(x, pkg, find_one) } dplyr/R/join-cols.R0000644000176200001440000001061314151641776013642 0ustar liggesusersjoin_cols <- function(x_names, y_names, by = NULL, suffix = c(".x", ".y"), keep = FALSE, error_call = caller_env()) { check_duplicate_vars(x_names, "x", error_call = error_call) check_duplicate_vars(y_names, "y", error_call = error_call) by <- standardise_join_by(by, x_names = x_names, y_names = y_names, error_call = error_call) suffix <- standardise_join_suffix(suffix, error_call = error_call) x_by <- set_names(match(by$x, x_names), by$x) y_by <- set_names(match(by$y, y_names), by$x) x_loc <- seq_along(x_names) names(x_loc) <- x_names if (!keep) { # in x_out, key variables need to keep the same name, and aux # variables need suffixes for duplicates that appear in y_out y_aux <- setdiff(y_names, c(by$x, if (!keep) by$y)) x_is_aux <- !x_names %in% by$x names(x_loc)[x_is_aux] <- add_suffixes(x_names[x_is_aux], c(by$x, y_aux), suffix$x) } else { # in x_out, key variables and aux variables need suffixes # for duplicates that appear in y_out names(x_loc) <- add_suffixes(x_names, y_names, suffix$x) } y_loc <- seq_along(y_names) names(y_loc) <- add_suffixes(y_names, x_names, suffix$y) if (!keep) { y_loc <- y_loc[!y_names %in% by$y] } # key = named location to use for matching # out = named locations to use in output list( x = list(key = x_by, out = x_loc), y = list(key = y_by, out = y_loc) ) } standardise_join_by <- function(by, x_names, y_names, error_call = caller_env()) { if (is.null(by)) { by <- intersect(x_names, y_names) if (length(by) == 0) { bullets <- c( "`by` must be supplied when `x` and `y` have no common variables.", i = "use by = character()` to perform a cross-join." ) abort(bullets, call = error_call) } by_quoted <- encodeString(by, quote = '"') if (length(by_quoted) == 1L) { by_code <- by_quoted } else { by_code <- paste0("c(", paste(by_quoted, collapse = ", "), ")") } inform(paste0("Joining, by = ", by_code)) by <- list(x = by, y = by) } else if (is.character(by)) { by_x <- names(by) %||% by by_y <- unname(by) # If x partially named, assume unnamed are the same in both tables by_x[by_x == ""] <- by_y[by_x == ""] by <- list(x = by_x, y = by_y) } else if (is.list(by)) { # TODO: check lengths by <- by[c("x", "y")] } else { msg <- glue("`by` must be a (named) character vector, list, or NULL, not {friendly_type_of(by)}.") abort(msg, call = error_call) } check_join_vars(by$x, x_names, error_call = error_call) check_join_vars(by$y, y_names, error_call = error_call) by } check_join_vars <- function(vars, names, error_call = caller_env()) { if (!is.character(vars)) { abort("join columns must be character vectors.", call = error_call) } na <- is.na(vars) if (any(na)) { bullets <- c( "Join columns must be not NA.", x = glue("Problem at position {err_vars(na)}.") ) abort(bullets, call = error_call) } dup <- duplicated(vars) if (any(dup)) { bullets <- c( "Join columns must be unique.", x = glue("Problem at position {err_vars(dup)}.") ) abort(bullets, call = error_call) } missing <- setdiff(vars, names) if (length(missing) > 0) { bullets <- c( "Join columns must be present in data.", x = glue("Problem with {err_vars(missing)}.") ) abort(bullets, call = error_call) } } check_duplicate_vars <- function(vars, input, error_call = caller_env()) { dup <- duplicated(vars) if (any(dup)) { bullets <- c( glue("Input columns in `{input}` must be unique."), x = glue("Problem with {err_vars(vars[dup])}.") ) abort(bullets, call = error_call) } } standardise_join_suffix <- function(x, error_call = caller_env()) { if (!is.character(x) || length(x) != 2) { bullets <- c( "`suffix` must be a character vector of length 2.", i = glue("`suffix` is {friendly_type_of(x)} of length {length(x)}.") ) abort(bullets, call = error_call) } if (any(is.na(x))) { msg <- glue("`suffix` can't be NA.") abort(msg, call = error_call) } list(x = x[[1]], y = x[[2]]) } add_suffixes <- function(x, y, suffix) { if (identical(suffix, "")) { return(x) } out <- rep_along(x, na_chr) for (i in seq_along(x)) { nm <- x[[i]] while (nm %in% y || nm %in% out[seq_len(i - 1)]) { nm <- paste0(nm, suffix) } out[[i]] <- nm } out } dplyr/R/all-equal.r0000644000176200001440000000676114151641776013673 0ustar liggesusers#' Flexible equality comparison for data frames #' #' @description #' `all_equal()` allows you to compare data frames, optionally ignoring #' row and column names. It is questioning as of dplyr 1.0.0, because it #' seems to solve a problem that no longer seems that important. #' #' @param target,current Two data frames to compare. #' @param ignore_col_order Should order of columns be ignored? #' @param ignore_row_order Should order of rows be ignored? #' @param convert Should similar classes be converted? Currently this will #' convert factor to character and integer to double. #' @param ... Ignored. Needed for compatibility with `all.equal()`. #' @return `TRUE` if equal, otherwise a character vector describing #' the reasons why they're not equal. Use [isTRUE()] if using the #' result in an `if` expression. #' @export #' @keywords internal #' @examples #' scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))] #' #' # By default, ordering of rows and columns ignored #' all_equal(mtcars, scramble(mtcars)) #' #' # But those can be overriden if desired #' all_equal(mtcars, scramble(mtcars), ignore_col_order = FALSE) #' all_equal(mtcars, scramble(mtcars), ignore_row_order = FALSE) #' #' # By default all_equal is sensitive to variable differences #' df1 <- data.frame(x = "a", stringsAsFactors = FALSE) #' df2 <- data.frame(x = factor("a")) #' all_equal(df1, df2) #' # But you can request dplyr convert similar types #' all_equal(df1, df2, convert = TRUE) all_equal <- function(target, current, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE, ...) { equal_data_frame(target, current, ignore_col_order = ignore_col_order, ignore_row_order = ignore_row_order, convert = convert ) } equal_data_frame <- function(x, y, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE) { compat <- is_compatible_data_frame(x, y, ignore_col_order = ignore_col_order, convert = convert) if (!isTRUE(compat)) { # revert the bulleting from is_compatible_data_frame() return(glue_collapse(compat, sep = "\n")) } nrows_x <- nrow(x) nrows_y <- nrow(y) if (nrows_x != nrows_y) { return("Different number of rows.") } if (ncol(x) == 0L) { return(TRUE) } # suppressMessages({ x <- as_tibble(x, .name_repair = "universal") y <- as_tibble(y, .name_repair = "universal") # }) x_split <- vec_split_id_order(x) y_split <- vec_split_id_order(y[, names(x), drop = FALSE]) # keys must be identical msg <- "" if (any(wrong <- !vec_in(x_split$key, y_split$key))) { rows <- sort(map_int(x_split$loc[which(wrong)], function(.x) .x[1L])) msg <- paste0(msg, "- Rows in x but not in y: ", glue_collapse(rows, sep = ", "), "\n") } if (any(wrong <- !vec_in(y_split$key, x_split$key))) { rows <- sort(map_int(y_split$loc[which(wrong)], function(.x) .x[1L])) msg <- paste0(msg, "- Rows in y but not in x: ", glue_collapse(rows, sep = ", "), "\n") } if (msg != "") { return(msg) } # keys are identical, check that rows occur the same number of times if (any(wrong <- lengths(x_split$loc) != lengths(y_split$loc))) { rows <- sort(map_int(x_split$loc[which(wrong)], function(.x) .x[1L])) return(paste0("- Rows with difference occurences in x and y: ", glue_collapse(rows, sep = ", "), "\n" )) } # then if we care about row order, the id need to be identical if (!ignore_row_order && !all(vec_equal(x_split$loc, y_split$loc))) { return("Same row values, but different order") } TRUE } dplyr/R/grouped-df.r0000644000176200001440000002022514151641776014041 0ustar liggesusers#' A grouped data frame. #' #' @description #' The easiest way to create a grouped data frame is to call the `group_by()` #' method on a data frame or tbl: this will take care of capturing #' the unevaluated expressions for you. #' #' These functions are designed for programmatic use. For data analysis #' purposes see [group_data()] for the accessor functions that retrieve #' various metadata from a grouped data frames. #' #' @keywords internal #' @param data a tbl or data frame. #' @param vars A character vector. #' @param drop When `.drop = TRUE`, empty groups are dropped. #' #' @export grouped_df <- function(data, vars, drop = group_by_drop_default(data)) { if (!is.data.frame(data)) { abort("`data` must be a data frame.") } if (!is.character(vars)) { abort("`vars` must be a character vector.") } if (length(vars) == 0) { as_tibble(data) } else { groups <- compute_groups(data, vars, drop = drop) new_grouped_df(data, groups) } } compute_groups <- function(data, vars, drop = FALSE) { unknown <- setdiff(vars, names(data)) if (length(unknown) > 0) { vars <- paste0(encodeString(vars, quote = "`"), collapse = ", ") abort(glue("`vars` missing from `data`: {vars}.")) } # Only train the dictionary based on selected columns group_vars <- as_tibble(data)[vars] split_key_loc <- vec_split_id_order(group_vars) old_keys <- split_key_loc$key old_rows <- split_key_loc$loc signal("", class = "dplyr_regroup") groups <- tibble(!!!old_keys, ".rows" := old_rows) if (!isTRUE(drop) && any(map_lgl(old_keys, is.factor))) { # Extra work is needed to auto expand empty groups uniques <- map(old_keys, function(.) { if (is.factor(.)) . else vec_unique(.) }) # Internally we only work with integers # # so for any grouping column that is not a factor # we need to match the values to the unique values positions <- map2(old_keys, uniques, function(.x, .y) { if (is.factor(.x)) .x else vec_match(.x, .y) }) # Expand groups internally adds empty groups recursively # we get back: # - indices: a list of how to vec_slice the current keys # to get the new keys # # - rows: the new list of rows (i.e. the same as old rows, # but with some extra empty integer(0) added for empty groups) expanded <- expand_groups(groups, positions, vec_size(old_keys)) new_indices <- expanded$indices new_rows <- expanded$rows # Make the new keys from the old keys and the new_indices new_keys <- pmap(list(old_keys, new_indices, uniques), function(key, index, unique) { if (is.factor(key)) { if (is.ordered(key)) { new_ordered(index, levels = levels(key)) } else { new_factor(index, levels = levels(key)) } } else { vec_slice(unique, index) } }) names(new_keys) <- vars groups <- tibble(!!!new_keys, ".rows" := new_rows) } attr(groups, ".drop") <- drop groups } count_regroups <- function(code) { i <- 0 withCallingHandlers(code, dplyr_regroup = function(cnd) { i <<- i + 1 }) i } show_regroups <- function(code) { withCallingHandlers(code, dplyr_regroup = function(cnd) { cat("Regrouping...\n") }) } #' Low-level construction and validation for the grouped_df and rowwise_df classes #' #' `new_grouped_df()` and `new_rowwise_df()` are constructors designed to be high-performance so only #' check types, not values. This means it is the caller's responsibility #' to create valid values, and hence this is for expert use only. #' #' @param x A data frame #' @param groups The grouped structure, `groups` should be a data frame. #' Its last column should be called `.rows` and be #' a list of 1 based integer vectors that all are between 1 and the number of rows of `.data`. #' @param class additional class, will be prepended to canonical classes. #' @param ... additional attributes #' #' @examples #' # 5 bootstrap samples #' tbl <- new_grouped_df( #' tibble(x = rnorm(10)), #' groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) #' ) #' # mean of each bootstrap sample #' summarise(tbl, x = mean(x)) #' #' @importFrom tibble new_tibble #' @keywords internal #' @export new_grouped_df <- function(x, groups, ..., class = character()) { if (!is.data.frame(x)) { abort("`x` must be a data frame.") } if (!is.data.frame(groups)) { abort("`groups` must be a data frame") } if (tail(names(groups), 1L) != ".rows") { abort('The last column of `groups` must be called ".rows".') } new_tibble( x, groups = groups, ..., nrow = NROW(x), class = c(class, "grouped_df") ) } #' @description #' `validate_grouped_df()` and `validate_rowwise_df()` validate the attributes #' of a `grouped_df` or a `rowwise_df`. #' #' @param check_bounds whether to check all indices for out of bounds problems in `grouped_df` objects #' @rdname new_grouped_df #' @export validate_grouped_df <- function(x, check_bounds = FALSE) { if (is.null(attr(x, "groups")) && !is.null(attr(x, "vars"))) { bullets <- c( "Corrupt `grouped_df` using old (< 0.8.0) format.", i = "Strip off old grouping with `ungroup()`." ) abort(bullets) } result <- .Call(`dplyr_validate_grouped_df`, x, check_bounds) if (!is.null(result)) { abort(result) } x } setOldClass(c("grouped_df", "tbl_df", "tbl", "data.frame")) #' @rdname grouped_df #' @export is.grouped_df <- function(x) inherits(x, "grouped_df") #' @rdname grouped_df #' @export is_grouped_df <- is.grouped_df group_sum <- function(x) { grps <- n_groups(x) paste0(commas(group_vars(x)), " [", big_mark(grps), "]") } #' @export tbl_sum.grouped_df <- function(x, ...) { c( NextMethod(), c("Groups" = group_sum(x)) ) } #' @export as.data.frame.grouped_df <- function(x, row.names = NULL, optional = FALSE, ...) { new_data_frame(dplyr_vec_data(x), n = nrow(x)) } #' @export as_tibble.grouped_df <- function(x, ...) { new_tibble(dplyr_vec_data(x), nrow = nrow(x)) } #' @importFrom tibble is_tibble #' @export `[.grouped_df` <- function(x, i, j, drop = FALSE) { out <- NextMethod() if (!is.data.frame(out)) { return(out) } if (drop) { as_tibble(out) } else { groups <- group_intersect(x, out) if ((missing(i) || nargs() == 2) && identical(groups, group_vars(x))) { new_grouped_df(out, group_data(x)) } else { grouped_df(out, groups, group_by_drop_default(x)) } } } #' @export `$<-.grouped_df` <- function(x, name, ..., value) { out <- NextMethod() if (name %in% group_vars(x)) { grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) } else { out } } #' @export `[<-.grouped_df` <- function(x, i, j, ..., value) { out <- NextMethod() grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) } #' @export `[[<-.grouped_df` <- function(x, ..., value) { out <- NextMethod() grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) } #' @export `names<-.grouped_df` <- function(x, value) { data <- as.data.frame(x) names(data) <- value groups <- group_data(x) group_loc <- match(intersect(names(groups), names(x)), names(x)) group_names <- c(value[group_loc], ".rows") if (!identical(group_names, names(groups))) { names(groups) <- c(value[group_loc], ".rows") } new_grouped_df(data, groups) } #' @method rbind grouped_df #' @export rbind.grouped_df <- function(...) { bind_rows(...) } #' @method cbind grouped_df #' @export cbind.grouped_df <- function(...) { bind_cols(...) } group_data_trim <- function(group_data, preserve = FALSE) { if (preserve) { return(group_data) } non_empty <- lengths(group_data$".rows") > 0 group_data[non_empty, , drop = FALSE] } # Helpers ----------------------------------------------------------------- expand_groups <- function(old_groups, positions, nr) { .Call(`dplyr_expand_groups`, old_groups, positions, nr) } vec_split_id_order <- function(x) { split_id <- vec_group_loc(x) split_id$loc <- new_list_of(split_id$loc, ptype = integer()) vec_slice(split_id, vec_order(split_id$key)) } group_intersect <- function(x, new) { intersect(group_vars(x), names(new)) } dplyr/R/slice.R0000644000176200001440000003433314177154670013051 0ustar liggesusers#' Subset rows using their positions #' #' @description #' `slice()` lets you index rows by their (integer) locations. It allows you #' to select, remove, and duplicate rows. It is accompanied by a number of #' helpers for common use cases: #' #' * `slice_head()` and `slice_tail()` select the first or last rows. #' * `slice_sample()` randomly selects rows. #' * `slice_min()` and `slice_max()` select rows with highest or lowest values #' of a variable. #' #' If `.data` is a [grouped_df], the operation will be performed on each group, #' so that (e.g.) `slice_head(df, n = 5)` will select the first five rows in #' each group. #' #' @details #' Slice does not work with relational databases because they have no #' intrinsic notion of row order. If you want to perform the equivalent #' operation, use [filter()] and [row_number()]. #' #' @family single table verbs #' @inheritParams arrange #' @inheritParams filter #' @param ... For `slice()`: <[`data-masking`][dplyr_data_masking]> Integer row #' values. #' #' Provide either positive values to keep, or negative values to drop. #' The values provided must be either all positive or all negative. #' Indices beyond the number of rows in the input are silently ignored. #' #' For `slice_helpers()`, these arguments are passed on to methods. #' #' @param n,prop Provide either `n`, the number of rows, or `prop`, the #' proportion of rows to select. If neither are supplied, `n = 1` will be #' used. #' #' If a negative value of `n` or `prop` is provided, the specified number or #' proportion of rows will be removed. #' #' If `n` is greater than the number of rows in the group (or `prop > 1`), #' the result will be silently truncated to the group size. If the #' `prop`ortion of a group size does not yield an integer number of rows, the #' absolute value of `prop*nrow(.data)` is rounded down. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Each row may appear 0, 1, or many times in the output. #' * Columns are not modified. #' * Groups are not modified. #' * Data frame attributes are preserved. #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `slice()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. #' * `slice_head()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. #' * `slice_tail()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. #' * `slice_min()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. #' * `slice_max()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. #' * `slice_sample()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. #' @export #' @examples #' mtcars %>% slice(1L) #' # Similar to tail(mtcars, 1): #' mtcars %>% slice(n()) #' mtcars %>% slice(5:n()) #' # Rows can be dropped with negative indices: #' slice(mtcars, -(1:4)) #' #' # First and last rows based on existing order #' mtcars %>% slice_head(n = 5) #' mtcars %>% slice_tail(n = 5) #' #' # Rows with minimum and maximum values of a variable #' mtcars %>% slice_min(mpg, n = 5) #' mtcars %>% slice_max(mpg, n = 5) #' #' # slice_min() and slice_max() may return more rows than requested #' # in the presence of ties. Use with_ties = FALSE to suppress #' mtcars %>% slice_min(cyl, n = 1) #' mtcars %>% slice_min(cyl, n = 1, with_ties = FALSE) #' #' # slice_sample() allows you to random select with or without replacement #' mtcars %>% slice_sample(n = 5) #' mtcars %>% slice_sample(n = 5, replace = TRUE) #' #' # you can optionally weight by a variable - this code weights by the #' # physical weight of the cars, so heavy cars are more likely to get #' # selected #' mtcars %>% slice_sample(weight_by = wt, n = 5) #' #' # Group wise operation ---------------------------------------- #' df <- tibble( #' group = rep(c("a", "b", "c"), c(1, 2, 4)), #' x = runif(7) #' ) #' #' # All slice helpers operate per group, silently truncating to the group #' # size, so the following code works without error #' df %>% group_by(group) %>% slice_head(n = 2) #' #' # When specifying the proportion of rows to include non-integer sizes #' # are rounded down, so group a gets 0 rows #' df %>% group_by(group) %>% slice_head(prop = 0.5) #' #' # Filter equivalents -------------------------------------------- #' # slice() expressions can often be written to use `filter()` and #' # `row_number()`, which can also be translated to SQL. For many databases, #' # you'll need to supply an explicit variable to use to compute the row number. #' filter(mtcars, row_number() == 1L) #' filter(mtcars, row_number() == n()) #' filter(mtcars, between(row_number(), 5, n())) slice <- function(.data, ..., .preserve = FALSE) { UseMethod("slice") } #' @export slice.data.frame <- function(.data, ..., .preserve = FALSE) { loc <- slice_rows(.data, ..., caller_env = caller_env(), error_call = current_env()) dplyr_row_slice(.data, loc, preserve = .preserve) } #' @export #' @rdname slice slice_head <- function(.data, ..., n, prop) { UseMethod("slice_head") } #' @export slice_head.data.frame <- function(.data, ..., n, prop) { check_slice_dots(..., n = n, prop = prop) size <- get_slice_size(n = n, prop = prop) idx <- function(n) { to <- size(n) if (to > n) { to <- n } seq2(1, to) } dplyr_local_error_call() slice(.data, idx(dplyr::n())) } #' @export #' @rdname slice slice_tail <- function(.data, ..., n, prop) { UseMethod("slice_tail") } #' @export slice_tail.data.frame <- function(.data, ..., n, prop) { check_slice_dots(..., n = n, prop = prop) size <- get_slice_size(n = n, prop = prop) idx <- function(n) { from <- n - size(n) + 1 if (from < 1L) { from <- 1L } seq2(from, n) } dplyr_local_error_call() slice(.data, idx(dplyr::n())) } #' @export #' @rdname slice #' @param order_by Variable or function of variables to order by. #' @param with_ties Should ties be kept together? The default, `TRUE`, #' may return more rows than you request. Use `FALSE` to ignore ties, #' and return the first `n` rows. slice_min <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { UseMethod("slice_min") } #' @export slice_min.data.frame <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { check_required(order_by) check_slice_dots(..., n = n, prop = prop) size <- get_slice_size(n = n, prop = prop) if (with_ties) { idx <- function(x, n) head(order(x), smaller_ranks(x, size(n))) } else { idx <- function(x, n) head(order(x), size(n)) } dplyr_local_error_call() slice(.data, local({ order_by <- {{ order_by }} n <- dplyr::n() x <- fix_call(vec_assert(order_by, size = n, arg = "order_by"), NULL) idx(x, n) })) } #' @export #' @rdname slice slice_max <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { UseMethod("slice_max") } #' @export slice_max.data.frame <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { check_required(order_by) check_slice_dots(..., n = n, prop = prop) size <- get_slice_size(n = n, prop = prop) if (with_ties) { idx <- function(x, n) head( order(x, decreasing = TRUE), smaller_ranks(desc(x), size(n)) ) } else { idx <- function(x, n) head(order(x, decreasing = TRUE), size(n)) } dplyr_local_error_call() slice(.data, local({ order_by <- {{ order_by }} n <- dplyr::n() order_by <- fix_call(vec_assert(order_by, size = n, arg = "order_by"), NULL) idx(order_by, n) })) } #' @export #' @rdname slice #' @param replace Should sampling be performed with (`TRUE`) or without #' (`FALSE`, the default) replacement. #' @param weight_by Sampling weights. This must evaluate to a vector of #' non-negative numbers the same length as the input. Weights are #' automatically standardised to sum to 1. slice_sample <- function(.data, ..., n, prop, weight_by = NULL, replace = FALSE) { UseMethod("slice_sample") } #' @export slice_sample.data.frame <- function(.data, ..., n, prop, weight_by = NULL, replace = FALSE) { check_slice_dots(..., n = n, prop = prop) size <- get_slice_size(n = n, prop = prop, allow_negative = FALSE) dplyr_local_error_call() slice(.data, local({ weight_by <- {{ weight_by }} n <- dplyr::n() if (!is.null(weight_by)) { weight_by <- fix_call(vec_assert(weight_by, size = n, arg = "weight_by"), NULL) } sample_int(n, size(n), replace = replace, wt = weight_by) })) } # helpers ----------------------------------------------------------------- slice_rows <- function(.data, ..., caller_env, error_call = caller_env()) { error_call <- dplyr_error_call(error_call) dots <- enquos(...) if (is_empty(dots)) { return(TRUE) } mask <- DataMask$new(.data, caller_env, "slice", error_call = error_call) on.exit(mask$forget(), add = TRUE) chunks <- slice_eval(mask, dots, error_call = error_call) slice_indices <- slice_combine(chunks, mask = mask, error_call = error_call) vec_c(!!!slice_indices, .ptype = integer()) } is_slice_call <- function(error_call) { is_slice <- TRUE if (is_environment(error_call) && !identical(error_call$.Generic, "slice")) { is_slice <- FALSE } is_slice } slice_eval <- function(mask, dots, error_call = caller_env()) { index <- 0L impl <- function(...) { n <- ...length2() out <- vector("list", n) for (i in seq_len(n)) { index <<- i out[[i]] <- ...elt2(i) } index <<- 0L fix_call( vec_c(!!!out), call = NULL ) } withCallingHandlers( mask$eval_all(quo(impl(!!!dots))), error = function(cnd) { if (index && is_slice_call(error_call)) { local_error_context(dots = dots, .index = index, mask = mask) header <- cnd_bullet_header("evaluating") } else { header <- "Problem while computing indices." } bullets <- c(header, i = cnd_bullet_cur_group_label()) abort(bullets, call = error_call, parent = cnd) } ) } slice_combine <- function(chunks, mask, error_call = caller_env()) { rows <- mask$get_rows() slice_indices <- new_list(length(rows)) withCallingHandlers( for (group in seq_along(rows)) { current_rows <- rows[[group]] res <- chunks[[group]] if (is.logical(res) && all(is.na(res))) { res <- integer() } else if (is.numeric(res)) { if (is.matrix(res) && ncol(res) == 1) { res <- as.vector(res) } res <- fix_call(vec_cast(res, integer()), NULL) } else { bullets <- c( glue("Invalid result of type <{vec_ptype_full(res)}>."), i = "Indices must be positive or negative integers." ) abort(bullets, call = NULL) } if (length(res) == 0L) { # nothing to do } else if (all(res >= 0, na.rm = TRUE)) { res <- res[!is.na(res) & res <= length(current_rows) & res > 0] } else if (all(res <= 0, na.rm = TRUE)) { res <- setdiff(seq_along(current_rows), -res) } else { mask$set_current_group(group) n_positive <- sum(res >= 0, na.rm = TRUE) n_negative <- sum(res <= 0, na.rm = TRUE) bullets <- c( "Indices must be all positive or all negative.", i = glue("Got {n_positive} positives, {n_negative} negatives.") ) abort(bullets, call = NULL) } slice_indices[[group]] <- current_rows[res] }, error = function(cnd) { mask$set_current_group(group) bullets <- c( "Problem while computing indices.", i = cnd_bullet_cur_group_label() ) abort(bullets, call = error_call, parent = cnd) }) slice_indices } check_constant <- function(x, name, error_call = caller_env()) { withCallingHandlers(force(x), error = function(e) { bullets <- c( glue("`{name}` must be a constant.") ) abort(bullets, parent = e, call = error_call) }) } check_slice_dots <- function(..., n, prop, error_call = caller_env()) { # special case to capture e.g. slice_head(2) if (missing(n) && missing(prop)) { # capture as quosure so that we can label dots <- enquos(...) if (length(dots) == 1L && names2(dots)[1] == "") { slice_call <- error_call$.Generic bullets <- c( "`n` must be explicitly named.", i = glue("Did you mean `{slice_call}(n = {as_label(dots[[1]])})`?") ) abort(bullets, call = error_call) } } # otherwise, we have either `n` or `prop` so ... must be empty check_dots_empty(call = error_call) } check_slice_n_prop <- function(n, prop, error_call = caller_env()) { if (missing(n) && missing(prop)) { list(type = "n", n = 1L) } else if (!missing(n) && missing(prop)) { n <- check_constant(n, "n", error_call = error_call) if (!is.numeric(n) || length(n) != 1 || is.na(n)) { abort("`n` must be a single number.", call = error_call) } list(type = "n", n = n) } else if (!missing(prop) && missing(n)) { prop <- check_constant(prop, "prop", error_call = error_call) if (!is.numeric(prop) || length(prop) != 1 || is.na(prop)) { abort("`prop` must be a single number.", call = error_call) } list(type = "prop", prop = prop) } else { abort("Must supply `n` or `prop`, but not both.", call = error_call) } } get_slice_size <- function(n, prop, allow_negative = TRUE, error_call = caller_env()) { slice_input <- check_slice_n_prop(n, prop, error_call = error_call) if (slice_input$type == "n") { if (slice_input$n > 0) { function(n) floor(slice_input$n) } else if (allow_negative) { function(n) ceiling(n + slice_input$n) } else { abort("`n` must be positive.", call = error_call) } } else if (slice_input$type == "prop") { if (slice_input$prop > 0) { function(n) floor(slice_input$prop * n) } else if (allow_negative) { function(n) ceiling(n + slice_input$prop * n) } else { abort("`prop` must be positive.", call = error_call) } } } sample_int <- function(n, size, replace = FALSE, wt = NULL) { if (size == 0L) { integer(0) } else { sample.int(n, size, prob = wt, replace = replace) } } smaller_ranks <- function(x, y) { sum(min_rank(x) <= y, na.rm = TRUE) } dplyr/R/case_when.R0000644000176200001440000001653114176714177013711 0ustar liggesusers#' A general vectorised if #' #' This function allows you to vectorise multiple [if_else()] #' statements. It is an R equivalent of the SQL `CASE WHEN` statement. #' If no cases match, `NA` is returned. #' #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A sequence of two-sided formulas. The left hand side (LHS) #' determines which values match this case. The right hand side (RHS) #' provides the replacement value. #' #' The LHS must evaluate to a logical vector. The RHS does not need to be #' logical, but all RHSs must evaluate to the same type of vector. #' #' Both LHS and RHS may have the same length of either 1 or `n`. The #' value of `n` must be consistent across all cases. The case of #' `n == 0` is treated as a variant of `n != 1`. #' #' `NULL` inputs are ignored. #' @export #' @return A vector of length 1 or `n`, matching the length of the logical #' input or output vectors, with the type (and attributes) of the first #' RHS. Inconsistent lengths or types will generate an error. #' @examples #' x <- 1:50 #' case_when( #' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' TRUE ~ as.character(x) #' ) #' #' # Like an if statement, the arguments are evaluated in order, so you must #' # proceed from the most specific to the most general. This won't work: #' case_when( #' TRUE ~ as.character(x), #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' x %% 35 == 0 ~ "fizz buzz" #' ) #' #' # If none of the cases match, NA is used: #' case_when( #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' x %% 35 == 0 ~ "fizz buzz" #' ) #' #' # Note that NA values in the vector x do not get special treatment. If you want #' # to explicitly handle NA values you can use the `is.na` function: #' x[2:4] <- NA_real_ #' case_when( #' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' is.na(x) ~ "nope", #' TRUE ~ as.character(x) #' ) #' #' # All RHS values need to be of the same type. Inconsistent types will throw an error. #' # This applies also to NA values used in RHS: NA is logical, use #' # typed values like NA_real_, NA_complex, NA_character_, NA_integer_ as appropriate. #' case_when( #' x %% 35 == 0 ~ NA_character_, #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' TRUE ~ as.character(x) #' ) #' case_when( #' x %% 35 == 0 ~ 35, #' x %% 5 == 0 ~ 5, #' x %% 7 == 0 ~ 7, #' TRUE ~ NA_real_ #' ) #' #' # case_when() evaluates all RHS expressions, and then constructs its #' # result by extracting the selected (via the LHS expressions) parts. #' # In particular NaNs are produced in this case: #' y <- seq(-2, 2, by = .5) #' case_when( #' y >= 0 ~ sqrt(y), #' TRUE ~ y #' ) #' #' # This throws an error as NA is logical not numeric #' try(case_when( #' x %% 35 == 0 ~ 35, #' x %% 5 == 0 ~ 5, #' x %% 7 == 0 ~ 7, #' TRUE ~ NA #' )) #' #' # case_when is particularly useful inside mutate when you want to #' # create a new variable that relies on a complex combination of existing #' # variables #' starwars %>% #' select(name:mass, gender, species) %>% #' mutate( #' type = case_when( #' height > 200 | mass > 200 ~ "large", #' species == "Droid" ~ "robot", #' TRUE ~ "other" #' ) #' ) #' #' #' # `case_when()` is not a tidy eval function. If you'd like to reuse #' # the same patterns, extract the `case_when()` call in a normal #' # function: #' case_character_type <- function(height, mass, species) { #' case_when( #' height > 200 | mass > 200 ~ "large", #' species == "Droid" ~ "robot", #' TRUE ~ "other" #' ) #' } #' #' case_character_type(150, 250, "Droid") #' case_character_type(150, 150, "Droid") #' #' # Such functions can be used inside `mutate()` as well: #' starwars %>% #' mutate(type = case_character_type(height, mass, species)) %>% #' pull(type) #' #' # `case_when()` ignores `NULL` inputs. This is useful when you'd #' # like to use a pattern only under certain conditions. Here we'll #' # take advantage of the fact that `if` returns `NULL` when there is #' # no `else` clause: #' case_character_type <- function(height, mass, species, robots = TRUE) { #' case_when( #' height > 200 | mass > 200 ~ "large", #' if (robots) species == "Droid" ~ "robot", #' TRUE ~ "other" #' ) #' } #' #' starwars %>% #' mutate(type = case_character_type(height, mass, species, robots = FALSE)) %>% #' pull(type) case_when <- function(...) { fs <- compact_null(list2(...)) n <- length(fs) error_call <- current_env() if (n == 0) { abort("No cases provided.", call = error_call) } query <- vector("list", n) value <- vector("list", n) default_env <- caller_env() quos_pairs <- map2( fs, seq_along(fs), validate_formula, default_env = default_env, dots_env = current_env(), error_call = error_call ) for (i in seq_len(n)) { pair <- quos_pairs[[i]] query[[i]] <- eval_tidy(pair$lhs, env = default_env) value[[i]] <- eval_tidy(pair$rhs, env = default_env) if (!is.logical(query[[i]])) { abort_case_when_logical(pair$lhs, i, query[[i]], error_call = error_call) } } m <- validate_case_when_length(query, value, fs, error_call = error_call) out <- value[[1]][rep(NA_integer_, m)] replaced <- rep(FALSE, m) for (i in seq_len(n)) { out <- replace_with(out, query[[i]] & !replaced, value[[i]], NULL, error_call = error_call) replaced <- replaced | (query[[i]] & !is.na(query[[i]])) } out } validate_formula <- function(x, i, default_env, dots_env, error_call = caller_env()) { # Formula might be quosured if (is_quosure(x)) { default_env <- quo_get_env(x) x <- quo_get_expr(x) } if (!is_formula(x)) { arg <- substitute(...(), dots_env)[[1]] abort_case_when_formula(arg, i, x, error_call = error_call) } if (is_null(f_lhs(x))) { abort("Formulas must be two-sided.", call = error_call) } # Formula might be unevaluated, e.g. if it's been quosured env <- f_env(x) %||% default_env list( lhs = new_quosure(f_lhs(x), env), rhs = new_quosure(f_rhs(x), env) ) } abort_case_when_formula <- function(arg, i, obj, error_call = caller_env()) { deparsed <- fmt_obj1(deparse_trunc(arg)) type <- friendly_type_of(obj) msg <- glue("Case {i} ({deparsed}) must be a two-sided formula, not {type}.") abort(msg, call = error_call) } abort_case_when_logical <- function(lhs, i, query, error_call = caller_env()) { deparsed <- fmt_obj1(deparse_trunc(quo_squash(lhs))) type <- friendly_type_of(query) msg <- glue("LHS of case {i} ({deparsed}) must be a logical vector, not {type}.") abort(msg, call = error_call) } validate_case_when_length <- function(query, value, fs, error_call = caller_env()) { lhs_lengths <- lengths(query) rhs_lengths <- lengths(value) all_lengths <- unique(c(lhs_lengths, rhs_lengths)) if (length(all_lengths) <= 1) { return(all_lengths[[1]]) } non_atomic_lengths <- all_lengths[all_lengths != 1] len <- non_atomic_lengths[[1]] if (length(non_atomic_lengths) == 1) { return(len) } inconsistent_lengths <- non_atomic_lengths[-1] lhs_problems <- lhs_lengths %in% inconsistent_lengths rhs_problems <- rhs_lengths %in% inconsistent_lengths problems <- lhs_problems | rhs_problems check_length_val(inconsistent_lengths, len, header = fmt_calls(fs[problems]), error_call = error_call) } dplyr/R/pull.R0000644000176200001440000000422714176476356012734 0ustar liggesusers#' Extract a single column #' #' `pull()` is similar to `$`. It's mostly useful because it looks a little #' nicer in pipes, it also works with remote data frames, and it can optionally #' name the output. #' #' @inheritParams arrange #' @inheritParams tidyselect::vars_pull #' @param name An optional parameter that specifies the column to be used #' as names for a named vector. Specified in a similar manner as \code{var}. #' @param ... For use by methods. #' @return A vector the same size as `.data`. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. #' @export #' @examples #' mtcars %>% pull(-1) #' mtcars %>% pull(1) #' mtcars %>% pull(cyl) #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' #' # Also works for remote sources #' df <- dbplyr::memdb_frame(x = 1:10, y = 10:1, .name = "pull-ex") #' df %>% #' mutate(z = x * y) %>% #' pull() #' @examples #' #' # Pull a named vector #' starwars %>% pull(height, name) pull <- function(.data, var = -1, name = NULL, ...) { check_dots_used() UseMethod("pull") } #' @export pull.data.frame <- function(.data, var = -1, name = NULL, ...) { var <- tidyselect::vars_pull(names(.data), !!enquo(var)) name <- enquo(name) if (quo_is_null(name)) { return(.data[[var]]) } name <- tidyselect::vars_pull(names(.data), !!name) set_names(.data[[var]], nm = .data[[name]]) } find_var <- function(expr, vars) { var_env <- set_names(as.list(seq_along(vars)), vars) var <- eval_tidy(expr, var_env) if (!is.numeric(var) || length(var) != 1) { abort("`var` must evaluate to a single number.") } var <- as.integer(var) n <- length(vars) if (is.na(var) || abs(var) > n || var == 0L) { abort("`var` must be a value between {-n} and {n} (excluding zero), not {var}.") } if (var < 0) { var <- var + n + 1 } vars[[var]] } dplyr/R/near.R0000644000176200001440000000066214121112104012644 0ustar liggesusers#' Compare two numeric vectors #' #' This is a safe way of comparing if two vectors of floating point numbers #' are (pairwise) equal. This is safer than using `==`, because it has #' a built in tolerance #' #' @param x,y Numeric vectors to compare #' @param tol Tolerance of comparison. #' @export #' @examples #' sqrt(2) ^ 2 == 2 #' near(sqrt(2) ^ 2, 2) near <- function(x, y, tol = .Machine$double.eps^0.5) { abs(x - y) < tol } dplyr/R/helper-error-msg.R0000644000176200001440000000015714121112104015110 0ustar liggesuserscapture_error_msg <- function(expr) { cat(rlang::catch_cnd(expr, classes = "error")$message, "\n", sep="") } dplyr/R/defunct.r0000644000176200001440000000075514151641776013443 0ustar liggesusers#' Defunct functions #' #' `r lifecycle::badge("deprecated")` #' #' Executing these functions will tell you which function replaces them. #' #' @keywords internal #' @name defunct NULL #' @export #' @rdname defunct id <- function(.variables, drop = FALSE) { lifecycle::deprecate_stop("0.5.0", "id()", "vctrs::vec_group_id()") } #' @export #' @rdname defunct failwith <- function(default = NULL, f, quiet = FALSE) { lifecycle::deprecate_stop("0.7.0", "failwith()", "purrr::possibly()") } dplyr/R/deprec-funs.R0000644000176200001440000000436314151641776014165 0ustar liggesusers#' Create a list of function calls #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `funs()` is deprecated; please use `list()` instead. We deprecated this #' function because it provided a unique way of specifying anonymous functions, #' rather than adopting the conventions used by purrr and other packages #' in the tidyverse. #' #' @param ... <[`data-masking`][dplyr_data_masking]> A list of functions #' specified by: #' #' - Their name, `"mean"` #' - The function itself, `mean` #' - A call to the function with `.` as a dummy argument, #' `mean(., na.rm = TRUE)` #' #' The following notations are **not** supported, see examples: #' #' - An anonymous function, `function(x) mean(x, na.rm = TRUE)` #' - An anonymous function in \pkg{purrr} notation, `~mean(., na.rm = TRUE)` #' #' @param .args,args A named list of additional arguments to be added to all #' function calls. As `funs()` is being deprecated, use other methods to #' supply arguments: `...` argument in [scoped verbs][summarise_at()] or make #' own functions with [purrr::partial()]. #' @export #' @keywords internal #' @examples #' funs("mean", mean(., na.rm = TRUE)) #' # -> #' list(mean = mean, mean = ~ mean(.x, na.rm = TRUE)) #' #' funs(m1 = mean, m2 = "mean", m3 = mean(., na.rm = TRUE)) #' # -> #' list(m1 = mean, m2 = "mean", m3 = ~ mean(.x, na.rm = TRUE)) funs <- function(..., .args = list()) { lifecycle::deprecate_warn("0.8.0", "funs()", details = paste_line( "Please use a list of either functions or lambdas: ", "", " # Simple named list: ", " list(mean = mean, median = median)", "", " # Auto named with `tibble::lst()`: ", " tibble::lst(mean, median)", "", " # Using lambdas", " list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))" )) dots <- enquos(...) default_env <- caller_env() error_call <- current_env() funs <- map(dots, function(quo) as_fun(quo, default_env, .args, error_call = error_call)) new_funs(funs) } new_funs <- function(funs) { attr(funs, "have_name") <- any(names2(funs) != "") # Workaround until rlang:::label() is exported temp <- map(funs, function(fn) node_car(quo_get_expr(fn))) temp <- exprs_auto_name(temp) names(funs) <- names(temp) class(funs) <- "fun_list" funs } dplyr/R/progress.R0000644000176200001440000001026714151641776013616 0ustar liggesusers#' Progress bar with estimated time. #' #' @description #' `r lifecycle::badge("deprecated")` #' #' This progress bar has been deprecated since providing progress bars is not #' the responsibility of dplyr. Instead, you might try the more powerful #' [progress](https://github.com/r-lib/progress) package. #' #' This reference class represents a text progress bar displayed estimated #' time remaining. When finished, it displays the total duration. The #' automatic progress bar can be disabled by setting option #' `dplyr.show_progress` to `FALSE`. #' #' @param n Total number of items #' @param min_time Progress bar will wait until at least `min_time` #' seconds have elapsed before displaying any results. #' @return A ref class with methods `tick()`, `print()`, #' `pause()`, and `stop()`. #' @keywords internal #' @export #' @examples #' p <- progress_estimated(3) #' p$tick() #' p$tick() #' p$tick() #' #' p <- progress_estimated(3) #' for (i in 1:3) p$pause(0.1)$tick()$print() #' #' p <- progress_estimated(3) #' p$tick()$print()$ #' pause(1)$stop() #' #' # If min_time is set, progress bar not shown until that many #' # seconds have elapsed #' p <- progress_estimated(3, min_time = 3) #' for (i in 1:3) p$pause(0.1)$tick()$print() #' #' \dontrun{ #' p <- progress_estimated(10, min_time = 3) #' for (i in 1:10) p$pause(0.5)$tick()$print() #' } progress_estimated <- function(n, min_time = 0) { lifecycle::deprecate_soft("1.0.0", "dplyr::progress_estimated()") Progress$new(n, min_time = min_time) } #' @importFrom R6 R6Class Progress <- R6::R6Class("Progress", public = list( n = NULL, i = 0, init_time = NULL, stopped = FALSE, stop_time = NULL, min_time = NULL, last_update = NULL, initialize = function(n, min_time = 0, ...) { self$n <- n self$min_time <- min_time self$begin() }, begin = function() { "Initialise timer. Call this before beginning timing." self$i <- 0 self$last_update <- self$init_time <- now() self$stopped <- FALSE self }, pause = function(x) { "Sleep for x seconds. Useful for testing." Sys.sleep(x) self }, width = function() { getOption("width") - nchar("|100% ~ 99.9 h remaining") - 2 }, tick = function() { "Process one element" if (self$stopped) return(self) if (self$i == self$n) abort("No more ticks.") self$i <- self$i + 1 self }, stop = function() { if (self$stopped) return(self) self$stopped <- TRUE self$stop_time <- now() self }, print = function(...) { if (!isTRUE(getOption("dplyr.show_progress")) || # user specifies no progress !interactive() || # not an interactive session !is.null(getOption("knitr.in.progress"))) { # dplyr used within knitr document return(invisible(self)) } now_ <- now() if (now_ - self$init_time < self$min_time || now_ - self$last_update < 0.05) { return(invisible(self)) } self$last_update <- now_ if (self$stopped) { overall <- show_time(self$stop_time - self$init_time) if (self$i == self$n) { cat_line("Completed after ", overall) cat("\n") } else { cat_line("Killed after ", overall) cat("\n") } return(invisible(self)) } avg <- (now() - self$init_time) / self$i time_left <- (self$n - self$i) * avg nbars <- trunc(self$i / self$n * self$width()) cat_line( "|", str_rep("=", nbars), str_rep(" ", self$width() - nbars), "|", format(round(self$i / self$n * 100), width = 3), "% ", "~", show_time(time_left), " remaining" ) invisible(self) } ) ) cat_line <- function(...) { msg <- paste(..., sep = "", collapse = "") gap <- max(c(0, getOption("width") - nchar(msg, "width"))) cat("\r", msg, rep.int(" ", gap), sep = "") utils::flush.console() } str_rep <- function(x, i) { paste(rep.int(x, i), collapse = "") } show_time <- function(x) { if (x < 60) { paste(round(x), "s") } else if (x < 60 * 60) { paste(round(x / 60), "m") } else { paste(round(x / (60 * 60)), "h") } } now <- function() proc.time()[[3]] dplyr/R/deprec-dbi.R0000644000176200001440000000676314164534554013755 0ustar liggesusers#' Source for database backends #' #' @description #' `r lifecycle::badge("deprecated")` #' #' These functions have been deprecated; instead please use [tbl()] #' directly on an `DBIConnection`. See for #' more details. #' #' @param dbname Database name #' @param host,port Host name and port number of database #' @param user,username,password User name and password. #' #' Generally, you should avoid saving username and password in your #' scripts as it is easy to accidentally expose valuable credentials. #' Instead, retrieve them from environment variables, or use database #' specific credential scores. For example, with MySQL you can set up `my.cnf` #' as described in [RMySQL::MySQL()]. #' @param ... for the src, other arguments passed on to the underlying #' database connector, [DBI::dbConnect()]. For the tbl, included for #' compatibility with the generic, but otherwise ignored. #' @return An S3 object with class `src_dbi`, `src_sql`, `src`. #' @keywords internal #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") #' copy_to(con, mtcars) #' #' # To retrieve a single table from a source, use `tbl()` #' mtcars <- con %>% tbl("mtcars") #' mtcars #' #' # You can also use pass raw SQL if you want a more sophisticated query #' con %>% tbl(sql("SELECT * FROM mtcars WHERE cyl == 8")) #' @name src_dbi NULL #' @rdname src_dbi #' @export src_mysql <- function(dbname, host = NULL, port = 0L, username = "root", password = "", ...) { check_dbplyr() check_installed("RMySQL", "to connect to MySQL/MariaDB.") lifecycle::deprecate_warn( "1.0.0", "dplyr::src_mysql()", details = "Please use `tbl()` directly with a database connection" ) con <- DBI::dbConnect( RMySQL::MySQL(), dbname = dbname, host = host, port = port, username = username, password = password, ... ) dbplyr::src_dbi(con, auto_disconnect = TRUE) } #' @rdname src_dbi #' @export src_postgres <- function(dbname = NULL, host = NULL, port = NULL, user = NULL, password = NULL, ...) { check_dbplyr() check_installed("RPostgreSQL", "to connect to PostgreSQL.") lifecycle::deprecate_warn( "1.0.0", "dplyr::src_postgres()", details = "Please use `tbl()` directly with a database connection" ) user <- user %||% if (in_travis()) "postgres" else "" con <- DBI::dbConnect( RPostgreSQL::PostgreSQL(), host = host %||% "", dbname = dbname %||% "", user = user, password = password %||% "", port = port %||% "", ... ) dbplyr::src_dbi(con, auto_disconnect = TRUE) } #' @rdname src_dbi #' @export #' @param path Path to SQLite database. You can use the special path #' ":memory:" to create a temporary in memory database. #' @param create if `FALSE`, `path` must already exist. If #' `TRUE`, will create a new SQLite3 database at `path` if #' `path` does not exist and connect to the existing database if #' `path` does exist. src_sqlite <- function(path, create = FALSE) { check_dbplyr() lifecycle::deprecate_warn( "1.0.0", "dplyr::src_sqlite()", details = "Please use `tbl()` directly with a database connection" ) if (!create && !file.exists(path)) { msg <- glue("`path` must already exist, unless `create` = TRUE.") abort(msg) } con <- DBI::dbConnect(RSQLite::SQLite(), path) RSQLite::initExtension(con) dbplyr::src_dbi(con, auto_disconnect = TRUE) } dplyr/R/rank.R0000644000176200001440000000610114121112104012644 0ustar liggesusers#' Windowed rank functions. #' #' Six variations on ranking functions, mimicking the ranking functions #' described in SQL2003. They are currently implemented using the built in #' `rank` function, and are provided mainly as a convenience when #' converting between R and SQL. All ranking functions map smallest inputs #' to smallest outputs. Use [desc()] to reverse the direction. #' #' * `row_number()`: equivalent to `rank(ties.method = "first")` #' #' * `min_rank()`: equivalent to `rank(ties.method = "min")` #' #' * `dense_rank()`: like `min_rank()`, but with no gaps between #' ranks #' #' * `percent_rank()`: a number between 0 and 1 computed by #' rescaling `min_rank` to `[0, 1]` #' #' * `cume_dist()`: a cumulative distribution function. Proportion #' of all values less than or equal to the current rank. #' #' * `ntile()`: a rough rank, which breaks the input vector into #' `n` buckets. The size of the buckets may differ by up to one, #' larger buckets have lower rank. #' #' @name ranking #' @param x a vector of values to rank. Missing values are left as is. #' If you want to treat them as the smallest or largest values, replace #' with Inf or -Inf before ranking. #' @examples #' x <- c(5, 1, 3, 2, 2, NA) #' row_number(x) #' min_rank(x) #' dense_rank(x) #' percent_rank(x) #' cume_dist(x) #' #' ntile(x, 2) #' ntile(1:8, 3) #' #' # row_number can be used with single table verbs without specifying x #' # (for data frames and databases that support windowing) #' mutate(mtcars, row_number() == 1L) #' mtcars %>% filter(between(row_number(), 1, 10)) NULL #' @export #' @rdname ranking row_number <- function(x) { if (missing(x)){ seq_len(n()) } else { rank(x, ties.method = "first", na.last = "keep") } } # Definition from # https://techcommunity.microsoft.com/t5/sql-server/ranking-functions-rank-dense-rank-and-ntile/ba-p/383384 #' @param n number of groups to split up into. #' @export #' @rdname ranking ntile <- function(x = row_number(), n) { # Avoid recomputation in default case: # row_number(row_number(x)) == row_number(x) if (!missing(x)) { x <- row_number(x) } len <- length(x) - sum(is.na(x)) n <- as.integer(floor(n)) if (len == 0L) { rep(NA_integer_, length(x)) } else { n_larger <- as.integer(len %% n) n_smaller <- as.integer(n - n_larger) size <- len / n larger_size <- as.integer(ceiling(size)) smaller_size <- as.integer(floor(size)) larger_threshold <- larger_size * n_larger bins <- if_else( x <= larger_threshold, (x + (larger_size - 1L)) / larger_size, (x + (- larger_threshold + smaller_size - 1L)) / smaller_size + n_larger ) as.integer(floor(bins)) } } #' @export #' @rdname ranking min_rank <- function(x) rank(x, ties.method = "min", na.last = "keep") #' @export #' @rdname ranking dense_rank <- function(x) { match(x, sort(unique(x))) } #' @export #' @rdname ranking percent_rank <- function(x) { (min_rank(x) - 1) / (sum(!is.na(x)) - 1) } #' @export #' @rdname ranking cume_dist <- function(x) { rank(x, ties.method = "max", na.last = "keep") / sum(!is.na(x)) } dplyr/R/arrange.R0000644000176200001440000001271514154654644013372 0ustar liggesusers#' Arrange rows by column values #' #' @description #' `arrange()` orders the rows of a data frame by the values of selected #' columns. #' #' Unlike other dplyr verbs, `arrange()` largely ignores grouping; you #' need to explicitly mention grouping variables (or use `.by_group = TRUE`) #' in order to group by them, and functions of variables are evaluated #' once per data frame, not once per group. #' #' @details #' ## Locales #' The sort order for character vectors will depend on the collating sequence #' of the locale in use: see [locales()]. #' #' ## Missing values #' Unlike base sorting with `sort()`, `NA` are: #' * always sorted to the end for local data, even when wrapped with `desc()`. #' * treated differently for remote data, depending on the backend. #' #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * All rows appear in the output, but (usually) in a different place. #' * Columns are not modified. #' * Groups are not modified. #' * Data frame attributes are preserved. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("arrange")}. #' @export #' @param .data A data frame, data frame extension (e.g. a tibble), or a #' lazy data frame (e.g. from dbplyr or dtplyr). See *Methods*, below, for #' more details. #' @param ... <[`data-masking`][dplyr_data_masking]> Variables, or functions of #' variables. Use [desc()] to sort a variable in descending order. #' @param .by_group If `TRUE`, will sort first by grouping variable. Applies to #' grouped data frames only. #' @family single table verbs #' @examples #' arrange(mtcars, cyl, disp) #' arrange(mtcars, desc(disp)) #' #' # grouped arrange ignores groups #' by_cyl <- mtcars %>% group_by(cyl) #' by_cyl %>% arrange(desc(wt)) #' # Unless you specifically ask: #' by_cyl %>% arrange(desc(wt), .by_group = TRUE) #' #' # use embracing when wrapping in a function; #' # see ?dplyr_data_masking for more details #' tidy_eval_arrange <- function(.data, var) { #' .data %>% #' arrange({{ var }}) #' } #' tidy_eval_arrange(mtcars, mpg) #' #' # use across() access select()-style semantics #' iris %>% arrange(across(starts_with("Sepal"))) #' iris %>% arrange(across(starts_with("Sepal"), desc)) arrange <- function(.data, ..., .by_group = FALSE) { UseMethod("arrange") } #' @export arrange.data.frame <- function(.data, ..., .by_group = FALSE) { dots <- enquos(...) if (.by_group) { dots <- c(quos(!!!groups(.data)), dots) } loc <- arrange_rows(.data, dots) dplyr_row_slice(.data, loc) } # Helpers ----------------------------------------------------------------- arrange_rows <- function(.data, dots, error_call = caller_env()) { error_call <- dplyr_error_call(error_call) if (length(dots) == 0L) { out <- seq_len(nrow(.data)) return(out) } directions <- map_chr(dots, function(quosure) { if(quo_is_call(quosure, "desc")) "desc" else "asc" }) quosures <- map(dots, function(quosure) { if (quo_is_call(quosure, "desc", ns = c("", "dplyr"))) { expr <- quo_get_expr(quosure) if (!has_length(expr, 2L)) { abort("`desc()` must be called with exactly one argument.", call = error_call) } quosure <- new_quosure(node_cadr(expr), quo_get_env(quosure)) } quosure }) # give the quosures arbitrary names so that # data has the right number of columns below after transmute() names(quosures) <- paste0("^^--arrange_quosure_", seq_along(quosures)) # TODO: not quite that because when the quosure is some expression # it should be evaluated by groups. # for now this abuses transmute so that we get just one # column per quosure # # revisit when we have something like mutate_one() to # evaluate one quosure in the data mask data <- withCallingHandlers({ transmute(new_data_frame(.data), !!!quosures) }, error = function(cnd) { if (inherits(cnd, "dplyr:::mutate_error")) { # reverse the name mangling bullets <- gsub("^^--arrange_quosure_", "..", cnd$bullets, fixed = TRUE) # only name bullets that aren't already named names <- names2(bullets) names[names == ""] <- "x" bullets <- set_names(bullets, names) # skip the parent as this has reworked the bullets # and this would be confusing to have them parent <- cnd$parent } else { parent <- cnd bullets <- c() } bullets <- c( "Problem with the implicit `transmute()` step. ", bullets ) abort(bullets, call = error_call, parent = parent) }) # we can't just use vec_compare_proxy(data) because we need to apply # direction for each column, so we get a list of proxies instead # and then mimic vctrs:::order_proxy # # should really be map2(quosures, directions, ...) proxies <- map2(data, directions, function(column, direction) { proxy <- vec_proxy_order(column) desc <- identical(direction, "desc") if (is.data.frame(proxy)) { proxy <- order(vec_order(proxy, direction = direction, na_value = if(desc) "smallest" else "largest" )) } else if(desc) { proxy <- desc(proxy) } proxy }) exec("order", !!!unname(proxies), decreasing = FALSE, na.last = TRUE) } dplyr/R/ts.R0000644000176200001440000000034014151641776012367 0ustar liggesusers#' @export filter.ts <- function(.data, ...) { bullets <- c( "Incompatible data source.", x = '`.data` is a object, not a data source.', i = "Did you want to use `stats::filter()`?" ) abort(bullets) } dplyr/R/deprec-bench-compare.r0000644000176200001440000000650414164534554015753 0ustar liggesusers#' Evaluate, compare, benchmark operations of a set of srcs. #' #' `r lifecycle::badge("deprecated")` #' These functions are deprecated because we now believe that you're #' better of performing the comparisons directly, yourself, in order to #' generate more informative test failures. #' #' @param tbls,tbls_x,tbls_y A list of [tbl()]s. #' @param op A function with a single argument, called often with each #' element of `tbls`. #' @param ref For checking, a data frame to test results against. If not #' supplied, defaults to the results from the first `src`. #' @param compare A function used to compare the results. Defaults to #' `equal_data_frame` which ignores the order of rows and columns. #' @param times For benchmarking, the number of times each operation is #' repeated. #' @param \dots #' For `compare_tbls()`: additional parameters passed on the #' `compare()` function #' #' For `bench_tbls()`: additional benchmarks to run. #' @return #' `eval_tbls()`: a list of data frames. #' #' `compare_tbls()`: an invisible `TRUE` on success, otherwise #' an error is thrown. #' #' `bench_tbls()`: an object of class #' [microbenchmark::microbenchmark()] #' @name bench_compare #' @keywords internal NULL #' @export #' @rdname bench_compare bench_tbls <- function(tbls, op, ..., times = 10) { lifecycle::deprecate_warn("1.0.0", "bench_tbls()") check_installed("microbenchmark", "to compute table benchmarks.") # Generate call to microbenchmark function that evaluates op for each tbl calls <- lapply(seq_along(tbls), function(i) { substitute(op(tbls[[i]]), list(i = i)) }) names(calls) <- names(tbls) mb <- as.call(c( quote(microbenchmark::microbenchmark), calls, dots(...), list(times = times) )) eval(mb) } #' @export #' @rdname bench_compare compare_tbls <- function(tbls, op, ref = NULL, compare = equal_data_frame, ...) { lifecycle::deprecate_warn("1.0.0", "compare_tbls()") results <- eval_tbls(tbls, op) expect_equal_tbls(results, compare = compare, ...) } #' @export #' @rdname bench_compare compare_tbls2 <- function(tbls_x, tbls_y, op, ref = NULL, compare = equal_data_frame, ...) { lifecycle::deprecate_warn("1.0.0", "compare_tbls2()") results <- eval_tbls2(tbls_x, tbls_y, op) expect_equal_tbls(results, compare = compare, ...) } expect_equal_tbls <- function(results, ref = NULL, compare = equal_data_frame, ...) { check_installed("testthat", "to compare tables.") if (length(results) < 2 && is.null(ref)) { testthat::skip("Need at least two srcs to compare") } if (is.null(ref)) { ref <- results[[1]] ref_name <- names(results)[1] rest <- results[-1] } else { rest <- results ref_name <- "supplied comparison" } for (i in seq_along(rest)) { ok <- compare(ref, rest[[i]], ...) # if (!ok) browser() msg <- paste0( names(rest)[[i]], " not equal to ", ref_name, "\n", attr(ok, "comment") ) testthat::expect_true(ok, info = msg) } invisible(TRUE) } #' @export #' @rdname bench_compare eval_tbls <- function(tbls, op) { lifecycle::deprecate_warn("1.0.0", "eval_tbls()") lapply(tbls, function(x) as.data.frame(op(x))) } #' @export #' @rdname bench_compare eval_tbls2 <- function(tbls_x, tbls_y, op) { lifecycle::deprecate_warn("1.0.0", "eval_tbls2()") Map(function(x, y) as.data.frame(op(x, y)), tbls_x, tbls_y) } dplyr/R/deprec-combine.R0000644000176200001440000000161114151641776014617 0ustar liggesusers#' Combine vectors #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `combine()` is deprecated in favour of [vctrs::vec_c()]. `combine()` #' attempted to automatically guess whether you wanted [c()] or [unlist()], #' but could fail in surprising ways. We now believe it's better to be explicit. #' #' @param ... Vectors to combine. #' @keywords internal #' @export #' @examples #' f1 <- factor("a") #' f2 <- factor("b") #' #' combine(f1, f2) #' # -> #' vctrs::vec_c(f1, f1) #' #' combine(list(f1, f2)) #' # -> #' vctrs::vec_c(!!!list(f1, f2)) combine <- function(...) { lifecycle::deprecate_warn("1.0.0", "combine()", "vctrs::vec_c()") args <- list2(...) if (length(args) == 1 && is.list(args[[1]])) { args <- args[[1]] } args <- keep(args, function(.x) !is.null(.x)) names(args) <- NULL if (length(args) == 0) { logical() } else { fix_call(vec_c(!!!args)) } } dplyr/R/colwise-arrange.R0000644000176200001440000000404614151641776015032 0ustar liggesusers#' Arrange rows by a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [across()] in an existing verb. See `vignette("colwise")` for details. #' #' These [scoped] variants of [arrange()] sort a data frame by a #' selection of variables. Like [arrange()], you can modify the #' variables before ordering with the `.funs` argument. #' #' @inheritParams scoped #' @inheritParams arrange #' #' @section Grouping variables: #' #' The grouping variables that are part of the selection participate #' in the sorting of the data frame. #' #' @export #' @keywords internal #' @examples #' df <- as_tibble(mtcars) #' arrange_all(df) #' # -> #' arrange(df, across()) #' #' arrange_all(df, desc) #' # -> #' arrange(df, across(everything(), desc)) arrange_all <- function(.tbl, .funs = list(), ..., .by_group = FALSE) { lifecycle::signal_stage("superseded", "arrange_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_all") if (!length(funs)) { funs <- syms(tbl_vars(.tbl)) } arrange(.tbl, !!!funs, .by_group = .by_group) } #' @rdname arrange_all #' @export arrange_at <- function(.tbl, .vars, .funs = list(), ..., .by_group = FALSE) { lifecycle::signal_stage("superseded", "arrange_at()") funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_at") if (!length(funs)) { funs <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) } arrange(.tbl, !!!funs, .by_group = .by_group) } #' @rdname arrange_all #' @export arrange_if <- function(.tbl, .predicate, .funs = list(), ..., .by_group = FALSE) { lifecycle::signal_stage("superseded", "arrange_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_if") if (!length(funs)) { funs <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) } arrange(.tbl, !!!funs, .by_group = .by_group) } dplyr/R/select.R0000644000176200001440000001124014154654644013222 0ustar liggesusers#' Subset columns using their names and types #' #' @description #' #' Select (and optionally rename) variables in a data frame, using a concise #' mini-language that makes it easy to refer to variables based on their name #' (e.g. `a:f` selects all columns from `a` on the left to `f` on the #' right). You can also use predicate functions like [is.numeric] to select #' variables based on their properties. #' #' #' ## Overview of selection features #' #' ```{r, child = "man/rmd/overview.Rmd"} #' ``` #' #' @inheritParams arrange #' @param ... <[`tidy-select`][dplyr_tidy_select]> One or more unquoted #' expressions separated by commas. Variable names can be used as if they #' were positions in the data frame, so expressions like `x:y` can #' be used to select a range of variables. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are not affected. #' * Output columns are a subset of input columns, potentially with a different #' order. Columns will be renamed if `new_name = old_name` form is used. #' * Data frame attributes are preserved. #' * Groups are maintained; you can't select off grouping variables. #' #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. #' #' @section Examples: #' ```{r, child = "man/rmd/setup.Rmd"} #' ``` #' #' Here we show the usage for the basic selection operators. See the #' specific help pages to learn about helpers like [starts_with()]. #' #' The selection language can be used in functions like #' `dplyr::select()` or `tidyr::pivot_longer()`. Let's first attach #' the tidyverse: #' #' ```{r, comment = "#>", collapse = TRUE} #' library(tidyverse) #' #' # For better printing #' iris <- as_tibble(iris) #' ``` #' #' Select variables by name: #' #' ```{r, comment = "#>", collapse = TRUE} #' starwars %>% select(height) #' #' iris %>% pivot_longer(Sepal.Length) #' ``` #' #' Select multiple variables by separating them with commas. Note how #' the order of columns is determined by the order of inputs: #' #' ```{r, comment = "#>", collapse = TRUE} #' starwars %>% select(homeworld, height, mass) #' ``` #' #' Functions like `tidyr::pivot_longer()` don't take variables with #' dots. In this case use `c()` to select multiple variables: #' #' ```{r, comment = "#>", collapse = TRUE} #' iris %>% pivot_longer(c(Sepal.Length, Petal.Length)) #' ``` #' #' ## Operators: #' #' The `:` operator selects a range of consecutive variables: #' #' ```{r, comment = "#>", collapse = TRUE} #' starwars %>% select(name:mass) #' ``` #' #' The `!` operator negates a selection: #' #' ```{r, comment = "#>", collapse = TRUE} #' starwars %>% select(!(name:mass)) #' #' iris %>% select(!c(Sepal.Length, Petal.Length)) #' #' iris %>% select(!ends_with("Width")) #' ``` #' #' `&` and `|` take the intersection or the union of two selections: #' #' ```{r, comment = "#>", collapse = TRUE} #' iris %>% select(starts_with("Petal") & ends_with("Width")) #' #' iris %>% select(starts_with("Petal") | ends_with("Width")) #' ``` #' #' To take the difference between two selections, combine the `&` and #' `!` operators: #' #' ```{r, comment = "#>", collapse = TRUE} #' iris %>% select(starts_with("Petal") & !ends_with("Width")) #' ``` #' #' @family single table verbs #' @export select <- function(.data, ...) { UseMethod("select") } #' @export select.list <- function(.data, ...) { abort("`select()` doesn't handle lists.") } #' @export select.data.frame <- function(.data, ...) { error_call <- dplyr_error_call() loc <- tidyselect_fix_call( tidyselect::eval_select(expr(c(...)), .data), call = error_call ) loc <- ensure_group_vars(loc, .data, notify = TRUE) dplyr_col_select(.data, loc, names(loc)) } # Helpers ----------------------------------------------------------------- ensure_group_vars <- function(loc, data, notify = TRUE) { group_loc <- match(group_vars(data), names(data)) missing <- setdiff(group_loc, loc) if (length(missing) > 0) { vars <- names(data)[missing] added_group_loc <- set_names(missing, vars) # don't add grouping variables with same name as new variable (#5841) added_group_loc <- added_group_loc[! vars %in% names(loc)] if (length(added_group_loc) > 0 && notify) { inform(glue( "Adding missing grouping variables: ", paste0("`", names(added_group_loc), "`", collapse = ", ") )) } loc <- c(added_group_loc, loc) } loc } dplyr/R/funs.R0000644000176200001440000000455314121112104012675 0ustar liggesusers#' Do values in a numeric vector fall in specified range? #' #' This is a shortcut for `x >= left & x <= right`, implemented #' efficiently in C++ for local values, and translated to the #' appropriate SQL for remote tables. #' #' @param x A numeric vector of values #' @param left,right Boundary values (must be scalars). #' @export #' @examples #' between(1:12, 7, 9) #' #' x <- rnorm(1e2) #' x[between(x, -1, 1)] #' #' ## Or on a tibble using filter #' filter(starwars, between(height, 100, 150)) between <- function(x, left, right) { if (!is.null(attr(x, "class")) && !inherits(x, c("Date", "POSIXct"))) { warn("between() called on numeric vector with S3 class"); } if (length(left) != 1) { abort("`left` must be length 1") } if (length(right) != 1) { abort("`right` must be length 1") } if (!is.double(x)) { x <- as.numeric(x) } .Call(`dplyr_between`, x, as.numeric(left), as.numeric(right)) } #' Cumulativate versions of any, all, and mean #' #' dplyr provides `cumall()`, `cumany()`, and `cummean()` to complete R's set #' of cumulative functions. #' #' @section Cumulative logical functions: #' #' These are particularly useful in conjunction with `filter()`: #' #' * `cumall(x)`: all cases until the first `FALSE`. #' * `cumall(!x)`: all cases until the first `TRUE`. #' * `cumany(x)`: all cases after the first `TRUE`. #' * `cumany(!x)`: all cases after the first `FALSE`. #' #' @param x For `cumall()` and `cumany()`, a logical vector; for #' `cummean()` an integer or numeric vector. #' @return A vector the same length as `x`. #' @examples #' # `cummean()` returns a numeric/integer vector of the same length #' # as the input vector. #' x <- c(1, 3, 5, 2, 2) #' cummean(x) #' cumsum(x) / seq_along(x) #' #' # `cumall()` and `cumany()` return logicals #' cumall(x < 5) #' cumany(x == 3) #' #' # `cumall()` vs. `cumany()` #' df <- data.frame( #' date = as.Date("2020-01-01") + 0:6, #' balance = c(100, 50, 25, -25, -50, 30, 120) #' ) #' # all rows after first overdraft #' df %>% filter(cumany(balance < 0)) #' # all rows until first overdraft #' df %>% filter(cumall(!(balance < 0))) #' #' @export cumall <- function(x) { .Call(`dplyr_cumall`, as.logical(x)) } #' @rdname cumall #' @export cumany <- function(x) { .Call(`dplyr_cumany`, as.logical(x)) } #' @rdname cumall #' @export cummean <- function(x) { .Call(`dplyr_cummean`, as.numeric(x)) } dplyr/R/utils-format.r0000644000176200001440000000277514121112104014414 0ustar liggesusers#' Describing dimensions #' #' Prints the dimensions of an array-like object in a user-friendly manner, #' substituting `NA` with ?? (for SQL queries). #' #' @param x Object to show dimensions for. #' @export #' @keywords internal #' @examples #' dim_desc(mtcars) dim_desc <- function(x) { d <- dim(x) d2 <- big_mark(d) d2[is.na(d)] <- "??" fmt_dims(d2) } wrap <- function(..., indent = 0) { x <- paste0(..., collapse = "") wrapped <- strwrap( x, indent = indent, exdent = indent + 2, width = getOption("width") ) paste0(wrapped, collapse = "\n") } ruler <- function(width = getOption("width")) { x <- seq_len(width) y <- case_when( x %% 10 == 0 ~ as.character((x %/% 10) %% 10), x %% 5 == 0 ~ "+", TRUE ~ "-" ) cat(y, "\n", sep = "") cat(x %% 10, "\n", sep = "") } rule <- function(pad = "-", gap = 2L) { paste0(rep(pad, getOption("width") - gap), collapse = "") } named_rule <- function(..., pad = "-") { if (nargs() == 0) { title <- "" } else { title <- paste0(...) } paste0(title, " ", rule(pad = pad, gap = nchar(title) - 1)) } # function for the thousand separator, # returns "," unless it's used for the decimal point, in which case returns "." big_mark <- function(x, ...) { mark <- if (identical(getOption("OutDec"), ",")) "." else "," formatC(x, big.mark = mark, ...) } paste_line <- function(..., .trailing = FALSE) { lines <- paste(chr(...), collapse = "\n") if (.trailing) { lines <- paste0(lines, "\n") } lines } dplyr/R/group-by.r0000644000176200001440000002040214151641776013546 0ustar liggesusers#' Group by one or more variables #' #' @description #' Most data operations are done on groups defined by variables. #' `group_by()` takes an existing tbl and converts it into a grouped tbl #' where operations are performed "by group". `ungroup()` removes grouping. #' #' @family grouping functions #' @inheritParams arrange #' @param ... In `group_by()`, variables or computations to group by. #' Computations are always done on the ungrouped data frame. #' To perform computations on the grouped data, you need to use #' a separate `mutate()` step before the `group_by()`. #' Computations are not allowed in `nest_by()`. #' In `ungroup()`, variables to remove from the grouping. #' @param .add When `FALSE`, the default, `group_by()` will #' override existing groups. To add to the existing groups, use #' `.add = TRUE`. #' #' This argument was previously called `add`, but that prevented #' creating a new grouping variable called `add`, and conflicts with #' our naming conventions. #' @param .drop Drop groups formed by factor levels that don't appear in the #' data? The default is `TRUE` except when `.data` has been previously #' grouped with `.drop = FALSE`. See [group_by_drop_default()] for details. #' @return A grouped data frame with class [`grouped_df`][grouped_df], #' unless the combination of `...` and `add` yields a empty set of #' grouping columns, in which case a tibble will be returned. #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `group_by()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}. #' * `ungroup()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}. #' @export #' @examples #' by_cyl <- mtcars %>% group_by(cyl) #' #' # grouping doesn't change how the data looks (apart from listing #' # how it's grouped): #' by_cyl #' #' # It changes how it acts with the other dplyr verbs: #' by_cyl %>% summarise( #' disp = mean(disp), #' hp = mean(hp) #' ) #' by_cyl %>% filter(disp == max(disp)) #' #' # Each call to summarise() removes a layer of grouping #' by_vs_am <- mtcars %>% group_by(vs, am) #' by_vs <- by_vs_am %>% summarise(n = n()) #' by_vs #' by_vs %>% summarise(n = sum(n)) #' #' # To removing grouping, use ungroup #' by_vs %>% #' ungroup() %>% #' summarise(n = sum(n)) #' #' # By default, group_by() overrides existing grouping #' by_cyl %>% #' group_by(vs, am) %>% #' group_vars() #' #' # Use add = TRUE to instead append #' by_cyl %>% #' group_by(vs, am, .add = TRUE) %>% #' group_vars() #' #' # You can group by expressions: this is a short-hand #' # for a mutate() followed by a group_by() #' mtcars %>% #' group_by(vsam = vs + am) #' #' # The implicit mutate() step is always performed on the #' # ungrouped data. Here we get 3 groups: #' mtcars %>% #' group_by(vs) %>% #' group_by(hp_cut = cut(hp, 3)) #' #' # If you want it to be performed by groups, #' # you have to use an explicit mutate() call. #' # Here we get 3 groups per value of vs #' mtcars %>% #' group_by(vs) %>% #' mutate(hp_cut = cut(hp, 3)) %>% #' group_by(hp_cut) #' #' # when factors are involved and .drop = FALSE, groups can be empty #' tbl <- tibble( #' x = 1:10, #' y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c")) #' ) #' tbl %>% #' group_by(y, .drop = FALSE) %>% #' group_rows() #' group_by <- function(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) { UseMethod("group_by") } #' @export group_by.data.frame <- function(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) { groups <- group_by_prepare(.data, ..., .add = .add, caller_env = caller_env()) grouped_df(groups$data, groups$group_names, .drop) } #' @rdname group_by #' @export #' @param x A [tbl()] ungroup <- function(x, ...) { UseMethod("ungroup") } #' @export ungroup.grouped_df <- function(x, ...) { if (missing(...)) { as_tibble(x) } else { old_groups <- group_vars(x) to_remove <- fix_call(tidyselect::vars_select(names(x), ...)) new_groups <- setdiff(old_groups, to_remove) group_by(x, !!!syms(new_groups)) } } #' @export ungroup.rowwise_df <- function(x, ...) { check_dots_empty() as_tibble(x) } #' @export ungroup.data.frame <- function(x, ...) { check_dots_empty() x } #' Prepare for grouping. #' #' `*_prepare()` performs standard manipulation that is needed prior #' to actual data processing. They are only be needed by packages #' that implement dplyr backends. #' #' @return A list #' \item{data}{Modified tbl} #' \item{groups}{Modified groups} #' @export #' @keywords internal group_by_prepare <- function(.data, ..., caller_env = caller_env(2), .add = FALSE, .dots = deprecated(), add = deprecated(), error_call = caller_env()) { if (!missing(add)) { lifecycle::deprecate_warn("1.0.0", "group_by(add = )", "group_by(.add = )") .add <- add } new_groups <- enquos(..., .ignore_empty = "all") if (!missing(.dots)) { # Used by dbplyr 1.4.2 so can't aggressively deprecate lifecycle::deprecate_warn("1.0.0", "group_by(.dots = )") new_groups <- c(new_groups, compat_lazy_dots(.dots, env = caller_env)) } # If any calls, use mutate to add new columns, then group by those computed_columns <- add_computed_columns(.data, new_groups, caller_env = caller_env, error_call = error_call ) out <- computed_columns$data group_names <- computed_columns$added_names if (.add) { group_names <- union(group_vars(.data), group_names) } unknown <- setdiff(group_names, tbl_vars(out)) if (length(unknown) > 0) { bullets <- c( "Must group by variables found in `.data`.", x = glue("Column `{unknown}` is not found.") ) abort(bullets, call = error_call) } list( data = out, groups = syms(group_names), group_names = group_names ) } add_computed_columns <- function(.data, vars, caller_env, error_call = caller_env()) { is_symbol <- map_lgl(vars, quo_is_variable_reference) needs_mutate <- have_name(vars) | !is_symbol if (any(needs_mutate)) { # TODO: use less of a hack if (inherits(.data, "data.frame")) { cols <- withCallingHandlers( mutate_cols( ungroup(.data), dplyr_quosures(!!!vars), caller_env = caller_env, error_call = call("mutate") # this is a pretend `mutate()` ), error = function(e) { abort("Problem adding computed columns.", parent = e, call = error_call) } ) out <- dplyr_col_modify(.data, cols) col_names <- names(cols) } else { out <- mutate(.data, !!!vars) col_names <- names(exprs_auto_name(vars)) } } else { out <- .data col_names <- names(exprs_auto_name(vars)) } list(data = out, added_names = col_names) } quo_is_variable_reference <- function(quo) { if (quo_is_symbol(quo)) { return(TRUE) } if (quo_is_call(quo, n = 2)) { expr <- quo_get_expr(quo) if (is_call(expr, c("$", "[["))) { if (!identical(expr[[2]], sym(".data"))) { return(FALSE) } param <- expr[[3]] if (is_symbol(param) || is_string(param)) { return(TRUE) } } } FALSE } #' Default value for .drop argument of group_by #' #' @param .tbl A data frame #' #' @return `TRUE` unless `.tbl` is a grouped data frame that was previously #' obtained by `group_by(.drop = FALSE)` #' #' @examples #' group_by_drop_default(iris) #' #' iris %>% #' group_by(Species) %>% #' group_by_drop_default() #' #' iris %>% #' group_by(Species, .drop = FALSE) %>% #' group_by_drop_default() #' #' @keywords internal #' @export group_by_drop_default <- function(.tbl) { UseMethod("group_by_drop_default") } #' @export group_by_drop_default.default <- function(.tbl) { TRUE } #' @export group_by_drop_default.grouped_df <- function(.tbl) { tryCatch({ !identical(attr(group_data(.tbl), ".drop"), FALSE) }, error = function(e){ TRUE }) } dplyr/R/compat-lazyeval.R0000644000176200001440000000425214121112104015026 0ustar liggesusers# nocov start - compat-lazyeval (last updated: rlang 0.3.0) # This file serves as a reference for compatibility functions for lazyeval. # Please find the most recent version in rlang's repository. warn_underscored <- function() { return(NULL) warn(paste( "The underscored versions are deprecated in favour of", "tidy evaluation idioms. Please see the documentation", "for `quo()` in rlang" )) } warn_text_se <- function() { return(NULL) warn("Text parsing is deprecated, please supply an expression or formula") } compat_lazy <- function(lazy, env = caller_env(), warn = TRUE) { if (warn) warn_underscored() if (missing(lazy)) { return(quo()) } if (is_quosure(lazy)) { return(lazy) } if (is_formula(lazy)) { return(as_quosure(lazy, env)) } out <- switch(typeof(lazy), symbol = , language = new_quosure(lazy, env), character = { if (warn) warn_text_se() parse_quo(lazy[[1]], env) }, logical = , integer = , double = { if (length(lazy) > 1) { warn("Truncating vector to length 1") lazy <- lazy[[1]] } new_quosure(lazy, env) }, list = if (inherits(lazy, "lazy")) { lazy = new_quosure(lazy$expr, lazy$env) } ) if (is_null(out)) { abort(sprintf("Can't convert a %s to a quosure.", typeof(lazy))) } else { out } } compat_lazy_dots <- function(dots, env, ..., .named = FALSE) { if (missing(dots)) { dots <- list() } if (inherits(dots, c("lazy", "formula"))) { dots <- list(dots) } else { dots <- unclass(dots) } dots <- c(dots, list(...)) warn <- TRUE for (i in seq_along(dots)) { dots[[i]] <- compat_lazy(dots[[i]], env, warn) warn <- FALSE } named <- have_name(dots) if (.named && any(!named)) { nms <- vapply(dots[!named], function(x) expr_text(get_expr(x)), character(1)) names(dots)[!named] <- nms } names(dots) <- names2(dots) dots } compat_as_lazy <- function(quo) { structure(class = "lazy", list( expr = get_expr(quo), env = get_env(quo) )) } compat_as_lazy_dots <- function(...) { structure(class = "lazy_dots", lapply(enquos(...), compat_as_lazy)) } # nocov end dplyr/R/distinct.R0000644000176200001440000001166214151641776013573 0ustar liggesusers#' Subset distinct/unique rows #' #' Select only unique/distinct rows from a data frame. This is similar #' to [unique.data.frame()] but considerably faster. #' #' @inheritParams arrange #' @param ... <[`data-masking`][dplyr_data_masking]> Optional variables to use #' when determining uniqueness. If there are multiple rows for a given #' combination of inputs, only the first row will be preserved. If omitted, #' will use all variables. #' @param .keep_all If `TRUE`, keep all variables in `.data`. #' If a combination of `...` is not distinct, this keeps the #' first row of values. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are a subset of the input but appear in the same order. #' * Columns are not modified if `...` is empty or `.keep_all` is `TRUE`. #' Otherwise, `distinct()` first calls `mutate()` to create new columns. #' * Groups are not modified. #' * Data frame attributes are preserved. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}. #' @export #' @examples #' df <- tibble( #' x = sample(10, 100, rep = TRUE), #' y = sample(10, 100, rep = TRUE) #' ) #' nrow(df) #' nrow(distinct(df)) #' nrow(distinct(df, x, y)) #' #' distinct(df, x) #' distinct(df, y) #' #' # You can choose to keep all other variables as well #' distinct(df, x, .keep_all = TRUE) #' distinct(df, y, .keep_all = TRUE) #' #' # You can also use distinct on computed variables #' distinct(df, diff = abs(x - y)) #' #' # use across() to access select()-style semantics #' distinct(starwars, across(contains("color"))) #' #' # Grouping ------------------------------------------------- #' # The same behaviour applies for grouped data frames, #' # except that the grouping variables are always included #' df <- tibble( #' g = c(1, 1, 2, 2), #' x = c(1, 1, 2, 1) #' ) %>% group_by(g) #' df %>% distinct(x) #' distinct <- function(.data, ..., .keep_all = FALSE) { UseMethod("distinct") } #' Same basic philosophy as group_by_prepare(): lazy_dots comes in, list of data and #' vars (character vector) comes out. #' @rdname group_by_prepare #' @export distinct_prepare <- function(.data, vars, group_vars = character(), .keep_all = FALSE, caller_env = caller_env(2), error_call = caller_env() ) { stopifnot(is_quosures(vars), is.character(group_vars)) # If no input, keep all variables if (length(vars) == 0) { return(list( data = .data, vars = seq_along(.data), keep = seq_along(.data) )) } # If any calls, use mutate to add new columns, then distinct on those computed_columns <- add_computed_columns(.data, vars, caller_env = caller_env, error_call = error_call ) .data <- computed_columns$data distinct_vars <- computed_columns$added_names # Once we've done the mutate, we no longer need lazy objects, and # can instead just use their names missing_vars <- setdiff(distinct_vars, names(.data)) if (length(missing_vars) > 0) { bullets <- c( "Must use existing variables.", set_names(glue("`{missing_vars}` not found in `.data`."), rep("x", length(missing_vars))) ) abort(bullets, call = error_call) } # Always include grouping variables preserving input order out_vars <- intersect(names(.data), c(distinct_vars, group_vars)) if (.keep_all) { keep <- seq_along(.data) } else { keep <- out_vars } list(data = .data, vars = out_vars, keep = keep) } #' @export distinct.data.frame <- function(.data, ..., .keep_all = FALSE) { prep <- distinct_prepare( .data, vars = enquos(...), group_vars = group_vars(.data), .keep_all = .keep_all, caller_env = caller_env() ) # out <- as_tibble(prep$data) out <- prep$data loc <- vec_unique_loc(as_tibble(out)[prep$vars]) dplyr_row_slice(out[prep$keep], loc) } #' Efficiently count the number of unique values in a set of vectors #' #' This is a faster and more concise equivalent of `length(unique(x))` #' #' @param \dots vectors of values #' @param na.rm if `TRUE` missing values don't count #' @examples #' x <- sample(1:10, 1e5, rep = TRUE) #' length(unique(x)) #' n_distinct(x) #' @export n_distinct <- function(..., na.rm = FALSE) { args <- list2(...) size <- vec_size_common(!!!args) data <- vec_recycle_common(!!!args, .size = size) nms <- vec_rep("", length(data)) data <- set_names(data, nms) data <- new_data_frame(data, n = size) if (isTRUE(na.rm)){ data <- vec_slice(data, !reduce(map(data, vec_equal_na), `|`)) } vec_unique_count(data) } dplyr/R/compat-future-group_by.R0000644000176200001440000000104614121112104016333 0ustar liggesusers# workaround so that methods that do not have the .drop argument yet # don't create the auto mutate .drop column # # things like count() and group_by_all() # can call .group_by_static_drop() instead of group_by() # so that .drop is only part of the group_by() call if it is FALSE # # this is only meant to stay in dplyr until 0.8.0 to give # implementers of group_by() methods a chance to add .drop in their # arguments .group_by_static_drop <- function(..., .drop) { if(.drop) { group_by(...) } else { group_by(..., .drop = FALSE) } } dplyr/R/utils-tidy-eval.R0000644000176200001440000000166114121112104014753 0ustar liggesusers#' Other tidy eval tools #' #' @description #' These tidy eval functions are no longer for normal usage, but are still #' exported from dplyr for backward compatibility. #' See [`?dplyr_data_masking`][dplyr_data_masking] and `vignette("programming")` #' for the latest recommendations. #' #' * [expr()][rlang::expr] #' * [enquo()][rlang::enquo] #' * [enquos()][rlang::enquos] #' * [sym()][rlang::sym] #' * [syms()][rlang::syms] #' * [as_label()][rlang::as_label] #' * [quo()][rlang::quo] #' * [quos()][rlang::quos] #' * [quo_name()][rlang::quo_name] #' * [ensym()][rlang::ensym] #' * [ensyms()][rlang::ensyms] #' * [enexpr()][rlang::enexpr] #' * [enexprs()][rlang::enexprs] #' #' @keywords internal #' @name tidyeval-compat #' @aliases expr enquo enquos sym syms as_label #' @export expr enquo enquos sym syms as_label .data #' @aliases quo quos quo_name ensym ensyms enexpr enexprs #' @export quo quos quo_name ensym ensyms enexpr enexprs NULL dplyr/R/data-bands.R0000644000176200001440000000141314121112104013710 0ustar liggesusers#' Band membership #' #' These data sets describe band members of the Beatles and Rolling Stones. They #' are toy data sets that can be displayed in their entirety on a slide (e.g. to #' demonstrate a join). #' #' `band_instruments` and `band_instruments2` contain the same data but use #' different column names for the first column of the data set. #' `band_instruments` uses `name`, which matches the name of the key column of #' `band_members`; `band_instruments2` uses `artist`, which does not. #' #' @format Each is a tibble with two variables and three observations #' @examples #' band_members #' band_instruments #' band_instruments2 "band_members" #' @rdname band_members #' @format NULL "band_instruments" #' @rdname band_members #' @format NULL "band_instruments2" dplyr/R/context.R0000644000176200001440000001001214151641776013422 0ustar liggesusers#' Context dependent expressions #' #' @description #' These functions return information about the "current" group or "current" #' variable, so only work inside specific contexts like `summarise()` and #' `mutate()` #' #' * `n()` gives the current group size. #' * `cur_data()` gives the current data for the current group (excluding #' grouping variables). #' * `cur_data_all()` gives the current data for the current group (including #' grouping variables) #' * `cur_group()` gives the group keys, a tibble with one row and one column #' for each grouping variable. #' * `cur_group_id()` gives a unique numeric identifier for the current group. #' * `cur_group_rows()` gives the row indices for the current group. #' * `cur_column()` gives the name of the current column (in [across()] only). #' #' See [group_data()] for equivalent functions that return values for all #' groups. #' #' @section data.table: #' If you're familiar with data.table: #' #' * `cur_data()` <-> `.SD` #' * `cur_group_id()` <-> `.GRP` #' * `cur_group()` <-> `.BY` #' * `cur_group_rows()` <-> `.I` #' #' @examples #' df <- tibble( #' g = sample(rep(letters[1:3], 1:3)), #' x = runif(6), #' y = runif(6) #' ) #' gf <- df %>% group_by(g) #' #' gf %>% summarise(n = n()) #' #' gf %>% mutate(id = cur_group_id()) #' gf %>% summarise(row = cur_group_rows()) #' gf %>% summarise(data = list(cur_group())) #' gf %>% summarise(data = list(cur_data())) #' gf %>% summarise(data = list(cur_data_all())) #' #' gf %>% mutate(across(everything(), ~ paste(cur_column(), round(.x, 2)))) #' @name context NULL #' @rdname context #' @export n <- function() { length(peek_mask("n")$current_rows()) } #' @rdname context #' @export cur_data <- function() { mask <- peek_mask("cur_data") vars <- mask$current_non_group_vars() mask$pick(vars) } #' @rdname context #' @export cur_data_all <- function() { mask <- peek_mask("cur_data_all") vars <- mask$current_vars() mask$pick(vars) } #' @rdname context #' @export cur_group <- function() { peek_mask("cur_group()")$current_key() } #' @rdname context #' @export cur_group_id <- function() { # [] to get a copy because the current group is dealt with internally # if we don't get a copy, code like this won't give correct result: # summarise(id = cur_group_id()) peek_mask("cur_group_id")$get_current_group()[] } #' @rdname context #' @export cur_group_rows <- function() { peek_mask("cur_group_rows")$current_rows() } #' @importFrom pillar format_glimpse group_labels_details <- function(keys) { glue_collapse(map2_chr(keys, names(keys), function(x, name) { glue("{name} = {value}", value = format_glimpse(x)) }), ", ") } cur_group_label <- function() { mask <- peek_mask("cur_group_label") data <- mask$full_data() if(is_grouped_df(data) && nrow(data) > 0) { glue("group {id}: {label}", id = cur_group_id(), label = group_labels_details(cur_group())) } else if (inherits(data, "rowwise_df") && nrow(data) > 0) { paste0("row ", cur_group_id()) } else { "" } } #' @rdname context #' @export cur_column <- function() { peek_column() } # context accessors ------------------------------------------------------- context_env <- new_environment() context_poke <- function(name, value) { old <- context_env[[name]] context_env[[name]] <- value old } context_peek_bare <- function(name) { context_env[[name]] } context_peek <- function(name, fun, location = "dplyr verbs") { context_peek_bare(name) %||% abort(glue("Must be used inside {location}."), call = call(fun)) } context_local <- function(name, value, frame = caller_env()) { old <- context_poke(name, value) expr <- expr(on.exit(context_poke(!!name, !!old), add = TRUE)) eval_bare(expr, frame) } peek_column <- function() { context_peek("column", "cur_column", "`across()`") } local_column <- function(x, frame = caller_env()) { context_local("column", x, frame = frame) } peek_mask <- function(fun = "peek_mask") { context_peek("mask", fun) } local_mask <- function(x, frame = caller_env()) { context_local("mask", x, frame = frame) } dplyr/R/colwise-distinct.R0000644000176200001440000000506714151641776015240 0ustar liggesusers#' Select distinct rows by a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [across()] in an existing verb. See `vignette("colwise")` for details. #' #' These [scoped] variants of [distinct()] extract distinct rows by a #' selection of variables. Like `distinct()`, you can modify the #' variables before ordering with the `.funs` argument. #' #' @param .keep_all If `TRUE`, keep all variables in `.data`. #' If a combination of `...` is not distinct, this keeps the #' first row of values. #' @inheritParams scoped #' @export #' #' @section Grouping variables: #' #' The grouping variables that are part of the selection are taken #' into account to determine distinct rows. #' #' @keywords internal #' @examples #' df <- tibble(x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2) #' #' distinct_all(df) #' # -> #' distinct(df, across()) #' #' distinct_at(df, vars(x,y)) #' # -> #' distinct(df, across(c(x, y))) #' #' distinct_if(df, is.numeric) #' # -> #' distinct(df, across(where(is.numeric))) #' #' # You can supply a function that will be applied before extracting the distinct values #' # The variables of the sorted tibble keep their original values. #' distinct_all(df, round) #' # -> #' distinct(df, across(everything(), round)) distinct_all <- function(.tbl, .funs = list(), ..., .keep_all = FALSE) { lifecycle::signal_stage("superseded", "distinct_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "distinct_all") if (!length(funs)) { funs <- syms(tbl_vars(.tbl)) } distinct(.tbl, !!!funs, .keep_all = .keep_all) } #' @rdname distinct_all #' @export distinct_at <- function(.tbl, .vars, .funs = list(), ..., .keep_all = FALSE) { lifecycle::signal_stage("superseded", "distinct_at()") funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "distinct_at") if (!length(funs)) { funs <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) } distinct(.tbl, !!!funs, .keep_all = .keep_all) } #' @rdname distinct_all #' @export distinct_if <- function(.tbl, .predicate, .funs = list(), ..., .keep_all = FALSE) { lifecycle::signal_stage("superseded", "distinct_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "distinct_if") if (!length(funs)) { funs <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) } distinct(.tbl, !!!funs, .keep_all = .keep_all) } dplyr/R/data-mask.R0000644000176200001440000001224114151641776013606 0ustar liggesusersDataMask <- R6Class("DataMask", public = list( initialize = function(data, caller, verb, error_call) { rows <- group_rows(data) # workaround for when there are 0 groups if (length(rows) == 0) { rows <- list(integer()) } private$rows <- rows frame <- caller_env(n = 2) local_mask(self, frame) names_bindings <- chr_unserialise_unicode(names2(data)) if (anyDuplicated(names_bindings)) { abort("Can't transform a data frame with duplicate names.", call = error_call) } names(data) <- names_bindings private$data <- data private$caller <- caller private$current_data <- unclass(data) private$chops <- .Call(dplyr_lazy_vec_chop_impl, data, rows) private$mask <- .Call(dplyr_data_masks_setup, private$chops, data, rows) private$keys <- group_keys(data) private$group_vars <- group_vars(data) private$verb <- verb }, add_one = function(name, chunks, result) { if (inherits(private$data, "rowwise_df")){ is_scalar_list <- function(.x) { vec_is_list(.x) && length(.x) == 1L } if (all(map_lgl(chunks, is_scalar_list))) { chunks <- map(chunks, `[[`, 1L) } } .Call(`dplyr_mask_add`, private, name, result, chunks) }, remove = function(name) { .Call(`dplyr_mask_remove`, private, name) }, resolve = function(name) { private$chops[[name]] }, eval_all = function(quo) { .Call(`dplyr_mask_eval_all`, quo, private) }, eval_all_summarise = function(quo) { .Call(`dplyr_mask_eval_all_summarise`, quo, private) }, eval_all_mutate = function(quo) { .Call(`dplyr_mask_eval_all_mutate`, quo, private) }, eval_all_filter = function(quos, env_filter) { .Call(`dplyr_mask_eval_all_filter`, quos, private, nrow(private$data), env_filter) }, pick = function(vars) { cols <- self$current_cols(vars) if (inherits(private$data, "rowwise_df")) { cols <- map2(cols, names(cols), function(col, name) { if (vec_is_list(private$current_data[[name]])) { col <- list(col) } col }) } nrow <- length(self$current_rows()) new_tibble(cols, nrow = nrow) }, current_cols = function(vars) { env_get_list(parent.env(private$mask), vars) }, current_rows = function() { private$rows[[self$get_current_group()]] }, current_key = function() { vec_slice(private$keys, self$get_current_group()) }, current_vars = function() { names(private$current_data) }, current_non_group_vars = function() { setdiff(self$current_vars(), private$group_vars) }, get_current_group = function() { parent.env(private$chops)$.current_group }, set_current_group = function(group) { parent.env(private$chops)$.current_group[] <- group }, full_data = function() { private$data }, get_used = function() { .Call(env_resolved, private$chops, names(private$current_data)) }, unused_vars = function() { used <- self$get_used() current_vars <- self$current_vars() current_vars[!used] }, get_rows = function() { private$rows }, across_cols = function() { private$current_data[self$current_non_group_vars()] }, forget = function() { names_bindings <- self$current_vars() verb <- private$verb osbolete_promise_fn <- function(name) { abort(c( "Obsolete data mask.", x = glue("Too late to resolve `{name}` after the end of `dplyr::{verb}()`."), i = glue("Did you save an object that uses `{name}` lazily in a column in the `dplyr::{verb}()` expression ?") ), call = NULL) } promises <- map(names_bindings, function(.x) expr(osbolete_promise_fn(!!.x))) bindings <- self$get_env_bindings() suppressWarnings({ rm(list = names_bindings, envir = bindings) env_bind_lazy(bindings, !!!set_names(promises, names_bindings)) }) }, get_env_bindings = function() { parent.env(private$mask) }, get_rlang_mask = function() { private$mask }, get_caller_env = function() { private$caller } ), private = list( # the input data data = NULL, # environment that contains lazy vec_chop()s for each input column # and list of result chunks as they get added. # # The parent environment of chops has: # - .indices: the list of indices # - .current_group: scalar integer that identifies the current group chops = NULL, # dynamic data mask, with active bindings for each column # this is an rlang data mask, as such the bindings are actually # in the parent environment of `mask` mask = NULL, # ptypes of all the variables current_data = list(), # names of the grouping variables group_vars = character(), # list of indices, one integer vector per group rows = NULL, # data frame of keys, one row per group keys = NULL, # caller environment of the verb (summarise(), ...) caller = NULL, verb = character() ) ) dplyr/R/copy-to.r0000644000176200001440000000333614151641776013403 0ustar liggesusers#' Copy a local data frame to a remote src #' #' This function uploads a local data frame into a remote data source, creating #' the table definition as needed. Wherever possible, the new object will be #' temporary, limited to the current connection to the source. #' #' @param dest remote data source #' @param df local data frame #' @param name name for new remote table. #' @param overwrite If `TRUE`, will overwrite an existing table with #' name `name`. If `FALSE`, will throw an error if `name` already #' exists. #' @param ... other parameters passed to methods. #' @seealso [collect()] for the opposite action; downloading remote data into #' a local dbl. #' @return a `tbl` object in the remote source #' @export #' @examples #' \dontrun{ #' iris2 <- dbplyr::src_memdb() %>% copy_to(iris, overwrite = TRUE) #' iris2 #' } copy_to <- function(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) { UseMethod("copy_to") } #' Copy tables to same source, if necessary #' #' @param x,y `y` will be copied to `x`, if necessary. #' @param copy If `x` and `y` are not from the same data source, #' and `copy` is `TRUE`, then `y` will be copied into the #' same src as `x`. This allows you to join tables across srcs, but #' it is a potentially expensive operation so you must opt into it. #' @param ... Other arguments passed on to methods. #' @export auto_copy <- function(x, y, copy = FALSE, ...) { if (same_src(x, y)) return(y) if (!copy) { bullets <- c( "`x` and `y` must share the same src.", i = "set `copy` = TRUE (may be slow)." ) abort(bullets) } UseMethod("auto_copy") } #' @export auto_copy.data.frame <- function(x, y, copy = FALSE, ...) { as.data.frame(y) } dplyr/R/colwise-select.R0000644000176200001440000001122414176714175014667 0ustar liggesusers#' Select and rename a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' `rename_if()`, `rename_at()`, and `rename_all()` have been superseded by #' `rename_with()`. The matching select statements have been superseded by the #' combination of a `select()` + `rename_with()`. Any predicate functions passed #' as arguments to `select()` or `rename_with()` must be wrapped in [where()]. #' #' These functions were superseded because `mutate_if()` and friends were #' superseded by `across()`. `select_if()` and `rename_if()` already use tidy #' selection so they can't be replaced by `across()` and instead we need a new #' function. #' #' @inheritParams scoped #' @keywords internal #' @param .funs A function `fun`, a purrr style lambda `~ fun(.)` or a list of either form. #' @examples #' mtcars <- as_tibble(mtcars) # for nicer printing #' #' mtcars %>% rename_all(toupper) #' # -> #' mtcars %>% rename_with(toupper) #' #' # NB: the transformation comes first in rename_with #' is_whole <- function(x) all(floor(x) == x) #' mtcars %>% rename_if(is_whole, toupper) #' # -> #' mtcars %>% rename_with(toupper, where(is_whole)) #' #' mtcars %>% rename_at(vars(mpg:hp), toupper) #' # -> #' mtcars %>% rename_with(toupper, mpg:hp) #' #' # You now must select() and then rename #' #' mtcars %>% select_all(toupper) #' # -> #' mtcars %>% rename_with(toupper) #' #' # Selection drops unselected variables: #' mtcars %>% select_if(is_whole, toupper) #' # -> #' mtcars %>% select(where(is_whole)) %>% rename_with(toupper) #' #' mtcars %>% select_at(vars(-contains("ar"), starts_with("c")), toupper) #' # -> #' mtcars %>% #' select(!contains("ar") | starts_with("c")) %>% #' rename_with(toupper) #' @export select_all <- function(.tbl, .funs = list(), ...) { lifecycle::signal_stage("superseded", "select_all()") funs <- as_fun_list(.funs, caller_env(), ..., .caller = "select_all") vars <- tbl_vars(.tbl) syms <- vars_select_syms(vars, funs, .tbl) select(.tbl, !!!syms) } #' @rdname select_all #' @export rename_all <- function(.tbl, .funs = list(), ...) { lifecycle::signal_stage("superseded", "rename_with()") funs <- as_fun_list(.funs, caller_env(), ..., .caller = "rename_all") vars <- tbl_vars(.tbl) syms <- vars_select_syms(vars, funs, .tbl, strict = TRUE) rename(.tbl, !!!syms) } #' @rdname select_all #' @export select_if <- function(.tbl, .predicate, .funs = list(), ...) { funs <- as_fun_list(.funs, caller_env(), ..., .caller = "select_if") if (!is_logical(.predicate)) { .predicate <- as_fun_list(.predicate, caller_env(), .caller = "select_if", .caller_arg = ".predicate") } vars <- tbl_if_vars(.tbl, .predicate, caller_env(), .include_group_vars = TRUE) syms <- vars_select_syms(vars, funs, .tbl) select(.tbl, !!!syms) } #' @rdname select_all #' @export rename_if <- function(.tbl, .predicate, .funs = list(), ...) { funs <- as_fun_list(.funs, caller_env(), ..., .caller = "rename_if") if (!is_logical(.predicate)) { .predicate <- as_fun_list(.predicate, caller_env(), .caller = "rename_if", .caller_arg = ".predicate") } vars <- tbl_if_vars(.tbl, .predicate, caller_env(), .include_group_vars = TRUE) syms <- vars_select_syms(vars, funs, .tbl, strict = TRUE) rename(.tbl, !!!syms) } #' @rdname select_all #' @export select_at <- function(.tbl, .vars, .funs = list(), ...) { vars <- tbl_at_vars(.tbl, .vars, .include_group_vars = TRUE) funs <- as_fun_list(.funs, caller_env(), ..., .caller = "select_at") syms <- vars_select_syms(vars, funs, .tbl) select(.tbl, !!!syms) } #' @rdname select_all #' @export rename_at <- function(.tbl, .vars, .funs = list(), ...) { vars <- tbl_at_vars(.tbl, .vars, .include_group_vars = TRUE) funs <- as_fun_list(.funs, caller_env(), ..., .caller = "rename_at") syms <- vars_select_syms(vars, funs, .tbl, strict = TRUE) rename(.tbl, !!!syms) } vars_select_syms <- function(vars, funs, tbl, strict = FALSE, error_call = caller_env()) { if (length(funs) > 1) { msg <- glue("`.funs` must contain one renaming function, not {length(funs)}.") abort(msg, call = error_call) } else if (length(funs) == 1) { fun <- funs[[1]] if (is_quosure(fun)) { fun <- quo_as_function(fun) } syms <- if (length(vars)) { set_names(syms(vars), fun(as.character(vars))) } else { set_names(syms(vars)) } } else if (!strict) { syms <- syms(vars) } else { msg <- glue("`.funs` must specify a renaming function.") abort(msg, call = error_call) } group_vars <- group_vars(tbl) group_syms <- syms(group_vars) has_group_sym <- group_syms %in% syms new_group_syms <- set_names(group_syms[!has_group_sym], group_vars[!has_group_sym]) c(new_group_syms, syms) } dplyr/R/tbl.r0000644000176200001440000000417214121112104012540 0ustar liggesusers#' Create a table from a data source #' #' This is a generic method that dispatches based on the first argument. #' #' @param src A data source #' @param ... Other arguments passed on to the individual methods #' @export tbl <- function(src, ...) { UseMethod("tbl") } #' Create a "tbl" object #' #' `tbl()` is the standard constructor for tbls. `as.tbl()` coerces, #' and `is.tbl()` tests. #' #' @keywords internal #' @export #' @param subclass name of subclass. "tbl" is an abstract base class, so you #' must supply this value. `tbl_` is automatically prepended to the #' class name #' @param object to test/coerce. #' @param ... For `tbl()`, other fields used by class. For `as.tbl()`, #' other arguments passed to methods. make_tbl <- function(subclass, ...) { subclass <- paste0("tbl_", subclass) structure(list(...), class = c(subclass, "tbl")) } #' @rdname tbl #' @param x Any object #' @export is.tbl <- function(x) inherits(x, "tbl") tbl_vars_dispatch <- function(x) { UseMethod("tbl_vars") } new_sel_vars <- function(vars, group_vars) { structure( vars, groups = group_vars, class = c("dplyr_sel_vars", "character") ) } #' List variables provided by a tbl. #' #' `tbl_vars()` returns all variables while `tbl_nongroup_vars()` #' returns only non-grouping variables. The `groups` attribute #' of the object returned by `tbl_vars()` is a character vector of the #' grouping columns. #' #' @export #' @param x A tbl object #' @seealso [group_vars()] for a function that returns grouping #' variables. #' @keywords internal tbl_vars <- function(x) { return(new_sel_vars(tbl_vars_dispatch(x), group_vars(x))) # For roxygen and static analysis UseMethod("tbl_vars") } #' @export tbl_vars.data.frame <- function(x) { names(x) } #' @rdname tbl_vars #' @export tbl_nongroup_vars <- function(x) { setdiff(tbl_vars(x), group_vars(x)) } is_sel_vars <- function(x) { inherits(x, "dplyr_sel_vars") } #' @export print.dplyr_sel_vars <- function(x, ...) { cat("\n") print(unstructure(x)) groups <- attr(x, "groups") if (length(groups)) { cat("Groups:\n") print(groups) } invisible(x) } dplyr/R/count-tally.R0000644000176200001440000001327414151641776014226 0ustar liggesusers#' Count observations by group #' #' @description #' `count()` lets you quickly count the unique values of one or more variables: #' `df %>% count(a, b)` is roughly equivalent to #' `df %>% group_by(a, b) %>% summarise(n = n())`. #' `count()` is paired with `tally()`, a lower-level helper that is equivalent #' to `df %>% summarise(n = n())`. Supply `wt` to perform weighted counts, #' switching the summary from `n = n()` to `n = sum(wt)`. #' #' `add_count()` and `add_tally()` are equivalents to `count()` and `tally()` #' but use `mutate()` instead of `summarise()` so that they add a new column #' with group-wise counts. #' #' @param x A data frame, data frame extension (e.g. a tibble), or a #' lazy data frame (e.g. from dbplyr or dtplyr). #' @param ... <[`data-masking`][dplyr_data_masking]> Variables to group by. #' @param wt <[`data-masking`][dplyr_data_masking]> Frequency weights. #' Can be `NULL` or a variable: #' #' * If `NULL` (the default), counts the number of rows in each group. #' * If a variable, computes `sum(wt)` for each group. #' @param sort If `TRUE`, will show the largest groups at the top. #' @param name The name of the new column in the output. #' #' If omitted, it will default to `n`. If there's already a column called `n`, #' it will error, and require you to specify the name. #' @param .drop For `count()`: if `FALSE` will include counts for empty groups #' (i.e. for levels of factors that don't exist in the data). Deprecated in #' `add_count()` since it didn't actually affect the output. #' @return #' An object of the same type as `.data`. `count()` and `add_count()` #' group transiently, so the output has the same groups as the input. #' @export #' @examples #' # count() is a convenient way to get a sense of the distribution of #' # values in a dataset #' starwars %>% count(species) #' starwars %>% count(species, sort = TRUE) #' starwars %>% count(sex, gender, sort = TRUE) #' starwars %>% count(birth_decade = round(birth_year, -1)) #' #' # use the `wt` argument to perform a weighted count. This is useful #' # when the data has already been aggregated once #' df <- tribble( #' ~name, ~gender, ~runs, #' "Max", "male", 10, #' "Sandra", "female", 1, #' "Susan", "female", 4 #' ) #' # counts rows: #' df %>% count(gender) #' # counts runs: #' df %>% count(gender, wt = runs) #' #' # tally() is a lower-level function that assumes you've done the grouping #' starwars %>% tally() #' starwars %>% group_by(species) %>% tally() #' #' # both count() and tally() have add_ variants that work like #' # mutate() instead of summarise #' df %>% add_count(gender, wt = runs) #' df %>% add_tally(wt = runs) count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { UseMethod("count") } #' @export count.data.frame <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x)) { if (!missing(...)) { out <- group_by(x, ..., .add = TRUE, .drop = .drop) } else { out <- x } out <- tally(out, wt = !!enquo(wt), sort = sort, name = name) # Ensure grouping is transient if (is.data.frame(x)) { out <- dplyr_reconstruct(out, x) } out } count.tbl_sql <- count.data.frame #' @export #' @rdname count tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { UseMethod("tally") } #' @export tally.data.frame <- function(x, wt = NULL, sort = FALSE, name = NULL) { n <- tally_n(x, {{ wt }}) name <- check_name(name, group_vars(x)) local_options(dplyr.summarise.inform = FALSE) out <- summarise(x, !!name := !!n) if (sort) { arrange(out, desc(!!sym(name))) } else { out } } tally.tbl_sql <- tally.data.frame #' @export #' @rdname count add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) { UseMethod("add_count") } #' @export add_count.default <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) { if (!missing(.drop)) { lifecycle::deprecate_warn("1.0.0", "add_count(.drop = )") } if (!missing(...)) { out <- group_by(x, ..., .add = TRUE) } else { out <- x } add_tally(out, wt = !!enquo(wt), sort = sort, name = name) } #' @export add_count.data.frame <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) { if (!missing(.drop)) { lifecycle::deprecate_warn("1.0.0", "add_count(.drop = )") } if (!missing(...)) { out <- group_by(x, ..., .add = TRUE) } else { out <- x } out <- add_tally(out, wt = !!enquo(wt), sort = sort, name = name) dplyr_reconstruct(out, x) } #' @rdname count #' @export add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { n <- tally_n(x, {{ wt }}) name <- check_name(name, tbl_vars(x)) out <- mutate(x, !!name := !!n) if (sort) { arrange(out, desc(!!sym(name))) } else { out } } # Helpers ----------------------------------------------------------------- tally_n <- function(x, wt) { wt <- enquo(wt) if (is_call(quo_get_expr(wt), "n", n = 0)) { # Provided only by dplyr 1.0.0. See #5349 for discussion. warn(c( "`wt = n()` is deprecated", i = "You can now omit the `wt` argument" )) wt <- quo(NULL) } if (quo_is_null(wt)) { expr(n()) } else { expr(sum(!!wt, na.rm = TRUE)) } } check_name <- function(name, vars) { if (is.null(name)) { name <- n_name(vars) if (name != "n") { inform(c( glue("Storing counts in `{name}`, as `n` already present in input"), i = "Use `name = \"new_name\"` to pick a new name." )) } } else if (!is.character(name) || length(name) != 1) { abort("`name` must be a single string.") } name } n_name <- function(x) { name <- "n" while (name %in% x) { name <- paste0("n", name) } name } dplyr/R/conditions.R0000644000176200001440000000664614154654644014132 0ustar liggesusers#' Local error call for dplyr verbs #' @noRd dplyr_local_error_call <- function(call = frame, frame = caller_env()) { # This doesn't implement the semantics of a `local_` function # perfectly in order to be as fast as possible frame$.__dplyr_error_call__. <- call invisible(NULL) } # Takes the local call by default. If the caller of the verb has # called `dplyr_local_error_call()`, we used that call instead. This # logic is slightly different than in checking functions or error # helpers, where the error call is always taken from the parent by # default. dplyr_error_call <- function(call) { if (is_missing(call)) { call <- caller_env() } if (is_environment(call)) { caller <- eval_bare(quote(base::parent.frame()), call) caller_call <- caller[[".__dplyr_error_call__."]] if (!is_null(caller_call)) { call <- caller_call } } call } arg_name <- function(quos, index) { name <- names(quos)[index] if (is.null(name) || name == "") { name <- glue("..{index}") } name } cnd_bullet_cur_group_label <- function(what = "error") { label <- cur_group_label() if (label != "") { glue("The {what} occurred in {label}.") } } cnd_bullet_rowwise_unlist <- function() { data <- peek_mask()$full_data() if (inherits(data, "rowwise_df")) { glue_data(peek_error_context(), "Did you mean: `{error_name} = list({error_expression})` ?") } } or_1 <- function(x) { if(x == 1L) { "1" } else { glue("{x} or 1") } } # Common ------------------------------------------------------------------ is_data_pronoun <- function(x) { is_call(x, c("[[", "$")) && identical(node_cadr(x), sym(".data")) } # Because as_label() strips off .data$<> and .data[[<>]] quo_as_label <- function(quo) { expr <- quo_get_expr(quo) if (is_data_pronoun(expr)) { deparse(expr)[[1]] } else{ as_label(expr) } } local_error_context <- function(dots, .index, mask, frame = caller_env()) { expr <- dots[[.index]] if (quo_is_call(expr, "invisible")) { expr <- "" } else { expr <- quo_as_label(dots[[.index]]) } error_context <- env( error_name = arg_name(dots, .index), error_expression = expr, mask = mask ) context_local("dplyr_error_context", error_context, frame = frame) } peek_error_context <- function() { context_peek("dplyr_error_context", "peek_error_context", "dplyr error handling") } cnd_bullet_header <- function(what) { error_context <- peek_error_context() error_name <- error_context$error_name error_expression <- error_context$error_expression if (nzchar(error_expression)) { sep <- " = " } else { sep <- "" } glue("Problem while {what} `{error_name}{sep}{error_expression}`.") } cnd_bullet_combine_details <- function(x, arg) { group <- as.integer(sub("^..", "", arg)) keys <- group_keys(peek_mask()$full_data())[group, ] details <- group_labels_details(keys) glue("Result type for group {group} ({details}): <{vec_ptype_full(x)}>.") } err_vars <- function(x) { if (is.logical(x)) { x <- which(x) } if (is.character(x)) { x <- encodeString(x, quote = "`") } glue_collapse(x, sep = ", ", last = if (length(x) <= 2) " and " else ", and ") } dplyr_internal_error <- function(class = NULL, data = list()) { abort(class = c(class, "dplyr:::internal_error"), dplyr_error_data = data) } skip_internal_condition <- function(cnd) { if (inherits(cnd, "dplyr:::internal_error")) { cnd$parent } else { cnd } } dplyr/R/src.r0000644000176200001440000000265214121112104012547 0ustar liggesusers#' Create a "src" object #' #' `src()` is the standard constructor for srcs and `is.src()` tests. #' #' @keywords internal #' @export #' @param subclass name of subclass. "src" is an abstract base class, so you #' must supply this value. `src_` is automatically prepended to the #' class name #' @param ... fields used by object. #' #' These dots are evaluated with [explicit splicing][rlang::dots_list]. #' @param x object to test for "src"-ness. src <- function(subclass, ...) { subclass <- paste0("src_", subclass) structure(dots_list(...), class = c(subclass, "src")) } #' @rdname src #' @export is.src <- function(x) inherits(x, "src") #' @export print.src <- function(x, ...) { cat(format(x, ...), "\n", sep = "") } #' List all tbls provided by a source. #' #' This is a generic method which individual src's will provide methods for. #' Most methods will not be documented because it's usually pretty obvious what #' possible results will be. #' #' @param x a data src. #' @param ... other arguments passed on to the individual methods. #' @export #' @keywords internal src_tbls <- function(x, ...) { UseMethod("src_tbls") } #' Figure out if two sources are the same (or two tbl have the same source) #' #' @param x,y src or tbls to test #' @return a logical flag #' @export #' @keywords internal same_src <- function(x, y) { UseMethod("same_src") } #' @export same_src.data.frame <- function(x, y) { is.data.frame(y) } dplyr/R/lead-lag.R0000644000176200001440000000546214151641776013421 0ustar liggesusers#' Compute lagged or leading values #' #' Find the "previous" (`lag()`) or "next" (`lead()`) values in a vector. #' Useful for comparing values behind of or ahead of the current values. #' #' @param x Vector of values #' @param n Positive integer of length 1, giving the number of positions to #' lead or lag by #' @param default Value used for non-existent rows. Defaults to `NA`. #' @param order_by Override the default ordering to use another vector or column #' @param ... Needed for compatibility with lag generic. #' @importFrom stats lag #' @examples #' lag(1:5) #' lead(1:5) #' #' x <- 1:5 #' tibble(behind = lag(x), x, ahead = lead(x)) #' #' # If you want to look more rows behind or ahead, use `n` #' lag(1:5, n = 1) #' lag(1:5, n = 2) #' #' lead(1:5, n = 1) #' lead(1:5, n = 2) #' #' # If you want to define a value for non-existing rows, use `default` #' lag(1:5) #' lag(1:5, default = 0) #' #' lead(1:5) #' lead(1:5, default = 6) #' #' # If data are not already ordered, use `order_by` #' scrambled <- slice_sample(tibble(year = 2000:2005, value = (0:5) ^ 2), prop = 1) #' #' wrong <- mutate(scrambled, previous_year_value = lag(value)) #' arrange(wrong, year) #' #' right <- mutate(scrambled, previous_year_value = lag(value, order_by = year)) #' arrange(right, year) #' @name lead-lag NULL #' @export #' @rdname lead-lag lag <- function(x, n = 1L, default = NA, order_by = NULL, ...) { if (!is.null(order_by)) { return(with_order(order_by, lag, x, n = n, default = default)) } if (inherits(x, "ts")) { msg <- "`x` must be a vector, not a ts object, do you want `stats::lag()`?" abort(msg) } if (length(n) != 1 || !is.numeric(n) || n < 0) { msg <- glue("`n` must be a positive integer, not {friendly_type_of(n)} of length {length(n)}.") abort(msg) } if (n == 0) return(x) if (vec_size(default) != 1L) { msg <- glue("`default` must be size 1, not size {vec_size(default)}") abort(msg) } xlen <- vec_size(x) n <- pmin(n, xlen) inputs <- fix_call(vec_cast_common(default = default, x = x)) vec_c( vec_rep(inputs$default, n), vec_slice(inputs$x, seq_len(xlen - n)) ) } #' @export #' @rdname lead-lag lead <- function(x, n = 1L, default = NA, order_by = NULL, ...) { if (!is.null(order_by)) { return(with_order(order_by, lead, x, n = n, default = default)) } if (length(n) != 1 || !is.numeric(n) || n < 0) { msg <- glue("`n` must be a positive integer, not {friendly_type_of(n)} of length {length(n)}.") abort(msg) } if (n == 0) return(x) if (vec_size(default) != 1L) { msg <- glue("`default` must be size 1, not size {vec_size(default)}") abort(msg) } xlen <- vec_size(x) n <- pmin(n, xlen) inputs <- fix_call(vec_cast_common(default = default, x = x)) vec_c( vec_slice(inputs$x, -seq_len(n)), vec_rep(inputs$default, n) ) } dplyr/R/recode.R0000644000176200001440000002331114151641776013205 0ustar liggesusers#' Recode values #' #' @description #' This is a vectorised version of [switch()]: you can replace #' numeric values based on their position or their name, and character or factor #' values only by their name. This is an S3 generic: dplyr provides methods for #' numeric, character, and factors. For logical vectors, use [if_else()]. For #' more complicated criteria, use [case_when()]. #' #' You can use `recode()` directly with factors; it will preserve the existing #' order of levels while changing the values. Alternatively, you can #' use `recode_factor()`, which will change the order of levels to match #' the order of replacements. See the [forcats](https://forcats.tidyverse.org/) #' package for more tools for working with factors and their levels. #' #' `recode()` is questioning because the arguments are in the wrong order. #' We have `new <- old`, `mutate(df, new = old)`, and `rename(df, new = old)` #' but `recode(x, old = new)`. We don't yet know how to fix this problem, but #' it's likely to involve creating a new function then retiring or deprecating #' `recode()`. #' #' @param .x A vector to modify #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Replacements. For character and factor `.x`, these should be named #' and replacement is based only on their name. For numeric `.x`, these can be #' named or not. If not named, the replacement is done based on position i.e. #' `.x` represents positions to look for in replacements. See examples. #' #' When named, the argument names should be the current values to be replaced, and the #' argument values should be the new (replacement) values. #' #' All replacements must be the same type, and must have either #' length one or the same length as `.x`. #' @param .default If supplied, all values not otherwise matched will #' be given this value. If not supplied and if the replacements are #' the same type as the original values in `.x`, unmatched #' values are not changed. If not supplied and if the replacements #' are not compatible, unmatched values are replaced with `NA`. #' #' `.default` must be either length 1 or the same length as #' `.x`. #' @param .missing If supplied, any missing values in `.x` will be #' replaced by this value. Must be either length 1 or the same length as #' `.x`. #' @param .ordered If `TRUE`, `recode_factor()` creates an #' ordered factor. #' @return A vector the same length as `.x`, and the same type as #' the first of `...`, `.default`, or `.missing`. #' `recode_factor()` returns a factor whose levels are in the same order as #' in `...`. The levels in `.default` and `.missing` come last. #' @seealso [na_if()] to replace specified values with a `NA`. #' #' [coalesce()] to replace missing values with a specified value. #' #' [tidyr::replace_na()] to replace `NA` with a value. #' @export #' @examples #' # For character values, recode values with named arguments only. Unmatched #' # values are unchanged. #' char_vec <- sample(c("a", "b", "c"), 10, replace = TRUE) #' recode(char_vec, a = "Apple") #' recode(char_vec, a = "Apple", b = "Banana") #' #' # Use .default as replacement for unmatched values. Note that NA and #' # replacement values need to be of the same type. For more information, see #' # https://adv-r.hadley.nz/vectors-chap.html#missing-values #' recode(char_vec, a = "Apple", b = "Banana", .default = NA_character_) #' #' # Throws an error as NA is logical, not character. #' try(recode(char_vec, a = "Apple", b = "Banana", .default = NA)) #' #' # Use a named character vector for unquote splicing with !!! #' level_key <- c(a = "apple", b = "banana", c = "carrot") #' recode(char_vec, !!!level_key) #' #' # For numeric values, named arguments can also be used #' num_vec <- c(1:4, NA) #' recode(num_vec, `2` = 20L, `4` = 40L) #' #' # Or if you don't name the arguments, recode() matches by position. #' # (Only works for numeric vector) #' recode(num_vec, "a", "b", "c", "d") #' # .x (position given) looks in (...), then grabs (... value at position) #' # so if nothing at position (here 5), it uses .default or NA. #' recode(c(1,5,3), "a", "b", "c", "d", .default = "nothing") #' #' # Note that if the replacements are not compatible with .x, #' # unmatched values are replaced by NA and a warning is issued. #' recode(num_vec, `2` = "b", `4` = "d") #' # use .default to change the replacement value #' recode(num_vec, "a", "b", "c", .default = "other") #' # use .missing to replace missing values in .x #' recode(num_vec, "a", "b", "c", .default = "other", .missing = "missing") #' #' # For factor values, use only named replacements #' # and supply default with levels() #' factor_vec <- factor(c("a", "b", "c")) #' recode(factor_vec, a = "Apple", .default = levels(factor_vec)) #' #' # Use recode_factor() to create factors with levels ordered as they #' # appear in the recode call. The levels in .default and .missing #' # come last. #' recode_factor(num_vec, `1` = "z", `2` = "y", `3` = "x") #' recode_factor(num_vec, `1` = "z", `2` = "y", `3` = "x", #' .default = "D") #' recode_factor(num_vec, `1` = "z", `2` = "y", `3` = "x", #' .default = "D", .missing = "M") #' #' # When the input vector is a compatible vector (character vector or #' # factor), it is reused as default. #' recode_factor(letters[1:3], b = "z", c = "y") #' recode_factor(factor(letters[1:3]), b = "z", c = "y") #' #' # Use a named character vector to recode factors with unquote splicing. #' level_key <- c(a = "apple", b = "banana", c = "carrot") #' recode_factor(char_vec, !!!level_key) recode <- function(.x, ..., .default = NULL, .missing = NULL) { UseMethod("recode") } #' @export recode.numeric <- function(.x, ..., .default = NULL, .missing = NULL) { values <- list2(...) nms <- have_name(values) if (all(nms)) { vals <- as.double(names(values)) } else if (all(!nms)) { vals <- seq_along(values) } else { msg <- "Either all values must be named, or none must be named." abort(msg) } n <- length(.x) template <- find_template(values, .default, .missing) out <- template[rep(NA_integer_, n)] replaced <- rep(FALSE, n) for (i in seq_along(values)) { out <- replace_with(out, .x == vals[i], values[[i]], paste0("Vector ", i)) replaced[.x == vals[i]] <- TRUE } .default <- validate_recode_default(.default, .x, out, replaced) out <- replace_with(out, !replaced & !is.na(.x), .default, "`.default`") out <- replace_with(out, is.na(.x), .missing, "`.missing`") out } #' @export recode.character <- function(.x, ..., .default = NULL, .missing = NULL) { .x <- as.character(.x) values <- list2(...) if (!all(have_name(values))) { bad <- which(!have_name(values)) + 1 msg <- glue("{fmt_pos_args(bad)} must be named.") abort(msg) } n <- length(.x) template <- find_template(values, .default, .missing) out <- template[rep(NA_integer_, n)] replaced <- rep(FALSE, n) for (nm in names(values)) { out <- replace_with(out, .x == nm, values[[nm]], paste0("`", nm, "`")) replaced[.x == nm] <- TRUE } .default <- validate_recode_default(.default, .x, out, replaced) out <- replace_with(out, !replaced & !is.na(.x), .default, "`.default`") out <- replace_with(out, is.na(.x), .missing, "`.missing`") out } #' @export recode.factor <- function(.x, ..., .default = NULL, .missing = NULL) { values <- list2(...) if (length(values) == 0) { abort("No replacements provided.") } if (!all(have_name(values))) { bad <- which(!have_name(values)) + 1 msg <- glue("{fmt_pos_args(bad)} must be named.") abort(msg) } if (!is.null(.missing)) { msg <- glue("`.missing` is not supported for factors.") abort(msg) } n <- length(levels(.x)) template <- find_template(values, .default, .missing) out <- template[rep(NA_integer_, n)] replaced <- rep(FALSE, n) for (nm in names(values)) { out <- replace_with( out, levels(.x) == nm, values[[nm]], paste0("`", nm, "`") ) replaced[levels(.x) == nm] <- TRUE } .default <- validate_recode_default(.default, .x, out, replaced) out <- replace_with(out, !replaced, .default, "`.default`") if (is.character(out)) { levels(.x) <- out .x } else { out[as.integer(.x)] } } find_template <- function(values, .default = NULL, .missing = NULL, error_call = caller_env()) { x <- compact(c(values, .default, .missing)) if (length(x) == 0) { abort("No replacements provided.", call = error_call) } x[[1]] } validate_recode_default <- function(default, x, out, replaced) { default <- recode_default(x, default, out) if (is.null(default) && sum(replaced & !is.na(x)) < length(out[!is.na(x)])) { bullets <- c( "Unreplaced values treated as NA as `.x` is not compatible. ", "Please specify replacements exhaustively or supply `.default`." ) warn(bullets) } default } recode_default <- function(x, default, out) { UseMethod("recode_default") } recode_default.default <- function(x, default, out) { same_type <- identical(typeof(x), typeof(out)) if (is.null(default) && same_type) { x } else { default } } recode_default.factor <- function(x, default, out) { if (is.null(default)) { if ((is.character(out) || is.factor(out)) && is.factor(x)) { levels(x) } else { out[NA_integer_] } } else { default } } #' @rdname recode #' @export recode_factor <- function(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE) { values <- list2(...) recoded <- recode(.x, !!!values, .default = .default, .missing = .missing) all_levels <- unique(c(values, recode_default(.x, .default, recoded), .missing)) recoded_levels <- if (is.factor(recoded)) levels(recoded) else unique(recoded) levels <- intersect(all_levels, recoded_levels) factor(recoded, levels, ordered = .ordered) } dplyr/R/across.R0000644000176200001440000004407414174551541013242 0ustar liggesusers#' Apply a function (or functions) across multiple columns #' #' @description #' `across()` makes it easy to apply the same transformation to multiple #' columns, allowing you to use [select()] semantics inside in "data-masking" #' functions like [summarise()] and [mutate()]. See `vignette("colwise")` for #' more details. #' #' `if_any()` and `if_all()` apply the same #' predicate function to a selection of columns and combine the #' results into a single logical vector: `if_any()` is `TRUE` when #' the predicate is `TRUE` for *any* of the selected columns, `if_all()` #' is `TRUE` when the predicate is `TRUE` for *all* selected columns. #' #' `across()` supersedes the family of "scoped variants" like #' `summarise_at()`, `summarise_if()`, and `summarise_all()`. #' #' @param .cols,cols <[`tidy-select`][dplyr_tidy_select]> Columns to transform. #' Because `across()` is used within functions like `summarise()` and #' `mutate()`, you can't select or compute upon grouping variables. #' @param .fns Functions to apply to each of the selected columns. #' Possible values are: #' #' - A function, e.g. `mean`. #' - A purrr-style lambda, e.g. `~ mean(.x, na.rm = TRUE)` #' - A list of functions/lambdas, e.g. #' `list(mean = mean, n_miss = ~ sum(is.na(.x))` #' - `NULL`: the default value, returns the selected columns in a data #' frame without applying a transformation. This is useful for when you want to #' use a function that takes a data frame. #' #' Within these functions you can use [cur_column()] and [cur_group()] #' to access the current column and grouping keys respectively. #' @param ... Additional arguments for the function calls in `.fns`. Using these #' `...` is strongly discouraged because of issues of timing of evaluation. #' @param .names A glue specification that describes how to name the output #' columns. This can use `{.col}` to stand for the selected column name, and #' `{.fn}` to stand for the name of the function being applied. The default #' (`NULL`) is equivalent to `"{.col}"` for the single function case and #' `"{.col}_{.fn}"` for the case where a list is used for `.fns`. #' #' @returns #' `across()` returns a tibble with one column for each column in `.cols` and each function in `.fns`. #' #' `if_any()` and `if_all()` return a logical vector. #' #' @section Timing of evaluation: #' R code in dplyr verbs is generally evaluated once per group. #' Inside `across()` however, code is evaluated once for each #' combination of columns and groups. If the evaluation timing is #' important, for example if you're generating random variables, think #' about when it should happen and place your code in consequence. #' #' ```{r} #' gdf <- #' tibble(g = c(1, 1, 2, 3), v1 = 10:13, v2 = 20:23) %>% #' group_by(g) #' #' set.seed(1) #' #' # Outside: 1 normal variate #' n <- rnorm(1) #' gdf %>% mutate(across(v1:v2, ~ .x + n)) #' #' # Inside a verb: 3 normal variates (ngroup) #' gdf %>% mutate(n = rnorm(1), across(v1:v2, ~ .x + n)) #' #' # Inside `across()`: 6 normal variates (ncol * ngroup) #' gdf %>% mutate(across(v1:v2, ~ .x + rnorm(1))) #' ```` #' #' @examples #' # across() ----------------------------------------------------------------- #' # Different ways to select the same set of columns #' # See for details #' iris %>% #' as_tibble() %>% #' mutate(across(c(Sepal.Length, Sepal.Width), round)) #' iris %>% #' as_tibble() %>% #' mutate(across(c(1, 2), round)) #' iris %>% #' as_tibble() %>% #' mutate(across(1:Sepal.Width, round)) #' iris %>% #' as_tibble() %>% #' mutate(across(where(is.double) & !c(Petal.Length, Petal.Width), round)) #' #' # A purrr-style formula #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), ~ mean(.x, na.rm = TRUE))) #' #' # A named list of functions #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd))) #' #' # Use the .names argument to control the output names #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), mean, .names = "mean_{.col}")) #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{.col}.{.fn}")) #' #' # When the list is not named, .fn is replaced by the function's position #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}")) #' #' # across() returns a data frame, which can be used as input of another function #' df <- data.frame( #' x1 = c(1, 2, NA), #' x2 = c(4, NA, 6), #' y = c("a", "b", "c") #' ) #' df %>% #' mutate(x_complete = complete.cases(across(starts_with("x")))) #' df %>% #' filter(complete.cases(across(starts_with("x")))) #' #' # if_any() and if_all() ---------------------------------------------------- #' iris %>% #' filter(if_any(ends_with("Width"), ~ . > 4)) #' iris %>% #' filter(if_all(ends_with("Width"), ~ . > 2)) #' #' @export #' @seealso [c_across()] for a function that returns a vector across <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { setup <- across_setup( {{ .cols }}, fns = .fns, names = .names, .caller_env = caller_env(), inline = FALSE ) vars <- setup$vars if (length(vars) == 0L) { return(new_tibble(list(), nrow = 1L)) } fns <- setup$fns names <- setup$names mask <- peek_mask() data <- mask$current_cols(vars) if (is.null(fns)) { nrow <- length(mask$current_rows()) data <- new_data_frame(data, n = nrow, class = c("tbl_df", "tbl")) if (is.null(names)) { return(data) } else { return(set_names(data, names)) } } n_cols <- length(data) n_fns <- length(fns) seq_n_cols <- seq_len(n_cols) seq_fns <- seq_len(n_fns) k <- 1L out <- vector("list", n_cols * n_fns) # Reset `cur_column()` info on exit old_var <- context_peek_bare("column") on.exit(context_poke("column", old_var), add = TRUE) # Loop in such an order that all functions are applied # to a single column before moving on to the next column withCallingHandlers( for (i in seq_n_cols) { var <- vars[[i]] col <- data[[i]] context_poke("column", var) for (j in seq_fns) { fn <- fns[[j]] out[[k]] <- fn(col, ...) k <- k + 1L } }, error = function(cnd) { bullets <- c( glue("Problem while computing column `{names[k]}`.") ) abort(bullets, call = call(setup$across_if_fn), parent = cnd) } ) size <- vec_size_common(!!!out) out <- vec_recycle_common(!!!out, .size = size) names(out) <- names new_data_frame(out, n = size, class = c("tbl_df", "tbl")) } #' @rdname across #' @export if_any <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { context_local("across_if_fn", "if_any") if_across(`|`, across({{ .cols }}, .fns, ..., .names = .names)) } #' @rdname across #' @export if_all <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { context_local("across_if_fn", "if_all") if_across(`&`, across({{ .cols }}, .fns, ..., .names = .names)) } if_across <- function(op, df) { n <- nrow(df) if (!length(df)) { return(TRUE) } combine <- function(x, y) { if (is_null(x)) { y } else { op(x, y) } } reduce(df, combine, .init = NULL) } #' Combine values from multiple columns #' #' @description #' `c_across()` is designed to work with [rowwise()] to make it easy to #' perform row-wise aggregations. It has two differences from `c()`: #' #' * It uses tidy select semantics so you can easily select multiple variables. #' See `vignette("rowwise")` for more details. #' #' * It uses [vctrs::vec_c()] in order to give safer outputs. #' #' @inheritParams across #' @seealso [across()] for a function that returns a tibble. #' @export #' @examples #' df <- tibble(id = 1:4, w = runif(4), x = runif(4), y = runif(4), z = runif(4)) #' df %>% #' rowwise() %>% #' mutate( #' sum = sum(c_across(w:z)), #' sd = sd(c_across(w:z)) #' ) c_across <- function(cols = everything()) { cols <- enquo(cols) vars <- c_across_setup(!!cols) mask <- peek_mask("c_across") cols <- mask$current_cols(vars) vec_c(!!!cols, .name_spec = zap()) } across_glue_mask <- function(.col, .fn, .caller_env) { glue_mask <- env(.caller_env, .col = .col, .fn = .fn) # TODO: we can make these bindings louder later env_bind_active( glue_mask, col = function() glue_mask$.col, fn = function() glue_mask$.fn ) glue_mask } across_setup <- function(cols, fns, names, .caller_env, mask = peek_mask("across"), inline = FALSE) { cols <- enquo(cols) # `across()` is evaluated in a data mask so we need to remove the # mask layer from the quosure environment (#5460) cols <- quo_set_env(cols, data_mask_top(quo_get_env(cols), recursive = FALSE, inherit = FALSE)) across_if_fn <- context_peek_bare("across_if_fn") %||% "across" # TODO: call eval_select with a calling handler to intercept # classed error, after https://github.com/r-lib/tidyselect/issues/233 if (is.null(fns) && quo_is_call(cols, "~")) { bullets <- c( "Must supply a column selection.", i = glue("You most likely meant: `{across_if_fn}(everything(), {as_label(cols)})`."), i = "The first argument `.cols` selects a set of columns.", i = "The second argument `.fns` operates on each selected columns." ) abort(bullets, call = call(across_if_fn)) } across_cols <- mask$across_cols() vars <- fix_call( tidyselect::eval_select(cols, data = across_cols), call = call(across_if_fn) ) names_vars <- names(vars) vars <- names(across_cols)[vars] if (is.null(fns)) { if (!is.null(names)) { glue_mask <- across_glue_mask(.caller_env, .col = names_vars, .fn = "1") names <- fix_call( vec_as_names(glue(names, .envir = glue_mask), repair = "check_unique"), call = call(across_if_fn) ) } else { names <- names_vars } value <- list(vars = vars, fns = fns, names = names) return(value) } # apply `.names` smart default if (is.function(fns) || is_formula(fns)) { names <- names %||% "{.col}" fns <- list("1" = fns) } else { names <- names %||% "{.col}_{.fn}" } if (!is.list(fns)) { msg <- c("`.fns` must be NULL, a function, a formula, or a list of functions/formulas.") abort(msg, call = call(across_if_fn)) } # make sure fns has names, use number to replace unnamed if (is.null(names(fns))) { names_fns <- seq_along(fns) } else { names_fns <- names(fns) empties <- which(names_fns == "") if (length(empties)) { names_fns[empties] <- empties } } glue_mask <- across_glue_mask(.caller_env, .col = rep(names_vars, each = length(fns)), .fn = rep(names_fns , length(vars)) ) names <- fix_call( vec_as_names(glue(names, .envir = glue_mask), repair = "check_unique"), call = call(across_if_fn) ) if (!inline) { fns <- map(fns, as_function) } list(vars = vars, fns = fns, names = names, across_if_fn = across_if_fn) } # FIXME: This pattern should be encapsulated by rlang data_mask_top <- function(env, recursive = FALSE, inherit = FALSE) { while (env_has(env, ".__tidyeval_data_mask__.", inherit = inherit)) { env <- env_parent(env_get(env, ".top_env", inherit = inherit)) if (!recursive) { return(env) } } env } c_across_setup <- function(cols) { mask <- peek_mask("c_across") cols <- enquo(cols) across_cols <- mask$across_cols() vars <- tidyselect::eval_select(expr(!!cols), across_cols) value <- names(vars) value } new_dplyr_quosure <- function(quo, ...) { attr(quo, "dplyr:::data") <- list2(...) quo } dplyr_quosures <- function(...) { quosures <- enquos(..., .ignore_empty = "all") names_given <- names2(quosures) names_auto <- names(enquos(..., .named = TRUE, .ignore_empty = "all")) for (i in seq_along(quosures)) { quosures[[i]] <- new_dplyr_quosure(quosures[[i]], name_given = names_given[i], name_auto = names_auto[i], is_named = names_given[i] != "", index = i ) } quosures } # When mutate() or summarise() have an unnamed call to across() at the top level, e.g. # summarise(across(<...>)) or mutate(across(<...>)) # # a call to top_across(<...>) is evaluated instead. # top_across() returns a flattened list of expressions along with some # information about the "current column" for each expression # in the "columns" attribute: # # For example with: summarise(across(c(x, y), mean, .names = "mean_{.col}")) top_across() will return # something like: # # structure( # list(mean_x = expr(mean(x)), mean_y = expr(mean(y))) # columns = c("x", "y") # ) # Technically this always returns a single quosure but we wrap it in a # list to follow the pattern in `expand_across()` expand_if_across <- function(quo) { quo_data <- attr(quo, "dplyr:::data") if (!quo_is_call(quo, c("if_any", "if_all"), ns = c("", "dplyr"))) { return(list(quo)) } call <- match.call( definition = if_any, call = quo_get_expr(quo), expand.dots = FALSE, envir = quo_get_env(quo) ) if (!is_null(call$...)) { return(list(quo)) } if (is_call(call, "if_any")) { op <- "|" if_fn <- "if_any" } else { op <- "&" if_fn <- "if_all" } context_local("across_if_fn", if_fn) call[[1]] <- quote(across) quos <- expand_across(quo_set_expr(quo, call)) # Select all rows if there are no inputs if (!length(quos)) { return(list(quo(TRUE))) } combine <- function(x, y) { if (is_null(x)) { y } else { call(op, x, y) } } expr <- reduce(quos, combine, .init = NULL) # Use `as_quosure()` instead of `new_quosure()` to avoid rewrapping # quosure in case of single input list(as_quosure(expr, env = baseenv())) } expand_across <- function(quo) { quo_data <- attr(quo, "dplyr:::data") if (!quo_is_call(quo, "across", ns = c("", "dplyr")) || quo_data$is_named) { return(list(quo)) } # Expand dots in lexical env env <- quo_get_env(quo) expr <- match.call( definition = across, call = quo_get_expr(quo), expand.dots = FALSE, envir = env ) # Abort expansion if there are any expression supplied because dots # must be evaluated once per group in the data mask. Expanding the # `across()` call would lead to either `n_group * n_col` evaluations # if dots are delayed or only 1 evaluation if they are eagerly # evaluated. if (!is_null(expr$...)) { return(list(quo)) } dplyr_mask <- peek_mask() mask <- dplyr_mask$get_rlang_mask() # Differentiate between missing and null (`match.call()` doesn't # expand default argument) if (".cols" %in% names(expr)) { cols <- expr$.cols } else { cols <- quote(everything()) } cols <- as_quosure(cols, env) setup <- across_setup( !!cols, fns = eval_tidy(expr$.fns, mask, env = env), names = eval_tidy(expr$.names, mask, env = env), .caller_env = dplyr_mask$get_caller_env(), inline = TRUE ) vars <- setup$vars # Empty expansion if (length(vars) == 0L) { return(new_expanded_quosures(list())) } fns <- setup$fns names <- setup$names %||% vars # No functions, so just return a list of symbols if (is.null(fns)) { expressions <- pmap(list(vars, names, seq_along(vars)), function(var, name, k) { quo <- new_quosure(sym(var), empty_env()) quo <- new_dplyr_quosure( quo, name_given = name, name_auto = name, is_named = TRUE, index = c(quo_data$index, k), column = var ) }) names(expressions) <- names expressions <- new_expanded_quosures(expressions) return(expressions) } n_vars <- length(vars) n_fns <- length(fns) seq_vars <- seq_len(n_vars) seq_fns <- seq_len(n_fns) expressions <- vector(mode = "list", n_vars * n_fns) columns <- character(n_vars * n_fns) k <- 1L for (i in seq_vars) { var <- vars[[i]] for (j in seq_fns) { fn_call <- as_across_fn_call(fns[[j]], var, env, mask) name <- names[[k]] expressions[[k]] <- new_dplyr_quosure( fn_call, name_given = name, name_auto = name, is_named = TRUE, index = c(quo_data$index, k), column = var ) k <- k + 1L } } names(expressions) <- names new_expanded_quosures(expressions) } new_expanded_quosures <- function(x) { structure(x, class = "dplyr_expanded_quosures") } # TODO: Take unevaluated `.fns` and inline calls to `function`. This # will enable support for R 4.1 lambdas. Note that unlike formulas, # only unevaluated `function` calls can be inlined. This will have # performance implications for lists of lambdas where formulas will # have better performance. It is possible that we will be able to # inline evaluated functions with strictness annotations. as_across_fn_call <- function(fn, var, env, mask) { if (is_inlinable_formula(fn, mask)) { # Don't need to worry about arguments passed through `...` # because we cancel expansion in that case expr <- f_rhs(fn) expr <- expr_substitute(expr, quote(.), sym(var)) expr <- expr_substitute(expr, quote(.x), sym(var)) # If the formula environment is the data mask it means the formula # was unevaluated, and in that case we can use the original # quosure environment. Otherwise, use the formula environment # which might include local data that is not reachable from the # data mask. f_env <- f_env(fn) if (identical(f_env, mask)) { f_env <- env } new_quosure(expr, f_env) } else { fn_call <- call2(as_function(fn), sym(var)) new_quosure(fn_call, env) } } # Don't inline formulas that don't inherit directly from the mask # because of a tidyeval bug/limitation that causes an infinite loop. # If the formula env is the data mask, we replace it with the original # quosure environment (which is maskable) later on to work around that # bug. is_inlinable_formula <- function(x, mask) { if (is_formula(x, lhs = FALSE, scoped = TRUE)) { env <- f_env(x) identical(env, mask) || !env_inherits(env, mask) } else { FALSE } } dplyr/R/filter.R0000644000176200001440000001704014154654644013234 0ustar liggesusers#' Subset rows using column values #' #' The `filter()` function is used to subset a data frame, #' retaining all rows that satisfy your conditions. #' To be retained, the row must produce a value of `TRUE` for all conditions. #' Note that when a condition evaluates to `NA` #' the row will be dropped, unlike base subsetting with `[`. #' #' The `filter()` function is used to subset the rows of #' `.data`, applying the expressions in `...` to the column values to determine which #' rows should be retained. It can be applied to both grouped and ungrouped data (see [group_by()] and #' [ungroup()]). However, dplyr is not yet smart enough to optimise the filtering #' operation on grouped datasets that do not need grouped calculations. For this #' reason, filtering is often considerably faster on ungrouped data. #' #' @section Useful filter functions: #' #' There are many functions and operators that are useful when constructing the #' expressions used to filter the data: #' #' * [`==`], [`>`], [`>=`] etc #' * [`&`], [`|`], [`!`], [xor()] #' * [is.na()] #' * [between()], [near()] #' #' @section Grouped tibbles: #' #' Because filtering expressions are computed within groups, they may #' yield different results on grouped tibbles. This will be the case #' as soon as an aggregating, lagging, or ranking function is #' involved. Compare this ungrouped filtering: #' #' ``` #' starwars %>% filter(mass > mean(mass, na.rm = TRUE)) #' ``` #' #' With the grouped equivalent: #' #' ``` #' starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) #' ``` #' #' In the ungrouped version, `filter()` compares the value of `mass` in each row to #' the global average (taken over the whole data set), keeping only the rows with #' `mass` greater than this global average. In contrast, the grouped version calculates #' the average mass separately for each `gender` group, and keeps rows with `mass` greater #' than the relevant within-gender average. #' #' @family single table verbs #' @inheritParams arrange #' @param ... <[`data-masking`][dplyr_data_masking]> Expressions that return a #' logical value, and are defined in terms of the variables in `.data`. #' If multiple expressions are included, they are combined with the `&` operator. #' Only rows for which all conditions evaluate to `TRUE` are kept. #' @param .preserve Relevant when the `.data` input is grouped. #' If `.preserve = FALSE` (the default), the grouping structure #' is recalculated based on the resulting data, otherwise the grouping is kept as is. #' @return #' An object of the same type as `.data`. The output has the following properties: #' #' * Rows are a subset of the input, but appear in the same order. #' * Columns are not modified. #' * The number of groups may be reduced (if `.preserve` is not `TRUE`). #' * Data frame attributes are preserved. #' #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}. #' @export #' @examples #' # Filtering by one criterion #' filter(starwars, species == "Human") #' filter(starwars, mass > 1000) #' #' # Filtering by multiple criteria within a single logical expression #' filter(starwars, hair_color == "none" & eye_color == "black") #' filter(starwars, hair_color == "none" | eye_color == "black") #' #' # When multiple expressions are used, they are combined using & #' filter(starwars, hair_color == "none", eye_color == "black") #' #' #' # The filtering operation may yield different results on grouped #' # tibbles because the expressions are computed within groups. #' # #' # The following filters rows where `mass` is greater than the #' # global average: #' starwars %>% filter(mass > mean(mass, na.rm = TRUE)) #' #' # Whereas this keeps rows with `mass` greater than the gender #' # average: #' starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) #' #' #' # To refer to column names that are stored as strings, use the `.data` pronoun: #' vars <- c("mass", "height") #' cond <- c(80, 150) #' starwars %>% #' filter( #' .data[[vars[[1]]]] > cond[[1]], #' .data[[vars[[2]]]] > cond[[2]] #' ) #' # Learn more in ?dplyr_data_masking filter <- function(.data, ..., .preserve = FALSE) { UseMethod("filter") } #' @export filter.data.frame <- function(.data, ..., .preserve = FALSE) { loc <- filter_rows(.data, ..., caller_env = caller_env()) dplyr_row_slice(.data, loc, preserve = .preserve) } filter_rows <- function(.data, ..., caller_env, error_call = caller_env()) { error_call <- dplyr_error_call(error_call) dots <- dplyr_quosures(...) check_filter(dots, error_call = error_call) mask <- DataMask$new(.data, caller_env, "filter", error_call = error_call) on.exit(mask$forget(), add = TRUE) dots <- filter_expand(dots, mask = mask, error_call = error_call) filter_eval(dots, mask = mask, error_call = error_call) } check_filter <- function(dots, error_call = error_call) { named <- have_name(dots) for (i in which(named)) { quo <- dots[[i]] # only allow named logical vectors, anything else # is suspicious expr <- quo_get_expr(quo) if (!is.logical(expr)) { name <- names(dots)[i] bullets <- c( "We detected a named input.", i = glue("This usually means that you've used `=` instead of `==`."), i = glue("Did you mean `{name} == {as_label(expr)}`?") ) abort(bullets, call = error_call) } } } filter_expand <- function(dots, mask, error_call = caller_env()) { env_filter <- env() filter_expand_one <- function(dot, index) { env_filter$current_expression <- index expand_if_across(dot) } dots <- withCallingHandlers( imap(unname(dots), filter_expand_one), error = function(cnd) { local_error_context(dots = dots, .index = env_filter$current_expression, mask = mask) abort(cnd_bullet_header("expanding"), call = error_call, parent = cnd) } ) new_quosures(flatten(dots)) } filter_eval <- function(dots, mask, error_call = caller_env()) { env_filter <- env() withCallingHandlers({ mask$eval_all_filter(dots, env_filter) }, error = function(e) { local_error_context(dots = dots, .index = env_filter$current_expression, mask = mask) bullets <- c( cnd_bullet_header("computing"), filter_bullets(e) ) abort(bullets, call = error_call, parent = skip_internal_condition(e)) }) } filter_bullets <- function(cnd, ...) { UseMethod("filter_bullets") } #' @export filter_bullets.default <- function(cnd, ...) { c(i = cnd_bullet_cur_group_label()) } #' @export `filter_bullets.dplyr:::filter_incompatible_type` <- function(cnd, ...) { column_name <- cnd$dplyr_error_data$column_name index <- cnd$dplyr_error_data$index result <- cnd$dplyr_error_data$result input_name <- if (is.null(column_name)) { glue("..{index}") } else { glue("..{index}${column_name}") } c( x = glue("Input `{input_name}` must be a logical vector, not a {vec_ptype_full(result)}."), i = cnd_bullet_cur_group_label() ) } #' @export `filter_bullets.dplyr:::filter_incompatible_size` <- function(cnd, ...) { index <- cnd$dplyr_error_data$index expected_size <- cnd$dplyr_error_data$expected_size size <- cnd$dplyr_error_data$size c( x = glue("Input `..{index}` must be of size {or_1(expected_size)}, not size {size}."), i = cnd_bullet_cur_group_label() ) } dplyr/R/coalesce.R0000644000176200001440000000356114151641776013527 0ustar liggesusers#' Find first non-missing element #' #' Given a set of vectors, `coalesce()` finds the first non-missing value #' at each position. This is inspired by the SQL `COALESCE` function #' which does the same thing for `NULL`s. #' #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Vectors. Inputs should be #' recyclable (either be length 1 or same length as the longest vector) and #' coercible to a common type. If data frames, they are coalesced column by #' column. #' @return A vector the same length as the first `...` argument with #' missing values replaced by the first non-missing value. #' @seealso [na_if()] to replace specified values with a `NA`. #' [tidyr::replace_na()] to replace `NA` with a value #' @export #' @examples #' # Use a single value to replace all missing values #' x <- sample(c(1:5, NA, NA, NA)) #' coalesce(x, 0L) #' #' # Or match together a complete vector from missing pieces #' y <- c(1, 2, NA, NA, 5) #' z <- c(NA, NA, 3, 4, 5) #' coalesce(y, z) #' #' # Supply lists by with dynamic dots #' vecs <- list( #' c(1, 2, NA, NA, 5), #' c(NA, NA, 3, 4, 5) #' ) #' coalesce(!!!vecs) coalesce <- function(...) { if (missing(..1)) { abort("At least one argument must be supplied.") } values <- list2(...) values <- fix_call(vec_cast_common(!!!values)) values <- fix_call(vec_recycle_common(!!!values)) x <- values[[1]] values <- values[-1] if (is.array(x) && length(dim(x)) > 1) { abort("Can't coalesce matrices.") } if (is.data.frame(x)) { df_coalesce(x, values) } else { vec_coalesce(x, values) } } vec_coalesce <- function(x, values) { for (i in seq_along(values)) { x_miss <- is.na(x) vec_slice(x, x_miss) <- vec_slice(values[[i]], x_miss) } x } df_coalesce <- function(x, values) { for (i in seq_along(x)) { col_values <- map(values, `[[`, i) x[[i]] <- vec_coalesce(x[[i]], col_values) } x } dplyr/R/join-common-by.R0000644000176200001440000000340214151641776014600 0ustar liggesusers#' Extract out common by variables #' #' @export #' @keywords internal common_by <- function(by = NULL, x, y) UseMethod("common_by", by) #' @export common_by.character <- function(by, x, y) { by <- common_by_from_vector(by) common_by.list(by, x, y) } common_by_from_vector <- function(by) { by <- by[!duplicated(by)] by_x <- names(by) %||% by by_y <- unname(by) # If x partially named, assume unnamed are the same in both tables by_x[by_x == ""] <- by_y[by_x == ""] list(x = by_x, y = by_y) } #' @export common_by.list <- function(by, x, y) { x_vars <- tbl_vars(x) if (!all(by$x %in% x_vars)) { msg <- glue("`by` can't contain join column {missing} which is missing from LHS.", missing = fmt_obj(setdiff(by$x, x_vars)) ) abort(msg) } y_vars <- tbl_vars(y) if (!all(by$y %in% y_vars)) { msg <- glue("`by` can't contain join column {missing} which is missing from RHS.", missing = fmt_obj(setdiff(by$y, y_vars)) ) abort(msg) } by } #' @export common_by.NULL <- function(by, x, y) { by <- intersect(tbl_vars(x), tbl_vars(y)) by <- by[!is.na(by)] if (length(by) == 0) { msg <- glue("`by` required, because the data sources have no common variables.") abort(msg) } inform(auto_by_msg(by)) list( x = by, y = by ) } auto_by_msg <- function(by) { by_quoted <- encodeString(by, quote = '"') if (length(by_quoted) == 1L) { by_code <- by_quoted } else { by_code <- paste0("c(", paste(by_quoted, collapse = ", "), ")") } paste0("Joining, by = ", by_code) } #' @export common_by.default <- function(by, x, y) { msg <- glue("`by` must be a (named) character vector, list, or NULL for natural joins (not recommended in production code), not {friendly_type_of(by)}.") abort(msg) } dplyr/R/sample.R0000644000176200001440000001164614154676633013240 0ustar liggesusers#' Sample n rows from a table #' #' @description #' `r lifecycle::badge("superseded")` #' `sample_n()` and `sample_frac()` have been superseded in favour of #' [slice_sample()]. While they will not be deprecated in the near future, #' retirement means that we will only perform critical bug fixes, so we recommend #' moving to the newer alternative. #' #' These functions were superseded because we realised it was more convenient to #' have two mutually exclusive arguments to one function, rather than two #' separate functions. This also made it to clean up a few other smaller #' design issues with `sample_n()`/`sample_frac`: #' #' * The connection to `slice()` was not obvious. #' * The name of the first argument, `tbl`, is inconsistent with other #' single table verbs which use `.data`. #' * The `size` argument uses tidy evaluation, which is surprising and #' undocumented. #' * It was easier to remove the deprecated `.env` argument. #' * `...` was in a suboptimal position. #' #' @keywords internal #' @param tbl A data.frame. #' @param size <[`tidy-select`][dplyr_tidy_select]> #' For `sample_n()`, the number of rows to select. #' For `sample_frac()`, the fraction of rows to select. #' If `tbl` is grouped, `size` applies to each group. #' @param replace Sample with or without replacement? #' @param weight <[`tidy-select`][dplyr_tidy_select]> Sampling weights. #' This must evaluate to a vector of non-negative numbers the same length as #' the input. Weights are automatically standardised to sum to 1. #' @param .env DEPRECATED. #' @param ... ignored #' @examples #' by_cyl <- mtcars %>% group_by(cyl) #' #' # sample_n() -> slice_sample() ---------------------------------------------- #' sample_n(mtcars, 10) #' sample_n(mtcars, 50, replace = TRUE) #' sample_n(mtcars, 10, weight = mpg) #' #' # Changes: #' # * explicitly name the `n` argument, #' # * the `weight` argument is now `weight_by`. #' #' slice_sample(mtcars, n = 10) #' slice_sample(mtcars, n = 50, replace = TRUE) #' slice_sample(mtcars, n = 10, weight_by = mpg) #' #' # Note that sample_n() would error if n was bigger than the group size #' # slice_sample() will just use the available rows for consistency with #' # the other slice helpers like slice_head() #' #' # sample_frac() -> slice_sample() ------------------------------------------- #' sample_frac(mtcars) #' sample_frac(mtcars, replace = TRUE) #' #' # Changes: #' # * use prop = 1 to randomly sample all rows #' #' slice_sample(mtcars, prop = 1) #' slice_sample(mtcars, prop = 1, replace = TRUE) #' #' @export sample_n <- function(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) { lifecycle::signal_stage("superseded", "sample_n()") UseMethod("sample_n") } #' @export sample_n.default <- function(tbl, size, replace = FALSE, weight = NULL, .env = parent.frame(), ...) { msg <- glue("`tbl` must be a data frame, not {friendly_type_of(tbl)}.") abort(msg) } #' @export sample_n.data.frame <- function(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) { if (!is_null(.env)) { inform("`sample_n() argument `.env` is deprecated and no longer has any effect.") } size <- enquo(size) weight <- enquo(weight) dplyr_local_error_call() slice(tbl, local({ size <- check_size(!!size, n(), replace = replace) sample.int(n(), size, replace = replace, prob = !!weight) })) } #' @rdname sample_n #' @export sample_frac <- function(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) { lifecycle::signal_stage("superseded", "sample_frac()") UseMethod("sample_frac") } #' @export sample_frac.default <- function(tbl, size = 1, replace = FALSE, weight = NULL, .env = parent.frame(), ...) { msg <- glue("`tbl` must be a data frame, not {friendly_type_of(tbl)}.") abort(msg) } #' @export sample_frac.data.frame <- function(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) { if (!is_null(.env)) { inform("`.env` is deprecated and no longer has any effect") } size <- enquo(size) weight <- enquo(weight) dplyr_local_error_call() slice(tbl, local({ size <- round(n() * check_frac(!!size, replace = replace)) sample.int(n(), size, replace = replace, prob = !!weight) })) } # Helper functions ------------------------------------------------------------- check_size <- function(size, n, replace = FALSE) { if (size <= n || replace) return(invisible(size)) bullets <- c( glue("`size` must be less than or equal to {n} (size of data)."), i = "set `replace = TRUE` to use sampling with replacement." ) abort(bullets, call = NULL) } check_frac <- function(size, replace = FALSE) { if (size <= 1 || replace) return(invisible(size)) bullets <- c( glue("`size` of sampled fraction must be less or equal to one."), i = "set `replace = TRUE` to use sampling with replacement." ) abort(bullets, call = NULL) } dplyr/R/relocate.R0000644000176200001440000000614714121112104013521 0ustar liggesusers#' Change column order #' #' Use `relocate()` to change column positions, using the same syntax as #' `select()` to make it easy to move blocks of columns at once. #' #' @inheritParams arrange #' @param ... <[`tidy-select`][dplyr_tidy_select]> Columns to move. #' @param .before,.after <[`tidy-select`][dplyr_tidy_select]> Destination of #' columns selected by `...`. Supplying neither will move columns to the #' left-hand side; specifying both is an error. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are not affected. #' * The same columns appear in the output, but (usually) in a different place. #' * Data frame attributes are preserved. #' * Groups are not affected. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("relocate")}. #' @export #' @examples #' df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") #' df %>% relocate(f) #' df %>% relocate(a, .after = c) #' df %>% relocate(f, .before = b) #' df %>% relocate(a, .after = last_col()) #' #' # relocated columns can change name #' df %>% relocate(ff = f) #' #' # Can also select variables based on their type #' df %>% relocate(where(is.character)) #' df %>% relocate(where(is.numeric), .after = last_col()) #' # Or with any other select helper #' df %>% relocate(any_of(c("a", "e", "i", "o", "u"))) #' #' # When .before or .after refers to multiple variables they will be #' # moved to be immediately before/after the selected variables. #' df2 <- tibble(a = 1, b = "a", c = 1, d = "a") #' df2 %>% relocate(where(is.numeric), .after = where(is.character)) #' df2 %>% relocate(where(is.numeric), .before = where(is.character)) relocate <- function(.data, ..., .before = NULL, .after = NULL) { UseMethod("relocate") } #' @export relocate.data.frame <- function(.data, ..., .before = NULL, .after = NULL) { to_move <- tidyselect::eval_select(expr(c(...)), .data) .before <- enquo(.before) .after <- enquo(.after) has_before <- !quo_is_null(.before) has_after <- !quo_is_null(.after) if (has_before && has_after) { abort("Must supply only one of `.before` and `.after`.") } else if (has_before) { where <- min(unname(tidyselect::eval_select(.before, .data))) if (!where %in% to_move) { to_move <- c(to_move, where) } } else if (has_after) { where <- max(unname(tidyselect::eval_select(.after, .data))) if (!where %in% to_move) { to_move <- c(where, to_move) } } else { where <- 1L if (!where %in% to_move) { to_move <- c(to_move, where) } } lhs <- setdiff(seq2(1, where - 1), to_move) rhs <- setdiff(seq2(where + 1, ncol(.data)), to_move) pos <- vec_unique(c(lhs, to_move, rhs)) out <- .data[pos] new_names <- names(pos) if (!is.null(new_names)) { names(out)[new_names != ""] <- new_names[new_names != ""] } out } dplyr/R/compat-purrr.R0000644000176200001440000001140114154402214014355 0ustar liggesusers# nocov start - compat-purrr.R # Latest version: https://github.com/r-lib/rlang/blob/master/R/compat-purrr.R # This file provides a minimal shim to provide a purrr-like API on top of # base R functions. They are not drop-in replacements but allow a similar style # of programming. # # Changelog: # 2020-04-14: # * Removed `pluck*()` functions # * Removed `*_cpl()` functions # * Used `as_function()` to allow use of `~` # * Used `.` prefix for helpers # # 2021-05-21: # * Fixed "object `x` not found" error in `imap()` (@mgirlich) map <- function(.x, .f, ...) { .f <- as_function(.f, env = global_env()) lapply(.x, .f, ...) } walk <- function(.x, .f, ...) { map(.x, .f, ...) invisible(.x) } map_lgl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, logical(1), ...) } map_int <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, integer(1), ...) } map_dbl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, double(1), ...) } map_chr <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, character(1), ...) } .rlang_purrr_map_mold <- function(.x, .f, .mold, ...) { .f <- as_function(.f, env = global_env()) out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE) names(out) <- names(.x) out } map2 <- function(.x, .y, .f, ...) { .f <- as_function(.f, env = global_env()) out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE) if (length(out) == length(.x)) { set_names(out, names(.x)) } else { set_names(out, NULL) } } map2_lgl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "logical") } map2_int <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "integer") } map2_dbl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "double") } map2_chr <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "character") } imap <- function(.x, .f, ...) { map2(.x, names(.x) %||% seq_along(.x), .f, ...) } pmap <- function(.l, .f, ...) { .f <- as.function(.f) args <- .rlang_purrr_args_recycle(.l) do.call("mapply", c( FUN = list(quote(.f)), args, MoreArgs = quote(list(...)), SIMPLIFY = FALSE, USE.NAMES = FALSE )) } .rlang_purrr_args_recycle <- function(args) { lengths <- map_int(args, length) n <- max(lengths) stopifnot(all(lengths == 1L | lengths == n)) to_recycle <- lengths == 1L args[to_recycle] <- map(args[to_recycle], function(x) rep.int(x, n)) args } keep <- function(.x, .f, ...) { .x[.rlang_purrr_probe(.x, .f, ...)] } discard <- function(.x, .p, ...) { sel <- .rlang_purrr_probe(.x, .p, ...) .x[is.na(sel) | !sel] } map_if <- function(.x, .p, .f, ...) { matches <- .rlang_purrr_probe(.x, .p) .x[matches] <- map(.x[matches], .f, ...) .x } .rlang_purrr_probe <- function(.x, .p, ...) { if (is_logical(.p)) { stopifnot(length(.p) == length(.x)) .p } else { .p <- as_function(.p, env = global_env()) map_lgl(.x, .p, ...) } } compact <- function(.x) { Filter(length, .x) } transpose <- function(.l) { inner_names <- names(.l[[1]]) if (is.null(inner_names)) { fields <- seq_along(.l[[1]]) } else { fields <- set_names(inner_names) } map(fields, function(i) { map(.l, .subset2, i) }) } every <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (!rlang::is_true(.p(.x[[i]], ...))) return(FALSE) } TRUE } some <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (rlang::is_true(.p(.x[[i]], ...))) return(TRUE) } FALSE } negate <- function(.p) { .p <- as_function(.p, env = global_env()) function(...) !.p(...) } reduce <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init) } reduce_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE) } accumulate <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init, accumulate = TRUE) } accumulate_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE, accumulate = TRUE) } detect <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(.x[[i]]) } } NULL } detect_index <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(i) } } 0L } .rlang_purrr_index <- function(x, right = FALSE) { idx <- seq_along(x) if (right) { idx <- rev(idx) } idx } # nocov end dplyr/R/compat-name-repair.R0000644000176200001440000001247414134265615015430 0ustar liggesusers# compat-name-repair (last updated: tibble 2.0.1.9000) # This file serves as a reference for compatibility functions for # name repair in tibble, until name repair is available in rlang. error_name_length_required <- function() { "`n` must be specified, when the `names` attribute is `NULL`." } minimal_names <- function(name, n) { if (is.null(name) && missing(n)) { abort(error_name_length_required()) } ## TODO: address scenarios where name is not NULL and n != length(name)? if (is.null(name)) { rep_len("", n) } else { name %|% "" } } set_minimal_names <- function(x) { new_names <- minimal_names(names(x), n = length(x)) set_names(x, new_names) } unique_names <- function(name, quiet = FALSE, transform = identity) { min_name <- minimal_names(name) naked_name <- strip_pos(min_name) naked_is_empty <- (naked_name == "") new_name <- transform(naked_name) new_name <- append_pos(new_name, needs_suffix = naked_is_empty) duped_after <- duplicated(new_name) | duplicated(new_name, fromLast = TRUE) new_name <- append_pos(new_name, duped_after) if (!quiet) { describe_repair(name, new_name) } new_name } set_unique_names <- function(x, quiet = FALSE) { x <- set_minimal_names(x) new_names <- unique_names(names(x), quiet = quiet) set_names(x, new_names) } universal_names <- function(name, quiet = FALSE) { unique_names(name, quiet = quiet, transform = make_syntactic) } set_universal_names <- function(x, quiet = FALSE) { x <- set_minimal_names(x) new_names <- universal_names(names(x), quiet = quiet) set_names(x, new_names) } ## makes each individual name syntactic ## does not enforce unique-ness make_syntactic <- function(name) { name[is.na(name)] <- "" name[name == ""] <- "." name[name == "..."] <- "...." name <- sub("^_", "._", name) new_name <- make.names(name) X_prefix <- grepl("^X", new_name) & !grepl("^X", name) new_name[X_prefix] <- sub("^X", "", new_name[X_prefix]) dot_suffix <- which(new_name == paste0(name, ".")) new_name[dot_suffix] <- sub("^(.*)[.]$", ".\\1", new_name[dot_suffix]) ## illegal characters have been replaced with '.' via make.names() ## however, we have: ## * declined its addition of 'X' prefixes ## * turned its '.' suffixes to '.' prefixes regex <- paste0( "^(?[.]{0,2})", "(?[0-9]*)", "(?[^0-9]?.*$)" ) re <- re_match(new_name, pattern = regex) needs_dots <- which(re$numbers != "") needs_third_dot <- (re$leftovers[needs_dots] == "") re$leading_dots[needs_dots] <- ifelse(needs_third_dot, "...", "..") new_name <- paste0(re$leading_dots, re$numbers, re$leftovers) new_name } append_pos <- function(name, needs_suffix) { need_append_pos <- which(needs_suffix) name[need_append_pos] <- paste0(name[need_append_pos], "..", need_append_pos) name } strip_pos <- function(name) { rx <- "[.][.][1-9][0-9]*$" gsub(rx, "", name) %|% "" } describe_repair <- function(orig_name, name) { if(length(orig_name) != length(name)) { abort(c( "`orig_name` and `name` have different sizes.", i = glue("`orig_name` is of size {length(orig_name)}."), i = glue("`name` is of size {length(name)}.") )) } new_names <- name != minimal_names(orig_name) if (any(new_names)) { msg <- bullets( "New names:", paste0( tick_if_needed(orig_name[new_names]), " -> ", tick_if_needed(name[new_names]), .problem = "" ) ) message(msg) } } ## from rematch2, except we don't add tbl_df or tbl classes to the return value re_match <- function(text, pattern, perl = TRUE, ...) { if (!is.character(pattern) || length(pattern) != 1L || is.na(pattern)) { abort(c( "incompatible `pattern`.", i = "`pattern` should be a scalar string." )) } text <- as.character(text) match <- regexpr(pattern, text, perl = perl, ...) start <- as.vector(match) length <- attr(match, "match.length") end <- start + length - 1L matchstr <- substring(text, start, end) matchstr[ start == -1 ] <- NA_character_ res <- data.frame( stringsAsFactors = FALSE, .text = text, .match = matchstr ) if (!is.null(attr(match, "capture.start"))) { gstart <- attr(match, "capture.start") glength <- attr(match, "capture.length") gend <- gstart + glength - 1L groupstr <- substring(text, gstart, gend) groupstr[ gstart == -1 ] <- NA_character_ dim(groupstr) <- dim(gstart) res <- cbind(groupstr, res, stringsAsFactors = FALSE) } names(res) <- c(attr(match, "capture.names"), ".text", ".match") res } # A better version (with far more dependencies) exists in msg-format.R bullets <- function(header, ..., .problem) { problems <- c(...) MAX_BULLETS <- 6L if (length(problems) >= MAX_BULLETS) { n_more <- length(problems) - MAX_BULLETS + 1L problems[[MAX_BULLETS]] <- "..." length(problems) <- MAX_BULLETS } paste0( header, "\n", paste0("* ", problems, collapse = "\n") ) } # FIXME: Also exists in pillar, do we need to export? tick <- function(x) { ifelse(is.na(x), "NA", encodeString(x, quote = "`")) } is_syntactic <- function(x) { ret <- (make_syntactic(x) == x) ret[is.na(x)] <- FALSE ret } tick_if_needed <- function(x) { needs_ticks <- !is_syntactic(x) x[needs_ticks] <- tick(x[needs_ticks]) x } dplyr/R/rowwise.r0000644000176200001440000001165614151641776013514 0ustar liggesusers#' Group input by rows #' #' @description #' `rowwise()` allows you to compute on a data frame a row-at-a-time. #' This is most useful when a vectorised function doesn't exist. #' #' Most dplyr verbs preserve row-wise grouping. The exception is [summarise()], #' which return a [grouped_df]. You can explicitly ungroup with [ungroup()] #' or [as_tibble()], or convert to a [grouped_df] with [group_by()]. #' #' @section List-columns: #' Because a rowwise has exactly one row per group it offers a small #' convenience for working with list-columns. Normally, `summarise()` and #' `mutate()` extract a groups worth of data with `[`. But when you index #' a list in this way, you get back another list. When you're working with #' a `rowwise` tibble, then dplyr will use `[[` instead of `[` to make your #' life a little easier. #' #' @param data Input data frame. #' @param ... <[`tidy-select`][dplyr_tidy_select]> Variables to be preserved #' when calling [summarise()]. This is typically a set of variables whose #' combination uniquely identify each row. #' #' **NB**: unlike `group_by()` you can not create new variables here but #' instead you can select multiple variables with (e.g.) `everything()`. #' @seealso [nest_by()] for a convenient way of creating rowwise data frames #' with nested data. #' @return A row-wise data frame with class `rowwise_df`. Note that a #' `rowwise_df` is implicitly grouped by row, but is not a `grouped_df`. #' @export #' @examples #' df <- tibble(x = runif(6), y = runif(6), z = runif(6)) #' # Compute the mean of x, y, z in each row #' df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) #' # use c_across() to more easily select many variables #' df %>% rowwise() %>% mutate(m = mean(c_across(x:z))) #' #' # Compute the minimum of x and y in each row #' df %>% rowwise() %>% mutate(m = min(c(x, y, z))) #' # In this case you can use an existing vectorised function: #' df %>% mutate(m = pmin(x, y, z)) #' # Where these functions exist they'll be much faster than rowwise #' # so be on the lookout for them. #' #' # rowwise() is also useful when doing simulations #' params <- tribble( #' ~sim, ~n, ~mean, ~sd, #' 1, 1, 1, 1, #' 2, 2, 2, 4, #' 3, 3, -1, 2 #' ) #' # Here I supply variables to preserve after the summary #' params %>% #' rowwise(sim) %>% #' summarise(z = rnorm(n, mean, sd)) #' #' # If you want one row per simulation, put the results in a list() #' params %>% #' rowwise(sim) %>% #' summarise(z = list(rnorm(n, mean, sd))) rowwise <- function(data, ...) { UseMethod("rowwise") } #' @export rowwise.data.frame <- function(data, ...) { vars <- tidyselect::eval_select(expr(c(...)), data) rowwise_df(data, vars) } #' @export rowwise.grouped_df <- function(data, ...) { if (!missing(...)) { bullets <- c( "Can't re-group when creating rowwise data.", i = "Either first `ungroup()` or call `rowwise()` without arguments." ) abort(bullets) } rowwise_df(data, group_vars(data)) } # Constructor + helper ---------------------------------------------------- rowwise_df <- function(data, group_vars) { group_data <- as_tibble(data)[group_vars] new_rowwise_df(data, group_data) } #' @rdname new_grouped_df #' @export new_rowwise_df <- function(data, group_data = NULL, ..., class = character()) { nrow <- nrow(data) if (!is.null(group_data)) { if (!is_tibble(group_data) || has_name(group_data, ".rows")) { msg <- "`group_data` must be a tibble without a `.rows` column." abort(msg) } group_data <- new_tibble(dplyr_vec_data(group_data), nrow = nrow) # strip attributes } else { group_data <- new_tibble(list(), nrow = nrow) } group_data$.rows <- new_list_of(as.list(seq_len(nrow)), ptype = integer()) new_tibble( data, groups = group_data, ..., nrow = nrow, class = c(class, "rowwise_df") ) } #' @rdname new_grouped_df #' @export validate_rowwise_df <- function(x) { result <- .Call(`dplyr_validate_rowwise_df`, x) if (!is.null(result)) { abort(result) } x } setOldClass(c("rowwise_df", "tbl_df", "tbl", "data.frame")) # methods ----------------------------------------------------------------- #' @export tbl_sum.rowwise_df <- function(x, ...) { c( NextMethod(), "Rowwise" = commas(group_vars(x)) ) } #' @export as_tibble.rowwise_df <- function(x, ...) { new_tibble(dplyr_vec_data(x), nrow = nrow(x)) } #' @importFrom tibble is_tibble #' @export `[.rowwise_df` <- function(x, i, j, drop = FALSE) { out <- NextMethod() if (!is.data.frame(out)) { return(out) } group_vars <- intersect(names(out), group_vars(x)) rowwise_df(out, group_vars) } #' @export `[<-.rowwise_df` <- function(x, i, j, ..., value) { out <- NextMethod() group_vars <- intersect(names(out), group_vars(x)) rowwise_df(out, group_vars) } #' @export `names<-.rowwise_df` <- function(x, value) { data <- NextMethod() group_vars <- value[match(group_vars(x), names(x))] rowwise_df(data, group_vars) } dplyr/R/sets.r0000644000176200001440000001153014151641776012762 0ustar liggesusers#' Set operations #' #' These functions override the set functions provided in base to make them #' generic so that efficient versions for data frames and other tables can be #' provided. The default methods call the base versions. Beware that #' `intersect()`, `union()` and `setdiff()` remove duplicates. #' #' @param x,y objects to perform set function on (ignoring order) #' @inheritParams rlang::args_dots_empty #' @name setops #' @examples #' mtcars$model <- rownames(mtcars) #' first <- mtcars[1:20, ] #' second <- mtcars[10:32, ] #' #' intersect(first, second) #' union(first, second) #' setdiff(first, second) #' setdiff(second, first) #' #' union_all(first, second) #' setequal(mtcars, mtcars[32:1, ]) #' #' # Handling of duplicates: #' a <- data.frame(column = c(1:10, 10)) #' b <- data.frame(column = c(1:5, 5)) #' #' # intersection is 1 to 5, duplicates removed (5) #' intersect(a, b) #' #' # union is 1 to 10, duplicates removed (5 and 10) #' union(a, b) #' #' # set difference, duplicates removed (10) #' setdiff(a, b) #' #' # union all does not remove duplicates #' union_all(a, b) NULL #' @rdname setops #' @export union_all <- function(x, y, ...) UseMethod("union_all") #' @export union_all.default <- function (x, y, ...) { check_dots_empty() vec_c(x, y) } #' @importFrom generics intersect #' @export generics::intersect #' @importFrom generics union #' @export generics::union #' @importFrom generics setdiff #' @export generics::setdiff #' @importFrom generics setequal #' @export generics::setequal #' @export intersect.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) cast <- vec_cast_common(x = x, y = y) new_x <- cast[[1L]] new_y <- cast[[2L]] out <- vec_unique(vec_slice(new_x, vec_in(new_x, new_y))) reconstruct_set(out, x) } #' @export union.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) cast <- vec_cast_common(x, y) out <- vec_unique(vec_rbind(!!!cast)) reconstruct_set(out, x) } #' @export union_all.data.frame <- function(x, y, ...) { check_dots_empty() out <- bind_rows(x, y) reconstruct_set(out, x) } #' @export setdiff.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) cast <- vec_cast_common(x, y) new_x <- cast[[1L]] new_y <- cast[[2L]] out <- vec_unique(vec_slice(new_x, !vec_in(new_x, new_y))) reconstruct_set(out, x) } #' @export setequal.data.frame <- function(x, y, ...) { check_dots_empty() isTRUE(equal_data_frame(x, y)) } reconstruct_set <- function(out, x) { if (is_grouped_df(x)) { out <- grouped_df(out, group_vars(x), group_by_drop_default(x)) } out } # Helpers ----------------------------------------------------------------- is_compatible_data_frame <- function(x, y, ignore_col_order = TRUE, convert = TRUE) { nc <- ncol(x) if (nc != ncol(y)) { return( c(x = glue("Different number of columns: {nc} vs {ncol(y)}.")) ) } names_x <- names(x) names_y <- names(y) names_y_not_in_x <- setdiff(names_y, names_x) names_x_not_in_y <- setdiff(names_x, names_y) if (length(names_y_not_in_x) == 0L && length(names_x_not_in_y) == 0L) { # check if same order if (!isTRUE(ignore_col_order)) { if (!identical(names_x, names_y)) { return(c(x = "Same column names, but different order.")) } } } else { # names are not the same, explain why msg <- c() if (length(names_y_not_in_x)) { wrong <- glue_collapse(glue('`{names_y_not_in_x}`'), sep = ", ") msg <- c( msg, x = glue("Cols in `y` but not `x`: {wrong}.") ) } if (length(names_x_not_in_y)) { wrong <- glue_collapse(glue('`{names_x_not_in_y}`'), sep = ", ") msg <- c( msg, x = glue("Cols in `x` but not `y`: {wrong}.") ) } return(msg) } msg <- c() for (name in names_x) { x_i <- x[[name]] y_i <- y[[name]] if (convert) { tryCatch( vec_ptype2(x_i, y_i), error = function(e) { msg <<- c( msg, x = glue("Incompatible types for column `{name}`: {vec_ptype_full(x_i)} vs {vec_ptype_full(y_i)}.") ) } ) } else { if (!identical(vec_ptype(x_i), vec_ptype(y_i))) { msg <- c( msg, x = glue("Different types for column `{name}`: {vec_ptype_full(x_i)} vs {vec_ptype_full(y_i)}.") ) } } } if (length(msg)) { return(msg) } TRUE } check_compatible <- function(x, y, ignore_col_order = TRUE, convert = TRUE, error_call = caller_env()) { if (!is.data.frame(y)) { abort("`y` must be a data frame. ", call = error_call) } compat <- is_compatible_data_frame(x, y, ignore_col_order = ignore_col_order, convert = convert) if (is.character(compat)) { bullets <- c( "`x` and `y` are not compatible.", compat ) abort(bullets, call = error_call) } } dplyr/R/reexport-tibble.r0000644000176200001440000000404514176714175015117 0ustar liggesusers# dataframe --------------------------------------------------------------- #' @importFrom tibble data_frame #' @export tibble::data_frame #' @importFrom tibble data_frame_ #' @export tibble::data_frame_ #' @importFrom tibble as_data_frame #' @export tibble::as_data_frame #' @importFrom tibble lst #' @export tibble::lst #' @importFrom tibble lst_ #' @export tibble::lst_ #' @importFrom tibble add_row #' @export tibble::add_row # type_sum ---------------------------------------------------------------- #' @importFrom tibble type_sum #' @export tibble::type_sum # glimpse ----------------------------------------------------------------- #' Get a glimpse of your data #' #' @description #' `glimpse()` is like a transposed version of `print()`: #' columns run down the page, and data runs across. #' This makes it possible to see every column in a data frame. #' It's a little like [str()] applied to a data frame #' but it tries to show you as much data as possible. #' (And it always shows the underlying data, even when applied #' to a remote data source.) #' #' `glimpse()` is provided by the pillar package, and re-exported #' by dplyr. See [pillar::glimpse()] for more details. #' #' @return x original x is (invisibly) returned, allowing `glimpse()` to be #' used within a data pipeline. #' @examples #' glimpse(mtcars) #' #' # Note that original x is (invisibly) returned, allowing `glimpse()` to be #' # used within a pipeline. #' mtcars %>% #' glimpse() %>% #' select(1:3) #' #' glimpse(starwars) #' @importFrom pillar glimpse #' @export #' @name glimpse glimpse # frame-data -------------------------------------------------------------- #' @importFrom tibble frame_data #' @export tibble::frame_data #' @importFrom tibble tribble #' @export tibble::tribble #' @importFrom tibble tibble #' @export tibble::tibble #' @importFrom tibble as_tibble #' @export tibble::as_tibble #' @importFrom tibble view tibble::view # utils ------------------------------------------------------------------- #' @importFrom tibble tbl_sum #' @export tibble::tbl_sum dplyr/R/order-by.R0000644000176200001440000000404114151641776013466 0ustar liggesusers#' A helper function for ordering window function output #' #' This function makes it possible to control the ordering of window functions #' in R that don't have a specific ordering parameter. When translated to SQL #' it will modify the order clause of the OVER function. #' #' This function works by changing the `call` to instead call #' [with_order()] with the appropriate arguments. #' #' @param order_by a vector to order_by #' @param call a function call to a window function, where the first argument #' is the vector being operated on #' @export #' @examples #' order_by(10:1, cumsum(1:10)) #' x <- 10:1 #' y <- 1:10 #' order_by(x, cumsum(y)) #' #' df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) #' scrambled <- df[sample(nrow(df)), ] #' #' wrong <- mutate(scrambled, running = cumsum(value)) #' arrange(wrong, year) #' #' right <- mutate(scrambled, running = order_by(year, cumsum(value))) #' arrange(right, year) order_by <- function(order_by, call) { quo <- enquo(call) expr <- quo_get_expr(quo) if (!is_call(expr)) { if (is_symbol(expr)) { bullets <- c( glue("`call` must be a function call, not a symbol."), i = glue("Did you mean `arrange({as_label(enquo(order_by))}, {expr})`?") ) abort(bullets) } else { type <- friendly_type_of(expr) msg <- glue("`call` must be a function call, not { type }.") abort(msg) } } fn <- set_expr(quo, node_car(expr)) args <- node_cdr(expr) args <- map(args, new_quosure, quo_get_env(quo)) expr <- expr(with_order(!!order_by, !!fn, !!!args)) eval_tidy(expr) } #' Run a function with one order, translating result back to original order #' #' This is used to power the ordering parameters of dplyr's window functions #' #' @param order_by vector to order by #' @param fun window function #' @param x,... arguments to `f` #' @keywords internal #' @export with_order <- function(order_by, fun, x, ...) { ord <- vec_order(order_by) undo <- vec_match(seq_along(order_by), ord) out <- fun(vec_slice(x, ord), ...) vec_slice(out, undo) } dplyr/R/mutate.R0000644000176200001440000004354414164534554013254 0ustar liggesusers#' Create, modify, and delete columns #' #' `mutate()` adds new variables and preserves existing ones; #' `transmute()` adds new variables and drops existing ones. #' New variables overwrite existing variables of the same name. #' Variables can be removed by setting their value to `NULL`. #' #' @section Useful mutate functions: #' #' * [`+`], [`-`], [log()], etc., for their usual mathematical meanings #' #' * [lead()], [lag()] #' #' * [dense_rank()], [min_rank()], [percent_rank()], [row_number()], #' [cume_dist()], [ntile()] #' #' * [cumsum()], [cummean()], [cummin()], [cummax()], [cumany()], [cumall()] #' #' * [na_if()], [coalesce()] #' #' * [if_else()], [recode()], [case_when()] #' #' @section Grouped tibbles: #' #' Because mutating expressions are computed within groups, they may #' yield different results on grouped tibbles. This will be the case #' as soon as an aggregating, lagging, or ranking function is #' involved. Compare this ungrouped mutate: #' #' ``` #' starwars %>% #' select(name, mass, species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' ``` #' #' With the grouped equivalent: #' #' ``` #' starwars %>% #' select(name, mass, species) %>% #' group_by(species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' ``` #' #' The former normalises `mass` by the global average whereas the #' latter normalises by the averages within species levels. #' #' @export #' @inheritParams arrange #' @param ... <[`data-masking`][dplyr_data_masking]> Name-value pairs. #' The name gives the name of the column in the output. #' #' The value can be: #' #' * A vector of length 1, which will be recycled to the correct length. #' * A vector the same length as the current group (or the whole data frame #' if ungrouped). #' * `NULL`, to remove the column. #' * A data frame or tibble, to create multiple columns in the output. #' @family single table verbs #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * For `mutate()`: #' * Columns from `.data` will be preserved according to the `.keep` argument. #' * Existing columns that are modified by `...` will always be returned in #' their original location. #' * New columns created through `...` will be placed according to the #' `.before` and `.after` arguments. #' * For `transmute()`: #' * Columns created or modified through `...` will be returned in the order #' specified by `...`. #' * Unmodified grouping columns will be placed at the front. #' * The number of rows is not affected. #' * Columns given the value `NULL` will be removed. #' * Groups will be recomputed if a grouping variable is mutated. #' * Data frame attributes are preserved. #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `mutate()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. #' * `transmute()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("transmute")}. #' @examples #' # Newly created variables are available immediately #' starwars %>% #' select(name, mass) %>% #' mutate( #' mass2 = mass * 2, #' mass2_squared = mass2 * mass2 #' ) #' #' # As well as adding new variables, you can use mutate() to #' # remove variables and modify existing variables. #' starwars %>% #' select(name, height, mass, homeworld) %>% #' mutate( #' mass = NULL, #' height = height * 0.0328084 # convert to feet #' ) #' #' # Use across() with mutate() to apply a transformation #' # to multiple columns in a tibble. #' starwars %>% #' select(name, homeworld, species) %>% #' mutate(across(!name, as.factor)) #' # see more in ?across #' #' # Window functions are useful for grouped mutates: #' starwars %>% #' select(name, mass, homeworld) %>% #' group_by(homeworld) %>% #' mutate(rank = min_rank(desc(mass))) #' # see `vignette("window-functions")` for more details #' #' # By default, new columns are placed on the far right. #' # Experimental: you can override with `.before` or `.after` #' df <- tibble(x = 1, y = 2) #' df %>% mutate(z = x + y) #' df %>% mutate(z = x + y, .before = 1) #' df %>% mutate(z = x + y, .after = x) #' #' # By default, mutate() keeps all columns from the input data. #' # Experimental: You can override with `.keep` #' df <- tibble(x = 1, y = 2, a = "a", b = "b") #' df %>% mutate(z = x + y, .keep = "all") # the default #' df %>% mutate(z = x + y, .keep = "used") #' df %>% mutate(z = x + y, .keep = "unused") #' df %>% mutate(z = x + y, .keep = "none") # same as transmute() #' #' # Grouping ---------------------------------------- #' # The mutate operation may yield different results on grouped #' # tibbles because the expressions are computed within groups. #' # The following normalises `mass` by the global average: #' starwars %>% #' select(name, mass, species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' #' # Whereas this normalises `mass` by the averages within species #' # levels: #' starwars %>% #' select(name, mass, species) %>% #' group_by(species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' #' # Indirection ---------------------------------------- #' # Refer to column names stored as strings with the `.data` pronoun: #' vars <- c("mass", "height") #' mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]]) #' # Learn more in ?dplyr_data_masking mutate <- function(.data, ...) { UseMethod("mutate") } #' @rdname mutate #' @param .keep `r lifecycle::badge("experimental")` #' Control which columns from `.data` are retained in the output. Grouping #' columns and columns created by `...` are always kept. #' #' * `"all"` retains all columns from `.data`. This is the default. #' * `"used"` retains only the columns used in `...` to create new #' columns. This is useful for checking your work, as it displays inputs #' and outputs side-by-side. #' * `"unused"` retains only the columns _not_ used in `...` to create new #' columns. This is useful if you generate new columns, but no longer need #' the columns used to generate them. #' * `"none"` doesn't retain any extra columns from `.data`. Only the grouping #' variables and columns created by `...` are kept. #' @param .before,.after `r lifecycle::badge("experimental")` #' <[`tidy-select`][dplyr_tidy_select]> Optionally, control where new columns #' should appear (the default is to add to the right hand side). See #' [relocate()] for more details. #' @export mutate.data.frame <- function(.data, ..., .keep = c("all", "used", "unused", "none"), .before = NULL, .after = NULL) { keep <- arg_match(.keep) cols <- mutate_cols(.data, dplyr_quosures(...), caller_env = caller_env()) used <- attr(cols, "used") out <- dplyr_col_modify(.data, cols) # Compact out `NULL` columns that got removed. # These won't exist in `out`, but we don't want them to look "new". # Note that `dplyr_col_modify()` makes it impossible to `NULL` a group column, # which we rely on below. cols <- compact_null(cols) cols_data <- names(.data) cols_group <- group_vars(.data) cols_expr <- names(cols) cols_expr_modified <- intersect(cols_expr, cols_data) cols_expr_new <- setdiff(cols_expr, cols_expr_modified) cols_used <- setdiff(cols_data, c(cols_group, cols_expr_modified, names(used)[!used])) cols_unused <- setdiff(cols_data, c(cols_group, cols_expr_modified, names(used)[used])) .before <- enquo(.before) .after <- enquo(.after) if (!quo_is_null(.before) || !quo_is_null(.after)) { # Only change the order of new columns out <- relocate(out, all_of(cols_expr_new), .before = !!.before, .after = !!.after) } cols_out <- names(out) if (keep == "all") { cols_retain <- cols_out } else if (keep == "used") { cols_retain <- setdiff(cols_out, cols_unused) } else if (keep == "unused") { cols_retain <- setdiff(cols_out, cols_used) } else if (keep == "none") { cols_retain <- setdiff(cols_out, c(cols_used, cols_unused)) } dplyr_col_select(out, cols_retain) } #' @rdname mutate #' @export transmute <- function(.data, ...) { UseMethod("transmute") } #' @export transmute.data.frame <- function(.data, ...) { dots <- check_transmute_args(...) dots <- dplyr_quosures(!!!dots) cols <- mutate_cols(.data, dots, caller_env = caller_env()) out <- dplyr_col_modify(.data, cols) # Compact out `NULL` columns that got removed. # These won't exist in `out`, but we don't want them to look "new". # Note that `dplyr_col_modify()` makes it impossible to `NULL` a group column, # which we rely on below. cols <- compact_null(cols) # Retain expression columns in order of their appearance cols_expr <- names(cols) # Retain untouched group variables up front cols_group <- group_vars(.data) cols_group <- setdiff(cols_group, cols_expr) cols_retain <- c(cols_group, cols_expr) dplyr_col_select(out, cols_retain) } # Helpers ----------------------------------------------------------------- check_transmute_args <- function(..., .keep, .before, .after, error_call = caller_env()) { if (!missing(.keep)) { abort("The `.keep` argument is not supported.", call = error_call) } if (!missing(.before)) { abort("The `.before` argument is not supported.", call = error_call) } if (!missing(.after)) { abort("The `.after` argument is not supported.", call = error_call) } enquos(...) } mutate_cols <- function(.data, dots, caller_env, error_call = caller_env()) { error_call <- dplyr_error_call(error_call) mask <- DataMask$new(.data, caller_env, "mutate", error_call = error_call) old_current_column <- context_peek_bare("column") on.exit(context_poke("column", old_current_column), add = TRUE) on.exit(mask$forget(), add = TRUE) rows <- mask$get_rows() new_columns <- set_names(list(), character()) withCallingHandlers({ for (i in seq_along(dots)) { context_poke("column", old_current_column) # get results from all the quosures that are expanded from ..i # then ingest them after quosures <- expand_across(dots[[i]]) quosures_results <- vector(mode = "list", length = length(quosures)) for (k in seq_along(quosures)) { quo <- quosures[[k]] quo_data <- attr(quo, "dplyr:::data") if (!is.null(quo_data$column)) { context_poke("column", quo_data$column) } # a list in which each element is the result of # evaluating the quosure in the "sliced data mask" # recycling it appropriately to match the group size # # TODO: reinject hybrid evaluation at the R level chunks <- NULL # result after unchopping the chunks result <- NULL if (quo_is_symbol(quo)){ name <- as_string(quo_get_expr(quo)) if (name %in% names(new_columns)) { # already have result and chunks result <- new_columns[[name]] chunks <- mask$resolve(name) } else if (name %in% names(.data)) { # column from the original data result <- .data[[name]] chunks <- mask$resolve(name) } if (inherits(.data, "rowwise_df") && vec_is_list(result)) { sizes <- list_sizes(result) wrong <- which(sizes != 1) if (length(wrong)) { # same error as would have been generated by mask$eval_all_mutate() group <- wrong[1L] mask$set_current_group(group) abort( class = c("dplyr:::mutate_incompatible_size", "dplyr:::internal_error"), dplyr_error_data = list(result_size = sizes[group], expected_size = 1) ) } } } else if (!quo_is_symbolic(quo) && !is.null(quo_get_expr(quo))) { # constant, we still need both `result` and `chunks` result <- quo_get_expr(quo) result <- withCallingHandlers( vec_recycle(result, vec_size(.data)), error = function(cnd) { abort( class = c("dplyr:::mutate_constant_recycle_error", "dplyr:::internal_error"), constant_size = vec_size(result), data_size = vec_size(.data) ) } ) chunks <- vec_chop(result, rows) } if (is.null(chunks)) { if (is.null(quo_data$column)) { chunks <- mask$eval_all_mutate(quo) } else { chunks <- withCallingHandlers( mask$eval_all_mutate(quo), error = function(cnd) { msg <- glue("Problem while computing column `{quo_data$name_auto}`.") abort(msg, call = call("across"), parent = cnd) } ) } } if (is.null(chunks)) { next } # only unchop if needed if (is.null(result)) { if (length(rows) == 1) { result <- chunks[[1]] } else { chunks <- dplyr_vec_cast_common(chunks, quo_data$name_auto) result <- vec_unchop(chunks, rows) } } quosures_results[[k]] <- list(result = result, chunks = chunks) } for (k in seq_along(quosures)) { quo <- quosures[[k]] quo_data <- attr(quo, "dplyr:::data") quo_result <- quosures_results[[k]] if (is.null(quo_result)) { if (quo_data$is_named) { name <- quo_data$name_given new_columns[[name]] <- zap() mask$remove(name) } next } result <- quo_result$result chunks <- quo_result$chunks if (!quo_data$is_named && is.data.frame(result)) { types <- vec_ptype(result) types_names <- names(types) chunks_extracted <- .Call(dplyr_extract_chunks, chunks, types) for (j in seq_along(types)) { mask$add_one(types_names[j], chunks_extracted[[j]], result = result[[j]]) } new_columns[types_names] <- result } else { # treat as a single output otherwise name <- quo_data$name_auto mask$add_one(name = name, chunks = chunks, result = result) new_columns[[name]] <- result } } } }, error = function(e) { local_error_context(dots = dots, .index = i, mask = mask) bullets <- c( cnd_bullet_header("computing"), mutate_bullets(e) ) abort( bullets, class = "dplyr:::mutate_error", parent = skip_internal_condition(e), bullets = bullets, call = error_call ) }, warning = function(w) { # Check if there is an upstack calling handler that would muffle # the warning. This avoids doing the expensive work below for a # silenced warning (#5675). if (check_muffled_warning(w)) { maybe_restart("muffleWarning") } local_error_context(dots = dots, .index = i, mask = mask) warn(c( cnd_bullet_header("computing"), i = cnd_header(w), i = cnd_bullet_cur_group_label(what = "warning") )) # Cancel `w` maybe_restart("muffleWarning") }) is_zap <- map_lgl(new_columns, inherits, "rlang_zap") new_columns[is_zap] <- rep(list(NULL), sum(is_zap)) used <- mask$get_used() names(used) <- mask$current_vars() attr(new_columns, "used") <- used new_columns } mutate_bullets <- function(cnd, ...) { UseMethod("mutate_bullets") } #' @export mutate_bullets.default <- function(cnd, ...) { c(i = cnd_bullet_cur_group_label()) } #' @export `mutate_bullets.dplyr:::mutate_incompatible_size` <- function(cnd, ...) { error_context <- peek_error_context() error_name <- error_context$error_name result_size <- cnd$dplyr_error_data$result_size expected_size <- cnd$dplyr_error_data$expected_size c( x = glue("`{error_name}` must be size {or_1(expected_size)}, not {result_size}."), i = cnd_bullet_rowwise_unlist(), i = cnd_bullet_cur_group_label() ) } #' @export `mutate_bullets.dplyr:::mutate_mixed_null` <- function(cnd, ...) { error_name <- peek_error_context()$error_name c( x = glue("`{error_name}` must return compatible vectors across groups."), x = "Can't combine NULL and non NULL results.", i = cnd_bullet_rowwise_unlist() ) } #' @export `mutate_bullets.dplyr:::mutate_not_vector` <- function(cnd, ...) { error_name <- peek_error_context()$error_name result <- cnd$dplyr_error_data$result c( x = glue("`{error_name}` must be a vector, not {friendly_type_of(result)}."), i = cnd_bullet_rowwise_unlist(), i = cnd_bullet_cur_group_label() ) } #' @export `mutate_bullets.dplyr:::error_incompatible_combine` <- function(cnd, ...) { # the details are included in the parent error c() } #' @export `mutate_bullets.dplyr:::mutate_constant_recycle_error` <- function(cnd, ...) { error_name <- peek_error_context()$error_name constant_size <- cnd$constant_size data_size <- cnd$data_size c( glue("Inlined constant `{error_name}` must be size {or_1(data_size)}, not {constant_size}.") ) } check_muffled_warning <- function(cnd) { early_exit <- TRUE # Cancel early exits, e.g. from an exiting handler. This way we can # still instrument caught warnings to avoid confusing # inconsistencies. This doesn't work on versions of R older than # 3.5.0 because they don't include this change: # https://github.com/wch/r-source/commit/688eaebf. So with # `tryCatch(warning = )`, the original warning `cnd` will be caught # instead of the instrumented warning. on.exit( if (can_return_from_exit && early_exit) { return(FALSE) } ) muffled <- withRestarts( muffleWarning = function(...) TRUE, { signalCondition(cnd) FALSE } ) early_exit <- FALSE muffled } on_load( can_return_from_exit <- getRversion() >= "3.5.0" ) dplyr/NEWS.md0000644000176200001440000032210114177236447012520 0ustar liggesusers# dplyr 1.0.8 * Better display of error messages thanks to rlang 1.0.0. * `mutate(.keep = "none")` is no longer identical to `transmute()`. `transmute()` has not been changed, and completely ignores the column ordering of the existing data, instead relying on the ordering of expressions supplied through `...`. `mutate(.keep = "none")` has been changed to ensure that pre-existing columns are never moved, which aligns more closely with the other `.keep` options (#6086). * `filter()` forbids matrix results (#5973) and warns about data frame results, especially data frames created from `across()` with a hint to use `if_any()` or `if_all()`. * `slice()` helpers (`slice_head()`, `slice_tail()`, `slice_min()`, `slice_max()`) now accept negative values for `n` and `prop` (#5961). * `slice()` now indicates which group produces an error (#5931). * `cur_data()` and `cur_data_all()` don't simplify list columns in rowwise data frames (#5901). * dplyr now uses `rlang::check_installed()` to prompt you whether to install required packages that are missing. * `storms` data updated to 2020 (@steveharoz, #5899). * `coalesce()` accepts 1-D arrays (#5557). * The deprecated `trunc_mat()` is no longer reexported from dplyr (#6141). # dplyr 1.0.7 * `across()` uses the formula environment when inlining them (#5886). * `summarise.rowwise_df()` is quiet when the result is ungrouped (#5875). * `c_across()` and `across()` key deparsing not confused by long calls (#5883). * `across()` handles named selections (#5207). # dplyr 1.0.6 * `add_count()` is now generic (#5837). * `if_any()` and `if_all()` abort when a predicate is mistakingly used as `.cols=` (#5732). * Multiple calls to `if_any()` and/or `if_all()` in the same expression are now properly disambiguated (#5782). * `filter()` now inlines `if_any()` and `if_all()` expressions. This greatly improves performance with grouped data frames. * Fixed behaviour of `...` in top-level `across()` calls (#5813, #5832). * `across()` now inlines lambda-formulas. This is slightly more performant and will allow more optimisations in the future. * Fixed issue in `bind_rows()` causing lists to be incorrectly transformed as data frames (#5417, #5749). * `select()` no longer creates duplicate variables when renaming a variable to the same name as a grouping variable (#5841). * `dplyr_col_select()` keeps attributes for bare data frames (#5294, #5831). * Fixed quosure handling in `dplyr::group_by()` that caused issues with extra arguments (tidyverse/lubridate#959). * Removed the `name` argument from the `compute()` generic (@ianmcook, #5783). * row-wise data frames of 0 rows and list columns are supported again (#5804). # dplyr 1.0.5 * Fixed edge case of `slice_sample()` when `weight_by=` is used and there 0 rows (#5729). * `across()` can again use columns in functions defined inline (#5734). * Using testthat 3rd edition. * Fixed bugs introduced in `across()` in previous version (#5765). * `group_by()` keeps attributes unrelated to the grouping (#5760). * The `.cols=` argument of `if_any()` and `if_all()` defaults to `everything()`. # dplyr 1.0.4 * Improved performance for `across()`. This makes `summarise(across())` and `mutate(across())` perform as well as the superseded colwise equivalents (#5697). * New functions `if_any()` and `if_all()` (#4770, #5713). * `summarise()` silently ignores NULL results (#5708). * Fixed a performance regression in `mutate()` when warnings occur once per group (#5675). We no longer instrument warnings with debugging information when `mutate()` is called within `suppressWarnings()`. # dplyr 1.0.3 * `summarise()` no longer informs when the result is ungrouped (#5633). * `group_by(.drop = FALSE)` preserves ordered factors (@brianrice2, #5545). * `count()` and `tally()` are now generic. * Removed default fallbacks to lazyeval methods; this will yield better error messages when you call a dplyr function with the wrong input, and is part of our long term plan to remove the deprecated lazyeval interface. * `inner_join()` gains a `keep` parameter for consistency with the other mutating joins (@patrickbarks, #5581). * Improved performance with many columns, with a dynamic data mask using active bindings and lazy chops (#5017). * `mutate()` and friends preserves row names in data frames once more (#5418). * `group_by()` uses the ungrouped data for the implicit mutate step (#5598). You might have to define an `ungroup()` method for custom classes. For example, see https://github.com/hadley/cubelyr/pull/3. * `relocate()` can rename columns it relocates (#5569). * `distinct()` and `group_by()` have better error messages when the mutate step fails (#5060). * Clarify that `between()` is not vectorised (#5493). * Fixed `across()` issue where data frame columns would could not be referred to with `all_of()` in the nested case (`mutate()` within `mutate()`) (#5498). * `across()` handles data frames with 0 columns (#5523). * `mutate()` always keeps grouping variables, unconditional to `.keep=` (#5582). * dplyr now depends on R 3.3.0 # dplyr 1.0.2 * Fixed `across()` issue where data frame columns would mask objects referred to from `all_of()` (#5460). * `bind_cols()` gains a `.name_repair` argument, passed to `vctrs::vec_cbind()` (#5451) * `summarise(.groups = "rowwise")` makes a rowwise data frame even if the input data is not grouped (#5422). # dplyr 1.0.1 * New function `cur_data_all()` similar to `cur_data()` but includes the grouping variables (#5342). * `count()` and `tally()` no longer automatically weights by column `n` if present (#5298). dplyr 1.0.0 introduced this behaviour because of Hadley's faulty memory. Historically `tally()` automatically weighted and `count()` did not, but this behaviour was accidentally changed in 0.8.2 (#4408) so that neither automatically weighted by `n`. Since 0.8.2 is almost a year old, and the automatically weighting behaviour was a little confusing anyway, we've removed it from both `count()` and `tally()`. Use of `wt = n()` is now deprecated; now just omit the `wt` argument. * `coalesce()` now supports data frames correctly (#5326). * `cummean()` no longer has off-by-one indexing problem (@cropgen, #5287). * The call stack is preserved on error. This makes it possible to `recover()` into problematic code called from dplyr verbs (#5308). # dplyr 1.0.0 ## Breaking changes * `bind_cols()` no longer converts to a tibble, returns a data frame if the input is a data frame. * `bind_rows()`, `*_join()`, `summarise()` and `mutate()` use vctrs coercion rules. There are two main user facing changes: * Combining factor and character vectors silently creates a character vector; previously it created a character vector with a warning. * Combining multiple factors creates a factor with combined levels; previously it created a character vector with a warning. * `bind_rows()` and other functions use vctrs name repair, see `?vctrs::vec_as_names`. * `all.equal.tbl_df()` removed. * Data frames, tibbles and grouped data frames are no longer considered equal, even if the data is the same. * Equality checks for data frames no longer ignore row order or groupings. * `expect_equal()` uses `all.equal()` internally. When comparing data frames, tests that used to pass may now fail. * `distinct()` keeps the original column order. * `distinct()` on missing columns now raises an error, it has been a compatibility warning for a long time. * `group_modify()` puts the grouping variable to the front. * `n()` and `row_number()` can no longer be called directly when dplyr is not loaded, and this now generates an error: `dplyr::mutate(mtcars, x = n())`. Fix by prefixing with `dplyr::` as in `dplyr::mutate(mtcars, x = dplyr::n())` * The old data format for `grouped_df` is no longer supported. This may affect you if you have serialized grouped data frames to disk, e.g. with `saveRDS()` or when using knitr caching. * `lead()` and `lag()` are stricter about their inputs. * Extending data frames requires that the extra class or classes are added first, not last. Having the extra class at the end causes some vctrs operations to fail with a message like: ``` Input must be a vector, not a `` object ``` * `right_join()` no longer sorts the rows of the resulting tibble according to the order of the RHS `by` argument in tibble `y`. ## New features * The `cur_` functions (`cur_data()`, `cur_group()`, `cur_group_id()`, `cur_group_rows()`) provide a full set of options to you access information about the "current" group in dplyr verbs. They are inspired by data.table's `.SD`, `.GRP`, `.BY`, and `.I`. * The `rows_` functions (`rows_insert()`, `rows_update()`, `rows_upsert()`, `rows_patch()`, `rows_delete()`) provide a new API to insert and delete rows from a second data frame or table. Support for updating mutable backends is planned (#4654). * `mutate()` and `summarise()` create multiple columns from a single expression if you return a data frame (#2326). * `select()` and `rename()` use the latest version of the tidyselect interface. Practically, this means that you can now combine selections using Boolean logic (i.e. `!`, `&` and `|`), and use predicate functions with `where()` (e.g. `where(is.character)`) to select variables by type (#4680). It also makes it possible to use `select()` and `rename()` to repair data frames with duplicated names (#4615) and prevents you from accidentally introducing duplicate names (#4643). This also means that dplyr now re-exports `any_of()` and `all_of()` (#5036). * `slice()` gains a new set of helpers: * `slice_head()` and `slice_tail()` select the first and last rows, like `head()` and `tail()`, but return `n` rows _per group_. * `slice_sample()` randomly selects rows, taking over from `sample_frac()` and `sample_n()`. * `slice_min()` and `slice_max()` select the rows with the minimum or maximum values of a variable, taking over from the confusing `top_n()`. * `summarise()` can create summaries of greater than length 1 if you use a summary function that returns multiple values. * `summarise()` gains a `.groups=` argument to control the grouping structure. * New `relocate()` verb makes it easy to move columns around within a data frame (#4598). * New `rename_with()` is designed specifically for the purpose of renaming selected columns with a function (#4771). * `ungroup()` can now selectively remove grouping variables (#3760). * `pull()` can now return named vectors by specifying an additional column name (@ilarischeinin, #4102). ## Experimental features * `mutate()` (for data frames only), gains experimental new arguments `.before` and `.after` that allow you to control where the new columns are placed (#2047). * `mutate()` (for data frames only), gains an experimental new argument called `.keep` that allows you to control which variables are kept from the input `.data`. `.keep = "all"` is the default; it keeps all variables. `.keep = "none"` retains no input variables (except for grouping keys), so behaves like `transmute()`. `.keep = "unused"` keeps only variables not used to make new columns. `.keep = "used"` keeps only the input variables used to create new columns; it's useful for double checking your work (#3721). * New, experimental, `with_groups()` makes it easy to temporarily group or ungroup (#4711). ## across() * New function `across()` that can be used inside `summarise()`, `mutate()`, and other verbs to apply a function (or a set of functions) to a selection of columns. See `vignette("colwise")` for more details. * New function `c_across()` that can be used inside `summarise()` and `mutate()` in row-wise data frames to easily (e.g.) compute a row-wise mean of all numeric variables. See `vignette("rowwise")` for more details. ## rowwise() * `rowwise()` is no longer questioning; we now understand that it's an important tool when you don't have vectorised code. It now also allows you to specify additional variables that should be preserved in the output when summarising (#4723). The rowwise-ness is preserved by all operations; you need to explicit drop it with `as_tibble()` or `group_by()`. * New, experimental, `nest_by()`. It has the same interface as `group_by()`, but returns a rowwise data frame of grouping keys, supplemental with a list-column of data frames containing the rest of the data. ## vctrs * The implementation of all dplyr verbs have been changed to use primitives provided by the vctrs package. This makes it easier to add support for new types of vector, radically simplifies the implementation, and makes all dplyr verbs more consistent. * The place where you are mostly likely to be impacted by the coercion changes is when working with factors in joins or grouped mutates: now when combining factors with different levels, dplyr creates a new factor with the union of the levels. This matches base R more closely, and while perhaps strictly less correct, is much more convenient. * dplyr dropped its two heaviest dependencies: Rcpp and BH. This should make it considerably easier and faster to build from source. * The implementation of all verbs has been carefully thought through. This mostly makes implementation simpler but should hopefully increase consistency, and also makes it easier to adapt to dplyr to new data structures in the new future. Pragmatically, the biggest difference for most people will be that each verb documents its return value in terms of rows, columns, groups, and data frame attributes. * Row names are now preserved when working with data frames. ## Grouping * `group_by()` uses hashing from the `vctrs` package. * Grouped data frames now have `names<-`, `[[<-`, `[<-` and `$<-` methods that re-generate the underlying grouping. Note that modifying grouping variables in multiple steps (i.e. `df$grp1 <- 1; df$grp2 <- 1`) will be inefficient since the data frame will be regrouped after each modification. * `[.grouped_df` now regroups to respect any grouping columns that have been removed (#4708). * `mutate()` and `summarise()` can now modify grouping variables (#4709). * `group_modify()` works with additional arguments (@billdenney and @cderv, #4509) * `group_by()` does not create an arbitrary NA group when grouping by factors with `drop = TRUE` (#4460). ## Lifecycle changes * All deprecations now use the [lifecycle](https://lifecycle.r-lib.org), that means by default you'll only see a deprecation warning once per session, and you can control with `options(lifecycle_verbosity = x)` where `x` is one of NULL, "quiet", "warning", and "error". ### Removed * `id()`, deprecated in dplyr 0.5.0, is now defunct. * `failwith()`, deprecated in dplyr 0.7.0, is now defunct. * `tbl_cube()` and `nasa` have been pulled out into a separate cubelyr package (#4429). * `rbind_all()` and `rbind_list()` have been removed (@bjungbogati, #4430). * `dr_dplyr()` has been removed as it is no longer needed (#4433, @smwindecker). ### Deprecated * Use of pkgconfig for setting `na_matches` argument to join functions is now deprecated (#4914). This was rarely used, and I'm now confident that the default is correct for R. * In `add_count()`, the `drop` argument has been deprecated because it didn't actually affect the output. * `add_rownames()`: please use `tibble::rownames_to_column()` instead. * `as.tbl()` and `tbl_df()`: please use `as_tibble()` instead. * `bench_tbls()`, `compare_tbls()`, `compare_tbls2()`, `eval_tbls()` and `eval_tbls2()` are now deprecated. That were only used in a handful of packages, and we now believe that you're better off performing comparisons more directly (#4675). * `combine()`: please use `vctrs::vec_c()` instead. * `funs()`: please use `list()` instead. * `group_by(add = )`: please use `.add` instead. * `group_by(.dots = )`/`group_by_prepare(.dots = )`: please use `!!!` instead (#4734). * The use of zero-arg `group_indices()` to retrieve the group id for the "current" group is deprecated; instead use `cur_group_id()`. * Passing arguments to `group_keys()` or `group_indices()` to change the grouping has been deprecated, instead do grouping first yourself. * `location()` and `changes()`: please use `lobstr::ref()` instead. * `progress_estimated()` is soft deprecated; it's not the responsibility of dplyr to provide progress bars (#4935). * `src_local()` has been deprecated; it was part of an approach to testing dplyr backends that didn't pan out. * `src_mysql()`, `src_postgres()`, and `src_sqlite()` has been deprecated. We've recommended against them for some time. Instead please use the approach described at . * `select_vars()`, `rename_vars()`, `select_var()`, `current_vars()` are now deprecated (@perezp44, #4432) ### Superseded * The scoped helpers (all functions ending in `_if`, `_at`, or `_all`) have been superseded by `across()`. This dramatically reduces the API surface for dplyr, while at the same providing providing a more flexible and less error-prone interface (#4769). `rename_*()` and `select_*()` have been superseded by `rename_with()`. * `do()` is superseded in favour of `summarise()`. * `sample_n()` and `sample_frac()` have been superseded by `slice_sample()`. See `?sample_n` for details about why, and for examples converting from old to new usage. * `top_n()` has been superseded by`slice_min()`/`slice_max()`. See `?top_n` for details about why, and how to convert old to new usage (#4494). ### Questioning * `all_equal()` is questioning; it solves a problem that no longer seems important. ### Stable * `rowwise()` is no longer questioning. ## Documentation improvements * New `vignette("base")` which describes how dplyr verbs relate to the base R equivalents (@sastoudt, #4755) * New `vignette("grouping")` gives more details about how dplyr verbs change when applied to grouped data frames (#4779, @MikeKSmith). * `vignette("programming")` has been completely rewritten to reflect our latest vocabulary, the most recent rlang features, and our current recommendations. It should now be substantially easier to program with dplyr. ## Minor improvements and bug fixes * dplyr now has a rudimentary, experimental, and stop-gap, extension mechanism documented in `?dplyr_extending` * dplyr no longer provides a `all.equal.tbl_df()` method. It never should have done so in the first place because it owns neither the generic nor the class. It also provided a problematic implementation because, by default, it ignored the order of the rows and the columns which is usually important. This is likely to cause new test failures in downstream packages; but on the whole we believe those failures to either reflect unexpected behaviour or tests that need to be strengthened (#2751). * `coalesce()` now uses vctrs recycling and common type coercion rules (#5186). * `count()` and `add_count()` do a better job of preserving input class and attributes (#4086). * `distinct()` errors if you request it use variables that don't exist (this was previously a warning) (#4656). * `filter()`, `mutate()` and `summarise()` get better error messages. * `filter()` handles data frame results when all columns are logical vectors by reducing them with `&` (#4678). In particular this means `across()` can be used in `filter()`. * `left_join()`, `right_join()`, and `full_join()` gain a `keep` argument so that you can optionally choose to keep both sets of join keys (#4589). This is useful when you want to figure out which rows were missing from either side. * Join functions can now perform a cross-join by specifying `by = character()` (#4206.) * `groups()` now returns `list()` for ungrouped data; previously it returned `NULL` which was type-unstable (when there are groups it returns a list of symbols). * The first argument of `group_map()`, `group_modify()` and `group_walk()` has been changed to `.data` for consistency with other generics. * `group_keys.rowwise_df()` gives a 0 column data frame with `n()` rows. * `group_map()` is now a generic (#4576). * `group_by(..., .add = TRUE)` replaces `group_by(..., add = TRUE)`, with a deprecation message. The old argument name was a mistake because it prevents you from creating a new grouping var called `add` and it violates our naming conventions (#4137). * `intersect()`, `union()`, `setdiff()` and `setequal()` generics are now imported from the generics package. This reduces a conflict with lubridate. * `order_by()` gives an informative hint if you accidentally call it instead of `arrange()` #3357. * `tally()` and `count()` now message if the default output `name` (n), already exists in the data frame. To quiet the message, you'll need to supply an explicit `name` (#4284). You can override the default weighting to using a constant by setting `wt = 1`. * `starwars` dataset now does a better job of separating biological sex from gender identity. The previous `gender` column has been renamed to `sex`, since it actually describes the individual's biological sex. A new `gender` column encodes the actual gender identity using other information about the Star Wars universe (@MeganBeckett, #4456). * `src_tbls()` accepts `...` arguments (#4485, @ianmcook). This could be a breaking change for some dplyr backend packages that implement `src_tbls()`. * Better performance for extracting slices of factors and ordered factors (#4501). * `rename_at()` and `rename_all()` call the function with a simple character vector, not a `dplyr_sel_vars` (#4459). * `ntile()` is now more consistent with database implementations if the buckets have irregular size (#4495). # dplyr 0.8.5 (2020-03-07) * Maintenance release for compatibility with R-devel. # dplyr 0.8.4 (2020-01-30) * Adapt tests to changes in dependent packages. # dplyr 0.8.3 (2019-07-04) * Fixed performance regression introduced in version 0.8.2 (#4458). # dplyr 0.8.2 (2019-06-28) ## New functions * `top_frac(data, proportion)` is a shorthand for `top_n(data, proportion * n())` (#4017). ## colwise changes * Using quosures in colwise verbs is deprecated (#4330). * Updated `distinct_if()`, `distinct_at()` and `distinct_all()` to include `.keep_all` argument (@beansrowning, #4343). * `rename_at()` handles empty selection (#4324). * `*_if()` functions correctly handle columns with special names (#4380). * colwise functions support constants in formulas (#4374). ## Hybrid evaluation changes * hybrid rank functions correctly handle NA (#4427). * `first()`, `last()` and `nth()` hybrid version handles factors (#4295). ## Minor changes * `top_n()` quotes its `n` argument, `n` no longer needs to be constant for all groups (#4017). * `tbl_vars()` keeps information on grouping columns by returning a `dplyr_sel_vars` object (#4106). * `group_split()` always sets the `ptype` attribute, which make it more robust in the case where there are 0 groups. * `group_map()` and `group_modify()` work in the 0 group edge case (#4421) * `select.list()` method added so that `select()` does not dispatch on lists (#4279). * `view()` is reexported from tibble (#4423). * `group_by()` puts NA groups last in character vectors (#4227). * `arrange()` handles integer64 objects (#4366). * `summarise()` correctly resolves summarised list columns (#4349). # dplyr 0.8.1 (2019-05-14) ## Breaking changes * `group_modify()` is the new name of the function previously known as `group_map()` ## New functions * `group_map()` now only calls the function on each group and return a list. * `group_by_drop_default()`, previously known as `dplyr:::group_drops()` is exported (#4245). ## Minor changes * Lists of formulas passed to colwise verbs are now automatically named. * `group_by()` does a shallow copy even in the no groups case (#4221). * Fixed `mutate()` on rowwise data frames with 0 rows (#4224). * Fixed handling of bare formulas in colwise verbs (#4183). * Fixed performance of `n_distinct()` (#4202). * `group_indices()` now ignores empty groups by default for `data.frame`, which is consistent with the default of `group_by()` (@yutannihilation, #4208). * Fixed integer overflow in hybrid `ntile()` (#4186). * colwise functions `summarise_at()` ... can rename vars in the case of multiple functions (#4180). * `select_if()` and `rename_if()` handle logical vector predicate (#4213). * hybrid `min()` and `max()` cast to integer when possible (#4258). * `bind_rows()` correctly handles the cases where there are multiple consecutive `NULL` (#4296). * Support for R 3.1.* has been dropped. The minimal R version supported is now 3.2.0. https://www.tidyverse.org/articles/2019/04/r-version-support/ * `rename_at()` handles empty selection (#4324). # dplyr 0.8.0.1 (2019-02-15) * Fixed integer C/C++ division, forced released by CRAN (#4185). # dplyr 0.8.0 (2019-02-14) ## Breaking changes * The error `could not find function "n"` or the warning ```Calling `n()` without importing or prefixing it is deprecated, use `dplyr::n()` ``` indicates when functions like `n()`, `row_number()`, ... are not imported or prefixed. The easiest fix is to import dplyr with `import(dplyr)` in your `NAMESPACE` or `#' @import dplyr` in a roxygen comment, alternatively such functions can be imported selectively as any other function with `importFrom(dplyr, n)` in the `NAMESPACE` or `#' @importFrom dplyr n` in a roxygen comment. The third option is to prefix them, i.e. use `dplyr::n()` * If you see `checking S3 generic/method consistency` in R CMD check for your package, note that : - `sample_n()` and `sample_frac()` have gained `...` - `filter()` and `slice()` have gained `.preserve` - `group_by()` has gained `.drop` * ```Error: `.data` is a corrupt grouped_df, ...``` signals code that makes wrong assumptions about the internals of a grouped data frame. ## New functions * New selection helpers `group_cols()`. It can be called in selection contexts such as `select()` and matches the grouping variables of grouped tibbles. * `last_col()` is re-exported from tidyselect (#3584). * `group_trim()` drops unused levels of factors that are used as grouping variables. * `nest_join()` creates a list column of the matching rows. `nest_join()` + `tidyr::unnest()` is equivalent to `inner_join` (#3570). ```r band_members %>% nest_join(band_instruments) ``` * `group_nest()` is similar to `tidyr::nest()` but focusing on the variables to nest by instead of the nested columns. ```r starwars %>% group_by(species, homeworld) %>% group_nest() starwars %>% group_nest(species, homeworld) ``` * `group_split()` is similar to `base::split()` but operating on existing groups when applied to a grouped data frame, or subject to the data mask on ungrouped data frames ```r starwars %>% group_by(species, homeworld) %>% group_split() starwars %>% group_split(species, homeworld) ``` * `group_map()` and `group_walk()` are purrr-like functions to iterate on groups of a grouped data frame, jointly identified by the data subset (exposed as `.x`) and the data key (a one row tibble, exposed as `.y`). `group_map()` returns a grouped data frame that combines the results of the function, `group_walk()` is only used for side effects and returns its input invisibly. ```r mtcars %>% group_by(cyl) %>% group_map(~ head(.x, 2L)) ``` * `distinct_prepare()`, previously known as `distinct_vars()` is exported. This is mostly useful for alternative backends (e.g. `dbplyr`). ## Major changes * `group_by()` gains the `.drop` argument. When set to `FALSE` the groups are generated based on factor levels, hence some groups may be empty (#341). ```r # 3 groups tibble( x = 1:2, f = factor(c("a", "b"), levels = c("a", "b", "c")) ) %>% group_by(f, .drop = FALSE) # the order of the grouping variables matter df <- tibble( x = c(1,2,1,2), f = factor(c("a", "b", "a", "b"), levels = c("a", "b", "c")) ) df %>% group_by(f, x, .drop = FALSE) df %>% group_by(x, f, .drop = FALSE) ``` The default behaviour drops the empty groups as in the previous versions. ```r tibble( x = 1:2, f = factor(c("a", "b"), levels = c("a", "b", "c")) ) %>% group_by(f) ``` * `filter()` and `slice()` gain a `.preserve` argument to control which groups it should keep. The default `filter(.preserve = FALSE)` recalculates the grouping structure based on the resulting data, otherwise it is kept as is. ```r df <- tibble( x = c(1,2,1,2), f = factor(c("a", "b", "a", "b"), levels = c("a", "b", "c")) ) %>% group_by(x, f, .drop = FALSE) df %>% filter(x == 1) df %>% filter(x == 1, .preserve = TRUE) ``` * The notion of lazily grouped data frames have disappeared. All dplyr verbs now recalculate immediately the grouping structure, and respect the levels of factors. * Subsets of columns now properly dispatch to the `[` or `[[` method when the column is an object (a vector with a class) instead of making assumptions on how the column should be handled. The `[` method must handle integer indices, including `NA_integer_`, i.e. `x[NA_integer_]` should produce a vector of the same class as `x` with whatever represents a missing value. ## Minor changes * `tally()` works correctly on non-data frame table sources such as `tbl_sql` (#3075). * `sample_n()` and `sample_frac()` can use `n()` (#3527) * `distinct()` respects the order of the variables provided (#3195, @foo-bar-baz-qux) and handles the 0 rows and 0 columns special case (#2954). * `combine()` uses tidy dots (#3407). * `group_indices()` can be used without argument in expressions in verbs (#1185). * Using `mutate_all()`, `transmute_all()`, `mutate_if()` and `transmute_if()` with grouped tibbles now informs you that the grouping variables are ignored. In the case of the `_all()` verbs, the message invites you to use `mutate_at(df, vars(-group_cols()))` (or the equivalent `transmute_at()` call) instead if you'd like to make it explicit in your code that the operation is not applied on the grouping variables. * Scoped variants of `arrange()` respect the `.by_group` argument (#3504). * `first()` and `last()` hybrid functions fall back to R evaluation when given no arguments (#3589). * `mutate()` removes a column when the expression evaluates to `NULL` for all groups (#2945). * grouped data frames support `[, drop = TRUE]` (#3714). * New low-level constructor `new_grouped_df()` and validator `validate_grouped_df` (#3837). * `glimpse()` prints group information on grouped tibbles (#3384). * `sample_n()` and `sample_frac()` gain `...` (#2888). * Scoped filter variants now support functions and purrr-like lambdas: ```r mtcars %>% filter_at(vars(hp, vs), ~ . %% 2 == 0) ``` ## Lifecycle * `do()`, `rowwise()` and `combine()` are questioning (#3494). * `funs()` is soft-deprecated and will start issuing warnings in a future version. ## Changes to column wise functions * Scoped variants for `distinct()`: `distinct_at()`, `distinct_if()`, `distinct_all()` (#2948). * `summarise_at()` excludes the grouping variables (#3613). * `mutate_all()`, `mutate_at()`, `summarise_all()` and `summarise_at()` handle utf-8 names (#2967). ## Performance * R expressions that cannot be handled with native code are now evaluated with unwind-protection when available (on R 3.5 and later). This improves the performance of dplyr on data frames with many groups (and hence many expressions to evaluate). We benchmarked that computing a grouped average is consistently twice as fast with unwind-protection enabled. Unwind-protection also makes dplyr more robust in corner cases because it ensures the C++ destructors are correctly called in all circumstances (debugger exit, captured condition, restart invokation). * `sample_n()` and `sample_frac()` gain `...` (#2888). * Improved performance for wide tibbles (#3335). * Faster hybrid `sum()`, `mean()`, `var()` and `sd()` for logical vectors (#3189). * Hybrid version of `sum(na.rm = FALSE)` exits early when there are missing values. This considerably improves performance when there are missing values early in the vector (#3288). * `group_by()` does not trigger the additional `mutate()` on simple uses of the `.data` pronoun (#3533). ## Internal * The grouping metadata of grouped data frame has been reorganized in a single tidy tibble, that can be accessed with the new `group_data()` function. The grouping tibble consists of one column per grouping variable, followed by a list column of the (1-based) indices of the groups. The new `group_rows()` function retrieves that list of indices (#3489). ```r # the grouping metadata, as a tibble group_by(starwars, homeworld) %>% group_data() # the indices group_by(starwars, homeworld) %>% group_data() %>% pull(.rows) group_by(starwars, homeworld) %>% group_rows() ``` * Hybrid evaluation has been completely redesigned for better performance and stability. ## Documentation * Add documentation example for moving variable to back in `?select` (#3051). * column wise functions are better documented, in particular explaining when grouping variables are included as part of the selection. ### Deprecated and defunct functions * `mutate_each()` and `summarise_each()` are deprecated. # dplyr 0.7.6 * `exprs()` is no longer exported to avoid conflicts with `Biobase::exprs()` (#3638). * The MASS package is explicitly suggested to fix CRAN warnings on R-devel (#3657). * Set operations like `intersect()` and `setdiff()` reconstruct groups metadata (#3587) and keep the order of the rows (#3839). * Using namespaced calls to `base::sort()` and `base::unique()` from C++ code to avoid ambiguities when these functions are overridden (#3644). * Fix rchk errors (#3693). # dplyr 0.7.5 (2018-04-14) ## Breaking changes for package developers * The major change in this version is that dplyr now depends on the selecting backend of the tidyselect package. If you have been linking to `dplyr::select_helpers` documentation topic, you should update the link to point to `tidyselect::select_helpers`. * Another change that causes warnings in packages is that dplyr now exports the `exprs()` function. This causes a collision with `Biobase::exprs()`. Either import functions from dplyr selectively rather than in bulk, or do not import `Biobase::exprs()` and refer to it with a namespace qualifier. ## Bug fixes * `distinct(data, "string")` now returns a one-row data frame again. (The previous behavior was to return the data unchanged.) * `do()` operations with more than one named argument can access `.` (#2998). * Reindexing grouped data frames (e.g. after `filter()` or `..._join()`) never updates the `"class"` attribute. This also avoids unintended updates to the original object (#3438). * Fixed rare column name clash in `..._join()` with non-join columns of the same name in both tables (#3266). * Fix `ntile()` and `row_number()` ordering to use the locale-dependent ordering functions in R when dealing with character vectors, rather than always using the C-locale ordering function in C (#2792, @foo-bar-baz-qux). * Summaries of summaries (such as `summarise(b = sum(a), c = sum(b))`) are now computed using standard evaluation for simplicity and correctness, but slightly slower (#3233). * Fixed `summarise()` for empty data frames with zero columns (#3071). ## Major changes * `enexpr()`, `expr()`, `exprs()`, `sym()` and `syms()` are now exported. `sym()` and `syms()` construct symbols from strings or character vectors. The `expr()` variants are equivalent to `quo()`, `quos()` and `enquo()` but return simple expressions rather than quosures. They support quasiquotation. * dplyr now depends on the new tidyselect package to power `select()`, `rename()`, `pull()` and their variants (#2896). Consequently `select_vars()`, `select_var()` and `rename_vars()` are soft-deprecated and will start issuing warnings in a future version. Following the switch to tidyselect, `select()` and `rename()` fully support character vectors. You can now unquote variables like this: ``` vars <- c("disp", "cyl") select(mtcars, !! vars) select(mtcars, -(!! vars)) ``` Note that this only works in selecting functions because in other contexts strings and character vectors are ambiguous. For instance strings are a valid input in mutating operations and `mutate(df, "foo")` creates a new column by recycling "foo" to the number of rows. ## Minor changes * Support for raw vector columns in `arrange()`, `group_by()`, `mutate()`, `summarise()` and `..._join()` (minimal `raw` x `raw` support initially) (#1803). * `bind_cols()` handles unnamed list (#3402). * `bind_rows()` works around corrupt columns that have the object bit set while having no class attribute (#3349). * `combine()` returns `logical()` when all inputs are `NULL` (or when there are no inputs) (#3365, @zeehio). * `distinct()` now supports renaming columns (#3234). * Hybrid evaluation simplifies `dplyr::foo()` to `foo()` (#3309). Hybrid functions can now be masked by regular R functions to turn off hybrid evaluation (#3255). The hybrid evaluator finds functions from dplyr even if dplyr is not attached (#3456). * In `mutate()` it is now illegal to use `data.frame` in the rhs (#3298). * Support `!!!` in `recode_factor()` (#3390). * `row_number()` works on empty subsets (#3454). * `select()` and `vars()` now treat `NULL` as empty inputs (#3023). * Scoped select and rename functions (`select_all()`, `rename_if()` etc.) now work with grouped data frames, adapting the grouping as necessary (#2947, #3410). `group_by_at()` can group by an existing grouping variable (#3351). `arrange_at()` can use grouping variables (#3332). * `slice()` no longer enforce tibble classes when input is a simple `data.frame`, and ignores 0 (#3297, #3313). * `transmute()` no longer prints a message when including a group variable. ## Documentation * Improved documentation for `funs()` (#3094) and set operations (e.g. `union()`) (#3238, @edublancas). ## Error messages * Better error message if dbplyr is not installed when accessing database backends (#3225). * `arrange()` fails gracefully on `data.frame` columns (#3153). * Corrected error message when calling `cbind()` with an object of wrong length (#3085). * Add warning with explanation to `distinct()` if any of the selected columns are of type `list` (#3088, @foo-bar-baz-qux), or when used on unknown columns (#2867, @foo-bar-baz-qux). * Show clear error message for bad arguments to `funs()` (#3368). * Better error message in `..._join()` when joining data frames with duplicate or `NA` column names. Joining such data frames with a semi- or anti-join now gives a warning, which may be converted to an error in future versions (#3243, #3417). * Dedicated error message when trying to use columns of the `Interval` or `Period` classes (#2568). * Added an `.onDetach()` hook that allows for plyr to be loaded and attached without the warning message that says functions in dplyr will be masked, since dplyr is no longer attached (#3359, @jwnorman). ## Performance * `sample_n()` and `sample_frac()` on grouped data frame are now faster especially for those with large number of groups (#3193, @saurfang). ## Internal * Compute variable names for joins in R (#3430). * Bumped Rcpp dependency to 0.12.15 to avoid imperfect detection of `NA` values in hybrid evaluation fixed in RcppCore/Rcpp#790 (#2919). * Avoid cleaning the data mask, a temporary environment used to evaluate expressions. If the environment, in which e.g. a `mutate()` expression is evaluated, is preserved until after the operation, accessing variables from that environment now gives a warning but still returns `NULL` (#3318). # dplyr 0.7.4 * Fix recent Fedora and ASAN check errors (#3098). * Avoid dependency on Rcpp 0.12.10 (#3106). # dplyr 0.7.3 * Fixed protection error that occurred when creating a character column using grouped `mutate()` (#2971). * Fixed a rare problem with accessing variable values in `summarise()` when all groups have size one (#3050). * `distinct()` now throws an error when used on unknown columns (#2867, @foo-bar-baz-qux). * Fixed rare out-of-bounds memory write in `slice()` when negative indices beyond the number of rows were involved (#3073). * `select()`, `rename()` and `summarise()` no longer change the grouped vars of the original data (#3038). * `nth(default = var)`, `first(default = var)` and `last(default = var)` fall back to standard evaluation in a grouped operation instead of triggering an error (#3045). * `case_when()` now works if all LHS are atomic (#2909), or when LHS or RHS values are zero-length vectors (#3048). * `case_when()` accepts `NA` on the LHS (#2927). * Semi- and anti-joins now preserve the order of left-hand-side data frame (#3089). * Improved error message for invalid list arguments to `bind_rows()` (#3068). * Grouping by character vectors is now faster (#2204). * Fixed a crash that occurred when an unexpected input was supplied to the `call` argument of `order_by()` (#3065). # dplyr 0.7.2 * Move build-time vs. run-time checks out of `.onLoad()` and into `dr_dplyr()`. # dplyr 0.7.1 * Use new versions of bindrcpp and glue to avoid protection problems. Avoid wrapping arguments to internal error functions (#2877). Fix two protection mistakes found by rchk (#2868). * Fix C++ error that caused compilation to fail on mac cran (#2862) * Fix undefined behaviour in `between()`, where `NA_REAL` were assigned instead of `NA_LOGICAL`. (#2855, @zeehio) * `top_n()` now executes operations lazily for compatibility with database backends (#2848). * Reuse of new variables created in ungrouped `mutate()` possible again, regression introduced in dplyr 0.7.0 (#2869). * Quosured symbols do not prevent hybrid handling anymore. This should fix many performance issues introduced with tidyeval (#2822). # dplyr 0.7.0 ## New data, functions, and features * Five new datasets provide some interesting built-in datasets to demonstrate dplyr verbs (#2094): * `starwars` dataset about starwars characters; has list columns * `storms` has the trajectories of ~200 tropical storms * `band_members`, `band_instruments` and `band_instruments2` has some simple data to demonstrate joins. * New `add_count()` and `add_tally()` for adding an `n` column within groups (#2078, @dgrtwo). * `arrange()` for grouped data frames gains a `.by_group` argument so you can choose to sort by groups if you want to (defaults to `FALSE`) (#2318) * New `pull()` generic for extracting a single column either by name or position (either from the left or the right). Thanks to @paulponcet for the idea (#2054). This verb is powered with the new `select_var()` internal helper, which is exported as well. It is like `select_vars()` but returns a single variable. * `as_tibble()` is re-exported from tibble. This is the recommend way to create tibbles from existing data frames. `tbl_df()` has been softly deprecated. `tribble()` is now imported from tibble (#2336, @chrMongeau); this is now preferred to `frame_data()`. ## Deprecated and defunct * dplyr no longer messages that you need dtplyr to work with data.table (#2489). * Long deprecated `regroup()`, `mutate_each_q()` and `summarise_each_q()` functions have been removed. * Deprecated `failwith()`. I'm not even sure why it was here. * Soft-deprecated `mutate_each()` and `summarise_each()`, these functions print a message which will be changed to a warning in the next release. * The `.env` argument to `sample_n()` and `sample_frac()` is defunct, passing a value to this argument print a message which will be changed to a warning in the next release. ## Databases This version of dplyr includes some major changes to how database connections work. By and large, you should be able to continue using your existing dplyr database code without modification, but there are two big changes that you should be aware of: * Almost all database related code has been moved out of dplyr and into a new package, [dbplyr](https://github.com/tidyverse/dbplyr/). This makes dplyr simpler, and will make it easier to release fixes for bugs that only affect databases. `src_mysql()`, `src_postgres()`, and `src_sqlite()` will still live dplyr so your existing code continues to work. * It is no longer necessary to create a remote "src". Instead you can work directly with the database connection returned by DBI. This reflects the maturity of the DBI ecosystem. Thanks largely to the work of Kirill Muller (funded by the R Consortium) DBI backends are now much more consistent, comprehensive, and easier to use. That means that there's no longer a need for a layer in between you and DBI. You can continue to use `src_mysql()`, `src_postgres()`, and `src_sqlite()`, but I recommend a new style that makes the connection to DBI more clear: ```R library(dplyr) con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") DBI::dbWriteTable(con, "mtcars", mtcars) mtcars2 <- tbl(con, "mtcars") mtcars2 ``` This is particularly useful if you want to perform non-SELECT queries as you can do whatever you want with `DBI::dbGetQuery()` and `DBI::dbExecute()`. If you've implemented a database backend for dplyr, please read the [backend news](https://github.com/tidyverse/dbplyr/blob/main/NEWS.md#backends) to see what's changed from your perspective (not much). If you want to ensure your package works with both the current and previous version of dplyr, see `wrap_dbplyr_obj()` for helpers. ## UTF-8 * Internally, column names are always represented as character vectors, and not as language symbols, to avoid encoding problems on Windows (#1950, #2387, #2388). * Error messages and explanations of data frame inequality are now encoded in UTF-8, also on Windows (#2441). * Joins now always reencode character columns to UTF-8 if necessary. This gives a nice speedup, because now pointer comparison can be used instead of string comparison, but relies on a proper encoding tag for all strings (#2514). * Fixed problems when joining factor or character encodings with a mix of native and UTF-8 encoded values (#1885, #2118, #2271, #2451). * Fix `group_by()` for data frames that have UTF-8 encoded names (#2284, #2382). * New `group_vars()` generic that returns the grouping as character vector, to avoid the potentially lossy conversion to language symbols. The list returned by `group_by_prepare()` now has a new `group_names` component (#1950, #2384). ## Colwise functions * `rename()`, `select()`, `group_by()`, `filter()`, `arrange()` and `transmute()` now have scoped variants (verbs suffixed with `_if()`, `_at()` and `_all()`). Like `mutate_all()`, `summarise_if()`, etc, these variants apply an operation to a selection of variables. * The scoped verbs taking predicates (`mutate_if()`, `summarise_if()`, etc) now support S3 objects and lazy tables. S3 objects should implement methods for `length()`, `[[` and `tbl_vars()`. For lazy tables, the first 100 rows are collected and the predicate is applied on this subset of the data. This is robust for the common case of checking the type of a column (#2129). * Summarise and mutate colwise functions pass `...` on the the manipulation functions. * The performance of colwise verbs like `mutate_all()` is now back to where it was in `mutate_each()`. * `funs()` has better handling of namespaced functions (#2089). * Fix issue with `mutate_if()` and `summarise_if()` when a predicate function returns a vector of `FALSE` (#1989, #2009, #2011). ## Tidyeval dplyr has a new approach to non-standard evaluation (NSE) called tidyeval. It is described in detail in `vignette("programming")` but, in brief, gives you the ability to interpolate values in contexts where dplyr usually works with expressions: ```{r} my_var <- quo(homeworld) starwars %>% group_by(!!my_var) %>% summarise_at(vars(height:mass), mean, na.rm = TRUE) ``` This means that the underscored version of each main verb is no longer needed, and so these functions have been deprecated (but remain around for backward compatibility). * `order_by()`, `top_n()`, `sample_n()` and `sample_frac()` now use tidyeval to capture their arguments by expression. This makes it possible to use unquoting idioms (see `vignette("programming")`) and fixes scoping issues (#2297). * Most verbs taking dots now ignore the last argument if empty. This makes it easier to copy lines of code without having to worry about deleting trailing commas (#1039). * [API] The new `.data` and `.env` environments can be used inside all verbs that operate on data: `.data$column_name` accesses the column `column_name`, whereas `.env$var` accesses the external variable `var`. Columns or external variables named `.data` or `.env` are shadowed, use `.data$...` and/or `.env$...` to access them. (`.data` implements strict matching also for the `$` operator (#2591).) The `column()` and `global()` functions have been removed. They were never documented officially. Use the new `.data` and `.env` environments instead. * Expressions in verbs are now interpreted correctly in many cases that failed before (e.g., use of `$`, `case_when()`, nonstandard evaluation, ...). These expressions are now evaluated in a specially constructed temporary environment that retrieves column data on demand with the help of the `bindrcpp` package (#2190). This temporary environment poses restrictions on assignments using `<-` inside verbs. To prevent leaking of broken bindings, the temporary environment is cleared after the evaluation (#2435). ## Verbs ### Joins * [API] `xxx_join.tbl_df(na_matches = "never")` treats all `NA` values as different from each other (and from any other value), so that they never match. This corresponds to the behavior of joins for database sources, and of database joins in general. To match `NA` values, pass `na_matches = "na"` to the join verbs; this is only supported for data frames. The default is `na_matches = "na"`, kept for the sake of compatibility to v0.5.0. It can be tweaked by calling `pkgconfig::set_config("dplyr::na_matches", "na")` (#2033). * `common_by()` gets a better error message for unexpected inputs (#2091) * Fix groups when joining grouped data frames with duplicate columns (#2330, #2334, @davidkretch). * One of the two join suffixes can now be an empty string, dplyr no longer hangs (#2228, #2445). * Anti- and semi-joins warn if factor levels are inconsistent (#2741). * Warnings about join column inconsistencies now contain the column names (#2728). ### Select * For selecting variables, the first selector decides if it's an inclusive selection (i.e., the initial column list is empty), or an exclusive selection (i.e., the initial column list contains all columns). This means that `select(mtcars, contains("am"), contains("FOO"), contains("vs"))` now returns again both `am` and `vs` columns like in dplyr 0.4.3 (#2275, #2289, @r2evans). * Select helpers now throw an error if called when no variables have been set (#2452) * Helper functions in `select()` (and related verbs) are now evaluated in a context where column names do not exist (#2184). * `select()` (and the internal function `select_vars()`) now support column names in addition to column positions. As a result, expressions like `select(mtcars, "cyl")` are now allowed. ### Other * `recode()`, `case_when()` and `coalesce()` now support splicing of arguments with rlang's `!!!` operator. * `count()` now preserves the grouping of its input (#2021). * `distinct()` no longer duplicates variables (#2001). * Empty `distinct()` with a grouped data frame works the same way as an empty `distinct()` on an ungrouped data frame, namely it uses all variables (#2476). * `copy_to()` now returns it's output invisibly (since you're often just calling for the side-effect). * `filter()` and `lag()` throw informative error if used with ts objects (#2219) * `mutate()` recycles list columns of length 1 (#2171). * `mutate()` gives better error message when attempting to add a non-vector column (#2319), or attempting to remove a column with `NULL` (#2187, #2439). * `summarise()` now correctly evaluates newly created factors (#2217), and can create ordered factors (#2200). * Ungrouped `summarise()` uses summary variables correctly (#2404, #2453). * Grouped `summarise()` no longer converts character `NA` to empty strings (#1839). ## Combining and comparing * `all_equal()` now reports multiple problems as a character vector (#1819, #2442). * `all_equal()` checks that factor levels are equal (#2440, #2442). * `bind_rows()` and `bind_cols()` give an error for database tables (#2373). * `bind_rows()` works correctly with `NULL` arguments and an `.id` argument (#2056), and also for zero-column data frames (#2175). * Breaking change: `bind_rows()` and `combine()` are more strict when coercing. Logical values are no longer coerced to integer and numeric. Date, POSIXct and other integer or double-based classes are no longer coerced to integer or double as there is chance of attributes or information being lost (#2209, @zeehio). * `bind_cols()` now calls `tibble::repair_names()` to ensure that all names are unique (#2248). * `bind_cols()` handles empty argument list (#2048). * `bind_cols()` better handles `NULL` inputs (#2303, #2443). * `bind_rows()` explicitly rejects columns containing data frames (#2015, #2446). * `bind_rows()` and `bind_cols()` now accept vectors. They are treated as rows by the former and columns by the latter. Rows require inner names like `c(col1 = 1, col2 = 2)`, while columns require outer names: `col1 = c(1, 2)`. Lists are still treated as data frames but can be spliced explicitly with `!!!`, e.g. `bind_rows(!!! x)` (#1676). * `rbind_list()` and `rbind_all()` now call `.Deprecated()`, they will be removed in the next CRAN release. Please use `bind_rows()` instead. * `combine()` accepts `NA` values (#2203, @zeehio) * `combine()` and `bind_rows()` with character and factor types now always warn about the coercion to character (#2317, @zeehio) * `combine()` and `bind_rows()` accept `difftime` objects. * `mutate` coerces results from grouped dataframes accepting combinable data types (such as `integer` and `numeric`). (#1892, @zeehio) ## Vector functions * `%in%` gets new hybrid handler (#126). * `between()` returns NA if `left` or `right` is `NA` (fixes #2562). * `case_when()` supports `NA` values (#2000, @tjmahr). * `first()`, `last()`, and `nth()` have better default values for factor, Dates, POSIXct, and data frame inputs (#2029). * Fixed segmentation faults in hybrid evaluation of `first()`, `last()`, `nth()`, `lead()`, and `lag()`. These functions now always fall back to the R implementation if called with arguments that the hybrid evaluator cannot handle (#948, #1980). * `n_distinct()` gets larger hash tables given slightly better performance (#977). * `nth()` and `ntile()` are more careful about proper data types of their return values (#2306). * `ntile()` ignores `NA` when computing group membership (#2564). * `lag()` enforces integer `n` (#2162, @kevinushey). * hybrid `min()` and `max()` now always return a `numeric` and work correctly in edge cases (empty input, all `NA`, ...) (#2305, #2436). * `min_rank("string")` no longer segfaults in hybrid evaluation (#2279, #2444). * `recode()` can now recode a factor to other types (#2268) * `recode()` gains `.dots` argument to support passing replacements as list (#2110, @jlegewie). ## Other minor changes and bug fixes * Many error messages are more helpful by referring to a column name or a position in the argument list (#2448). * New `is_grouped_df()` alias to `is.grouped_df()`. * `tbl_vars()` now has a `group_vars` argument set to `TRUE` by default. If `FALSE`, group variables are not returned. * Fixed segmentation fault after calling `rename()` on an invalid grouped data frame (#2031). * `rename_vars()` gains a `strict` argument to control if an error is thrown when you try and rename a variable that doesn't exist. * Fixed undefined behavior for `slice()` on a zero-column data frame (#2490). * Fixed very rare case of false match during join (#2515). * Restricted workaround for `match()` to R 3.3.0. (#1858). * dplyr now warns on load when the version of R or Rcpp during installation is different to the currently installed version (#2514). * Fixed improper reuse of attributes when creating a list column in `summarise()` and perhaps `mutate()` (#2231). * `mutate()` and `summarise()` always strip the `names` attribute from new or updated columns, even for ungrouped operations (#1689). * Fixed rare error that could lead to a segmentation fault in `all_equal(ignore_col_order = FALSE)` (#2502). * The "dim" and "dimnames" attributes are always stripped when copying a vector (#1918, #2049). * `grouped_df` and `rowwise` are registered officially as S3 classes. This makes them easier to use with S4 (#2276, @joranE, #2789). * All operations that return tibbles now include the `"tbl"` class. This is important for correct printing with tibble 1.3.1 (#2789). * Makeflags uses PKG_CPPFLAGS for defining preprocessor macros. * astyle formatting for C++ code, tested but not changed as part of the tests (#2086, #2103). * Update RStudio project settings to install tests (#1952). * Using `Rcpp::interfaces()` to register C callable interfaces, and registering all native exported functions via `R_registerRoutines()` and `useDynLib(.registration = TRUE)` (#2146). * Formatting of grouped data frames now works by overriding the `tbl_sum()` generic instead of `print()`. This means that the output is more consistent with tibble, and that `format()` is now supported also for SQL sources (#2781). # dplyr 0.5.0 ## Breaking changes ### Existing functions * `arrange()` once again ignores grouping (#1206). * `distinct()` now only keeps the distinct variables. If you want to return all variables (using the first row for non-distinct values) use `.keep_all = TRUE` (#1110). For SQL sources, `.keep_all = FALSE` is implemented using `GROUP BY`, and `.keep_all = TRUE` raises an error (#1937, #1942, @krlmlr). (The default behaviour of using all variables when none are specified remains - this note only applies if you select some variables). * The select helper functions `starts_with()`, `ends_with()` etc are now real exported functions. This means that you'll need to import those functions if you're using from a package where dplyr is not attached. i.e. `dplyr::select(mtcars, starts_with("m"))` used to work, but now you'll need `dplyr::select(mtcars, dplyr::starts_with("m"))`. ### Deprecated and defunct functions * The long deprecated `chain()`, `chain_q()` and `%.%` have been removed. Please use `%>%` instead. * `id()` has been deprecated. Please use `group_indices()` instead (#808). * `rbind_all()` and `rbind_list()` are formally deprecated. Please use `bind_rows()` instead (#803). * Outdated benchmarking demos have been removed (#1487). * Code related to starting and signalling clusters has been moved out to [multidplyr](https://github.com/tidyverse/multidplyr). ## New functions * `coalesce()` finds the first non-missing value from a set of vectors. (#1666, thanks to @krlmlr for initial implementation). * `case_when()` is a general vectorised if + else if (#631). * `if_else()` is a vectorised if statement: it's a stricter (type-safe), faster, and more predictable version of `ifelse()`. In SQL it is translated to a `CASE` statement. * `na_if()` makes it easy to replace a certain value with an `NA` (#1707). In SQL it is translated to `NULL_IF`. * `near(x, y)` is a helper for `abs(x - y) < tol` (#1607). * `recode()` is vectorised equivalent to `switch()` (#1710). * `union_all()` method. Maps to `UNION ALL` for SQL sources, `bind_rows()` for data frames/tbl\_dfs, and `combine()` for vectors (#1045). * A new family of functions replace `summarise_each()` and `mutate_each()` (which will thus be deprecated in a future release). `summarise_all()` and `mutate_all()` apply a function to all columns while `summarise_at()` and `mutate_at()` operate on a subset of columns. These columns are selected with either a character vector of columns names, a numeric vector of column positions, or a column specification with `select()` semantics generated by the new `columns()` helper. In addition, `summarise_if()` and `mutate_if()` take a predicate function or a logical vector (these verbs currently require local sources). All these functions can now take ordinary functions instead of a list of functions generated by `funs()` (though this is only useful for local sources). (#1845, @lionel-) * `select_if()` lets you select columns with a predicate function. Only compatible with local sources. (#497, #1569, @lionel-) ## Local backends ### dtplyr All data table related code has been separated out in to a new dtplyr package. This decouples the development of the data.table interface from the development of the dplyr package. If both data.table and dplyr are loaded, you'll get a message reminding you to load dtplyr. ### Tibble Functions related to the creation and coercion of `tbl_df`s, now live in their own package: [tibble](https://www.rstudio.com/blog/tibble-1-0-0/). See `vignette("tibble")` for more details. * `$` and `[[` methods that never do partial matching (#1504), and throw an error if the variable does not exist. * `all_equal()` allows to compare data frames ignoring row and column order, and optionally ignoring minor differences in type (e.g. int vs. double) (#821). The test handles the case where the df has 0 columns (#1506). The test fails fails when convert is `FALSE` and types don't match (#1484). * `all_equal()` shows better error message when comparing raw values or when types are incompatible and `convert = TRUE` (#1820, @krlmlr). * `add_row()` makes it easy to add a new row to data frame (#1021) * `as_data_frame()` is now an S3 generic with methods for lists (the old `as_data_frame()`), data frames (trivial), and matrices (with efficient C++ implementation) (#876). It no longer strips subclasses. * The internals of `data_frame()` and `as_data_frame()` have been aligned, so `as_data_frame()` will now automatically recycle length-1 vectors. Both functions give more informative error messages if you attempting to create an invalid data frame. You can no longer create a data frame with duplicated names (#820). Both check for `POSIXlt` columns, and tell you to use `POSIXct` instead (#813). * `frame_data()` properly constructs rectangular tables (#1377, @kevinushey), and supports list-cols. * `glimpse()` is now a generic. The default method dispatches to `str()` (#1325). It now (invisibly) returns its first argument (#1570). * `lst()` and `lst_()` which create lists in the same way that `data_frame()` and `data_frame_()` create data frames (#1290). * `print.tbl_df()` is considerably faster if you have very wide data frames. It will now also only list the first 100 additional variables not already on screen - control this with the new `n_extra` parameter to `print()` (#1161). When printing a grouped data frame the number of groups is now printed with thousands separators (#1398). The type of list columns is correctly printed (#1379) * Package includes `setOldClass(c("tbl_df", "tbl", "data.frame"))` to help with S4 dispatch (#969). * `tbl_df` automatically generates column names (#1606). ### tbl_cube * new `as_data_frame.tbl_cube()` (#1563, @krlmlr). * `tbl_cube`s are now constructed correctly from data frames, duplicate dimension values are detected, missing dimension values are filled with `NA`. The construction from data frames now guesses the measure variables by default, and allows specification of dimension and/or measure variables (#1568, @krlmlr). * Swap order of `dim_names` and `met_name` arguments in `as.tbl_cube` (for `array`, `table` and `matrix`) for consistency with `tbl_cube` and `as.tbl_cube.data.frame`. Also, the `met_name` argument to `as.tbl_cube.table` now defaults to `"Freq"` for consistency with `as.data.frame.table` (@krlmlr, #1374). ## Remote backends * `as_data_frame()` on SQL sources now returns all rows (#1752, #1821, @krlmlr). * `compute()` gets new parameters `indexes` and `unique_indexes` that make it easier to add indexes (#1499, @krlmlr). * `db_explain()` gains a default method for DBIConnections (#1177). * The backend testing system has been improved. This lead to the removal of `temp_srcs()`. In the unlikely event that you were using this function, you can instead use `test_register_src()`, `test_load()`, and `test_frame()`. * You can now use `right_join()` and `full_join()` with remote tables (#1172). ### SQLite * `src_memdb()` is a session-local in-memory SQLite database. `memdb_frame()` works like `data_frame()`, but creates a new table in that database. * `src_sqlite()` now uses a stricter quoting character, `` ` ``, instead of `"`. SQLite "helpfully" will convert `"x"` into a string if there is no identifier called x in the current scope (#1426). * `src_sqlite()` throws errors if you try and use it with window functions (#907). ### SQL translation * `filter.tbl_sql()` now puts parens around each argument (#934). * Unary `-` is better translated (#1002). * `escape.POSIXt()` method makes it easier to use date times. The date is rendered in ISO 8601 format in UTC, which should work in most databases (#857). * `is.na()` gets a missing space (#1695). * `if`, `is.na()`, and `is.null()` get extra parens to make precedence more clear (#1695). * `pmin()` and `pmax()` are translated to `MIN()` and `MAX()` (#1711). * Window functions: * Work on ungrouped data (#1061). * Warning if order is not set on cumulative window functions. * Multiple partitions or ordering variables in windowed functions no longer generate extra parentheses, so should work for more databases (#1060) ### Internals This version includes an almost total rewrite of how dplyr verbs are translated into SQL. Previously, I used a rather ad-hoc approach, which tried to guess when a new subquery was needed. Unfortunately this approach was fraught with bugs, so in this version I've implemented a much richer internal data model. Now there is a three step process: 1. When applied to a `tbl_lazy`, each dplyr verb captures its inputs and stores in a `op` (short for operation) object. 2. `sql_build()` iterates through the operations building to build up an object that represents a SQL query. These objects are convenient for testing as they are lists, and are backend agnostics. 3. `sql_render()` iterates through the queries and generates the SQL, using generics (like `sql_select()`) that can vary based on the backend. In the short-term, this increased abstraction is likely to lead to some minor performance decreases, but the chance of dplyr generating correct SQL is much much higher. In the long-term, these abstractions will make it possible to write a query optimiser/compiler in dplyr, which would make it possible to generate much more succinct queries. If you have written a dplyr backend, you'll need to make some minor changes to your package: * `sql_join()` has been considerably simplified - it is now only responsible for generating the join query, not for generating the intermediate selects that rename the variable. Similarly for `sql_semi_join()`. If you've provided new methods in your backend, you'll need to rewrite. * `select_query()` gains a distinct argument which is used for generating queries for `distinct()`. It loses the `offset` argument which was never used (and hence never tested). * `src_translate_env()` has been replaced by `sql_translate_env()` which should have methods for the connection object. There were two other tweaks to the exported API, but these are less likely to affect anyone. * `translate_sql()` and `partial_eval()` got a new API: now use connection + variable names, rather than a `tbl`. This makes testing considerably easier. `translate_sql_q()` has been renamed to `translate_sql_()`. * Also note that the sql generation generics now have a default method, instead methods for DBIConnection and NULL. ## Minor improvements and bug fixes ### Single table verbs * Avoiding segfaults in presence of `raw` columns (#1803, #1817, @krlmlr). * `arrange()` fails gracefully on list columns (#1489) and matrices (#1870, #1945, @krlmlr). * `count()` now adds additional grouping variables, rather than overriding existing (#1703). `tally()` and `count()` can now count a variable called `n` (#1633). Weighted `count()`/`tally()` ignore `NA`s (#1145). * The progress bar in `do()` is now updated at most 20 times per second, avoiding unneccessary redraws (#1734, @mkuhn) * `distinct()` doesn't crash when given a 0-column data frame (#1437). * `filter()` throws an error if you supply an named arguments. This is usually a type: `filter(df, x = 1)` instead of `filter(df, x == 1)` (#1529). * `summarise()` correctly coerces factors with different levels (#1678), handles min/max of already summarised variable (#1622), and supports data frames as columns (#1425). * `select()` now informs you that it adds missing grouping variables (#1511). It works even if the grouping variable has a non-syntactic name (#1138). Negating a failed match (e.g. `select(mtcars, -contains("x"))`) returns all columns, instead of no columns (#1176) The `select()` helpers are now exported and have their own documentation (#1410). `one_of()` gives a useful error message if variables names are not found in data frame (#1407). * The naming behaviour of `summarise_each()` and `mutate_each()` has been tweaked so that you can force inclusion of both the function and the variable name: `summarise_each(mtcars, funs(mean = mean), everything())` (#442). * `mutate()` handles factors that are all `NA` (#1645), or have different levels in different groups (#1414). It disambiguates `NA` and `NaN` (#1448), and silently promotes groups that only contain `NA` (#1463). It deep copies data in list columns (#1643), and correctly fails on incompatible columns (#1641). `mutate()` on a grouped data no longer groups grouping attributes (#1120). `rowwise()` mutate gives expected results (#1381). * `one_of()` tolerates unknown variables in `vars`, but warns (#1848, @jennybc). * `print.grouped_df()` passes on `...` to `print()` (#1893). * `slice()` correctly handles grouped attributes (#1405). * `ungroup()` generic gains `...` (#922). ### Dual table verbs * `bind_cols()` matches the behaviour of `bind_rows()` and ignores `NULL` inputs (#1148). It also handles `POSIXct`s with integer base type (#1402). * `bind_rows()` handles 0-length named lists (#1515), promotes factors to characters (#1538), and warns when binding factor and character (#1485). bind_rows()` is more flexible in the way it can accept data frames, lists, list of data frames, and list of lists (#1389). * `bind_rows()` rejects `POSIXlt` columns (#1875, @krlmlr). * Both `bind_cols()` and `bind_rows()` infer classes and grouping information from the first data frame (#1692). * `rbind()` and `cbind()` get `grouped_df()` methods that make it harder to create corrupt data frames (#1385). You should still prefer `bind_rows()` and `bind_cols()`. * Joins now use correct class when joining on `POSIXct` columns (#1582, @joel23888), and consider time zones (#819). Joins handle a `by` that is empty (#1496), or has duplicates (#1192). Suffixes grow progressively to avoid creating repeated column names (#1460). Joins on string columns should be substantially faster (#1386). Extra attributes are ok if they are identical (#1636). Joins work correct when factor levels not equal (#1712, #1559). Anti- and semi-joins give correct result when by variable is a factor (#1571), but warn if factor levels are inconsistent (#2741). A clear error message is given for joins where an explicit `by` contains unavailable columns (#1928, #1932). Warnings about join column inconsistencies now contain the column names (#2728). * `inner_join()`, `left_join()`, `right_join()`, and `full_join()` gain a `suffix` argument which allows you to control what suffix duplicated variable names receive (#1296). * Set operations (`intersect()`, `union()` etc) respect coercion rules (#799). `setdiff()` handles factors with `NA` levels (#1526). * There were a number of fixes to enable joining of data frames that don't have the same encoding of column names (#1513), including working around bug 16885 regarding `match()` in R 3.3.0 (#1806, #1810, @krlmlr). ### Vector functions * `combine()` silently drops `NULL` inputs (#1596). * Hybrid `cummean()` is more stable against floating point errors (#1387). * Hybrid `lead()` and `lag()` received a considerable overhaul. They are more careful about more complicated expressions (#1588), and falls back more readily to pure R evaluation (#1411). They behave correctly in `summarise()` (#1434). and handle default values for string columns. * Hybrid `min()` and `max()` handle empty sets (#1481). * `n_distinct()` uses multiple arguments for data frames (#1084), falls back to R evaluation when needed (#1657), reverting decision made in (#567). Passing no arguments gives an error (#1957, #1959, @krlmlr). * `nth()` now supports negative indices to select from end, e.g. `nth(x, -2)` selects the 2nd value from the end of `x` (#1584). * `top_n()` can now also select bottom `n` values by passing a negative value to `n` (#1008, #1352). * Hybrid evaluation leaves formulas untouched (#1447). # dplyr 0.4.3 ## Improved encoding support Until now, dplyr's support for non-UTF8 encodings has been rather shaky. This release brings a number of improvement to fix these problems: it's probably not perfect, but should be a lot better than the previously version. This includes fixes to `arrange()` (#1280), `bind_rows()` (#1265), `distinct()` (#1179), and joins (#1315). `print.tbl_df()` also received a fix for strings with invalid encodings (#851). ## Other minor improvements and bug fixes * `frame_data()` provides a means for constructing `data_frame`s using a simple row-wise language. (#1358, @kevinushey) * `all.equal()` no longer runs all outputs together (#1130). * `as_data_frame()` gives better error message with NA column names (#1101). * `[.tbl_df` is more careful about subsetting column names (#1245). * `arrange()` and `mutate()` work on empty data frames (#1142). * `arrange()`, `filter()`, `slice()`, and `summarise()` preserve data frame meta attributes (#1064). * `bind_rows()` and `bind_cols()` accept lists (#1104): during initial data cleaning you no longer need to convert lists to data frames, but can instead feed them to `bind_rows()` directly. * `bind_rows()` gains a `.id` argument. When supplied, it creates a new column that gives the name of each data frame (#1337, @lionel-). * `bind_rows()` respects the `ordered` attribute of factors (#1112), and does better at comparing `POSIXct`s (#1125). The `tz` attribute is ignored when determining if two `POSIXct` vectors are comparable. If the `tz` of all inputs is the same, it's used, otherwise its set to `UTC`. * `data_frame()` always produces a `tbl_df` (#1151, @kevinushey) * `filter(x, TRUE, TRUE)` now just returns `x` (#1210), it doesn't internally modify the first argument (#971), and it now works with rowwise data (#1099). It once again works with data tables (#906). * `glimpse()` also prints out the number of variables in addition to the number of observations (@ilarischeinin, #988). * Joins handles matrix columns better (#1230), and can join `Date` objects with heterogenous representations (some `Date`s are integers, while other are numeric). This also improves `all.equal()` (#1204). * Fixed `percent_rank()` and `cume_dist()` so that missing values no longer affect denominator (#1132). * `print.tbl_df()` now displays the class for all variables, not just those that don't fit on the screen (#1276). It also displays duplicated column names correctly (#1159). * `print.grouped_df()` now tells you how many groups there are. * `mutate()` can set to `NULL` the first column (used to segfault, #1329) and it better protects intermediary results (avoiding random segfaults, #1231). * `mutate()` on grouped data handles the special case where for the first few groups, the result consists of a `logical` vector with only `NA`. This can happen when the condition of an `ifelse` is an all `NA` logical vector (#958). * `mutate.rowwise_df()` handles factors (#886) and correctly handles 0-row inputs (#1300). * `n_distinct()` gains an `na_rm` argument (#1052). * The `Progress` bar used by `do()` now respects global option `dplyr.show_progress` (default is TRUE) so you can turn it off globally (@jimhester #1264, #1226). * `summarise()` handles expressions that returning heterogenous outputs, e.g. `median()`, which that sometimes returns an integer, and other times a numeric (#893). * `slice()` silently drops columns corresponding to an NA (#1235). * `ungroup.rowwise_df()` gives a `tbl_df` (#936). * More explicit duplicated column name error message (#996). * When "," is already being used as the decimal point (`getOption("OutDec")`), use "." as the thousands separator when printing out formatted numbers (@ilarischeinin, #988). ## Databases * `db_query_fields.SQLiteConnection` uses `build_sql` rather than `paste0` (#926, @NikNakk) * Improved handling of `log()` (#1330). * `n_distinct(x)` is translated to `COUNT(DISTINCT(x))` (@skparkes, #873). * `print(n = Inf)` now works for remote sources (#1310). ## Hybrid evaluation * Hybrid evaluation does not take place for objects with a class (#1237). * Improved `$` handling (#1134). * Simplified code for `lead()` and `lag()` and make sure they work properly on factors (#955). Both respect the `default` argument (#915). * `mutate` can set to `NULL` the first column (used to segfault, #1329). * `filter` on grouped data handles indices correctly (#880). * `sum()` issues a warning about integer overflow (#1108). # dplyr 0.4.2 This is a minor release containing fixes for a number of crashes and issues identified by R CMD CHECK. There is one new "feature": dplyr no longer complains about unrecognised attributes, and instead just copies them over to the output. * `lag()` and `lead()` for grouped data were confused about indices and therefore produced wrong results (#925, #937). `lag()` once again overrides `lag()` instead of just the default method `lag.default()`. This is necessary due to changes in R CMD check. To use the lag function provided by another package, use `pkg::lag`. * Fixed a number of memory issues identified by valgrind. * Improved performance when working with large number of columns (#879). * Lists-cols that contain data frames now print a slightly nicer summary (#1147) * Set operations give more useful error message on incompatible data frames (#903). * `all.equal()` gives the correct result when `ignore_row_order` is `TRUE` (#1065) and `all.equal()` correctly handles character missing values (#1095). * `bind_cols()` always produces a `tbl_df` (#779). * `bind_rows()` gains a test for a form of data frame corruption (#1074). * `bind_rows()` and `summarise()` now handles complex columns (#933). * Workaround for using the constructor of `DataFrame` on an unprotected object (#998) * Improved performance when working with large number of columns (#879). # dplyr 0.4.1 * Don't assume that RPostgreSQL is available. # dplyr 0.4.0 ## New features * `add_rownames()` turns row names into an explicit variable (#639). * `as_data_frame()` efficiently coerces a list into a data frame (#749). * `bind_rows()` and `bind_cols()` efficiently bind a list of data frames by row or column. `combine()` applies the same coercion rules to vectors (it works like `c()` or `unlist()` but is consistent with the `bind_rows()` rules). * `right_join()` (include all rows in `y`, and matching rows in `x`) and `full_join()` (include all rows in `x` and `y`) complete the family of mutating joins (#96). * `group_indices()` computes a unique integer id for each group (#771). It can be called on a grouped_df without any arguments or on a data frame with same arguments as `group_by()`. ## New vignettes * `vignette("data_frames")` describes dplyr functions that make it easier and faster to create and coerce data frames. It subsumes the old `memory` vignette. * `vignette("two-table")` describes how two-table verbs work in dplyr. ## Minor improvements * `data_frame()` (and `as_data_frame()` & `tbl_df()`) now explicitly forbid columns that are data frames or matrices (#775). All columns must be either a 1d atomic vector or a 1d list. * `do()` uses lazyeval to correctly evaluate its arguments in the correct environment (#744), and new `do_()` is the SE equivalent of `do()` (#718). You can modify grouped data in place: this is probably a bad idea but it's sometimes convenient (#737). `do()` on grouped data tables now passes in all columns (not all columns except grouping vars) (#735, thanks to @kismsu). `do()` with database tables no longer potentially includes grouping variables twice (#673). Finally, `do()` gives more consistent outputs when there are no rows or no groups (#625). * `first()` and `last()` preserve factors, dates and times (#509). * Overhaul of single table verbs for data.table backend. They now all use a consistent (and simpler) code base. This ensures that (e.g.) `n()` now works in all verbs (#579). * In `*_join()`, you can now name only those variables that are different between the two tables, e.g. `inner_join(x, y, c("a", "b", "c" = "d"))` (#682). If non-join columns are the same, dplyr will add `.x` and `.y` suffixes to distinguish the source (#655). * `mutate()` handles complex vectors (#436) and forbids `POSIXlt` results (instead of crashing) (#670). * `select()` now implements a more sophisticated algorithm so if you're doing multiples includes and excludes with and without names, you're more likely to get what you expect (#644). You'll also get a better error message if you supply an input that doesn't resolve to an integer column position (#643). * Printing has received a number of small tweaks. All `print()` methods invisibly return their input so you can interleave `print()` statements into a pipeline to see interim results. `print()` will column names of 0 row data frames (#652), and will never print more 20 rows (i.e. `options(dplyr.print_max)` is now 20), not 100 (#710). Row names are no never printed since no dplyr method is guaranteed to preserve them (#669). `glimpse()` prints the number of observations (#692) `type_sum()` gains a data frame method. * `summarise()` handles list output columns (#832) * `slice()` works for data tables (#717). Documentation clarifies that slice can't work with relational databases, and the examples show how to achieve the same results using `filter()` (#720). * dplyr now requires RSQLite >= 1.0. This shouldn't affect your code in any way (except that RSQLite now doesn't need to be attached) but does simplify the internals (#622). * Functions that need to combine multiple results into a single column (e.g. `join()`, `bind_rows()` and `summarise()`) are more careful about coercion. Joining factors with the same levels in the same order preserves the original levels (#675). Joining factors with non-identical levels generates a warning and coerces to character (#684). Joining a character to a factor (or vice versa) generates a warning and coerces to character. Avoid these warnings by ensuring your data is compatible before joining. `rbind_list()` will throw an error if you attempt to combine an integer and factor (#751). `rbind()`ing a column full of `NA`s is allowed and just collects the appropriate missing value for the column type being collected (#493). `summarise()` is more careful about `NA`, e.g. the decision on the result type will be delayed until the first non NA value is returned (#599). It will complain about loss of precision coercions, which can happen for expressions that return integers for some groups and a doubles for others (#599). * A number of functions gained new or improved hybrid handlers: `first()`, `last()`, `nth()` (#626), `lead()` & `lag()` (#683), `%in%` (#126). That means when you use these functions in a dplyr verb, we handle them in C++, rather than calling back to R, and hence improving performance. Hybrid `min_rank()` correctly handles `NaN` values (#726). Hybrid implementation of `nth()` falls back to R evaluation when `n` is not a length one integer or numeric, e.g. when it's an expression (#734). Hybrid `dense_rank()`, `min_rank()`, `cume_dist()`, `ntile()`, `row_number()` and `percent_rank()` now preserve NAs (#774) * `filter` returns its input when it has no rows or no columns (#782). * Join functions keep attributes (e.g. time zone information) from the left argument for `POSIXct` and `Date` objects (#819), and only only warn once about each incompatibility (#798). ## Bug fixes * `[.tbl_df` correctly computes row names for 0-column data frames, avoiding problems with xtable (#656). `[.grouped_df` will silently drop grouping if you don't include the grouping columns (#733). * `data_frame()` now acts correctly if the first argument is a vector to be recycled. (#680 thanks @jimhester) * `filter.data.table()` works if the table has a variable called "V1" (#615). * `*_join()` keeps columns in original order (#684). Joining a factor to a character vector doesn't segfault (#688). `*_join` functions can now deal with multiple encodings (#769), and correctly name results (#855). * `*_join.data.table()` works when data.table isn't attached (#786). * `group_by()` on a data table preserves original order of the rows (#623). `group_by()` supports variables with more than 39 characters thanks to a fix in lazyeval (#705). It gives meaningful error message when a variable is not found in the data frame (#716). * `grouped_df()` requires `vars` to be a list of symbols (#665). * `min(.,na.rm = TRUE)` works with `Date`s built on numeric vectors (#755). * `rename_()` generic gets missing `.dots` argument (#708). * `row_number()`, `min_rank()`, `percent_rank()`, `dense_rank()`, `ntile()` and `cume_dist()` handle data frames with 0 rows (#762). They all preserve missing values (#774). `row_number()` doesn't segfault when giving an external variable with the wrong number of variables (#781). * `group_indices` handles the edge case when there are no variables (#867). * Removed bogus `NAs introduced by coercion to integer range` on 32-bit Windows (#2708). # dplyr 0.3.0.1 * Fixed problem with test script on Windows. # dplyr 0.3 ## New functions * `between()` vector function efficiently determines if numeric values fall in a range, and is translated to special form for SQL (#503). * `count()` makes it even easier to do (weighted) counts (#358). * `data_frame()` by @kevinushey is a nicer way of creating data frames. It never coerces column types (no more `stringsAsFactors = FALSE`!), never munges column names, and never adds row names. You can use previously defined columns to compute new columns (#376). * `distinct()` returns distinct (unique) rows of a tbl (#97). Supply additional variables to return the first row for each unique combination of variables. * Set operations, `intersect()`, `union()` and `setdiff()` now have methods for data frames, data tables and SQL database tables (#93). They pass their arguments down to the base functions, which will ensure they raise errors if you pass in two many arguments. * Joins (e.g. `left_join()`, `inner_join()`, `semi_join()`, `anti_join()`) now allow you to join on different variables in `x` and `y` tables by supplying a named vector to `by`. For example, `by = c("a" = "b")` joins `x.a` to `y.b`. * `n_groups()` function tells you how many groups in a tbl. It returns 1 for ungrouped data. (#477) * `transmute()` works like `mutate()` but drops all variables that you didn't explicitly refer to (#302). * `rename()` makes it easy to rename variables - it works similarly to `select()` but it preserves columns that you didn't otherwise touch. * `slice()` allows you to selecting rows by position (#226). It includes positive integers, drops negative integers and you can use expression like `n()`. ## Programming with dplyr (non-standard evaluation) * You can now program with dplyr - every function that does non-standard evaluation (NSE) has a standard evaluation (SE) version ending in `_`. This is powered by the new lazyeval package which provides all the tools needed to implement NSE consistently and correctly. * See `vignette("nse")` for full details. * `regroup()` is deprecated. Please use the more flexible `group_by_()` instead. * `summarise_each_q()` and `mutate_each_q()` are deprecated. Please use `summarise_each_()` and `mutate_each_()` instead. * `funs_q` has been replaced with `funs_`. ## Removed and deprecated features * `%.%` has been deprecated: please use `%>%` instead. `chain()` is defunct. (#518) * `filter.numeric()` removed. Need to figure out how to reimplement with new lazy eval system. * The `Progress` refclass is no longer exported to avoid conflicts with shiny. Instead use `progress_estimated()` (#535). * `src_monetdb()` is now implemented in MonetDB.R, not dplyr. * `show_sql()` and `explain_sql()` and matching global options `dplyr.show_sql` and `dplyr.explain_sql` have been removed. Instead use `show_query()` and `explain()`. ## Minor improvements and bug fixes * Main verbs now have individual documentation pages (#519). * `%>%` is simply re-exported from magrittr, instead of creating a local copy (#496, thanks to @jimhester) * Examples now use `nycflights13` instead of `hflights` because it the variables have better names and there are a few interlinked tables (#562). `Lahman` and `nycflights13` are (once again) suggested packages. This means many examples will not work unless you explicitly install them with `install.packages(c("Lahman", "nycflights13"))` (#508). dplyr now depends on Lahman 3.0.1. A number of examples have been updated to reflect modified field names (#586). * `do()` now displays the progress bar only when used in interactive prompts and not when knitting (#428, @jimhester). * `glimpse()` now prints a trailing new line (#590). * `group_by()` has more consistent behaviour when grouping by constants: it creates a new column with that value (#410). It renames grouping variables (#410). The first argument is now `.data` so you can create new groups with name x (#534). * Now instead of overriding `lag()`, dplyr overrides `lag.default()`, which should avoid clobbering lag methods added by other packages. (#277). * `mutate(data, a = NULL)` removes the variable `a` from the returned dataset (#462). * `trunc_mat()` and hence `print.tbl_df()` and friends gets a `width` argument to control the default output width. Set `options(dplyr.width = Inf)` to always show all columns (#589). * `select()` gains `one_of()` selector: this allows you to select variables provided by a character vector (#396). It fails immediately if you give an empty pattern to `starts_with()`, `ends_with()`, `contains()` or `matches()` (#481, @leondutoit). Fixed buglet in `select()` so that you can now create variables called `val` (#564). * Switched from RC to R6. * `tally()` and `top_n()` work consistently: neither accidentally evaluates the the `wt` param. (#426, @mnel) * `rename` handles grouped data (#640). ## Minor improvements and bug fixes by backend ### Databases * Correct SQL generation for `paste()` when used with the collapse parameter targeting a Postgres database. (@rbdixon, #1357) * The db backend system has been completely overhauled in order to make it possible to add backends in other packages, and to support a much wider range of databases. See `vignette("new-sql-backend")` for instruction on how to create your own (#568). * `src_mysql()` gains a method for `explain()`. * When `mutate()` creates a new variable that uses a window function, automatically wrap the result in a subquery (#484). * Correct SQL generation for `first()` and `last()` (#531). * `order_by()` now works in conjunction with window functions in databases that support them. ### Data frames/`tbl_df` * All verbs now understand how to work with `difftime()` (#390) and `AsIs` (#453) objects. They all check that colnames are unique (#483), and are more robust when columns are not present (#348, #569, #600). * Hybrid evaluation bugs fixed: * Call substitution stopped too early when a sub expression contained a `$` (#502). * Handle `::` and `:::` (#412). * `cumany()` and `cumall()` properly handle `NA` (#408). * `nth()` now correctly preserve the class when using dates, times and factors (#509). * no longer substitutes within `order_by()` because `order_by()` needs to do its own NSE (#169). * `[.tbl_df` always returns a tbl_df (i.e. `drop = FALSE` is the default) (#587, #610). `[.grouped_df` preserves important output attributes (#398). * `arrange()` keeps the grouping structure of grouped data (#491, #605), and preserves input classes (#563). * `contains()` accidentally matched regular expressions, now it passes `fixed = TRUE` to `grep()` (#608). * `filter()` asserts all variables are white listed (#566). * `mutate()` makes a `rowwise_df` when given a `rowwise_df` (#463). * `rbind_all()` creates `tbl_df` objects instead of raw `data.frame`s. * If `select()` doesn't match any variables, it returns a 0-column data frame, instead of the original (#498). It no longer fails when if some columns are not named (#492) * `sample_n()` and `sample_frac()` methods for data.frames exported. (#405, @alyst) * A grouped data frame may have 0 groups (#486). Grouped df objects gain some basic validity checking, which should prevent some crashes related to corrupt `grouped_df` objects made by `rbind()` (#606). * More coherence when joining columns of compatible but different types, e.g. when joining a character vector and a factor (#455), or a numeric and integer (#450) * `mutate()` works for on zero-row grouped data frame, and with list columns (#555). * `LazySubset` was confused about input data size (#452). * Internal `n_distinct()` is stricter about it's inputs: it requires one symbol which must be from the data frame (#567). * `rbind_*()` handle data frames with 0 rows (#597). They fill character vector columns with `NA` instead of blanks (#595). They work with list columns (#463). * Improved handling of encoding for column names (#636). * Improved handling of hybrid evaluation re $ and @ (#645). ### Data tables * Fix major omission in `tbl_dt()` and `grouped_dt()` methods - I was accidentally doing a deep copy on every result :( * `summarise()` and `group_by()` now retain over-allocation when working with data.tables (#475, @arunsrinivasan). * joining two data.tables now correctly dispatches to data table methods, and result is a data table (#470) ### Cubes * `summarise.tbl_cube()` works with single grouping variable (#480). # dplyr 0.2 ## Piping dplyr now imports `%>%` from magrittr (#330). I recommend that you use this instead of `%.%` because it is easier to type (since you can hold down the shift key) and is more flexible. With you `%>%`, you can control which argument on the RHS recieves the LHS by using the pronoun `.`. This makes `%>%` more useful with base R functions because they don't always take the data frame as the first argument. For example you could pipe `mtcars` to `xtabs()` with: mtcars %>% xtabs( ~ cyl + vs, data = .) Thanks to @smbache for the excellent magrittr package. dplyr only provides `%>%` from magrittr, but it contains many other useful functions. To use them, load `magrittr` explicitly: `library(magrittr)`. For more details, see `vignette("magrittr")`. `%.%` will be deprecated in a future version of dplyr, but it won't happen for a while. I've also deprecated `chain()` to encourage a single style of dplyr usage: please use `%>%` instead. ## Do `do()` has been completely overhauled. There are now two ways to use it, either with multiple named arguments or a single unnamed arguments. `group_by()` + `do()` is equivalent to `plyr::dlply`, except it always returns a data frame. If you use named arguments, each argument becomes a list-variable in the output. A list-variable can contain any arbitrary R object so it's particularly well suited for storing models. library(dplyr) models <- mtcars %>% group_by(cyl) %>% do(lm = lm(mpg ~ wt, data = .)) models %>% summarise(rsq = summary(lm)$r.squared) If you use an unnamed argument, the result should be a data frame. This allows you to apply arbitrary functions to each group. mtcars %>% group_by(cyl) %>% do(head(., 1)) Note the use of the `.` pronoun to refer to the data in the current group. `do()` also has an automatic progress bar. It appears if the computation takes longer than 5 seconds and lets you know (approximately) how much longer the job will take to complete. ## New verbs dplyr 0.2 adds three new verbs: * `glimpse()` makes it possible to see all the columns in a tbl, displaying as much data for each variable as can be fit on a single line. * `sample_n()` randomly samples a fixed number of rows from a tbl; `sample_frac()` randomly samples a fixed fraction of rows. Only works for local data frames and data tables (#202). * `summarise_each()` and `mutate_each()` make it easy to apply one or more functions to multiple columns in a tbl (#178). ## Minor improvements * If you load plyr after dplyr, you'll get a message suggesting that you load plyr first (#347). * `as.tbl_cube()` gains a method for matrices (#359, @paulstaab) * `compute()` gains `temporary` argument so you can control whether the results are temporary or permanent (#382, @cpsievert) * `group_by()` now defaults to `add = FALSE` so that it sets the grouping variables rather than adding to the existing list. I think this is how most people expected `group_by` to work anyway, so it's unlikely to cause problems (#385). * Support for [MonetDB](http://www.monetdb.org) tables with `src_monetdb()` (#8, thanks to @hannesmuehleisen). * New vignettes: * `memory` vignette which discusses how dplyr minimises memory usage for local data frames (#198). * `new-sql-backend` vignette which discusses how to add a new SQL backend/source to dplyr. * `changes()` output more clearly distinguishes which columns were added or deleted. * `explain()` is now generic. * dplyr is more careful when setting the keys of data tables, so it never accidentally modifies an object that it doesn't own. It also avoids unnecessary key setting which negatively affected performance. (#193, #255). * `print()` methods for `tbl_df`, `tbl_dt` and `tbl_sql` gain `n` argument to control the number of rows printed (#362). They also works better when you have columns containing lists of complex objects. * `row_number()` can be called without arguments, in which case it returns the same as `1:n()` (#303). * `"comment"` attribute is allowed (white listed) as well as names (#346). * hybrid versions of `min`, `max`, `mean`, `var`, `sd` and `sum` handle the `na.rm` argument (#168). This should yield substantial performance improvements for those functions. * Special case for call to `arrange()` on a grouped data frame with no arguments. (#369) ## Bug fixes * Code adapted to Rcpp > 0.11.1 * internal `DataDots` class protects against missing variables in verbs (#314), including the case where `...` is missing. (#338) * `all.equal.data.frame` from base is no longer bypassed. we now have `all.equal.tbl_df` and `all.equal.tbl_dt` methods (#332). * `arrange()` correctly handles NA in numeric vectors (#331) and 0 row data frames (#289). * `copy_to.src_mysql()` now works on windows (#323) * `*_join()` doesn't reorder column names (#324). * `rbind_all()` is stricter and only accepts list of data frames (#288) * `rbind_*` propagates time zone information for `POSIXct` columns (#298). * `rbind_*` is less strict about type promotion. The numeric `Collecter` allows collection of integer and logical vectors. The integer `Collecter` also collects logical values (#321). * internal `sum` correctly handles integer (under/over)flow (#308). * `summarise()` checks consistency of outputs (#300) and drops `names` attribute of output columns (#357). * join functions throw error instead of crashing when there are no common variables between the data frames, and also give a better error message when only one data frame has a by variable (#371). * `top_n()` returns `n` rows instead of `n - 1` (@leondutoit, #367). * SQL translation always evaluates subsetting operators (`$`, `[`, `[[`) locally. (#318). * `select()` now renames variables in remote sql tbls (#317) and implicitly adds grouping variables (#170). * internal `grouped_df_impl` function errors if there are no variables to group by (#398). * `n_distinct` did not treat NA correctly in the numeric case #384. * Some compiler warnings triggered by -Wall or -pedantic have been eliminated. * `group_by` only creates one group for NA (#401). * Hybrid evaluator did not evaluate expression in correct environment (#403). # dplyr 0.1.3 ## Bug fixes * `select()` actually renames columns in a data table (#284). * `rbind_all()` and `rbind_list()` now handle missing values in factors (#279). * SQL joins now work better if names duplicated in both x and y tables (#310). * Builds against Rcpp 0.11.1 * `select()` correctly works with the vars attribute (#309). * Internal code is stricter when deciding if a data frame is grouped (#308): this avoids a number of situations which previously caused problems. * More data frame joins work with missing values in keys (#306). # dplyr 0.1.2 ## New features * `select()` is substantially more powerful. You can use named arguments to rename existing variables, and new functions `starts_with()`, `ends_with()`, `contains()`, `matches()` and `num_range()` to select variables based on their names. It now also makes a shallow copy, substantially reducing its memory impact (#158, #172, #192, #232). * `summarize()` added as alias for `summarise()` for people from countries that don't don't spell things correctly ;) (#245) ## Bug fixes * `filter()` now fails when given anything other than a logical vector, and correctly handles missing values (#249). `filter.numeric()` proxies `stats::filter()` so you can continue to use `filter()` function with numeric inputs (#264). * `summarise()` correctly uses newly created variables (#259). * `mutate()` correctly propagates attributes (#265) and `mutate.data.frame()` correctly mutates the same variable repeatedly (#243). * `lead()` and `lag()` preserve attributes, so they now work with dates, times and factors (#166). * `n()` never accepts arguments (#223). * `row_number()` gives correct results (#227). * `rbind_all()` silently ignores data frames with 0 rows or 0 columns (#274). * `group_by()` orders the result (#242). It also checks that columns are of supported types (#233, #276). * The hybrid evaluator did not handle some expressions correctly, for example in `if(n() > 5) 1 else 2` the subexpression `n()` was not substituted correctly. It also correctly processes `$` (#278). * `arrange()` checks that all columns are of supported types (#266). It also handles list columns (#282). * Working towards Solaris compatibility. * Benchmarking vignette temporarily disabled due to microbenchmark problems reported by BDR. # dplyr 0.1.1 ## Improvements * new `location()` and `changes()` functions which provide more information about how data frames are stored in memory so that you can see what gets copied. * renamed `explain_tbl()` to `explain()` (#182). * `tally()` gains `sort` argument to sort output so highest counts come first (#173). * `ungroup.grouped_df()`, `tbl_df()`, `as.data.frame.tbl_df()` now only make shallow copies of their inputs (#191). * The `benchmark-baseball` vignette now contains fairer (including grouping times) comparisons with `data.table`. (#222) ## Bug fixes * `filter()` (#221) and `summarise()` (#194) correctly propagate attributes. * `summarise()` throws an error when asked to summarise an unknown variable instead of crashing (#208). * `group_by()` handles factors with missing values (#183). * `filter()` handles scalar results (#217) and better handles scoping, e.g. `filter(., variable)` where `variable` is defined in the function that calls `filter`. It also handles `T` and `F` as aliases to `TRUE` and `FALSE` if there are no `T` or `F` variables in the data or in the scope. * `select.grouped_df` fails when the grouping variables are not included in the selected variables (#170) * `all.equal.data.frame()` handles a corner case where the data frame has `NULL` names (#217) * `mutate()` gives informative error message on unsupported types (#179) * dplyr source package no longer includes pandas benchmark, reducing download size from 2.8 MB to 0.5 MB. dplyr/MD50000644000176200001440000005301314200434032011706 0ustar liggesuserscdb5269b4482c9232f5d440b89141e79 *DESCRIPTION 3b44dfe91f4912ca3b742aee84e32745 *LICENSE 375f69a73b9ae97a242ae7c58e2d4fee *NAMESPACE 07814fa72f5a8c2ba480d02e37feacd1 *NEWS.md cce961007b26bc93962516231d2f3d1f *R/across.R 034e8f5e7329cca3bba055eaa493c75e *R/all-equal.r 805b87a9017de72f1a972ed912c80b03 *R/arrange.R f3144e17c32cf7ce5bfdd9ce1a8aef88 *R/bind.r 4512992cbeda84029979c92a3764efbc *R/case_when.R aa10abcb34012f8ed957cf2651c68ed0 *R/coalesce.R 4d63fb733e60580f5dc0c7246fe24e60 *R/colwise-arrange.R ecc8d2af0818739d7abe55726d41baf6 *R/colwise-distinct.R a161974c93039516ff17af095885a286 *R/colwise-filter.R fb2c8858078c764954dd97ec67a67c94 *R/colwise-funs.R b002417db71d63af119c637505d47a93 *R/colwise-group-by.R baf90097418998ff38db172ae8faa398 *R/colwise-mutate.R 667f3485690d97b8c51e93afcd6a0708 *R/colwise-select.R 6e81c210c9d69a41380c74ff00386258 *R/colwise.R 689d6a8d8b0cdec20ba7d803ca87a746 *R/compat-dbplyr.R 4e2534cf903b0aa7d817bb6d999304b9 *R/compat-future-group_by.R 51816ff76ce4ba84e0aad37cbd628f44 *R/compat-lazyeval.R 16164284d5edf6cf1f91760d794e206f *R/compat-name-repair.R 583ed028787fec291089e8ac9b519345 *R/compat-purrr.R 026594aae1e07b2c8eed857fe7839990 *R/compute-collect.r 66c31e3e7e8e3057812bc28a3c89a218 *R/conditions.R ebb99b889e6399129c5423bfb013e024 *R/context.R 610fd2723f30c96c436a716d92247297 *R/copy-to.r 6982156d5c98fa41a60de4a92ee54ebd *R/count-tally.R 5b876ab8c3207fd6b7d12c64085d3f74 *R/data-bands.R 810473d7b799dd6be95f1917ffe29478 *R/data-mask.R 2426abe00f322d0d5a1c4057626c2344 *R/data-starwars.R d40a7f03afb14ee1a1f4bd5bb0fe5bff *R/data-storms.R 15ed50b9e7d6f0db6881250facc997cf *R/dbplyr.R 4e170880485850ef167d3716a5ed8b47 *R/defunct.r 7582328a896764331de6553314253d58 *R/deprec-bench-compare.r 0ddf0f876b3e0587703809f4a7fbf5bd *R/deprec-combine.R 27eae51ade8755127a60aae50868c82e *R/deprec-dbi.R 1f2a6dd3877cb887618f9304e9eddc3b *R/deprec-do.r aed71f55da3b2af96c48abeb9e73863c *R/deprec-funs.R 1985ebe4a6e32d44887753707c86349b *R/deprec-lazyeval.R f4c0af84105e96af7478526629bef27e *R/deprec-location.R 71870acceef8cb58de3d5acbe05fcee7 *R/deprec-src-local.r 0a85aad0d2067a527c9faa8b6a611811 *R/deprec-tibble.R fe3ff3de29a611d88247d49e40a156e5 *R/deprec-tidyselect.R 4a41a291a9396a2e2692a05eb8a08982 *R/desc.r ae3cfcc5c722c67da2191f2bc9878e54 *R/distinct.R c81d32d0455381b12c8405d9d9edcf88 *R/doc-methods.R c1a345332ed754ed6514baec83cab11d *R/doc-params.R 929b3e9bb72f501ae3690573b399565f *R/dplyr.r d3859f4e2de5309abf85f62b42497826 *R/error.R d7e2741ed506693fd37e32e0c5a61925 *R/explain.r 3373d0376ede033d6808f7b9bce06213 *R/filter.R 4bab3448dac78bd5aae3fbba53db65f9 *R/funs.R 82fbc4ae5e509deb5eadf304e2ec37d6 *R/generics.R 3e48e1a0f3ce529df44478c8d63768cc *R/group-by.r 5938c69fb0de8e2a6efca5c4f9d4be4f *R/group_data.R 35ddfe74fa70d49414f3210575f8cef4 *R/group_map.R 6a1a3f773b050dd3a28a32ec085e0c68 *R/group_nest.R d82e2491b739b6f9571bafcea8e9035b *R/group_split.R d78c2c28598c28cb83f819ca27a29820 *R/group_trim.R b6526ce6112769ccd37b08292f1e2f65 *R/grouped-df.r e5b756cf70307df43ea9a14e83badbad *R/groups-with.R 8be6e6e5e5d0cba0895c8af5f2c62ac9 *R/helper-error-msg.R 9394449d40f81d87b483d33cf1e81309 *R/if_else.R 7cd4d92edf226d20342625bca8caec2f *R/join-cols.R 03aeb79e4304155b9a48587f88d8f745 *R/join-common-by.R b72a5a367541bc50d8f49c04783f8e62 *R/join-rows.R 919036539eed042ade49768a3c7fcd3a *R/join.r 0b912cee18eaa612021b36bc32414d71 *R/lead-lag.R 7c86e39c4d76aea40d87abfdd61bf517 *R/mutate.R 44c4a6617effafb1006998f91025700a *R/na_if.R 9f92869c84547da11884ad5523fc4697 *R/near.R b9b44328f1e9c75b2746aa7bc36a6d49 *R/nest_by.R a76c894fd97399ef0d794c0f568da555 *R/nth-value.R 97a2766be54d9e768e4be9a9c111594d *R/order-by.R 59bad28006b061ba9ea0e569e7e99420 *R/progress.R 8b9360dafe4fbb505ce6fd7e66018e7c *R/pull.R 10eb48c527720ba2283438e5da751634 *R/rank.R b61763e7a119c3d03cab49beb58e1bb1 *R/rbind.R 956bfcbe2423ce959f4f401b2ff97f22 *R/recode.R 84f25da525b41c98063ef06874186c78 *R/reexport-tibble.r bf1d4ceacfe852b7c1c3f7c27018a423 *R/relocate.R c1bc72389d7ce918eff02b8005c21e82 *R/rename.R c3468db2aad9ec826bf0982424745278 *R/rows.R 8a170080aaeaad337b0d2b137b7e5847 *R/rowwise.r 94486736e51357cbcfde7c46b79018af *R/sample.R aaf14fe325404a99b1abb7f9ffec5f91 *R/select-helpers.R c83c2bba5239f10df37752ff2c0410c8 *R/select.R c93c91ade5dccb8e6ad528712e5580e2 *R/sets.r 7fcbdd411b6cf9e583e3f04f122f0c32 *R/slice.R c3b80d897d87e20f702dc2b1ed49f9b0 *R/src.r 77c8ec30f341182605264f405c61f5e1 *R/src_dbi.R 71586abd4e9a822d94cd86d3eb386f06 *R/summarise.R ca9e68b79c6990768de55a0ad3deae25 *R/tbl.r 483f245e38cefd9051aa5d12b4b21899 *R/top-n.R 482c17e9bda3631c27dcd7d0b5282b1d *R/ts.R 439135d606aa0988556d2534de69f124 *R/utils-format.r 0b7f51a5ddb93c2e7dbbdb0bf50b1b63 *R/utils-replace-with.R 05d88dc0e20740e47f32aa6c23815f41 *R/utils-tidy-eval.R 46f5418c0c1fb953e53eec2e11df2e7e *R/utils.r 6c63c2c466fb1417d0a332f87aef8ffa *R/vctrs.R ebcc0cf06d623f14d59aa8621ca5c3df *R/zzz.r c948407e521c015fbc61b2b5cdf88384 *README.md 18550e8dea071632c573454fe5b5c26c *build/dplyr.pdf f7bc7f6339f71dff80a1a57b189a3a01 *build/vignette.rds a79561c8013e7a7f3c23d509f4918bf8 *data/band_instruments.rda 3aa4b1478fc31219480e88c876c3aeed *data/band_instruments2.rda 4d44ad5e4198daccbd4227dca895750b *data/band_members.rda a1c7281e62ffbd808fda4e3496ca5387 *data/starwars.rda ae93456ff19247d81216167acc527c49 *data/storms.rda 13f3c2cfa4fbce48cc9375bb3d2f8861 *inst/doc/base.R fe0c50e26f14b9a7bb164b447b198f63 *inst/doc/base.Rmd 91c9dd885ad00e67543c9a8956d23bff *inst/doc/base.html c70fd7fbd7c109e42b2db77290f1d6d7 *inst/doc/colwise.R 3537606fbea2276e4a12e5ff550d9f2a *inst/doc/colwise.Rmd b1656c9a7783eb1600bac350fe41a69b *inst/doc/colwise.html 6c6c76cde452e1ac97aa6ef918e40cd2 *inst/doc/compatibility.R 1ec4360571817f9a76202dd8bfd16e58 *inst/doc/compatibility.Rmd 25c70cd4cc89139424e311216efa4adc *inst/doc/compatibility.html 3d543dc283c60689d6a8ec2467af2c69 *inst/doc/dplyr.R 41b199c8315e8bec9d18778f6db24a89 *inst/doc/dplyr.Rmd 0d38b83db8a18c552c06cc137017b98d *inst/doc/dplyr.html 84dda277746e568a08df17ed52431668 *inst/doc/grouping.R 02c513eb8ed22a9adf294c18d3646b21 *inst/doc/grouping.Rmd 5325c4157aab2f90d7cc7963d7549890 *inst/doc/grouping.html 5c1eee29b98acca7801fab5711d7273e *inst/doc/programming.R 932de3025e0fd16e482df2247ff820b7 *inst/doc/programming.Rmd 8e060757b89caf7dda1bb85fcbfb804f *inst/doc/programming.html d3deeeefe0358ac9b743289068b4f529 *inst/doc/rowwise.R 6407982c474c9b9273d0963025772d92 *inst/doc/rowwise.Rmd 3641c014840d7d4c5b0c433308a4a34b *inst/doc/rowwise.html 7ae954caf22fa7e13ce2f3bd75d98145 *inst/doc/two-table.R fdb7682a8f09c1976c19f30eaabc8133 *inst/doc/two-table.Rmd 667bf268d9607e47650ac91172402a1a *inst/doc/two-table.html a62a733e53e4e014dee303ee037b1c32 *inst/doc/window-functions.R 976537d75de5855dbab47dc63d638545 *inst/doc/window-functions.Rmd 3c92399d7bc8fb9a392281f27569bee2 *inst/doc/window-functions.html c692267479959dda91a343d57bff3d23 *man/across.Rd 640fa7c9a5c2a5d89513ec3f0014a948 *man/add_rownames.Rd 06b454338ef22de3ab553d609c3cab58 *man/all_equal.Rd 0626402ad649d62c8be6c1c8b98c8152 *man/all_vars.Rd 6deb33bf484393641bdbb919719d551a *man/arrange.Rd f15cd2d316c50df02758c919197e5364 *man/arrange_all.Rd 137be2eff7b7ad639f186670f6d93a00 *man/auto_copy.Rd 1fe1b4696a46635cf0566543c167fcea *man/backend_dbplyr.Rd 95f8bf5158b02ad31c35c25101661326 *man/band_members.Rd 038cef2e1f04a0fa8362a689b3a4cc3f *man/bench_compare.Rd f1a7c622873355bfba4c641bcc668763 *man/between.Rd 9737b6cfd226cbc76e615e725c4881e4 *man/bind.Rd 3128dcc58be98c49e2ec8145394372bc *man/c_across.Rd a3e7c2e1dc53393256604c08cf1d5179 *man/case_when.Rd fd4543fb2f91b2d10c3f1c456c8928fe *man/check_dbplyr.Rd aa6f1fff2841437025b8bbf242f12442 *man/coalesce.Rd b5605cf1ee65b932b89dda836ab643c2 *man/combine.Rd dde6e8ebe2d80016fa0e555593df4736 *man/common_by.Rd a676edddce965df2c8ae41b3b2c4a821 *man/compute.Rd 7a5ceb32175e349d760596c191ec5368 *man/context.Rd cbe3e88cf9b11546a7e0f3134ddf477c *man/copy_to.Rd 1c871c8c5cd47ab13ffb3a67b36e9e7c *man/count.Rd 3aeecc5b0d5e319adc921604e3c5b364 *man/cumall.Rd aacf775fa037f50a7744cbbc38c885d6 *man/defunct.Rd f0c7978518fbd44832836a74288b6bca *man/desc.Rd 88537f8e714f9460fbce7cd0952dd552 *man/dim_desc.Rd d5070912c50d7bb1107f3e947aba098e *man/distinct.Rd 1b16d3fd4561e6f5eff0044ed56c6dff *man/distinct_all.Rd 49e416ff5b3ebcdfefbbbce076d7e432 *man/do.Rd 52c4c0c69c4bb0c3a672fd44379e06ef *man/dplyr-package.Rd 95d294bb191051e5d23f0a3792084d73 *man/dplyr_data_masking.Rd 2ab1222005f131c85aaf659ecb5dfefc *man/dplyr_extending.Rd 07293d669063b24fc0a91d34e65f4859 *man/dplyr_tidy_select.Rd eb3c2c72321f810ddc3ecdb66c8c27d3 *man/explain.Rd cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 46de21252239c5a23d400eae83ec6b2d *man/figures/lifecycle-retired.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg 85095a2a3fbf23db19a67ff3362abb48 *man/figures/logo.png 9fece08698d815e9c34d9aa576111925 *man/filter-joins.Rd 40bb0bfa9f3376620ad13a1bc4174746 *man/filter.Rd 1d569df863aa82ec65f83e7151edb205 *man/filter_all.Rd 537509963c5e955f0f4aec69ccc146ec *man/funs.Rd 11e9ef3d17c3391661a46d5adc84a839 *man/glimpse.Rd 449daebb0d8cad3474eed2131715ad98 *man/group_by.Rd a341eb3c12202dfe27d8fed12826d6ff *man/group_by_all.Rd 482b49eafff031e9325ac02d914c9dea *man/group_by_drop_default.Rd ebfbffb26c6e9aab88fb6d5419a0b77b *man/group_by_prepare.Rd 2063c4e6693f1df7a595a033dbd67061 *man/group_cols.Rd b6a5e51145621dbb3b141ca033e42838 *man/group_data.Rd 43a2df18e6fb186415c4031b5190250e *man/group_map.Rd 6a7ec005c199987b882ab83864c3237e *man/group_nest.Rd 9612ef6d9b02609f83c41c7b1f9a8f80 *man/group_split.Rd a6b314b87472d88fbf5b180686c82c4e *man/group_trim.Rd 0034aaf2276f36e3e8a6d52938d87cc2 *man/grouped_df.Rd 948f5f439a9af91215bb7b20cc0fbd9b *man/ident.Rd ac526ee481cb8465b06783bacaa9d5ea *man/if_else.Rd 7268b08c918d8c21b3e49ed8d3319f9c *man/lead-lag.Rd 0459078b837b73947835b65d3c6b125a *man/location.Rd 2af74a929602385a9c12b984b2dcbbc3 *man/make_tbl.Rd 3ef92cd15109014488c2e3953c27d939 *man/mutate-joins.Rd f88eeaccf5c9ea05ebaae1c78215143e *man/mutate.Rd 12ee3071020e2b0844e8a9b287224715 *man/mutate_all.Rd 2a32bf3f0fd8bd440bcdeff5019050b0 *man/n_distinct.Rd 116a9113e646790782614f6995f0683c *man/na_if.Rd 0c99aebdb1662bb9e9fc97334212cc4d *man/near.Rd a8e6de4d58b18dc4251ec0761d838a9e *man/nest_by.Rd f6cd6243f37c140c10ef5586bd45597c *man/nest_join.Rd 6b712ed5d61b43f7e0d07d8f914b2bd8 *man/new_grouped_df.Rd d268e8876260e69f49a6c88114840b56 *man/nth.Rd 535b4dbee16f84764a5a889687d83159 *man/order_by.Rd 771a1b83656a2c73444c14f67231fc8b *man/progress_estimated.Rd ca30bc76cdcce294551cb69cbeb3c123 *man/pull.Rd 48e4d502bb7f505d2265f7c5b181b7ce *man/ranking.Rd 4977a27bf23c68be812987b18fa3f098 *man/recode.Rd 53a2e8168f6d050574b667f2df14b954 *man/reexports.Rd 8954d710e648f84c56e19c81d01ccb52 *man/relocate.Rd e04804150ad80a63d69e4c023878194e *man/rename.Rd aeda6121b91c598bc0f15410bf9f894c *man/rmd/overview.Rmd 4fa370dea568ce4c9aff95b54e6f8122 *man/rmd/setup.Rmd 44986760ac990861810e5ed7b0e651a0 *man/rows.Rd 2516b977792648fad60c77dde0019301 *man/rowwise.Rd f7b4ab90ebcaec811366c956b4d1401a *man/same_src.Rd 1ce41dfb3b562fa0c16bff596d0c121f *man/sample_n.Rd 662dae2439b2202ddce172f88a987729 *man/scoped.Rd e24cf195b0b0f0054c4fa4720f99bc71 *man/se-deprecated.Rd bfcf78e8808dfb2544dd9506ff84c0cf *man/select.Rd a70159a7e6d4bee3052cd5c09f5fc009 *man/select_all.Rd bac1278b0e85b9152961810623ee4c83 *man/select_vars.Rd f147adbdbf7768e4edb175d3696c1371 *man/setops.Rd f60aaea596341d299f0ec6699a55e500 *man/slice.Rd 58452b35baf6118d7567b5cc0ce5aa9d *man/sql.Rd bdd76a3de94d66b4778780368fb9a8c0 *man/src.Rd 40d73d3f5218395dd9d11fcc6b620369 *man/src_dbi.Rd aaac736de1afcecf9b39c259eb085a7c *man/src_local.Rd 5ab1f15087cb44691875b8fecfdd400f *man/src_tbls.Rd acd1bfbd14cbdfbf7b86647c4c6a8f0a *man/starwars.Rd e2a0bf066998e6862321097e71f2364c *man/storms.Rd 08756aa6aab223a3cf79303d1488622e *man/summarise.Rd 4af8423e860b36cd908e3a7b38bdb520 *man/summarise_all.Rd f39c50448dc8884049f5c2e0c3fe29c9 *man/summarise_each.Rd 5187b161ff224c05041721db82431503 *man/tbl.Rd 12a9390dc42e26bf24c697a7fef007b2 *man/tbl_df.Rd ff0579184a0570e6393a9487639a9bc8 *man/tbl_ptype.Rd 8f5ecb0ada84315a04fe57b85e683bda *man/tbl_vars.Rd 3ffcb874b6f316412b101910861174e7 *man/tidyeval-compat.Rd bd7658ac2bdd0fad530681f681e3859b *man/top_n.Rd baf461b7aacc027ebb7587acf30ffa8a *man/vars.Rd ed158c996778d3f8f0bd7b13d78cdfde *man/with_groups.Rd 0f716a00c40a985424f71afd4d758a80 *man/with_order.Rd 871250a726900a88ca84bdd0dc659f54 *src/chop.cpp 6287b412dc0ffa56806226fad0f881ff *src/dplyr.h 2446dd44f4ecc04f9a523aa763c7e235 *src/filter.cpp bc63f5d92697970049036928635f71ba *src/funs.cpp 84c7d9b44e5b6738094b1b8d83edc0de *src/group_by.cpp 67ccfa80646c08b438ebf67a38667549 *src/group_data.cpp 3e518030122e01c5ea91fd8d9cc6e04d *src/imports.cpp d78af3e8a96a7664959285c3048bbcca *src/init.cpp ff088d78d7e9567e87a17e9c2972cf34 *src/mask.cpp bf40c8d3bc084815ca9d69021bad1e70 *src/mutate.cpp a8f65b4abe8a13ed2154c3f0bc58261c *src/slice.cpp 6ada5a33d476ae9869703272def65f23 *src/summarise.cpp 60c24a9c9c03f728e0d81d86fa6ca4d0 *tests/testthat.R 441160365337e6f3f61702ce7cc39598 *tests/testthat/_snaps/across.md 37f636edccdafd95b4083f9c4ae2650b *tests/testthat/_snaps/all-equal.md 5c49d135cd865ed068ae56640c84437e *tests/testthat/_snaps/arrange.md 0c9c3a51b1ddc4c93c8f56a4138582d6 *tests/testthat/_snaps/bind.md 759b73ad14006f1b87182bb62bcff35e *tests/testthat/_snaps/case-when.md abae98923f4bd9e5277fb9a46ff55b85 *tests/testthat/_snaps/coalesce.md a3be2f2082973753d912e3d4417e4360 *tests/testthat/_snaps/colwise-filter.md 2cb996a93e33c4a996a3228a9c517222 *tests/testthat/_snaps/colwise-mutate.md 1874d9aa2bd2dcd9091c9e92c78da0be *tests/testthat/_snaps/colwise-select.md 344c594226a02e072933b23638c3f9b0 *tests/testthat/_snaps/colwise.md 322d52d515096cffc6dd847d898db0ac *tests/testthat/_snaps/conditions.md 7f02b67bb3e0b17ae1abd5cfef6fb227 *tests/testthat/_snaps/context.md 76d60359e5021037a7f8c00902ad0f49 *tests/testthat/_snaps/count-tally.md c4ee06dbb4c3c6eab85d44d30d40ff7c *tests/testthat/_snaps/deprec-combine.md 7ecc1a26663c4757024940bbe8db475f *tests/testthat/_snaps/deprec-dbi.md a09c55d7849612c738339b939aa20369 *tests/testthat/_snaps/deprec-do.md bd0b334777f9f7a5974417cb33cfe9f9 *tests/testthat/_snaps/deprec-funs.md d5cd7d205f2cef83c7fd43d9435ad021 *tests/testthat/_snaps/deprec-src-local.md 908469ac80758db86a9e804c5722070f *tests/testthat/_snaps/distinct.md 10a608bbe2cc9d13eb6f2eff66d1c7bb *tests/testthat/_snaps/filter.md 5a5959f97580d5dca112df5a3380b868 *tests/testthat/_snaps/group-by.md 82d81578ca3344e88a0250f1f2912dbe *tests/testthat/_snaps/group_map.md 12da86c055ea0f307a28ddc53c112fc6 *tests/testthat/_snaps/grouped-df.md 1d0324f649562b7c9efcc5885dea1036 *tests/testthat/_snaps/if-else.md 219b9f0307d2ea0b31ccc2921f20d0f3 *tests/testthat/_snaps/join-cols.md 76824b64f86d734f29aa1099a959f136 *tests/testthat/_snaps/join-rows.md 1d02ccbb3f0ecd57c92de473f6e78060 *tests/testthat/_snaps/lead-lag.md 4f39a3de3223b4a790024111da046fb9 *tests/testthat/_snaps/mutate.md 9ed72781d432c444f5b611e1ea5241cd *tests/testthat/_snaps/na-if.md 889e2b5dab63bb17f9054b516577eb70 *tests/testthat/_snaps/nth-value.md 70325c9054ecdf62d21a2d769350a1a0 *tests/testthat/_snaps/order-by.md 42c0f06f90606b881ca1059575e41d87 *tests/testthat/_snaps/recode.md ef8d8caf5a4c0acabffb1236ba8107af *tests/testthat/_snaps/rows.md 0a1c93a8aa925beaa18f076afb911e70 *tests/testthat/_snaps/rowwise.md 172f5acdfdc072ea1cf1c1bd03cfd722 *tests/testthat/_snaps/sample.md a36058f03ea3402ca4f5602c3fe21b0a *tests/testthat/_snaps/select.md 4aa5cf6fa5037ade6f73ebf5c5038261 *tests/testthat/_snaps/sets.md 90f7fb82103c4d8d193d437f61f037cf *tests/testthat/_snaps/slice.md cf64f3f6fb5bb4d976b2e55e64165713 *tests/testthat/_snaps/summarise.md 72b2f086d5f8154b91f7817d7ac2c51d *tests/testthat/_snaps/top-n.md 07e61ca097e0511318a0a59f689fa458 *tests/testthat/_snaps/transmute.md a81342d23db8955e9ee56bda3723ff7a *tests/testthat/_snaps/window.md fd997824c68b7d8cd4ea75c79619de18 *tests/testthat/helper-dplyr.R 25fa968b367f6994e9c4db2caffb9ebb *tests/testthat/helper-encoding.R 8b590c78c107292f9159e0cecae923c7 *tests/testthat/helper-s3.R 5c40bc3557d7e68c3f963b4f428f5c20 *tests/testthat/helper-torture.R db565b0d20e096ffb506dd27c7d20f55 *tests/testthat/test-DBI.R 185396ed0bed6e31efe3bef25eb9776f *tests/testthat/test-across.R 6ebc80288b964fba66aa302558b684c5 *tests/testthat/test-all-equal.r 92f1bec73945d715b61e818ba3325309 *tests/testthat/test-arrange.r b87ceb8e4c33cfccfa26d635bee14e09 *tests/testthat/test-bind.R 9353dc4abbad63800adc32a9e1fbfa31 *tests/testthat/test-case-when.R fac58347fb254baa34ca60d289e4be05 *tests/testthat/test-coalesce.R 838d7eb1336243f8ec91dc10c63412c2 *tests/testthat/test-colwise-arrange.R 60402bc767fae94d4af51b836e2245fa *tests/testthat/test-colwise-distinct.R c70d3ac7e849a3855540b3d7cb0126a7 *tests/testthat/test-colwise-filter.R 989caa75d2c5938cdb53e2cbae96e8cb *tests/testthat/test-colwise-funs.R 0db080a8efd026edb15b03c658ac5eb6 *tests/testthat/test-colwise-group-by.R ec9b12a20898f3a931ee6e4fa180535d *tests/testthat/test-colwise-mutate.R 095feb2735687c93e92460ed244eaee9 *tests/testthat/test-colwise-select.R 913282791a28f971ce50ef73bcd18b52 *tests/testthat/test-colwise.R bf40c9189656f075c0cbee8dd89c7805 *tests/testthat/test-conditions.R 7d810566c8acfbab5ff2742b3575d859 *tests/testthat/test-context.R 99debd8c76f12e14ab212c395b81e72d *tests/testthat/test-count-tally.r 5f35f675bd363584996be52ecaa420d9 *tests/testthat/test-deprec-combine.R 0c920c856e78563e8d68b5dea5cd8cd2 *tests/testthat/test-deprec-dbi.R 935112ec657e2ed3fe220025ccb6ff70 *tests/testthat/test-deprec-do.R da8653b018211850ecbfbe2441a2264f *tests/testthat/test-deprec-funs.R a6812f7b2d2daa5b2085f81ef43bf141 *tests/testthat/test-deprec-lazyeval.R de87f0d053dc3defe5217638a3d4448c *tests/testthat/test-deprec-src-local.r f82f247adb2819171a667ace2d2990bb *tests/testthat/test-deprec-tibble.R feb3c0043432caab00cfabb098011aba *tests/testthat/test-distinct.R 5b5ce89dd22ad866c3ff848e66b44ae9 *tests/testthat/test-empty-groups.R 7d0d23ef141921caaeed7958a6bde223 *tests/testthat/test-filter.r 0b97c2a9d5a3442a6465a10f9d3989d2 *tests/testthat/test-funs-predicates.R f8b82c8beb67813f81be860e243c662b *tests/testthat/test-funs.R c1c8906907dc4ff62cbc984923e9ad66 *tests/testthat/test-generics.R 7c379734c180fb2aa3140558d5b85076 *tests/testthat/test-group-by.r a1cfa724a0723f1205bac2a08917a5b0 *tests/testthat/test-group_data.R a6b33b7229f1049f1c62fecf29214e86 *tests/testthat/test-group_map.R 9608f77c3ef96e279cbdb0fae9b576df *tests/testthat/test-group_nest.R ceea0873e618109dd9c5f52c6865b10a *tests/testthat/test-group_split.R 2d736d0b17b973cd95e969a8bb9749b2 *tests/testthat/test-group_trim.R c7bef6b3bea8c473f128ce403b33c915 *tests/testthat/test-grouped-df.r 33822455c18b2d6e1bf09776dfb56b6e *tests/testthat/test-groups-with.R 2f30c09a1b9913961af7d7f9e290d4fe *tests/testthat/test-if-else.R 72bad17e9efac4ea6e3bfc0e0e012ea0 *tests/testthat/test-join-cols.R 32f8b00d97dda8c165af3294b3297dd0 *tests/testthat/test-join-rows.R 9df877760325d043f05b8e37c9174208 *tests/testthat/test-join.r ea0c9b5221740b9626c824f9020a8572 *tests/testthat/test-lead-lag.R 0ded5da6c2019005cb6fade90f05fd02 *tests/testthat/test-mutate-windowed.R 0757daa524e25f962809305af1cf4d6e *tests/testthat/test-mutate.r 4c8403c6fa09c85c1f8dce01b7c98143 *tests/testthat/test-n_distinct.R b5a603ffb9de7a33216f2a3f0a8c59ca *tests/testthat/test-na-if.R 5ea93280062e3dc5258f73736989706d *tests/testthat/test-near.R 40670af2057737b27c986c0a2e9ff058 *tests/testthat/test-nest_by.R e0989975df8054e3e48172caa9907e2a *tests/testthat/test-nth-value.R cc23847fcd406b63908309a2e429ba93 *tests/testthat/test-order-by.R 65c13e813a1ba14107e7b5e72e09ee0a *tests/testthat/test-pull.R 180560bbaf2e3d7ce1cb13416f11d146 *tests/testthat/test-rank.R 77a2fcdd6cb7039ede7f4647148189c5 *tests/testthat/test-rbind.R ff23d3fa4597eba920e8b76aba0de066 *tests/testthat/test-recode.R cd0073ae29d527621302b0f2761d3cb1 *tests/testthat/test-relocate.R 9a868a250fbb8ca79eead7c825d3f7ce *tests/testthat/test-rename.R 1b2b6f47b7645320e4c46f138683bbf6 *tests/testthat/test-rows.R 0a93e54fc977b848c8eadc430a87e990 *tests/testthat/test-rowwise.r d87af6d52df718c0fca203d6909295ef *tests/testthat/test-sample.R 1af7ca923551e1a810bd110200f7881d *tests/testthat/test-select-helpers.R 4e83676b725488d0852be0f61832cd78 *tests/testthat/test-select.r 3964c2e81db84790c55b894858e598b1 *tests/testthat/test-sets.R 36ba1855be4d8aa501b2051d3d8b2e63 *tests/testthat/test-slice.r 8ab5b8040f7f579a628e3eb5a70f9c3f *tests/testthat/test-summarise.r f97a14a5701b40eba1580202c66cad3b *tests/testthat/test-tbl.R faf2a4bdd26756be03ef12b98d55e45a *tests/testthat/test-top-n.R 83884b34f732168f5334d6634bf64cc5 *tests/testthat/test-transmute.R a530e9db69bcdb8dbd954187316da28b *tests/testthat/test-union-all.R ca5f2571e4332c691e29f14ed0b32496 *tests/testthat/test-utils.R bdd3dee6dcfbf991c1bcc79d712594e2 *tests/testthat/test-window.R 86eea74dcbab50d52c262a4cb256a352 *tests/testthat/utf-8.txt fe0c50e26f14b9a7bb164b447b198f63 *vignettes/base.Rmd 3537606fbea2276e4a12e5ff550d9f2a *vignettes/colwise.Rmd 1ec4360571817f9a76202dd8bfd16e58 *vignettes/compatibility.Rmd 41b199c8315e8bec9d18778f6db24a89 *vignettes/dplyr.Rmd 02c513eb8ed22a9adf294c18d3646b21 *vignettes/grouping.Rmd 932de3025e0fd16e482df2247ff820b7 *vignettes/programming.Rmd 6407982c474c9b9273d0963025772d92 *vignettes/rowwise.Rmd fdb7682a8f09c1976c19f30eaabc8133 *vignettes/two-table.Rmd 976537d75de5855dbab47dc63d638545 *vignettes/window-functions.Rmd dplyr/inst/0000755000176200001440000000000014200154115012352 5ustar liggesusersdplyr/inst/doc/0000755000176200001440000000000014200154115013117 5ustar liggesusersdplyr/inst/doc/colwise.R0000644000176200001440000001277214200154102014714 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ## ---- eval = FALSE------------------------------------------------------------ # df %>% # group_by(g1, g2) %>% # summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d)) ## ---- eval = FALSE------------------------------------------------------------ # df %>% # group_by(g1, g2) %>% # summarise(across(a:d, mean)) ## ----setup-------------------------------------------------------------------- library(dplyr, warn.conflicts = FALSE) ## ----------------------------------------------------------------------------- starwars %>% summarise(across(where(is.character), n_distinct)) starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) ## ----------------------------------------------------------------------------- df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) ## ----------------------------------------------------------------------------- min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) starwars %>% summarise(across(c(height, mass, birth_year), min_max)) ## ----------------------------------------------------------------------------- starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}")) ## ----------------------------------------------------------------------------- starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ## ----------------------------------------------------------------------------- starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ) ## ----------------------------------------------------------------------------- starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% relocate(starts_with("min")) ## ----------------------------------------------------------------------------- df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) ## ----------------------------------------------------------------------------- df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) ## ----------------------------------------------------------------------------- df %>% summarise(across(where(is.numeric), sd), n = n()) ## ----------------------------------------------------------------------------- df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) ## ----------------------------------------------------------------------------- df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) ## ----------------------------------------------------------------------------- rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) ## ----------------------------------------------------------------------------- starwars %>% distinct(across(contains("color"))) ## ----------------------------------------------------------------------------- starwars %>% count(across(contains("color")), sort = TRUE) ## ----------------------------------------------------------------------------- starwars %>% filter(if_any(everything(), ~ !is.na(.x))) ## ----------------------------------------------------------------------------- starwars %>% filter(if_all(everything(), ~ !is.na(.x))) ## ----------------------------------------------------------------------------- starwars %>% filter(across(everything(), ~ !is.na(.x))) ## ---- eval = FALSE------------------------------------------------------------ # df %>% # group_by(g1, g2) %>% # summarise( # across(where(is.numeric), mean), # across(where(is.factor), nlevels), # n = n(), # ) ## ---- results = FALSE--------------------------------------------------------- df %>% mutate_if(is.numeric, mean, na.rm = TRUE) # -> df %>% mutate(across(where(is.numeric), mean, na.rm = TRUE)) df %>% mutate_at(vars(c(x, starts_with("y"))), mean) # -> df %>% mutate(across(c(x, starts_with("y")), mean, na.rm = TRUE)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) ## ----------------------------------------------------------------------------- df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) ## ----------------------------------------------------------------------------- df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) df %>% mutate(across(everything(), ~ .x / y)) dplyr/inst/doc/rowwise.Rmd0000644000176200001440000003312314121112104015256 0ustar liggesusers--- title: "Row-wise operations" description: > In R, it's usually easier to do something for each column than for each row. In this vignette you will learn how to use the `rowwise()` function to perform operations by row. Along the way, you'll learn about list-columns, and see how you might perform simulations and modelling within dplyr verbs. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{rowwise} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr, and R in general, are particularly well suited to performing operations over columns, and performing operations over rows is much harder. In this vignette, you'll learn dplyr's approach centred around the row-wise data frame created by `rowwise()`. There are three common use cases that we discuss in this vignette: * Row-wise aggregates (e.g. compute the mean of x, y, z). * Calling a function multiple times with varying arguments. * Working with list-columns. These types of problems are often easily solved with a for loop, but it's nice to have a solution that fits naturally into a pipeline. > Of course, someone has to write loops. It doesn't have to be you. > --- Jenny Bryan ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ```{r include = FALSE} nest_by <- function(df, ...) { df %>% group_by(...) %>% summarise(data = list(across())) %>% rowwise(...) } # mtcars %>% nest_by(cyl) ``` ## Creating Row-wise operations require a special type of grouping where each group consists of a single row. You create this with `rowwise()`: ```{r} df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() ``` Like `group_by()`, `rowwise()` doesn't really do anything itself; it just changes how the other verbs work. For example, compare the results of `mutate()` in the following code: ```{r} df %>% mutate(m = mean(c(x, y, z))) df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) ``` If you use `mutate()` with a regular data frame, it computes the mean of `x`, `y`, and `z` across all rows. If you apply it to a row-wise data frame, it computes the mean for each row. You can optionally supply "identifier" variables in your call to `rowwise()`. These variables are preserved when you call `summarise()`, so they behave somewhat similarly to the grouping variables passed to `group_by()`: ```{r} df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) ``` `rowwise()` is just a special form of grouping, so if you want to remove it from a data frame, just call `ungroup()`. ## Per row summary statistics `dplyr::summarise()` makes it really easy to summarise values across rows within one column. When combined with `rowwise()` it also makes it easy to summarise values across columns within one row. To see how, we'll start by making a little dataset: ```{r} df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df ``` Let's say we want compute the sum of `w`, `x`, `y`, and `z` for each row. We start by making a row-wise data frame: ```{r} rf <- df %>% rowwise(id) ``` We can then use `mutate()` to add a new column to each row, or `summarise()` to return just that one summary: ```{r} rf %>% mutate(total = sum(c(w, x, y, z))) rf %>% summarise(total = sum(c(w, x, y, z))) ``` Of course, if you have a lot of variables, it's going to be tedious to type in every variable name. Instead, you can use `c_across()` which uses tidy selection syntax so you can to succinctly select many variables: ```{r} rf %>% mutate(total = sum(c_across(w:z))) rf %>% mutate(total = sum(c_across(where(is.numeric)))) ``` You could combine this with column-wise operations (see `vignette("colwise")` for more details) to compute the proportion of the total for each column: ```{r} rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) ``` ### Row-wise summary functions The `rowwise()` approach will work for any summary function. But if you need greater speed, it's worth looking for a built-in row-wise variant of your summary function. These are more efficient because they operate on the data frame as whole; they don't split it into rows, compute the summary, and then join the results back together again. ```{r} df %>% mutate(total = rowSums(across(where(is.numeric)))) df %>% mutate(mean = rowMeans(across(where(is.numeric)))) ``` **NB**: I use `df` (not `rf`) and `across()` (not `c_across()`) here because `rowMeans()` and `rowSums()` take a multi-row data frame as input. ```{r, eval = FALSE, include = FALSE} bench::mark( df %>% mutate(m = rowSums(across(x:z))), df %>% mutate(m = apply(across(x:z), 1, sum)), df %>% rowwise() %>% mutate(m = sum(pick(x:z))), check = FALSE ) ``` ## List-columns `rowwise()` operations are a natural pairing when you have list-columns. They allow you to avoid explicit loops and/or functions from the `apply()` or `purrr::map()` families. ### Motivation Imagine you have this data frame, and you want to count the lengths of each element: ```{r} df <- tibble( x = list(1, 2:3, 4:6) ) ``` You might try calling `length()`: ```{r} df %>% mutate(l = length(x)) ``` But that returns the length of the column, not the length of the individual values. If you're an R documentation aficionado, you might know there's already a base R function just for this purpose: ```{r} df %>% mutate(l = lengths(x)) ``` Or if you're an experienced R programmer, you might know how to apply a function to each element of a list using `sapply()`, `vapply()`, or one of the purrr `map()` functions: ```{r} df %>% mutate(l = sapply(x, length)) df %>% mutate(l = purrr::map_int(x, length)) ``` But wouldn't it be nice if you could just write `length(x)` and dplyr would figure out that you wanted to compute the length of the element inside of `x`? Since you're here, you might already be guessing at the answer: this is just another application of the row-wise pattern. ```{r} df %>% rowwise() %>% mutate(l = length(x)) ``` ### Subsetting Before we continue on, I wanted to briefly mention the magic that makes this work. This isn't something you'll generally need to think about (it'll just work), but it's useful to know about when something goes wrong. There's an important difference between a grouped data frame where each group happens to have one row, and a row-wise data frame where every group always has one row. Take these two data frames: ```{r} df <- tibble(g = 1:2, y = list(1:3, "a")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) ``` If we compute some properties of `y`, you'll notice the results look different: ```{r} gf %>% mutate(type = typeof(y), length = length(y)) rf %>% mutate(type = typeof(y), length = length(y)) ``` They key difference is that when `mutate()` slices up the columns to pass to `length(y)` the grouped mutate uses `[` and the row-wise mutate uses `[[`. The following code gives a flavour of the differences if you used a for loop: ```{r} # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 ``` Note that this magic only applies when you're referring to existing columns, not when you're creating new rows. This is potentially confusing, but we're fairly confident it's the least worst solution, particularly given the hint in the error message. ```{r, error = TRUE} gf %>% mutate(y2 = y) rf %>% mutate(y2 = y) rf %>% mutate(y2 = list(y)) ``` ### Modelling `rowwise()` data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We'll start by creating a nested data frame: ```{r} by_cyl <- mtcars %>% nest_by(cyl) by_cyl ``` This is a little different to the usual `group_by()` output: we have visibly changed the structure of the data. Now we have three rows (one for each group), and we have a list-col, `data`, that stores the data for that group. Also note that the output is `rowwise()`; this is important because it's going to make working with that list of data frames much easier. Once we have one data frame per row, it's straightforward to make one model per row: ```{r} mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods ``` And supplement that with one set of predictions per row: ```{r} mods <- mods %>% mutate(pred = list(predict(mod, data))) mods ``` You could then summarise the model in a variety of ways: ```{r} mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) mods %>% summarise(rsq = summary(mod)$r.squared) mods %>% summarise(broom::glance(mod)) ``` Or easily access the parameters of each model: ```{r} mods %>% summarise(broom::tidy(mod)) ``` ## Repeated function calls `rowwise()` doesn't just work with functions that return a length-1 vector (aka summary functions); it can work with any function if the result is a list. This means that `rowwise()` and `mutate()` provide an elegant way to call a function many times with varying arguments, storing the outputs alongside the inputs. ### Simulations I think this is a particularly elegant way to perform simulations, because it lets you store simulated values along with the parameters that generated them. For example, imagine you have the following data frame that describes the properties of 3 samples from the uniform distribution: ```{r} df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) ``` You can supply these parameters to `runif()` by using `rowwise()` and `mutate()`: ```{r} df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) ``` Note the use of `list()` here - `runif()` returns multiple values and a `mutate()` expression has to return something of length 1. `list()` means that we'll get a list column where each row is a list containing multiple values. If you forget to use `list()`, dplyr will give you a hint: ```{r, error = TRUE} df %>% rowwise() %>% mutate(data = runif(n, min, max)) ``` ### Multiple combinations What if you want to call a function for every combination of inputs? You can use `expand.grid()` (or `tidyr::expand_grid()`) to generate the data frame and then repeat the same pattern as above: ```{r} df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) ``` ### Varying functions In more complicated problems, you might also want to vary the function being called. This tends to be a bit more of an awkward fit with this approach because the columns in the input tibble will be less regular. But it's still possible, and it's a natural place to use `do.call()`: ```{r} df <- tribble( ~rng, ~params, "runif", list(n = 10), "rnorm", list(n = 20), "rpois", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) ``` ```{r, include = FALSE, eval = FALSE} df <- rowwise(tribble( ~rng, ~params, "runif", list(min = -1, max = 1), "rnorm", list(), "rpois", list(lambda = 5), )) # Has to happen in separate function to avoid eager unquoting f <- function(rng, params) purrr::exec(rng, n = 10, !!!params) df %>% mutate(data = list(f(rng, params))) ``` ## Previously ### `rowwise()` `rowwise()` was also questioning for quite some time, partly because I didn't appreciate how many people needed the native ability to compute summaries across multiple variables for each row. As an alternative, we recommended performing row-wise operations with the purrr `map()` functions. However, this was challenging because you needed to pick a map function based on the number of arguments that were varying and the type of result, which required quite some knowledge of purrr functions. I was also resistant to `rowwise()` because I felt like automatically switching between `[` to `[[` was too magical in the same way that automatically `list()`-ing results made `do()` too magical. I've now persuaded myself that the row-wise magic is good magic partly because most people find the distinction between `[` and `[[` mystifying and `rowwise()` means that you don't need to think about it. Since `rowwise()` clearly is useful it is not longer questioning, and we expect it to be around for the long term. ### `do()` We've questioned the need for `do()` for quite some time, because it never felt very similar to the other dplyr verbs. It had two main modes of operation: * Without argument names: you could call functions that input and output data frames using `.` to refer to the "current" group. For example, the following code gets the first row of each group: ```{r} mtcars %>% group_by(cyl) %>% do(head(., 1)) ``` This has been superseded `cur_data()` plus the more permissive `summarise()` which can now create multiple columns and multiple rows. ```{r} mtcars %>% group_by(cyl) %>% summarise(head(cur_data(), 1)) ``` * With arguments: it worked like `mutate()` but automatically wrapped every element in a list: ```{r} mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) ``` I now believe that behaviour is both too magical and not very useful, and it can be replaced by `summarise()` and `cur_data()` . ```{r} mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(cur_data())) ``` If needed (unlike here), you can wrap the results in a list yourself. The addition of `cur_data()`/`across()` and the increased scope of `summarise()` means that `do()` is no longer needed, so it is now superseded. dplyr/inst/doc/window-functions.Rmd0000644000176200001440000002233614121112104017100 0ustar liggesusers--- title: "Window functions" description: > Window functions are a useful family of functions that work with vectors (returning an output the same size as the input), and combine naturally with `mutate()` and `filter()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Window functions} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) if (!rlang::is_installed("Lahman")) { knitr::opts_chunk$set(eval = FALSE) } ``` A __window function__ is a variation on an aggregation function. Where an aggregation function, like `sum()` and `mean()`, takes n inputs and return a single value, a window function returns n values. The output of a window function depends on all its input values, so window functions don't include functions that work element-wise, like `+` or `round()`. Window functions include variations on aggregate functions, like `cumsum()` and `cummean()`, functions for ranking and ordering, like `rank()`, and functions for taking offsets, like `lead()` and `lag()`. In this vignette, we'll use a small sample of the Lahman batting dataset, including the players that have won an award. ```{r} library(Lahman) batting <- Lahman::Batting %>% as_tibble() %>% select(playerID, yearID, teamID, G, AB:H) %>% arrange(playerID, yearID, teamID) %>% semi_join(Lahman::AwardsPlayers, by = "playerID") players <- batting %>% group_by(playerID) ``` Window functions are used in conjunction with `mutate()` and `filter()` to solve a wide range of problems. Here's a selection: ```{r, eval = FALSE} # For each player, find the two years with most hits filter(players, min_rank(desc(H)) <= 2 & H > 0) # Within each player, rank each year by the number of games played mutate(players, G_rank = min_rank(G)) # For each player, find every year that was better than the previous year filter(players, G > lag(G)) # For each player, compute avg change in games played per year mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID))) # For each player, find all years where they played more games than they did on average filter(players, G > mean(G)) # For each, player compute a z score based on number of games played mutate(players, G_z = (G - mean(G)) / sd(G)) ``` Before reading this vignette, you should be familiar with `mutate()` and `filter()`. ## Types of window functions There are five main families of window functions. Two families are unrelated to aggregation functions: * Ranking and ordering functions: `row_number()`, `min_rank()`, `dense_rank()`, `cume_dist()`, `percent_rank()`, and `ntile()`. These functions all take a vector to order by, and return various types of ranks. * Offsets `lead()` and `lag()` allow you to access the previous and next values in a vector, making it easy to compute differences and trends. The other three families are variations on familiar aggregate functions: * Cumulative aggregates: `cumsum()`, `cummin()`, `cummax()` (from base R), and `cumall()`, `cumany()`, and `cummean()` (from dplyr). * Rolling aggregates operate in a fixed width window. You won't find them in base R or in dplyr, but there are many implementations in other packages, such as [RcppRoll](https://cran.r-project.org/package=RcppRoll). * Recycled aggregates, where an aggregate is repeated to match the length of the input. These are not needed in R because vector recycling automatically recycles aggregates where needed. They are important in SQL, because the presence of an aggregation function usually tells the database to return only one row per group. Each family is described in more detail below, focussing on the general goals and how to use them with dplyr. For more details, refer to the individual function documentation. ## Ranking functions The ranking functions are variations on a theme, differing in how they handle ties: ```{r} x <- c(1, 1, 2, 2, 2) row_number(x) min_rank(x) dense_rank(x) ``` If you're familiar with R, you may recognise that `row_number()` and `min_rank()` can be computed with the base `rank()` function and various values of the `ties.method` argument. These functions are provided to save a little typing, and to make it easier to convert between R and SQL. Two other ranking functions return numbers between 0 and 1. `percent_rank()` gives the percentage of the rank; `cume_dist()` gives the proportion of values less than or equal to the current value. ```{r} cume_dist(x) percent_rank(x) ``` These are useful if you want to select (for example) the top 10% of records within each group. For example: ```{r} filter(players, cume_dist(desc(G)) < 0.1) ``` Finally, `ntile()` divides the data up into `n` evenly sized buckets. It's a coarse ranking, and it can be used in with `mutate()` to divide the data into buckets for further summary. For example, we could use `ntile()` to divide the players within a team into four ranked groups, and calculate the average number of games within each group. ```{r} by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) ``` All ranking functions rank from lowest to highest so that small input values get small ranks. Use `desc()` to rank from highest to lowest. ## Lead and lag `lead()` and `lag()` produce offset versions of a input vector that is either ahead of or behind the original vector. ```{r} x <- 1:5 lead(x) lag(x) ``` You can use them to: * Compute differences or percent changes. ```{r, results = "hide"} # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) ``` Using `lag()` is more convenient than `diff()` because for `n` inputs `diff()` returns `n - 1` outputs. * Find out when a value changes. ```{r, results = "hide"} # Find when a player changed teams filter(players, teamID != lag(teamID)) ``` `lead()` and `lag()` have an optional argument `order_by`. If set, instead of using the row order to determine which value comes before another, they will use another variable. This is important if you have not already sorted the data, or you want to sort one way and lag another. Here's a simple example of what happens if you don't specify `order_by` when you need it: ```{r} df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) ``` ## Cumulative aggregates Base R provides cumulative sum (`cumsum()`), cumulative min (`cummin()`), and cumulative max (`cummax()`). (It also provides `cumprod()` but that is rarely useful). Other common accumulating functions are `cumany()` and `cumall()`, cumulative versions of `||` and `&&`, and `cummean()`, a cumulative mean. These are not included in base R, but efficient versions are provided by `dplyr`. `cumany()` and `cumall()` are useful for selecting all rows up to, or all rows after, a condition is true for the first (or last) time. For example, we can use `cumany()` to find all records for a player after they played a year with 150 games: ```{r, eval = FALSE} filter(players, cumany(G > 150)) ``` Like lead and lag, you may want to control the order in which the accumulation occurs. None of the built in functions have an `order_by` argument so `dplyr` provides a helper: `order_by()`. You give it the variable you want to order by, and then the call to the window function: ```{r} x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) ``` This function uses a bit of non-standard evaluation, so I wouldn't recommend using it inside another function; use the simpler but less concise `with_order()` instead. ## Recycled aggregates R's vector recycling makes it easy to select values that are higher or lower than a summary. I call this a recycled aggregate because the value of the aggregate is recycled to be the same length as the original vector. Recycled aggregates are useful if you want to find all records greater than the mean or less than the median: ```{r, eval = FALSE} filter(players, G > mean(G)) filter(players, G < median(G)) ``` While most SQL databases don't have an equivalent of `median()` or `quantile()`, when filtering you can achieve the same effect with `ntile()`. For example, `x > median(x)` is equivalent to `ntile(x, 2) == 2`; `x > quantile(x, 75)` is equivalent to `ntile(x, 100) > 75` or `ntile(x, 4) > 3`. ```{r, eval = FALSE} filter(players, ntile(G, 2) == 2) ``` You can also use this idea to select the records with the highest (`x == max(x)`) or lowest value (`x == min(x)`) for a field, but the ranking functions give you more control over ties, and allow you to select any number of records. Recycled aggregates are also useful in conjunction with `mutate()`. For example, with the batting data, we could compute the "career year", the number of years a player has played since they entered the league: ```{r} mutate(players, career_year = yearID - min(yearID) + 1) ``` Or, as in the introductory example, we could compute a z-score: ```{r} mutate(players, G_z = (G - mean(G)) / sd(G)) ``` dplyr/inst/doc/rowwise.html0000644000176200001440000025044214200154111015507 0ustar liggesusers Row-wise operations

Row-wise operations

dplyr, and R in general, are particularly well suited to performing operations over columns, and performing operations over rows is much harder. In this vignette, you’ll learn dplyr’s approach centred around the row-wise data frame created by rowwise().

There are three common use cases that we discuss in this vignette:

  • Row-wise aggregates (e.g. compute the mean of x, y, z).
  • Calling a function multiple times with varying arguments.
  • Working with list-columns.

These types of problems are often easily solved with a for loop, but it’s nice to have a solution that fits naturally into a pipeline.

Of course, someone has to write loops. It doesn’t have to be you. — Jenny Bryan

library(dplyr, warn.conflicts = FALSE)

Creating

Row-wise operations require a special type of grouping where each group consists of a single row. You create this with rowwise():

df <- tibble(x = 1:2, y = 3:4, z = 5:6)
df %>% rowwise()
#> # A tibble: 2 × 3
#> # Rowwise: 
#>       x     y     z
#>   <int> <int> <int>
#> 1     1     3     5
#> 2     2     4     6

Like group_by(), rowwise() doesn’t really do anything itself; it just changes how the other verbs work. For example, compare the results of mutate() in the following code:

df %>% mutate(m = mean(c(x, y, z)))
#> # A tibble: 2 × 4
#>       x     y     z     m
#>   <int> <int> <int> <dbl>
#> 1     1     3     5   3.5
#> 2     2     4     6   3.5
df %>% rowwise() %>% mutate(m = mean(c(x, y, z)))
#> # A tibble: 2 × 4
#> # Rowwise: 
#>       x     y     z     m
#>   <int> <int> <int> <dbl>
#> 1     1     3     5     3
#> 2     2     4     6     4

If you use mutate() with a regular data frame, it computes the mean of x, y, and z across all rows. If you apply it to a row-wise data frame, it computes the mean for each row.

You can optionally supply “identifier” variables in your call to rowwise(). These variables are preserved when you call summarise(), so they behave somewhat similarly to the grouping variables passed to group_by():

df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6)

df %>% 
  rowwise() %>% 
  summarise(m = mean(c(x, y, z)))
#> # A tibble: 2 × 1
#>       m
#>   <dbl>
#> 1     3
#> 2     4

df %>% 
  rowwise(name) %>% 
  summarise(m = mean(c(x, y, z)))
#> `summarise()` has grouped output by 'name'. You can override using the
#> `.groups` argument.
#> # A tibble: 2 × 2
#> # Groups:   name [2]
#>   name       m
#>   <chr>  <dbl>
#> 1 Mara       3
#> 2 Hadley     4

rowwise() is just a special form of grouping, so if you want to remove it from a data frame, just call ungroup().

Per row summary statistics

dplyr::summarise() makes it really easy to summarise values across rows within one column. When combined with rowwise() it also makes it easy to summarise values across columns within one row. To see how, we’ll start by making a little dataset:

df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45)
df
#> # A tibble: 6 × 5
#>      id     w     x     y     z
#>   <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40
#> 2     2    11    21    31    41
#> 3     3    12    22    32    42
#> 4     4    13    23    33    43
#> # … with 2 more rows

Let’s say we want compute the sum of w, x, y, and z for each row. We start by making a row-wise data frame:

rf <- df %>% rowwise(id)

We can then use mutate() to add a new column to each row, or summarise() to return just that one summary:

rf %>% mutate(total = sum(c(w, x, y, z)))
#> # A tibble: 6 × 6
#> # Rowwise:  id
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40   100
#> 2     2    11    21    31    41   104
#> 3     3    12    22    32    42   108
#> 4     4    13    23    33    43   112
#> # … with 2 more rows
rf %>% summarise(total = sum(c(w, x, y, z)))
#> `summarise()` has grouped output by 'id'. You can override using the `.groups`
#> argument.
#> # A tibble: 6 × 2
#> # Groups:   id [6]
#>      id total
#>   <int> <int>
#> 1     1   100
#> 2     2   104
#> 3     3   108
#> 4     4   112
#> # … with 2 more rows

Of course, if you have a lot of variables, it’s going to be tedious to type in every variable name. Instead, you can use c_across() which uses tidy selection syntax so you can to succinctly select many variables:

rf %>% mutate(total = sum(c_across(w:z)))
#> # A tibble: 6 × 6
#> # Rowwise:  id
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40   100
#> 2     2    11    21    31    41   104
#> 3     3    12    22    32    42   108
#> 4     4    13    23    33    43   112
#> # … with 2 more rows
rf %>% mutate(total = sum(c_across(where(is.numeric))))
#> # A tibble: 6 × 6
#> # Rowwise:  id
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40   100
#> 2     2    11    21    31    41   104
#> 3     3    12    22    32    42   108
#> 4     4    13    23    33    43   112
#> # … with 2 more rows

You could combine this with column-wise operations (see vignette("colwise") for more details) to compute the proportion of the total for each column:

rf %>% 
  mutate(total = sum(c_across(w:z))) %>% 
  ungroup() %>% 
  mutate(across(w:z, ~ . / total))
#> # A tibble: 6 × 6
#>      id     w     x     y     z total
#>   <int> <dbl> <dbl> <dbl> <dbl> <int>
#> 1     1 0.1   0.2   0.3   0.4     100
#> 2     2 0.106 0.202 0.298 0.394   104
#> 3     3 0.111 0.204 0.296 0.389   108
#> 4     4 0.116 0.205 0.295 0.384   112
#> # … with 2 more rows

Row-wise summary functions

The rowwise() approach will work for any summary function. But if you need greater speed, it’s worth looking for a built-in row-wise variant of your summary function. These are more efficient because they operate on the data frame as whole; they don’t split it into rows, compute the summary, and then join the results back together again.

df %>% mutate(total = rowSums(across(where(is.numeric))))
#> # A tibble: 6 × 6
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <dbl>
#> 1     1    10    20    30    40   101
#> 2     2    11    21    31    41   106
#> 3     3    12    22    32    42   111
#> 4     4    13    23    33    43   116
#> # … with 2 more rows
df %>% mutate(mean = rowMeans(across(where(is.numeric))))
#> # A tibble: 6 × 6
#>      id     w     x     y     z  mean
#>   <int> <int> <int> <int> <int> <dbl>
#> 1     1    10    20    30    40  20.2
#> 2     2    11    21    31    41  21.2
#> 3     3    12    22    32    42  22.2
#> 4     4    13    23    33    43  23.2
#> # … with 2 more rows

NB: I use df (not rf) and across() (not c_across()) here because rowMeans() and rowSums() take a multi-row data frame as input.

List-columns

rowwise() operations are a natural pairing when you have list-columns. They allow you to avoid explicit loops and/or functions from the apply() or purrr::map() families.

Motivation

Imagine you have this data frame, and you want to count the lengths of each element:

df <- tibble(
  x = list(1, 2:3, 4:6)
)

You might try calling length():

df %>% mutate(l = length(x))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     3
#> 2 <int [2]>     3
#> 3 <int [3]>     3

But that returns the length of the column, not the length of the individual values. If you’re an R documentation aficionado, you might know there’s already a base R function just for this purpose:

df %>% mutate(l = lengths(x))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3

Or if you’re an experienced R programmer, you might know how to apply a function to each element of a list using sapply(), vapply(), or one of the purrr map() functions:

df %>% mutate(l = sapply(x, length))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3
df %>% mutate(l = purrr::map_int(x, length))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3

But wouldn’t it be nice if you could just write length(x) and dplyr would figure out that you wanted to compute the length of the element inside of x? Since you’re here, you might already be guessing at the answer: this is just another application of the row-wise pattern.

df %>% 
  rowwise() %>% 
  mutate(l = length(x))
#> # A tibble: 3 × 2
#> # Rowwise: 
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3

Subsetting

Before we continue on, I wanted to briefly mention the magic that makes this work. This isn’t something you’ll generally need to think about (it’ll just work), but it’s useful to know about when something goes wrong.

There’s an important difference between a grouped data frame where each group happens to have one row, and a row-wise data frame where every group always has one row. Take these two data frames:

df <- tibble(g = 1:2, y = list(1:3, "a"))
gf <- df %>% group_by(g)
rf <- df %>% rowwise(g)

If we compute some properties of y, you’ll notice the results look different:

gf %>% mutate(type = typeof(y), length = length(y))
#> # A tibble: 2 × 4
#> # Groups:   g [2]
#>       g y         type  length
#>   <int> <list>    <chr>  <int>
#> 1     1 <int [3]> list       1
#> 2     2 <chr [1]> list       1
rf %>% mutate(type = typeof(y), length = length(y))
#> # A tibble: 2 × 4
#> # Rowwise:  g
#>       g y         type      length
#>   <int> <list>    <chr>      <int>
#> 1     1 <int [3]> integer        3
#> 2     2 <chr [1]> character      1

They key difference is that when mutate() slices up the columns to pass to length(y) the grouped mutate uses [ and the row-wise mutate uses [[. The following code gives a flavour of the differences if you used a for loop:

# grouped
out1 <- integer(2)
for (i in 1:2) {
  out1[[i]] <- length(df$y[i])
}
out1
#> [1] 1 1

# rowwise
out2 <- integer(2)
for (i in 1:2) {
  out2[[i]] <- length(df$y[[i]])
}
out2
#> [1] 3 1

Note that this magic only applies when you’re referring to existing columns, not when you’re creating new rows. This is potentially confusing, but we’re fairly confident it’s the least worst solution, particularly given the hint in the error message.

gf %>% mutate(y2 = y)
#> # A tibble: 2 × 3
#> # Groups:   g [2]
#>       g y         y2       
#>   <int> <list>    <list>   
#> 1     1 <int [3]> <int [3]>
#> 2     2 <chr [1]> <chr [1]>
rf %>% mutate(y2 = y)
#> Error in `mutate()`:
#> ! Problem while computing `y2 = y`.
#> x `y2` must be size 1, not 3.
#> ℹ Did you mean: `y2 = list(y)` ?
#> ℹ The error occurred in row 1.
rf %>% mutate(y2 = list(y))
#> # A tibble: 2 × 3
#> # Rowwise:  g
#>       g y         y2       
#>   <int> <list>    <list>   
#> 1     1 <int [3]> <int [3]>
#> 2     2 <chr [1]> <chr [1]>

Modelling

rowwise() data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We’ll start by creating a nested data frame:

by_cyl <- mtcars %>% nest_by(cyl)
by_cyl
#> # A tibble: 3 × 2
#> # Rowwise:  cyl
#>     cyl data              
#>   <dbl> <list>            
#> 1     4 <tibble [11 × 12]>
#> 2     6 <tibble [7 × 12]> 
#> 3     8 <tibble [14 × 12]>

This is a little different to the usual group_by() output: we have visibly changed the structure of the data. Now we have three rows (one for each group), and we have a list-col, data, that stores the data for that group. Also note that the output is rowwise(); this is important because it’s going to make working with that list of data frames much easier.

Once we have one data frame per row, it’s straightforward to make one model per row:

mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data)))
mods
#> # A tibble: 3 × 3
#> # Rowwise:  cyl
#>     cyl data               mod   
#>   <dbl> <list>             <list>
#> 1     4 <tibble [11 × 12]> <lm>  
#> 2     6 <tibble [7 × 12]>  <lm>  
#> 3     8 <tibble [14 × 12]> <lm>

And supplement that with one set of predictions per row:

mods <- mods %>% mutate(pred = list(predict(mod, data)))
mods
#> # A tibble: 3 × 4
#> # Rowwise:  cyl
#>     cyl data               mod    pred      
#>   <dbl> <list>             <list> <list>    
#> 1     4 <tibble [11 × 12]> <lm>   <dbl [11]>
#> 2     6 <tibble [7 × 12]>  <lm>   <dbl [7]> 
#> 3     8 <tibble [14 × 12]> <lm>   <dbl [14]>

You could then summarise the model in a variety of ways:

mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2)))
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
#> argument.
#> # A tibble: 3 × 2
#> # Groups:   cyl [3]
#>     cyl  rmse
#>   <dbl> <dbl>
#> 1     4 3.01 
#> 2     6 0.985
#> 3     8 1.87
mods %>% summarise(rsq = summary(mod)$r.squared)
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
#> argument.
#> # A tibble: 3 × 2
#> # Groups:   cyl [3]
#>     cyl   rsq
#>   <dbl> <dbl>
#> 1     4 0.509
#> 2     6 0.465
#> 3     8 0.423
mods %>% summarise(broom::glance(mod))
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
#> argument.
#> # A tibble: 3 × 13
#> # Groups:   cyl [3]
#>     cyl r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
#>   <dbl>     <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
#> 1     4     0.509         0.454  3.33      9.32  0.0137     1 -27.7   61.5  62.7
#> 2     6     0.465         0.357  1.17      4.34  0.0918     1  -9.83  25.7  25.5
#> 3     8     0.423         0.375  2.02      8.80  0.0118     1 -28.7   63.3  65.2
#> # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Or easily access the parameters of each model:

mods %>% summarise(broom::tidy(mod))
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
#> argument.
#> # A tibble: 6 × 6
#> # Groups:   cyl [3]
#>     cyl term        estimate std.error statistic    p.value
#>   <dbl> <chr>          <dbl>     <dbl>     <dbl>      <dbl>
#> 1     4 (Intercept)    39.6       4.35      9.10 0.00000777
#> 2     4 wt             -5.65      1.85     -3.05 0.0137    
#> 3     6 (Intercept)    28.4       4.18      6.79 0.00105   
#> 4     6 wt             -2.78      1.33     -2.08 0.0918    
#> # … with 2 more rows

Repeated function calls

rowwise() doesn’t just work with functions that return a length-1 vector (aka summary functions); it can work with any function if the result is a list. This means that rowwise() and mutate() provide an elegant way to call a function many times with varying arguments, storing the outputs alongside the inputs.

Simulations

I think this is a particularly elegant way to perform simulations, because it lets you store simulated values along with the parameters that generated them. For example, imagine you have the following data frame that describes the properties of 3 samples from the uniform distribution:

df <- tribble(
  ~ n, ~ min, ~ max,
    1,     0,     1,
    2,    10,   100,
    3,   100,  1000,
)

You can supply these parameters to runif() by using rowwise() and mutate():

df %>% 
  rowwise() %>% 
  mutate(data = list(runif(n, min, max)))
#> # A tibble: 3 × 4
#> # Rowwise: 
#>       n   min   max data     
#>   <dbl> <dbl> <dbl> <list>   
#> 1     1     0     1 <dbl [1]>
#> 2     2    10   100 <dbl [2]>
#> 3     3   100  1000 <dbl [3]>

Note the use of list() here - runif() returns multiple values and a mutate() expression has to return something of length 1. list() means that we’ll get a list column where each row is a list containing multiple values. If you forget to use list(), dplyr will give you a hint:

df %>% 
  rowwise() %>% 
  mutate(data = runif(n, min, max))
#> Error in `mutate()`:
#> ! Problem while computing `data = runif(n, min, max)`.
#> x `data` must be size 1, not 2.
#> ℹ Did you mean: `data = list(runif(n, min, max))` ?
#> ℹ The error occurred in row 2.

Multiple combinations

What if you want to call a function for every combination of inputs? You can use expand.grid() (or tidyr::expand_grid()) to generate the data frame and then repeat the same pattern as above:

df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100))

df %>% 
  rowwise() %>% 
  mutate(data = list(rnorm(10, mean, sd)))
#> # A tibble: 9 × 3
#> # Rowwise: 
#>    mean    sd data      
#>   <dbl> <dbl> <list>    
#> 1    -1     1 <dbl [10]>
#> 2     0     1 <dbl [10]>
#> 3     1     1 <dbl [10]>
#> 4    -1    10 <dbl [10]>
#> # … with 5 more rows

Varying functions

In more complicated problems, you might also want to vary the function being called. This tends to be a bit more of an awkward fit with this approach because the columns in the input tibble will be less regular. But it’s still possible, and it’s a natural place to use do.call():

df <- tribble(
   ~rng,     ~params,
   "runif",  list(n = 10), 
   "rnorm",  list(n = 20),
   "rpois",  list(n = 10, lambda = 5),
) %>%
  rowwise()

df %>% 
  mutate(data = list(do.call(rng, params)))
#> # A tibble: 3 × 3
#> # Rowwise: 
#>   rng   params           data      
#>   <chr> <list>           <list>    
#> 1 runif <named list [1]> <dbl [10]>
#> 2 rnorm <named list [1]> <dbl [20]>
#> 3 rpois <named list [2]> <int [10]>

Previously

rowwise()

rowwise() was also questioning for quite some time, partly because I didn’t appreciate how many people needed the native ability to compute summaries across multiple variables for each row. As an alternative, we recommended performing row-wise operations with the purrr map() functions. However, this was challenging because you needed to pick a map function based on the number of arguments that were varying and the type of result, which required quite some knowledge of purrr functions.

I was also resistant to rowwise() because I felt like automatically switching between [ to [[ was too magical in the same way that automatically list()-ing results made do() too magical. I’ve now persuaded myself that the row-wise magic is good magic partly because most people find the distinction between [ and [[ mystifying and rowwise() means that you don’t need to think about it.

Since rowwise() clearly is useful it is not longer questioning, and we expect it to be around for the long term.

do()

We’ve questioned the need for do() for quite some time, because it never felt very similar to the other dplyr verbs. It had two main modes of operation:

  • Without argument names: you could call functions that input and output data frames using . to refer to the “current” group. For example, the following code gets the first row of each group:

    mtcars %>% 
      group_by(cyl) %>% 
      do(head(., 1))
    #> # A tibble: 3 × 13
    #> # Groups:   cyl [3]
    #>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
    #>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
    #> 1  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1     8    16
    #> 2  21       6   160   110  3.9   2.62  16.5     0     1     4     4    12    24
    #> 3  18.7     8   360   175  3.15  3.44  17.0     0     0     3     2    16    32

    This has been superseded cur_data() plus the more permissive summarise() which can now create multiple columns and multiple rows.

    mtcars %>% 
      group_by(cyl) %>% 
      summarise(head(cur_data(), 1))
    #> # A tibble: 3 × 13
    #>     cyl   mpg  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
    #>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
    #> 1     4  22.8   108    93  3.85  2.32  18.6     1     1     4     1     8    16
    #> 2     6  21     160   110  3.9   2.62  16.5     0     1     4     4    12    24
    #> 3     8  18.7   360   175  3.15  3.44  17.0     0     0     3     2    16    32
  • With arguments: it worked like mutate() but automatically wrapped every element in a list:

    mtcars %>% 
      group_by(cyl) %>% 
      do(nrows = nrow(.))
    #> # A tibble: 3 × 2
    #> # Rowwise: 
    #>     cyl nrows    
    #>   <dbl> <list>   
    #> 1     4 <int [1]>
    #> 2     6 <int [1]>
    #> 3     8 <int [1]>

    I now believe that behaviour is both too magical and not very useful, and it can be replaced by summarise() and cur_data() .

    mtcars %>% 
      group_by(cyl) %>% 
      summarise(nrows = nrow(cur_data()))
    #> # A tibble: 3 × 2
    #>     cyl nrows
    #>   <dbl> <int>
    #> 1     4    11
    #> 2     6     7
    #> 3     8    14

    If needed (unlike here), you can wrap the results in a list yourself.

The addition of cur_data()/across() and the increased scope of summarise() means that do() is no longer needed, so it is now superseded.

dplyr/inst/doc/base.Rmd0000644000176200001440000002746214144435746014533 0ustar liggesusers--- title: "dplyr <-> base R" output: rmarkdown::html_vignette description: > How does dplyr compare to base R? This vignette describes the main differences in philosophy, and shows the base R code most closely equivalent to each dplyr verb. vignette: > %\VignetteIndexEntry{From base R to dplyr} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4, tibble.print_max = 4) ``` This vignette compares dplyr functions to their base R equivalents. This helps those familiar with base R understand better what dplyr does, and shows dplyr users how you might express the same ideas in base R code. We'll start with a rough overview of the major differences, then discuss the one table verbs in more detail, followed by the two table verbs. # Overview 1. The code dplyr verbs input and output data frames. This contrasts with base R functions which more frequently work with individual vectors. 1. dplyr relies heavily on "non-standard evaluation" so that you don't need to use `$` to refer to columns in the "current" data frame. This behaviour is inspired by the base functions `subset()` and `transform()`. 1. dplyr solutions tend to use a variety of single purpose verbs, while base R solutions typically tend to use `[` in a variety of ways, depending on the task at hand. 1. Multiple dplyr verbs are often strung together into a pipeline by `%>%`. In base R, you'll typically save intermediate results to a variable that you either discard, or repeatedly overwrite. 1. All dplyr verbs handle "grouped" data frames so that the code to perform a computation per-group looks very similar to code that works on a whole data frame. In base R, per-group operations tend to have varied forms. # One table verbs The following table shows a condensed translation between dplyr verbs and their base R equivalents. The following sections describe each operation in more detail. You learn more about the dplyr verbs in their documentation and in For more `vignette("one-table")`. | dplyr | base | |------------------------------- |--------------------------------------------------| | `arrange(df, x)` | `df[order(x), , drop = FALSE]` | | `distinct(df, x)` | `df[!duplicated(x), , drop = FALSE]`, `unique()` | | `filter(df, x)` | `df[which(x), , drop = FALSE]`, `subset()` | | `mutate(df, z = x + y)` | `df$z <- df$x + df$y`, `transform()` | | `pull(df, 1)` | `df[[1]]` | | `pull(df, x)` | `df$x` | | `rename(df, y = x)` | `names(df)[names(df) == "x"] <- "y"` | | `relocate(df, y)` | `df[union("y", names(df))]` | | `select(df, x, y)` | `df[c("x", "y")]`, `subset()` | | `select(df, starts_with("x"))` | `df[grepl(names(df), "^x")]` | | `summarise(df, mean(x))` | `mean(df$x)`, `tapply()`, `aggregate()`, `by()` | | `slice(df, c(1, 2, 5))` | `df[c(1, 2, 5), , drop = FALSE]` | To begin, we'll load dplyr and convert `mtcars` and `iris` to tibbles so that we can easily show only abbreviated output for each operation. ```{r setup, message = FALSE} library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris) ``` ## `arrange()`: Arrange rows by variables `dplyr::arrange()` orders the rows of a data frame by the values of one or more columns: ```{r} mtcars %>% arrange(cyl, disp) ``` The `desc()` helper allows you to order selected variables in descending order: ```{r} mtcars %>% arrange(desc(cyl), desc(disp)) ``` We can replicate in base R by using `[` with `order()`: ```{r} mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] ``` Note the use of `drop = FALSE`. If you forget this, and the input is a data frame with a single column, the output will be a vector, not a data frame. This is a source of subtle bugs. Base R does not provide a convenient and general way to sort individual variables in descending order, so you have two options: * For numeric variables, you can use `-x`. * You can request `order()` to sort all variables in descending order. ```{r, results = FALSE} mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE] ``` ## `distinct()`: Select distinct/unique rows `dplyr::distinct()` selects unique rows: ```{r} df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns df %>% distinct(x, .keep_all = TRUE) # whole data frame ``` There are two equivalents in base R, depending on whether you want the whole data frame, or just selected variables: ```{r} unique(df["x"]) # selected columns df[!duplicated(df$x), , drop = FALSE] # whole data frame ``` ## `filter()`: Return rows with matching conditions `dplyr::filter()` selects rows where an expression is `TRUE`: ```{r} starwars %>% filter(species == "Human") starwars %>% filter(mass > 1000) starwars %>% filter(hair_color == "none" & eye_color == "black") ``` The closest base equivalent (and the inspiration for `filter()`) is `subset()`: ```{r} subset(starwars, species == "Human") subset(starwars, mass > 1000) subset(starwars, hair_color == "none" & eye_color == "black") ``` You can also use `[` but this also requires the use of `which()` to remove `NA`s: ```{r} starwars[which(starwars$species == "Human"), , drop = FALSE] starwars[which(starwars$mass > 1000), , drop = FALSE] starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE] ``` ## `mutate()`: Create or transform variables `dplyr::mutate()` creates new variables from existing variables: ```{r} df %>% mutate(z = x + y, z2 = z ^ 2) ``` The closest base equivalent is `transform()`, but note that it cannot use freshly created variables: ```{r} head(transform(df, z = x + y, z2 = (x + y) ^ 2)) ``` Alternatively, you can use `$<-`: ```{r} mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 ``` When applied to a grouped data frame, `dplyr::mutate()` computes new variable once per group: ```{r} gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) ``` To replicate this in base R, you can use `ave()`: ```{r} transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) ``` ## `pull()`: Pull out a single variable `dplyr::pull()` extracts a variable either by name or position: ```{r} mtcars %>% pull(1) mtcars %>% pull(cyl) ``` This equivalent to `[[` for positions and `$` for names: ```{r} mtcars[[1]] mtcars$cyl ``` ## `relocate()`: Change column order `dplyr::relocate()` makes it easy to move a set of columns to a new position (by default, the front): ```{r} # to front mtcars %>% relocate(gear, carb) # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) ``` We can replicate this in base R with a little set manipulation: ```{r} mtcars[union(c("gear", "carb"), names(mtcars))] to_back <- c("mpg", "cyl") mtcars[c(setdiff(names(mtcars), to_back), to_back)] ``` Moving columns to somewhere in the middle requires a little more set twiddling. ## `rename()`: Rename variables by name `dplyr::rename()` allows you to rename variables by name or position: ```{r} iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) ``` Renaming variables by position is straight forward in base R: ```{r} iris2 <- iris names(iris2)[2] <- "sepal_width" ``` Renaming variables by name requires a bit more work: ```{r} names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length" ``` ## `rename_with()`: Rename variables with a function `dplyr::rename_with()` transform column names with a function: ```{r} iris %>% rename_with(toupper) ``` A similar effect can be achieved with `setNames()` in base R: ```{r} setNames(iris, toupper(names(iris))) ``` ## `select()`: Select variables by name `dplyr::select()` subsets columns by position, name, function of name, or other property: ```{r} iris %>% select(1:3) iris %>% select(Species, Sepal.Length) iris %>% select(starts_with("Petal")) iris %>% select(where(is.factor)) ``` Subsetting variables by position is straightforward in base R: ```{r} iris[1:3] # single argument selects columns; never drops iris[1:3, , drop = FALSE] ``` You have two options to subset by name: ```{r} iris[c("Species", "Sepal.Length")] subset(iris, select = c(Species, Sepal.Length)) ``` Subsetting by function of name requires a bit of work with `grep()`: ```{r} iris[grep("^Petal", names(iris))] ``` And you can use `Filter()` to subset by type: ```{r} Filter(is.factor, iris) ``` ## `summarise()`: Reduce multiple values down to a single value `dplyr::summarise()` computes one or more summaries for each group: ```{r} mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) ``` I think the closest base R equivalent uses `by()`. Unfortunately `by()` returns a list of data frames, but you can combine them back together again with `do.call()` and `rbind()`: ```{r} mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) ``` `aggregate()` comes very close to providing an elegant answer: ```{r} agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg ``` But unfortunately while it looks like there are `disp.mean` and `disp.n` columns, it's actually a single matrix column: ```{r} str(agg) ``` You can see a variety of other options at . ## `slice()`: Choose rows by position `slice()` selects rows with their location: ```{r} slice(mtcars, 25:n()) ``` This is straightforward to replicate with `[`: ```{r} mtcars[25:nrow(mtcars), , drop = FALSE] ``` # Two-table verbs When we want to merge two data frames, `x` and `y`), we have a variety of different ways to bring them together. Various base R `merge()` calls are replaced by a variety of dplyr `join()` functions. | dplyr | base | |------------------------|-----------------------------------------| | `inner_join(df1, df2)` |`merge(df1, df2)` | | `left_join(df1, df2) ` |`merge(df1, df2, all.x = TRUE)` | | `right_join(df1, df2)` |`merge(df1, df2, all.y = TRUE)` | | `full_join(df1, df2)` |`merge(df1, df2, all = TRUE)` | | `semi_join(df1, df2)` |`df1[df1$x %in% df2$x, , drop = FALSE]` | | `anti_join(df1, df2)` |`df1[!df1$x %in% df2$x, , drop = FALSE]` | For more information about two-table verbs, see `vignette("two-table")`. ### Mutating joins dplyr's `inner_join()`, `left_join()`, `right_join()`, and `full_join()` add new columns from `y` to `x`, matching rows based on a set of "keys", and differ only in how missing matches are handled. They are equivalent to calls to `merge()` with various settings of the `all`, `all.x`, and `all.y` arguments. The main difference is the order of the rows: * dplyr preserves the order of the `x` data frame. * `merge()` sorts the key columns. ### Filtering joins dplyr's `semi_join()` and `anti_join()` affect only the rows, not the columns: ```{r} band_members %>% semi_join(band_instruments) band_members %>% anti_join(band_instruments) ``` They can be replicated in base R with `[` and `%in%`: ```{r} band_members[band_members$name %in% band_instruments$name, , drop = FALSE] band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] ``` Semi and anti joins with multiple key variables are considerably more challenging to implement. dplyr/inst/doc/compatibility.html0000644000176200001440000011231614200154103016657 0ustar liggesusers dplyr compatibility

dplyr compatibility

This vignette is aimed at package authors who need to update their code because of a backward incompatible change to dplyr. We do try and minimise backward incompatible changes as much as possible, but sometimes they are necessary in order to radically simplify existing code, or unlock a lot of potential value in the future.

This vignette starts with some general advice on writing package code that works with multiple version of dplyr, then continues to discuss specific changes in dplyr versions.

Working with multiple dplyr versions

Ideally, you want to make sure that your package works with both the released version and the development version of dplyr. This is typically a little bit more work, but has two big advantages:

  1. It’s more convenient for your users, since they’re not forced to update dplyr if they don’t want to.

  2. It’s easier on CRAN since it doesn’t require a massive coordinated release of multiple packages.

To make code work with multiple versions of a package, your first tool is the simple if statement:

if (utils::packageVersion("dplyr") > "0.5.0") {
  # code for new version
} else {
  # code for old version
}

Always condition on > current-version, not >= next-version because this will ensure that this branch is also used for the development version of the package. For example, if the current release is version “0.5.0”, the development version will be “0.5.0.9000”.

Occasionally, you’ll run into a situation where the NAMESPACE has changed and you need to conditionally import different functions. This typically occurs when functions are moved from one package to another. We try out best to provide automatic fallbacks, but this is not always possible. Often you can work around the problem by avoiding importFrom and using :: instead. Do this where possible:

if (utils::packageVersion("dplyr") > "0.5.0") {
  dbplyr::build_sql(...)
} else {
  dplyr::build_sql(...)
}

This will generate an R CMD check NOTE (because the one of the functions will always be missing), but this is ok. Simply explain that you get the note because you have written a wrapper to make sure your code is backward compatible.

Sometimes it’s not possible to avoid importFrom(). For example you might be importing a generic so that you can define a method for it. In this case, you can take advantage of a little-known feature in the NAMESPACE file: you can include if statements.

#' @rawNamespace
#' if (utils::packageVersion("dplyr") > "0.5.0") {
#'   importFrom("dbplyr", "build_sql")
#' } else {
#'   importFrom("dplyr", "build_sql")
#' }

dplyr 0.6.0

Database code moves to dbplyr

Almost all database related code has been moved out of dplyr and into a new package, dbplyr. This makes dplyr simpler, and will make it easier to release fixes for bugs that only affect databases. If you’ve implemented a database backend for dplyr, please read the backend news on the backend.

Depending on what generics you use, and what generics you provide methods for you, you may need to write some conditional code. To help make this easier we’ve written wrap_dbplyr_obj() which will write the helper code for you:

wrap_dbplyr_obj("build_sql")

wrap_dbplyr_obj("base_agg")

Simply copy the results of this function in your package.

These will generate R CMD check NOTES, so make sure to tell CRAN that this is to ensure backward compatibility.

Deprecation of underscored verbs_()

Because the tidyeval framework allows us to combine SE and NSE semantics within the same functions, the underscored verbs have been softly deprecated.

For users of SE_ verbs

The legacy underscored versions take objects for which a lazyeval::as.lazy() method is defined. This includes symbols and calls, strings, and formulas. All of these objects have been replaced with quosures and you can call tidyeval verbs with unquoted quosures:

quo <- quo(cyl)
select(mtcars, !! quo)

Symbolic expressions are also supported, but note that bare symbols and calls do not carry scope information. If you’re referring to objects in the data frame, it’s safe to omit specifying an enclosure:

sym <- quote(cyl)
select(mtcars, !! sym)

call <- quote(mean(cyl))
summarise(mtcars, cyl = !! call)

Transforming objects into quosures is generally straightforward. To enclose with the current environment, you can unquote directly in quo() or you can use as_quosure():

quo(!! sym)
#> <quosure>
#> expr: ^cyl
#> env:  global
quo(!! call)
#> <quosure>
#> expr: ^mean(cyl)
#> env:  global

rlang::as_quosure(sym)
#> Warning: `as_quosure()` requires an explicit environment as of rlang 0.3.0.
#> Please supply `env`.
#> This warning is displayed once per session.
#> <quosure>
#> expr: ^cyl
#> env:  global
rlang::as_quosure(call)
#> <quosure>
#> expr: ^mean(cyl)
#> env:  global

Note that while formulas and quosures are very similar objects (and in the most general sense, formulas are quosures), they can’t be used interchangeably in tidyeval functions. Early implementations did treat bare formulas as quosures, but this created compatibility issues with modelling functions of the stats package. Fortunately, it’s easy to transform formulas to quosures that will self-evaluate in tidyeval functions:

f <- ~cyl
f
#> ~cyl
rlang::as_quosure(f)
#> <quosure>
#> expr: ^cyl
#> env:  global

Finally, and perhaps most importantly, strings are not and should not be parsed. As developers, it is tempting to try and solve problems using strings because we have been trained to work with strings rather than quoted expressions. However it’s almost always the wrong way to approach the problem. The exception is for creating symbols. In that case it is perfectly legitimate to use strings:

rlang::sym("cyl")
#> cyl
rlang::syms(letters[1:3])
#> [[1]]
#> a
#> 
#> [[2]]
#> b
#> 
#> [[3]]
#> c

But you should never use strings to create calls. Instead you can use quasiquotation:

syms <- rlang::syms(c("foo", "bar", "baz"))
quo(my_call(!!! syms))
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

fun <- rlang::sym("my_call")
quo((!!fun)(!!! syms))
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

Or create the call with call2():

call <- rlang::call2("my_call", !!! syms)
call
#> my_call(foo, bar, baz)

rlang::as_quosure(call)
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

# Or equivalently:
quo(!! rlang::call2("my_call", !!! syms))
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

Note that idioms based on interp() should now generally be avoided and replaced with quasiquotation. Where you used to interpolate:

lazyeval::interp(~ mean(var), var = rlang::sym("mpg"))

You would now unquote:

var <- "mpg"
quo(mean(!! rlang::sym(var)))

See also vignette("programming") for more about quasiquotation and quosures.

For package authors

For package authors, rlang provides a compatibility file that you can copy to your package. compat_lazy() and compat_lazy_dots() turn lazy-able objects into proper quosures. This helps providing an underscored version to your users for backward compatibility. For instance, here is how we defined the underscored version of filter() in dplyr 0.6:

filter_.tbl_df <- function(.data, ..., .dots = list()) {
  dots <- compat_lazy_dots(.dots, caller_env(), ...)
  filter(.data, !!! dots)
}

With tidyeval, S3 dispatch to the correct method might be an issue. In the past, the genericity of dplyr verbs was accomplished by dispatching in the underscored versions. Now that those are deprecated, we’ve turned the non-underscored verbs into S3 generics.

We maintain backward compatibility by redispatching to old underscored verbs in the default methods of the new S3 generics. For example, here is how we redispatch filter():

filter.default <- function(.data, ...) {
  filter_(.data, .dots = compat_as_lazy_dots(...))
}

This gets the job done in most cases. However, the default method will not be called for objects inheriting from one of the classes for which we provide non-underscored methods: data.frame, tbl_df, tbl_cube and grouped_df. An example of this is the sf package whose objects have classes c("sf", "data.frame"). Authors of such packages should provide a method for the non-underscored generic in order to be compatible with dplyr:

filter.sf <- function(.data, ...) {
  st_as_sf(NextMethod())
}

If you need help with this, please let us know!

Deprecation of mutate_each() and summarise_each()

These functions have been replaced by a more complete family of functions. This family has suffixes _if, _at and _all and includes more verbs than just mutate summarise.

If you need to update your code to the new family, there are two relevant functions depending on which variables you apply funs() to. If you called mutate_each() without supplying a selection of variables, funs is applied to all variables. In this case, you should update your code to use mutate_all() instead:

mutate_each(starwars, funs(as.character))
mutate_all(starwars, funs(as.character))

Note that the new verbs support bare functions as well, so you don’t necessarily need to wrap with funs():

mutate_all(starwars, as.character)

On the other hand, if you supplied a variable selection, you should use mutate_at(). The variable selection should be wrapped with vars().

mutate_each(starwars, funs(as.character), height, mass)
mutate_at(starwars, vars(height, mass), as.character)

vars() supports all the selection helpers that you usually use with select():

summarise_at(mtcars, vars(starts_with("d")), mean)

Note that instead of a vars() selection, you can also supply character vectors of column names:

mutate_at(starwars, c("height", "mass"), as.character)
dplyr/inst/doc/programming.R0000644000176200001440000001170214200154107015566 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ## ----setup, message = FALSE--------------------------------------------------- library(dplyr) ## ---- results = FALSE--------------------------------------------------------- starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,] ## ---- results = FALSE--------------------------------------------------------- starwars %>% filter(homeworld == "Naboo", species == "Human") ## ----------------------------------------------------------------------------- df <- data.frame(x = runif(3), y = runif(3)) df$x ## ---- results = FALSE--------------------------------------------------------- var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) ## ---- results = FALSE--------------------------------------------------------- for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ## ---- results = FALSE--------------------------------------------------------- summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) ## ---- results = FALSE--------------------------------------------------------- vars <- c("mpg", "vs") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars)) ## ----------------------------------------------------------------------------- mutate_y <- function(data) { mutate(data, y = a + x) } ## ----------------------------------------------------------------------------- my_summary_function <- function(data) { data %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } ## ----------------------------------------------------------------------------- #' @importFrom rlang .data my_summary_function <- function(data) { data %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) } ## ----------------------------------------------------------------------------- my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } ## ----------------------------------------------------------------------------- my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } ## ----------------------------------------------------------------------------- my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } ## ----------------------------------------------------------------------------- my_summarise4 <- function(data, expr) { data %>% summarise( "mean_{{expr}}" := mean({{ expr }}), "sum_{{expr}}" := sum({{ expr }}), "n_{{expr}}" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( "mean_{{mean_var}}" := mean({{ mean_var }}), "sd_{{sd_var}}" := sd({{ sd_var }}) ) } ## ----------------------------------------------------------------------------- my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) starwars %>% my_summarise(sex, gender) ## ----------------------------------------------------------------------------- my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) ## ----------------------------------------------------------------------------- my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(across({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } ## ----------------------------------------------------------------------------- my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(across({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}")) } ## ---- results = FALSE--------------------------------------------------------- for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ## ---- results = FALSE--------------------------------------------------------- mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]])) ## ---- eval = FALSE------------------------------------------------------------ # library(shiny) # ui <- fluidPage( # selectInput("var", "Variable", choices = names(diamonds)), # tableOutput("output") # ) # server <- function(input, output, session) { # data <- reactive(filter(diamonds, .data[[input$var]] > 0)) # output$output <- renderTable(head(data())) # } dplyr/inst/doc/two-table.Rmd0000644000176200001440000001617414125606753015512 0ustar liggesusers--- title: "Two-table verbs" description: > Most dplyr verbs work with a single data set, but most data analyses involve multiple datasets. This vignette introduces you to the dplyr verbs that work with more one than data set, and introduces to the mutating joins, filtering joins, and the set operations. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Two-table verbs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 5) library(dplyr) ``` It's rare that a data analysis involves only a single table of data. In practice, you'll normally have many tables that contribute to an analysis, and you need flexible tools to combine them. In dplyr, there are three families of verbs that work with two tables at a time: * Mutating joins, which add new variables to one table from matching rows in another. * Filtering joins, which filter observations from one table based on whether or not they match an observation in the other table. * Set operations, which combine the observations in the data sets as if they were set elements. (This discussion assumes that you have [tidy data](https://www.jstatsoft.org/v59/i10/), where the rows are observations and the columns are variables. If you're not familiar with that framework, I'd recommend reading up on it first.) All two-table verbs work similarly. The first two arguments are `x` and `y`, and provide the tables to combine. The output is always a new table with the same type as `x`. ## Mutating joins Mutating joins allow you to combine variables from multiple tables. For example, consider the flights and airlines data from the nycflights13 package. In one table we have flight information with an abbreviation for carrier, and in another we have a mapping between abbreviations and full names. You can use a join to add the carrier names to the flight data: ```{r, warning = FALSE} library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) ``` ### Controlling how the tables are matched As well as `x` and `y`, each mutating join takes an argument `by` that controls which variables are used to match observations in the two tables. There are a few ways to specify it, as I illustrate below with various tables from nycflights13: * `NULL`, the default. dplyr will will use all variables that appear in both tables, a __natural__ join. For example, the flights and weather tables match on their common variables: year, month, day, hour and origin. ```{r} flights2 %>% left_join(weather) ``` * A character vector, `by = "x"`. Like a natural join, but uses only some of the common variables. For example, `flights` and `planes` have `year` columns, but they mean different things so we only want to join by `tailnum`. ```{r} flights2 %>% left_join(planes, by = "tailnum") ``` Note that the year columns in the output are disambiguated with a suffix. * A named character vector: `by = c("x" = "a")`. This will match variable `x` in table `x` to variable `a` in table `y`. The variables from use will be used in the output. Each flight has an origin and destination `airport`, so we need to specify which one we want to join to: ```{r} flights2 %>% left_join(airports, c("dest" = "faa")) flights2 %>% left_join(airports, c("origin" = "faa")) ``` ### Types of join There are four types of mutating join, which differ in their behaviour when a match is not found. We'll illustrate each with a simple example: ```{r} df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = "a") ``` * `inner_join(x, y)` only includes observations that match in both `x` and `y`. ```{r} df1 %>% inner_join(df2) %>% knitr::kable() ``` * `left_join(x, y)` includes all observations in `x`, regardless of whether they match or not. This is the most commonly used join because it ensures that you don't lose observations from your primary table. ```{r} df1 %>% left_join(df2) ``` * `right_join(x, y)` includes all observations in `y`. It's equivalent to `left_join(y, x)`, but the columns and rows will be ordered differently. ```{r} df1 %>% right_join(df2) df2 %>% left_join(df1) ``` * `full_join()` includes all observations from `x` and `y`. ```{r} df1 %>% full_join(df2) ``` The left, right and full joins are collectively know as __outer joins__. When a row doesn't match in an outer join, the new variables are filled in with missing values. ### Observations While mutating joins are primarily used to add new variables, they can also generate new observations. If a match is not unique, a join will add all possible combinations (the Cartesian product) of the matching observations: ```{r} df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) df1 %>% left_join(df2) ``` ## Filtering joins Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types: * `semi_join(x, y)` __keeps__ all observations in `x` that have a match in `y`. * `anti_join(x, y)` __drops__ all observations in `x` that have a match in `y`. These are most useful for diagnosing join mismatches. For example, there are many flights in the nycflights13 dataset that don't have a matching tail number in the planes table: ```{r} library("nycflights13") flights %>% anti_join(planes, by = "tailnum") %>% count(tailnum, sort = TRUE) ``` If you're worried about what observations your joins will match, start with a `semi_join()` or `anti_join()`. `semi_join()` and `anti_join()` never duplicate; they only ever remove observations. ```{r} df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) # Four rows to start with: df1 %>% nrow() # And we get four rows after the join df1 %>% inner_join(df2, by = "x") %>% nrow() # But only two rows actually match df1 %>% semi_join(df2, by = "x") %>% nrow() ``` ## Set operations The final type of two-table verb is set operations. These expect the `x` and `y` inputs to have the same variables, and treat the observations like sets: * `intersect(x, y)`: return only observations in both `x` and `y` * `union(x, y)`: return unique observations in `x` and `y` * `setdiff(x, y)`: return observations in `x`, but not in `y`. Given this simple data: ```{r} (df1 <- tibble(x = 1:2, y = c(1L, 1L))) (df2 <- tibble(x = 1:2, y = 1:2)) ``` The four possibilities are: ```{r} intersect(df1, df2) # Note that we get 3 rows, not 4 union(df1, df2) setdiff(df1, df2) setdiff(df2, df1) ``` ## Multiple-table verbs dplyr does not provide any functions for working with three or more tables. Instead use `purrr::reduce()` or `Reduce()`, as described in [Advanced R](http://adv-r.had.co.nz/Functionals.html#functionals-fp), to iteratively combine the two-table verbs to handle as many tables as you need. dplyr/inst/doc/base.R0000644000176200001440000001403614200154101014153 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4, tibble.print_max = 4) ## ----setup, message = FALSE--------------------------------------------------- library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris) ## ----------------------------------------------------------------------------- mtcars %>% arrange(cyl, disp) ## ----------------------------------------------------------------------------- mtcars %>% arrange(desc(cyl), desc(disp)) ## ----------------------------------------------------------------------------- mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] ## ---- results = FALSE--------------------------------------------------------- mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE] ## ----------------------------------------------------------------------------- df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns df %>% distinct(x, .keep_all = TRUE) # whole data frame ## ----------------------------------------------------------------------------- unique(df["x"]) # selected columns df[!duplicated(df$x), , drop = FALSE] # whole data frame ## ----------------------------------------------------------------------------- starwars %>% filter(species == "Human") starwars %>% filter(mass > 1000) starwars %>% filter(hair_color == "none" & eye_color == "black") ## ----------------------------------------------------------------------------- subset(starwars, species == "Human") subset(starwars, mass > 1000) subset(starwars, hair_color == "none" & eye_color == "black") ## ----------------------------------------------------------------------------- starwars[which(starwars$species == "Human"), , drop = FALSE] starwars[which(starwars$mass > 1000), , drop = FALSE] starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE] ## ----------------------------------------------------------------------------- df %>% mutate(z = x + y, z2 = z ^ 2) ## ----------------------------------------------------------------------------- head(transform(df, z = x + y, z2 = (x + y) ^ 2)) ## ----------------------------------------------------------------------------- mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 ## ----------------------------------------------------------------------------- gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) ## ----------------------------------------------------------------------------- transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) ## ----------------------------------------------------------------------------- mtcars %>% pull(1) mtcars %>% pull(cyl) ## ----------------------------------------------------------------------------- mtcars[[1]] mtcars$cyl ## ----------------------------------------------------------------------------- # to front mtcars %>% relocate(gear, carb) # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) ## ----------------------------------------------------------------------------- mtcars[union(c("gear", "carb"), names(mtcars))] to_back <- c("mpg", "cyl") mtcars[c(setdiff(names(mtcars), to_back), to_back)] ## ----------------------------------------------------------------------------- iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) ## ----------------------------------------------------------------------------- iris2 <- iris names(iris2)[2] <- "sepal_width" ## ----------------------------------------------------------------------------- names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length" ## ----------------------------------------------------------------------------- iris %>% rename_with(toupper) ## ----------------------------------------------------------------------------- setNames(iris, toupper(names(iris))) ## ----------------------------------------------------------------------------- iris %>% select(1:3) iris %>% select(Species, Sepal.Length) iris %>% select(starts_with("Petal")) iris %>% select(where(is.factor)) ## ----------------------------------------------------------------------------- iris[1:3] # single argument selects columns; never drops iris[1:3, , drop = FALSE] ## ----------------------------------------------------------------------------- iris[c("Species", "Sepal.Length")] subset(iris, select = c(Species, Sepal.Length)) ## ----------------------------------------------------------------------------- iris[grep("^Petal", names(iris))] ## ----------------------------------------------------------------------------- Filter(is.factor, iris) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) ## ----------------------------------------------------------------------------- mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) ## ----------------------------------------------------------------------------- agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg ## ----------------------------------------------------------------------------- str(agg) ## ----------------------------------------------------------------------------- slice(mtcars, 25:n()) ## ----------------------------------------------------------------------------- mtcars[25:nrow(mtcars), , drop = FALSE] ## ----------------------------------------------------------------------------- band_members %>% semi_join(band_instruments) band_members %>% anti_join(band_instruments) ## ----------------------------------------------------------------------------- band_members[band_members$name %in% band_instruments$name, , drop = FALSE] band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] dplyr/inst/doc/dplyr.html0000644000176200001440000026327314200154104015152 0ustar liggesusers Introduction to dplyr

Introduction to dplyr

When working with data you must:

  • Figure out what you want to do.

  • Describe those tasks in the form of a computer program.

  • Execute the program.

The dplyr package makes these steps fast and easy:

  • By constraining your options, it helps you think about your data manipulation challenges.

  • It provides simple “verbs”, functions that correspond to the most common data manipulation tasks, to help you translate your thoughts into code.

  • It uses efficient backends, so you spend less time waiting for the computer.

This document introduces you to dplyr’s basic set of tools, and shows you how to apply them to data frames. dplyr also supports databases via the dbplyr package, once you’ve installed, read vignette("dbplyr") to learn more.

Data: starwars

To explore the basic data manipulation verbs of dplyr, we’ll use the dataset starwars. This dataset contains 87 characters and comes from the Star Wars API, and is documented in ?starwars

dim(starwars)
#> [1] 87 14
starwars
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Note that starwars is a tibble, a modern reimagining of the data frame. It’s particularly useful for large datasets because it only prints the first few rows. You can learn more about tibbles at https://tibble.tidyverse.org; in particular you can convert data frames to tibbles with as_tibble().

Single table verbs

dplyr aims to provide a function for each basic verb of data manipulation. These verbs can be organised into three categories based on the component of the dataset that they work with:

  • Rows:
    • filter() chooses rows based on column values.
    • slice() chooses rows based on location.
    • arrange() changes the order of the rows.
  • Columns:
    • select() changes whether or not a column is included.
    • rename() changes the name of columns.
    • mutate() changes the values of columns and creates new columns.
    • relocate() changes the order of the columns.
  • Groups of rows:
    • summarise() collapses a group into a single row.

The pipe

All of the dplyr functions take a data frame (or tibble) as the first argument. Rather than forcing the user to either save intermediate objects or nest functions, dplyr provides the %>% operator from magrittr. x %>% f(y) turns into f(x, y) so the result from one step is then “piped” into the next step. You can use the pipe to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as “then”).

Filter rows with filter()

filter() allows you to select a subset of rows in a data frame. Like all single verbs, the first argument is the tibble (or data frame). The second and subsequent arguments refer to variables within that data frame, selecting rows where the expression is TRUE.

For example, we can select all character with light skin color and brown eyes with:

starwars %>% filter(skin_color == "light", eye_color == "brown")
#> # A tibble: 7 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Leia Org…    150    49 brown      light      brown             19 fema… femin…
#> 2 Biggs Da…    183    84 black      light      brown             24 male  mascu…
#> 3 Cordé        157    NA brown      light      brown             NA fema… femin…
#> 4 Dormé        165    NA brown      light      brown             NA fema… femin…
#> # … with 3 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

This is roughly equivalent to this base R code:

starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ]

Arrange rows with arrange()

arrange() works similarly to filter() except that instead of filtering or selecting rows, it reorders them. It takes a data frame, and a set of column names (or more complicated expressions) to order by. If you provide more than one column name, each additional column will be used to break ties in the values of preceding columns:

starwars %>% arrange(height, mass)
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Yoda          66    17 white      green      brown            896 male  mascu…
#> 2 Ratts Ty…     79    15 none       grey, blue unknown           NA male  mascu…
#> 3 Wicket S…     88    20 brown      brown      brown              8 male  mascu…
#> 4 Dud Bolt      94    45 none       blue, grey yellow            NA male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Use desc() to order a column in descending order:

starwars %>% arrange(desc(height))
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Yarael P…    264    NA none       white      yellow            NA male  mascu…
#> 2 Tarfful      234   136 brown      brown      blue              NA male  mascu…
#> 3 Lama Su      229    88 none       grey       black             NA male  mascu…
#> 4 Chewbacca    228   112 brown      unknown    blue             200 male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Choose rows using their position with slice()

slice() lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows.

We can get characters from row numbers 5 through 10.

starwars %>% slice(5:10)
#> # A tibble: 6 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Leia Org…    150    49 brown      light      brown             19 fema… femin…
#> 2 Owen Lars    178   120 brown, gr… light      blue              52 male  mascu…
#> 3 Beru Whi…    165    75 brown      light      blue              47 fema… femin…
#> 4 R5-D4         97    32 <NA>       white, red red               NA none  mascu…
#> # … with 2 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

It is accompanied by a number of helpers for common use cases:

  • slice_head() and slice_tail() select the first or last rows.
starwars %>% slice_head(n = 3)
#> # A tibble: 3 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue              19 male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow           112 none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red               33 none  mascu…
#> # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
  • slice_sample() randomly selects rows. Use the option prop to choose a certain proportion of the cases.
starwars %>% slice_sample(n = 5)
#> # A tibble: 5 × 14
#>   name     height  mass hair_color skin_color  eye_color birth_year sex   gender
#>   <chr>     <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> <chr> 
#> 1 Dud Bolt     94    45 none       blue, grey  yellow            NA male  mascu…
#> 2 Bossk       190   113 none       green       red               53 male  mascu…
#> 3 Shaak Ti    178    57 none       red, blue,… black             NA fema… femin…
#> 4 Dormé       165    NA brown      light       brown             NA fema… femin…
#> # … with 1 more row, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
starwars %>% slice_sample(prop = 0.1)
#> # A tibble: 8 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Qui-Gon …    193    89 brown      fair       blue              92 male  mascu…
#> 2 Dexter J…    198   102 none       brown      yellow            NA male  mascu…
#> 3 R4-P17        96    NA none       silver, r… red, blue         NA none  femin…
#> 4 Lama Su      229    88 none       grey       black             NA male  mascu…
#> # … with 4 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Use replace = TRUE to perform a bootstrap sample. If needed, you can weight the sample with the weight argument.

  • slice_min() and slice_max() select rows with highest or lowest values of a variable. Note that we first must choose only the values which are not NA.
starwars %>%
  filter(!is.na(height)) %>%
  slice_max(height, n = 3)
#> # A tibble: 3 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Yarael P…    264    NA none       white      yellow            NA male  mascu…
#> 2 Tarfful      234   136 brown      brown      blue              NA male  mascu…
#> 3 Lama Su      229    88 none       grey       black             NA male  mascu…
#> # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Select columns with select()

Often you work with large datasets with many columns but only a few are actually of interest to you. select() allows you to rapidly zoom in on a useful subset using operations that usually only work on numeric variable positions:

# Select columns by name
starwars %>% select(hair_color, skin_color, eye_color)
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # … with 83 more rows
# Select all columns between hair_color and eye_color (inclusive)
starwars %>% select(hair_color:eye_color)
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # … with 83 more rows
# Select all columns except those from hair_color to eye_color (inclusive)
starwars %>% select(!(hair_color:eye_color))
#> # A tibble: 87 × 11
#>   name     height  mass birth_year sex   gender homeworld species films vehicles
#>   <chr>     <int> <dbl>      <dbl> <chr> <chr>  <chr>     <chr>   <lis> <list>  
#> 1 Luke Sk…    172    77       19   male  mascu… Tatooine  Human   <chr> <chr>   
#> 2 C-3PO       167    75      112   none  mascu… Tatooine  Droid   <chr> <chr>   
#> 3 R2-D2        96    32       33   none  mascu… Naboo     Droid   <chr> <chr>   
#> 4 Darth V…    202   136       41.9 male  mascu… Tatooine  Human   <chr> <chr>   
#> # … with 83 more rows, and 1 more variable: starships <list>
# Select all columns ending with color
starwars %>% select(ends_with("color"))
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # … with 83 more rows

There are a number of helper functions you can use within select(), like starts_with(), ends_with(), matches() and contains(). These let you quickly match larger blocks of variables that meet some criterion. See ?select for more details.

You can rename variables with select() by using named arguments:

starwars %>% select(home_world = homeworld)
#> # A tibble: 87 × 1
#>   home_world
#>   <chr>     
#> 1 Tatooine  
#> 2 Tatooine  
#> 3 Naboo     
#> 4 Tatooine  
#> # … with 83 more rows

But because select() drops all the variables not explicitly mentioned, it’s not that useful. Instead, use rename():

starwars %>% rename(home_world = homeworld)
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 5 more variables: home_world <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Add new columns with mutate()

Besides selecting sets of existing columns, it’s often useful to add new columns that are functions of existing columns. This is the job of mutate():

starwars %>% mutate(height_m = height / 100)
#> # A tibble: 87 × 15
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 6 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>, height_m <dbl>

We can’t see the height in meters we just calculated, but we can fix that using a select command.

starwars %>%
  mutate(height_m = height / 100) %>%
  select(height_m, height, everything())
#> # A tibble: 87 × 15
#>   height_m height name     mass hair_color skin_color eye_color birth_year sex  
#>      <dbl>  <int> <chr>   <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1     1.72    172 Luke S…    77 blond      fair       blue            19   male 
#> 2     1.67    167 C-3PO      75 <NA>       gold       yellow         112   none 
#> 3     0.96     96 R2-D2      32 <NA>       white, bl… red             33   none 
#> 4     2.02    202 Darth …   136 none       white      yellow          41.9 male 
#> # … with 83 more rows, and 6 more variables: gender <chr>, homeworld <chr>,
#> #   species <chr>, films <list>, vehicles <list>, starships <list>

dplyr::mutate() is similar to the base transform(), but allows you to refer to columns that you’ve just created:

starwars %>%
  mutate(
    height_m = height / 100,
    BMI = mass / (height_m^2)
  ) %>%
  select(BMI, everything())
#> # A tibble: 87 × 16
#>     BMI name       height  mass hair_color skin_color eye_color birth_year sex  
#>   <dbl> <chr>       <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1  26.0 Luke Skyw…    172    77 blond      fair       blue            19   male 
#> 2  26.9 C-3PO         167    75 <NA>       gold       yellow         112   none 
#> 3  34.7 R2-D2          96    32 <NA>       white, bl… red             33   none 
#> 4  33.3 Darth Vad…    202   136 none       white      yellow          41.9 male 
#> # … with 83 more rows, and 7 more variables: gender <chr>, homeworld <chr>,
#> #   species <chr>, films <list>, vehicles <list>, starships <list>,
#> #   height_m <dbl>

If you only want to keep the new variables, use transmute():

starwars %>%
  transmute(
    height_m = height / 100,
    BMI = mass / (height_m^2)
  )
#> # A tibble: 87 × 2
#>   height_m   BMI
#>      <dbl> <dbl>
#> 1     1.72  26.0
#> 2     1.67  26.9
#> 3     0.96  34.7
#> 4     2.02  33.3
#> # … with 83 more rows

Change column order with relocate()

Use a similar syntax as select() to move blocks of columns at once

starwars %>% relocate(sex:homeworld, .before = height)
#> # A tibble: 87 × 14
#>   name       sex   gender homeworld height  mass hair_color skin_color eye_color
#>   <chr>      <chr> <chr>  <chr>      <int> <dbl> <chr>      <chr>      <chr>    
#> 1 Luke Skyw… male  mascu… Tatooine     172    77 blond      fair       blue     
#> 2 C-3PO      none  mascu… Tatooine     167    75 <NA>       gold       yellow   
#> 3 R2-D2      none  mascu… Naboo         96    32 <NA>       white, bl… red      
#> 4 Darth Vad… male  mascu… Tatooine     202   136 none       white      yellow   
#> # … with 83 more rows, and 5 more variables: birth_year <dbl>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Summarise values with summarise()

The last verb is summarise(). It collapses a data frame to a single row.

starwars %>% summarise(height = mean(height, na.rm = TRUE))
#> # A tibble: 1 × 1
#>   height
#>    <dbl>
#> 1   174.

It’s not that useful until we learn the group_by() verb below.

Commonalities

You may have noticed that the syntax and function of all these verbs are very similar:

  • The first argument is a data frame.

  • The subsequent arguments describe what to do with the data frame. You can refer to columns in the data frame directly without using $.

  • The result is a new data frame

Together these properties make it easy to chain together multiple simple steps to achieve a complex result.

These five functions provide the basis of a language of data manipulation. At the most basic level, you can only alter a tidy data frame in five useful ways: you can reorder the rows (arrange()), pick observations and variables of interest (filter() and select()), add new variables that are functions of existing variables (mutate()), or collapse many values to a summary (summarise()).

Combining functions with %>%

The dplyr API is functional in the sense that function calls don’t have side-effects. You must always save their results. This doesn’t lead to particularly elegant code, especially if you want to do many operations at once. You either have to do it step-by-step:

a1 <- group_by(starwars, species, sex)
a2 <- select(a1, height, mass)
a3 <- summarise(a2,
  height = mean(height, na.rm = TRUE),
  mass = mean(mass, na.rm = TRUE)
)

Or if you don’t want to name the intermediate results, you need to wrap the function calls inside each other:

summarise(
  select(
    group_by(starwars, species, sex),
    height, mass
  ),
  height = mean(height, na.rm = TRUE),
  mass = mean(mass, na.rm = TRUE)
)
#> Adding missing grouping variables: `species`, `sex`
#> `summarise()` has grouped output by 'species'. You can override using the
#> `.groups` argument.
#> # A tibble: 41 × 4
#> # Groups:   species [38]
#>   species  sex   height  mass
#>   <chr>    <chr>  <dbl> <dbl>
#> 1 Aleena   male      79    15
#> 2 Besalisk male     198   102
#> 3 Cerean   male     198    82
#> 4 Chagrian male     196   NaN
#> # … with 37 more rows

This is difficult to read because the order of the operations is from inside to out. Thus, the arguments are a long way away from the function. To get around this problem, dplyr provides the %>% operator from magrittr. x %>% f(y) turns into f(x, y) so you can use it to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as “then”):

starwars %>%
  group_by(species, sex) %>%
  select(height, mass) %>%
  summarise(
    height = mean(height, na.rm = TRUE),
    mass = mean(mass, na.rm = TRUE)
  )

Patterns of operations

The dplyr verbs can be classified by the type of operations they accomplish (we sometimes speak of their semantics, i.e., their meaning). It’s helpful to have a good grasp of the difference between select and mutate operations.

Selecting operations

One of the appealing features of dplyr is that you can refer to columns from the tibble as if they were regular variables. However, the syntactic uniformity of referring to bare column names hides semantical differences across the verbs. A column symbol supplied to select() does not have the same meaning as the same symbol supplied to mutate().

Selecting operations expect column names and positions. Hence, when you call select() with bare variable names, they actually represent their own positions in the tibble. The following calls are completely equivalent from dplyr’s point of view:

# `name` represents the integer 1
select(starwars, name)
#> # A tibble: 87 × 1
#>   name          
#>   <chr>         
#> 1 Luke Skywalker
#> 2 C-3PO         
#> 3 R2-D2         
#> 4 Darth Vader   
#> # … with 83 more rows
select(starwars, 1)
#> # A tibble: 87 × 1
#>   name          
#>   <chr>         
#> 1 Luke Skywalker
#> 2 C-3PO         
#> 3 R2-D2         
#> 4 Darth Vader   
#> # … with 83 more rows

By the same token, this means that you cannot refer to variables from the surrounding context if they have the same name as one of the columns. In the following example, height still represents 2, not 5:

height <- 5
select(starwars, height)
#> # A tibble: 87 × 1
#>   height
#>    <int>
#> 1    172
#> 2    167
#> 3     96
#> 4    202
#> # … with 83 more rows

One useful subtlety is that this only applies to bare names and to selecting calls like c(height, mass) or height:mass. In all other cases, the columns of the data frame are not put in scope. This allows you to refer to contextual variables in selection helpers:

name <- "color"
select(starwars, ends_with(name))
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # … with 83 more rows

These semantics are usually intuitive. But note the subtle difference:

name <- 5
select(starwars, name, identity(name))
#> # A tibble: 87 × 2
#>   name           skin_color 
#>   <chr>          <chr>      
#> 1 Luke Skywalker fair       
#> 2 C-3PO          gold       
#> 3 R2-D2          white, blue
#> 4 Darth Vader    white      
#> # … with 83 more rows

In the first argument, name represents its own position 1. In the second argument, name is evaluated in the surrounding context and represents the fifth column.

For a long time, select() used to only understand column positions. Counting from dplyr 0.6, it now understands column names as well. This makes it a bit easier to program with select():

vars <- c("name", "height")
select(starwars, all_of(vars), "mass")
#> # A tibble: 87 × 3
#>   name           height  mass
#>   <chr>           <int> <dbl>
#> 1 Luke Skywalker    172    77
#> 2 C-3PO             167    75
#> 3 R2-D2              96    32
#> 4 Darth Vader       202   136
#> # … with 83 more rows

Mutating operations

Mutate semantics are quite different from selection semantics. Whereas select() expects column names or positions, mutate() expects column vectors. We will set up a smaller tibble to use for our examples.

df <- starwars %>% select(name, height, mass)

When we use select(), the bare column names stand for their own positions in the tibble. For mutate() on the other hand, column symbols represent the actual column vectors stored in the tibble. Consider what happens if we give a string or a number to mutate():

mutate(df, "height", 2)
#> # A tibble: 87 × 5
#>   name           height  mass `"height"`   `2`
#>   <chr>           <int> <dbl> <chr>      <dbl>
#> 1 Luke Skywalker    172    77 height         2
#> 2 C-3PO             167    75 height         2
#> 3 R2-D2              96    32 height         2
#> 4 Darth Vader       202   136 height         2
#> # … with 83 more rows

mutate() gets length-1 vectors that it interprets as new columns in the data frame. These vectors are recycled so they match the number of rows. That’s why it doesn’t make sense to supply expressions like "height" + 10 to mutate(). This amounts to adding 10 to a string! The correct expression is:

mutate(df, height + 10)
#> # A tibble: 87 × 4
#>   name           height  mass `height + 10`
#>   <chr>           <int> <dbl>         <dbl>
#> 1 Luke Skywalker    172    77           182
#> 2 C-3PO             167    75           177
#> 3 R2-D2              96    32           106
#> 4 Darth Vader       202   136           212
#> # … with 83 more rows

In the same way, you can unquote values from the context if these values represent a valid column. They must be either length 1 (they then get recycled) or have the same length as the number of rows. In the following example we create a new vector that we add to the data frame:

var <- seq(1, nrow(df))
mutate(df, new = var)
#> # A tibble: 87 × 4
#>   name           height  mass   new
#>   <chr>           <int> <dbl> <int>
#> 1 Luke Skywalker    172    77     1
#> 2 C-3PO             167    75     2
#> 3 R2-D2              96    32     3
#> 4 Darth Vader       202   136     4
#> # … with 83 more rows

A case in point is group_by(). While you might think it has select semantics, it actually has mutate semantics. This is quite handy as it allows to group by a modified column:

group_by(starwars, sex)
#> # A tibble: 87 × 14
#> # Groups:   sex [5]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
group_by(starwars, sex = as.factor(sex))
#> # A tibble: 87 × 14
#> # Groups:   sex [5]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <fct> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
group_by(starwars, height_binned = cut(height, 3))
#> # A tibble: 87 × 15
#> # Groups:   height_binned [4]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 6 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>, height_binned <fct>

This is why you can’t supply a column name to group_by(). This amounts to creating a new column containing the string recycled to the number of rows:

group_by(df, "month")
#> # A tibble: 87 × 4
#> # Groups:   "month" [1]
#>   name           height  mass `"month"`
#>   <chr>           <int> <dbl> <chr>    
#> 1 Luke Skywalker    172    77 month    
#> 2 C-3PO             167    75 month    
#> 3 R2-D2              96    32 month    
#> 4 Darth Vader       202   136 month    
#> # … with 83 more rows
dplyr/inst/doc/grouping.html0000644000176200001440000021726314200154106015652 0ustar liggesusers Grouped data

Grouped data

dplyr verbs are particularly powerful when you apply them to grouped data frames (grouped_df objects). This vignette shows you:

  • How to group, inspect, and ungroup with group_by() and friends.

  • How individual dplyr verbs changes their behaviour when applied to grouped data frame.

  • How to access data about the “current” group from within a verb.

We’ll start by loading dplyr:

library(dplyr)

group_by()

The most important grouping verb is group_by(): it takes a data frame and one or more variables to group by:

by_species <- starwars %>% group_by(species)
by_sex_gender <- starwars %>% group_by(sex, gender)

You can see the grouping when you print the data:

by_species
#> # A tibble: 87 × 14
#> # Groups:   species [38]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
by_sex_gender
#> # A tibble: 87 × 14
#> # Groups:   sex, gender [6]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Or use tally() to count the number of rows in each group. The sort argument is useful if you want to see the largest groups up front.

by_species %>% tally()
#> # A tibble: 38 × 2
#>   species      n
#>   <chr>    <int>
#> 1 Aleena       1
#> 2 Besalisk     1
#> 3 Cerean       1
#> 4 Chagrian     1
#> # … with 34 more rows

by_sex_gender %>% tally(sort = TRUE)
#> # A tibble: 6 × 3
#> # Groups:   sex [5]
#>   sex    gender        n
#>   <chr>  <chr>     <int>
#> 1 male   masculine    60
#> 2 female feminine     16
#> 3 none   masculine     5
#> 4 <NA>   <NA>          4
#> # … with 2 more rows

As well as grouping by existing variables, you can group by any function of existing variables. This is equivalent to performing a mutate() before the group_by():

bmi_breaks <- c(0, 18.5, 25, 30, Inf)

starwars %>%
  group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>%
  tally()
#> # A tibble: 5 × 2
#>   bmi_cat       n
#>   <fct>     <int>
#> 1 (0,18.5]     10
#> 2 (18.5,25]    24
#> 3 (25,30]      13
#> 4 (30,Inf]     12
#> # … with 1 more row

Group metadata

You can see underlying group data with group_keys(). It has one row for each group and one column for each grouping variable:

by_species %>% group_keys()
#> # A tibble: 38 × 1
#>   species 
#>   <chr>   
#> 1 Aleena  
#> 2 Besalisk
#> 3 Cerean  
#> 4 Chagrian
#> # … with 34 more rows

by_sex_gender %>% group_keys()
#> # A tibble: 6 × 2
#>   sex            gender   
#>   <chr>          <chr>    
#> 1 female         feminine 
#> 2 hermaphroditic masculine
#> 3 male           masculine
#> 4 none           feminine 
#> # … with 2 more rows

You can see which group each row belongs to with group_indices():

by_species %>% group_indices()
#>  [1] 11  6  6 11 11 11 11  6 11 11 11 11 34 11 24 12 11 11 36 11 11  6 31 11 11
#> [26] 18 11 11  8 26 11 21 11 10 10 10 38 30  7 38 11 37 32 32 33 35 29 11  3 20
#> [51] 37 27 13 23 16  4 11 11 11  9 17 17 11 11 11 11  5  2 15 15 11  1  6 25 19
#> [76] 28 14 34 11 38 22 11 11 11  6 38 11

And which rows each group contains with group_rows():

by_species %>% group_rows() %>% head()
#> <list_of<integer>[6]>
#> [[1]]
#> [1] 72
#> 
#> [[2]]
#> [1] 68
#> 
#> [[3]]
#> [1] 49
#> 
#> [[4]]
#> [1] 56
#> 
#> [[5]]
#> [1] 67
#> 
#> [[6]]
#> [1]  2  3  8 22 73 85

Use group_vars() if you just want the names of the grouping variables:

by_species %>% group_vars()
#> [1] "species"
by_sex_gender %>% group_vars()
#> [1] "sex"    "gender"

Changing and adding to grouping variables

If you apply group_by() to an already grouped dataset, will overwrite the existing grouping variables. For example, the following code groups by homeworld instead of species:

by_species %>%
  group_by(homeworld) %>%
  tally()
#> # A tibble: 49 × 2
#>   homeworld       n
#>   <chr>       <int>
#> 1 Alderaan        3
#> 2 Aleen Minor     1
#> 3 Bespin          1
#> 4 Bestine IV      1
#> # … with 45 more rows

To augment the grouping, using .add = TRUE1. For example, the following code groups by species and homeworld:

by_species %>%
  group_by(homeworld, .add = TRUE) %>%
  tally()
#> # A tibble: 58 × 3
#> # Groups:   species [38]
#>   species  homeworld       n
#>   <chr>    <chr>       <int>
#> 1 Aleena   Aleen Minor     1
#> 2 Besalisk Ojom            1
#> 3 Cerean   Cerea           1
#> 4 Chagrian Champala        1
#> # … with 54 more rows

Removing grouping variables

To remove all grouping variables, use ungroup():

by_species %>%
  ungroup() %>%
  tally()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1    87

You can also choose to selectively ungroup by listing the variables you want to remove:

by_sex_gender %>% 
  ungroup(sex) %>% 
  tally()
#> # A tibble: 3 × 2
#>   gender        n
#>   <chr>     <int>
#> 1 feminine     17
#> 2 masculine    66
#> 3 <NA>          4

Verbs

The following sections describe how grouping affects the main dplyr verbs.

summarise()

summarise() computes a summary for each group. This means that it starts from group_keys(), adding summary variables to the right hand side:

by_species %>%
  summarise(
    n = n(),
    height = mean(height, na.rm = TRUE)
  )
#> # A tibble: 38 × 3
#>   species      n height
#>   <chr>    <int>  <dbl>
#> 1 Aleena       1     79
#> 2 Besalisk     1    198
#> 3 Cerean       1    198
#> 4 Chagrian     1    196
#> # … with 34 more rows

The .groups= argument controls the grouping structure of the output. The historical behaviour of removing the right hand side grouping variable corresponds to .groups = "drop_last" without a message or .groups = NULL with a message (the default).

by_sex_gender %>% 
  summarise(n = n()) %>% 
  group_vars()
#> `summarise()` has grouped output by 'sex'. You can override using the `.groups`
#> argument.
#> [1] "sex"

by_sex_gender %>% 
  summarise(n = n(), .groups = "drop_last") %>% 
  group_vars()
#> [1] "sex"

Since version 1.0.0 the groups may also be kept (.groups = "keep") or dropped (.groups = "drop").

by_sex_gender %>% 
  summarise(n = n(), .groups = "keep") %>% 
  group_vars()
#> [1] "sex"    "gender"

by_sex_gender %>% 
  summarise(n = n(), .groups = "drop") %>% 
  group_vars()
#> character(0)

When the output no longer have grouping variables, it becomes ungrouped (i.e. a regular tibble).

select(), rename(), and relocate()

rename() and relocate() behave identically with grouped and ungrouped data because they only affect the name or position of existing columns. Grouped select() is almost identical to ungrouped select, except that it always includes the grouping variables:

by_species %>% select(mass)
#> Adding missing grouping variables: `species`
#> # A tibble: 87 × 2
#> # Groups:   species [38]
#>   species  mass
#>   <chr>   <dbl>
#> 1 Human      77
#> 2 Droid      75
#> 3 Droid      32
#> 4 Human     136
#> # … with 83 more rows

If you don’t want the grouping variables, you’ll have to first ungroup(). (This design is possibly a mistake, but we’re stuck with it for now.)

arrange()

Grouped arrange() is the same as ungrouped arrange(), unless you set .by_group = TRUE, in which case it will order first by the grouping variables.

by_species %>%
  arrange(desc(mass)) %>%
  relocate(species, mass)
#> # A tibble: 87 × 14
#> # Groups:   species [38]
#>   species  mass name     height hair_color skin_color eye_color birth_year sex  
#>   <chr>   <dbl> <chr>     <int> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1 Hutt     1358 Jabba D…    175 <NA>       green-tan… orange         600   herm…
#> 2 Kaleesh   159 Grievous    216 none       brown, wh… green, y…       NA   male 
#> 3 Droid     140 IG-88       200 none       metal      red             15   none 
#> 4 Human     136 Darth V…    202 none       white      yellow          41.9 male 
#> # … with 83 more rows, and 5 more variables: gender <chr>, homeworld <chr>,
#> #   films <list>, vehicles <list>, starships <list>

by_species %>%
  arrange(desc(mass), .by_group = TRUE) %>%
  relocate(species, mass)
#> # A tibble: 87 × 14
#> # Groups:   species [38]
#>   species   mass name    height hair_color skin_color eye_color birth_year sex  
#>   <chr>    <dbl> <chr>    <int> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1 Aleena      15 Ratts …     79 none       grey, blue unknown           NA male 
#> 2 Besalisk   102 Dexter…    198 none       brown      yellow            NA male 
#> 3 Cerean      82 Ki-Adi…    198 white      pale       yellow            92 male 
#> 4 Chagrian    NA Mas Am…    196 none       blue       blue              NA male 
#> # … with 83 more rows, and 5 more variables: gender <chr>, homeworld <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Note that second example is sorted by species (from the group_by() statement) and then by mass (within species).

mutate() and transmute()

In simple cases with vectorised functions, grouped and ungrouped mutate() give the same results. They differ when used with summary functions:

# Subtract off global mean
starwars %>% 
  select(name, homeworld, mass) %>% 
  mutate(standard_mass = mass - mean(mass, na.rm = TRUE))
#> # A tibble: 87 × 4
#>   name           homeworld  mass standard_mass
#>   <chr>          <chr>     <dbl>         <dbl>
#> 1 Luke Skywalker Tatooine     77         -20.3
#> 2 C-3PO          Tatooine     75         -22.3
#> 3 R2-D2          Naboo        32         -65.3
#> 4 Darth Vader    Tatooine    136          38.7
#> # … with 83 more rows

# Subtract off homeworld mean
starwars %>% 
  select(name, homeworld, mass) %>% 
  group_by(homeworld) %>% 
  mutate(standard_mass = mass - mean(mass, na.rm = TRUE))
#> # A tibble: 87 × 4
#> # Groups:   homeworld [49]
#>   name           homeworld  mass standard_mass
#>   <chr>          <chr>     <dbl>         <dbl>
#> 1 Luke Skywalker Tatooine     77         -8.38
#> 2 C-3PO          Tatooine     75        -10.4 
#> 3 R2-D2          Naboo        32        -32.2 
#> 4 Darth Vader    Tatooine    136         50.6 
#> # … with 83 more rows

Or with window functions like min_rank():

# Overall rank
starwars %>% 
  select(name, homeworld, height) %>% 
  mutate(rank = min_rank(height))
#> # A tibble: 87 × 4
#>   name           homeworld height  rank
#>   <chr>          <chr>      <int> <int>
#> 1 Luke Skywalker Tatooine     172    29
#> 2 C-3PO          Tatooine     167    21
#> 3 R2-D2          Naboo         96     5
#> 4 Darth Vader    Tatooine     202    72
#> # … with 83 more rows

# Rank per homeworld
starwars %>% 
  select(name, homeworld, height) %>% 
  group_by(homeworld) %>% 
  mutate(rank = min_rank(height))
#> # A tibble: 87 × 4
#> # Groups:   homeworld [49]
#>   name           homeworld height  rank
#>   <chr>          <chr>      <int> <int>
#> 1 Luke Skywalker Tatooine     172     5
#> 2 C-3PO          Tatooine     167     4
#> 3 R2-D2          Naboo         96     1
#> 4 Darth Vader    Tatooine     202    10
#> # … with 83 more rows

filter()

A grouped filter() effectively does a mutate() to generate a logical variable, and then only keeps the rows where the variable is TRUE. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species:

by_species %>%
  select(name, species, height) %>% 
  filter(height == max(height))
#> # A tibble: 35 × 3
#> # Groups:   species [35]
#>   name                  species        height
#>   <chr>                 <chr>           <int>
#> 1 Greedo                Rodian            173
#> 2 Jabba Desilijic Tiure Hutt              175
#> 3 Yoda                  Yoda's species     66
#> 4 Bossk                 Trandoshan        190
#> # … with 31 more rows

You can also use filter() to remove entire groups. For example, the following code eliminates all groups that only have a single member:

by_species %>%
  filter(n() != 1) %>% 
  tally()
#> # A tibble: 9 × 2
#>   species      n
#>   <chr>    <int>
#> 1 Droid        6
#> 2 Gungan       3
#> 3 Human       35
#> 4 Kaminoan     2
#> # … with 5 more rows

slice() and friends

slice() and friends (slice_head(), slice_tail(), slice_sample(), slice_min() and slice_max()) select rows within a group. For example, we can select the first observation within each species:

by_species %>%
  relocate(species) %>% 
  slice(1)
#> # A tibble: 38 × 14
#> # Groups:   species [38]
#>   species  name    height  mass hair_color skin_color eye_color birth_year sex  
#>   <chr>    <chr>    <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1 Aleena   Ratts …     79    15 none       grey, blue unknown           NA male 
#> 2 Besalisk Dexter…    198   102 none       brown      yellow            NA male 
#> 3 Cerean   Ki-Adi…    198    82 white      pale       yellow            92 male 
#> 4 Chagrian Mas Am…    196    NA none       blue       blue              NA male 
#> # … with 34 more rows, and 5 more variables: gender <chr>, homeworld <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Similarly, we can use slice_min() to select the smallest n values of a variable:

by_species %>%
  filter(!is.na(height)) %>% 
  slice_min(height, n = 2)
#> # A tibble: 48 × 14
#> # Groups:   species [38]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Ratts Ty…     79    15 none       grey, blue unknown           NA male  mascu…
#> 2 Dexter J…    198   102 none       brown      yellow            NA male  mascu…
#> 3 Ki-Adi-M…    198    82 white      pale       yellow            92 male  mascu…
#> 4 Mas Amed…    196    NA none       blue       blue              NA male  mascu…
#> # … with 44 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

Computing on grouping information

Inside dplyr verbs, you can access various properties of the “current” group using a family of functions with the cur_ prefix. These functions are typically needed for everyday usage of dplyr, but can be useful because they allow you to free from some of the typical constraints of dplyr verbs.

cur_data()

cur_data() returns the current group, excluding grouping variables. It’s useful to feed to functions that take a whole data frame. For example, the following code fits a linear model of mass ~ height to each species:

by_species %>%
  filter(n() > 1) %>% 
  mutate(mod = list(lm(mass ~ height, data = cur_data())))
#> # A tibble: 58 × 15
#> # Groups:   species [9]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 54 more rows, and 6 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>, mod <list>

cur_group() and cur_group_id()

cur_group_id() gives a unique numeric identifier for the current group. This is sometimes useful if you want to index into an external data structure.

by_species %>%
  arrange(species) %>% 
  select(name, species, homeworld) %>% 
  mutate(id = cur_group_id())
#> # A tibble: 87 × 4
#> # Groups:   species [38]
#>   name            species  homeworld      id
#>   <chr>           <chr>    <chr>       <int>
#> 1 Ratts Tyerell   Aleena   Aleen Minor     1
#> 2 Dexter Jettster Besalisk Ojom            2
#> 3 Ki-Adi-Mundi    Cerean   Cerea           3
#> 4 Mas Amedda      Chagrian Champala        4
#> # … with 83 more rows

  1. Note that the argument changed from add = TRUE to .add = TRUE in dplyr 1.0.0.↩︎

dplyr/inst/doc/base.html0000644000176200001440000030226414200154101014721 0ustar liggesusers dplyr <-> base R

dplyr <-> base R

This vignette compares dplyr functions to their base R equivalents. This helps those familiar with base R understand better what dplyr does, and shows dplyr users how you might express the same ideas in base R code. We’ll start with a rough overview of the major differences, then discuss the one table verbs in more detail, followed by the two table verbs.

Overview

  1. The code dplyr verbs input and output data frames. This contrasts with base R functions which more frequently work with individual vectors.

  2. dplyr relies heavily on “non-standard evaluation” so that you don’t need to use $ to refer to columns in the “current” data frame. This behaviour is inspired by the base functions subset() and transform().

  3. dplyr solutions tend to use a variety of single purpose verbs, while base R solutions typically tend to use [ in a variety of ways, depending on the task at hand.

  4. Multiple dplyr verbs are often strung together into a pipeline by %>%. In base R, you’ll typically save intermediate results to a variable that you either discard, or repeatedly overwrite.

  5. All dplyr verbs handle “grouped” data frames so that the code to perform a computation per-group looks very similar to code that works on a whole data frame. In base R, per-group operations tend to have varied forms.

One table verbs

The following table shows a condensed translation between dplyr verbs and their base R equivalents. The following sections describe each operation in more detail. You learn more about the dplyr verbs in their documentation and in For more vignette("one-table").

dplyr base
arrange(df, x) df[order(x), , drop = FALSE]
distinct(df, x) df[!duplicated(x), , drop = FALSE], unique()
filter(df, x) df[which(x), , drop = FALSE], subset()
mutate(df, z = x + y) df$z <- df$x + df$y, transform()
pull(df, 1) df[[1]]
pull(df, x) df$x
rename(df, y = x) names(df)[names(df) == "x"] <- "y"
relocate(df, y) df[union("y", names(df))]
select(df, x, y) df[c("x", "y")], subset()
select(df, starts_with("x")) df[grepl(names(df), "^x")]
summarise(df, mean(x)) mean(df$x), tapply(), aggregate(), by()
slice(df, c(1, 2, 5)) df[c(1, 2, 5), , drop = FALSE]

To begin, we’ll load dplyr and convert mtcars and iris to tibbles so that we can easily show only abbreviated output for each operation.

library(dplyr)
mtcars <- as_tibble(mtcars)
iris <- as_tibble(iris)

arrange(): Arrange rows by variables

dplyr::arrange() orders the rows of a data frame by the values of one or more columns:

mtcars %>% arrange(cyl, disp)
#> # A tibble: 32 × 11
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
#> 2  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
#> 3  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
#> 4  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
#> # … with 28 more rows

The desc() helper allows you to order selected variables in descending order:

mtcars %>% arrange(desc(cyl), desc(disp))
#> # A tibble: 32 × 11
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  10.4     8   472   205  2.93  5.25  18.0     0     0     3     4
#> 2  10.4     8   460   215  3     5.42  17.8     0     0     3     4
#> 3  14.7     8   440   230  3.23  5.34  17.4     0     0     3     4
#> 4  19.2     8   400   175  3.08  3.84  17.0     0     0     3     2
#> # … with 28 more rows

We can replicate in base R by using [ with order():

mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE]
#> # A tibble: 32 × 11
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
#> 2  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
#> 3  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
#> 4  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
#> # … with 28 more rows

Note the use of drop = FALSE. If you forget this, and the input is a data frame with a single column, the output will be a vector, not a data frame. This is a source of subtle bugs.

Base R does not provide a convenient and general way to sort individual variables in descending order, so you have two options:

  • For numeric variables, you can use -x.
  • You can request order() to sort all variables in descending order.
mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE]
mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE]

distinct(): Select distinct/unique rows

dplyr::distinct() selects unique rows:

df <- tibble(
  x = sample(10, 100, rep = TRUE),
  y = sample(10, 100, rep = TRUE)
)

df %>% distinct(x) # selected columns
#> # A tibble: 10 × 1
#>       x
#>   <int>
#> 1     7
#> 2     1
#> 3     9
#> 4     3
#> # … with 6 more rows
df %>% distinct(x, .keep_all = TRUE) # whole data frame
#> # A tibble: 10 × 2
#>       x     y
#>   <int> <int>
#> 1     7     7
#> 2     1     9
#> 3     9     9
#> 4     3     4
#> # … with 6 more rows

There are two equivalents in base R, depending on whether you want the whole data frame, or just selected variables:

unique(df["x"]) # selected columns
#> # A tibble: 10 × 1
#>       x
#>   <int>
#> 1     7
#> 2     1
#> 3     9
#> 4     3
#> # … with 6 more rows
df[!duplicated(df$x), , drop = FALSE] # whole data frame
#> # A tibble: 10 × 2
#>       x     y
#>   <int> <int>
#> 1     7     7
#> 2     1     9
#> 3     9     9
#> 4     3     4
#> # … with 6 more rows

filter(): Return rows with matching conditions

dplyr::filter() selects rows where an expression is TRUE:

starwars %>% filter(species == "Human")
#> # A tibble: 35 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # … with 31 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
starwars %>% filter(mass > 1000)
#> # A tibble: 1 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Jabba De…    175  1358 <NA>       green-tan… orange           600 herm… mascu…
#> # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
starwars %>% filter(hair_color == "none" & eye_color == "black")
#> # A tibble: 9 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Nien Nunb    160    68 none       grey       black             NA male  mascu…
#> 2 Gasgano      122    NA none       white, bl… black             NA male  mascu…
#> 3 Kit Fisto    196    87 none       green      black             NA male  mascu…
#> 4 Plo Koon     188    80 none       orange     black             22 male  mascu…
#> # … with 5 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

The closest base equivalent (and the inspiration for filter()) is subset():

subset(starwars, species == "Human")
#> # A tibble: 35 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # … with 31 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
subset(starwars, mass > 1000)
#> # A tibble: 1 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Jabba De…    175  1358 <NA>       green-tan… orange           600 herm… mascu…
#> # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
subset(starwars, hair_color == "none" & eye_color == "black")
#> # A tibble: 9 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Nien Nunb    160    68 none       grey       black             NA male  mascu…
#> 2 Gasgano      122    NA none       white, bl… black             NA male  mascu…
#> 3 Kit Fisto    196    87 none       green      black             NA male  mascu…
#> 4 Plo Koon     188    80 none       orange     black             22 male  mascu…
#> # … with 5 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

You can also use [ but this also requires the use of which() to remove NAs:

starwars[which(starwars$species == "Human"), , drop = FALSE]
#> # A tibble: 35 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # … with 31 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
starwars[which(starwars$mass > 1000), , drop = FALSE]
#> # A tibble: 1 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Jabba De…    175  1358 <NA>       green-tan… orange           600 herm… mascu…
#> # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE]
#> # A tibble: 9 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Nien Nunb    160    68 none       grey       black             NA male  mascu…
#> 2 Gasgano      122    NA none       white, bl… black             NA male  mascu…
#> 3 Kit Fisto    196    87 none       green      black             NA male  mascu…
#> 4 Plo Koon     188    80 none       orange     black             22 male  mascu…
#> # … with 5 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

mutate(): Create or transform variables

dplyr::mutate() creates new variables from existing variables:

df %>% mutate(z = x + y, z2 = z ^ 2)
#> # A tibble: 100 × 4
#>       x     y     z    z2
#>   <int> <int> <int> <dbl>
#> 1     7     7    14   196
#> 2     1     9    10   100
#> 3     9     9    18   324
#> 4     9     3    12   144
#> # … with 96 more rows

The closest base equivalent is transform(), but note that it cannot use freshly created variables:

head(transform(df, z = x + y, z2 = (x + y) ^ 2))
#>   x y  z  z2
#> 1 7 7 14 196
#> 2 1 9 10 100
#> 3 9 9 18 324
#> 4 9 3 12 144
#> 5 7 4 11 121
#> 6 3 4  7  49

Alternatively, you can use $<-:

mtcars$cyl2 <- mtcars$cyl * 2
mtcars$cyl4 <- mtcars$cyl2 * 2

When applied to a grouped data frame, dplyr::mutate() computes new variable once per group:

gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5))
gf %>% 
  group_by(g) %>% 
  mutate(x_mean = mean(x), x_rank = rank(x))
#> # A tibble: 4 × 4
#> # Groups:   g [2]
#>       g     x x_mean x_rank
#>   <dbl> <dbl>  <dbl>  <dbl>
#> 1     1   0.5      1      1
#> 2     1   1.5      1      2
#> 3     2   2.5      3      1
#> 4     2   3.5      3      2

To replicate this in base R, you can use ave():

transform(gf, 
  x_mean = ave(x, g, FUN = mean), 
  x_rank = ave(x, g, FUN = rank)
)
#>   g   x x_mean x_rank
#> 1 1 0.5      1      1
#> 2 1 1.5      1      2
#> 3 2 2.5      3      1
#> 4 2 3.5      3      2

pull(): Pull out a single variable

dplyr::pull() extracts a variable either by name or position:

mtcars %>% pull(1)
#>  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
#> [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
#> [31] 15.0 21.4
mtcars %>% pull(cyl)
#>  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4

This equivalent to [[ for positions and $ for names:

mtcars[[1]]
#>  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
#> [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
#> [31] 15.0 21.4
mtcars$cyl
#>  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4

relocate(): Change column order

dplyr::relocate() makes it easy to move a set of columns to a new position (by default, the front):

# to front
mtcars %>% relocate(gear, carb) 
#> # A tibble: 32 × 13
#>    gear  carb   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1     4     4  21       6   160   110  3.9   2.62  16.5     0     1    12    24
#> 2     4     4  21       6   160   110  3.9   2.88  17.0     0     1    12    24
#> 3     4     1  22.8     4   108    93  3.85  2.32  18.6     1     1     8    16
#> 4     3     1  21.4     6   258   110  3.08  3.22  19.4     1     0    12    24
#> # … with 28 more rows

# to back
mtcars %>% relocate(mpg, cyl, .after = last_col()) 
#> # A tibble: 32 × 13
#>    disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4   mpg   cyl
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1   160   110  3.9   2.62  16.5     0     1     4     4    12    24  21       6
#> 2   160   110  3.9   2.88  17.0     0     1     4     4    12    24  21       6
#> 3   108    93  3.85  2.32  18.6     1     1     4     1     8    16  22.8     4
#> 4   258   110  3.08  3.22  19.4     1     0     3     1    12    24  21.4     6
#> # … with 28 more rows

We can replicate this in base R with a little set manipulation:

mtcars[union(c("gear", "carb"), names(mtcars))]
#> # A tibble: 32 × 13
#>    gear  carb   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1     4     4  21       6   160   110  3.9   2.62  16.5     0     1    12    24
#> 2     4     4  21       6   160   110  3.9   2.88  17.0     0     1    12    24
#> 3     4     1  22.8     4   108    93  3.85  2.32  18.6     1     1     8    16
#> 4     3     1  21.4     6   258   110  3.08  3.22  19.4     1     0    12    24
#> # … with 28 more rows

to_back <- c("mpg", "cyl")
mtcars[c(setdiff(names(mtcars), to_back), to_back)]
#> # A tibble: 32 × 13
#>    disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4   mpg   cyl
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1   160   110  3.9   2.62  16.5     0     1     4     4    12    24  21       6
#> 2   160   110  3.9   2.88  17.0     0     1     4     4    12    24  21       6
#> 3   108    93  3.85  2.32  18.6     1     1     4     1     8    16  22.8     4
#> 4   258   110  3.08  3.22  19.4     1     0     3     1    12    24  21.4     6
#> # … with 28 more rows

Moving columns to somewhere in the middle requires a little more set twiddling.

rename(): Rename variables by name

dplyr::rename() allows you to rename variables by name or position:

iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2)
#> # A tibble: 150 × 5
#>   sepal_length sepal_width Petal.Length Petal.Width Species
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa 
#> 4          4.6         3.1          1.5         0.2 setosa 
#> # … with 146 more rows

Renaming variables by position is straight forward in base R:

iris2 <- iris
names(iris2)[2] <- "sepal_width"

Renaming variables by name requires a bit more work:

names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length"

rename_with(): Rename variables with a function

dplyr::rename_with() transform column names with a function:

iris %>% rename_with(toupper)
#> # A tibble: 150 × 5
#>   SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa 
#> 4          4.6         3.1          1.5         0.2 setosa 
#> # … with 146 more rows

A similar effect can be achieved with setNames() in base R:

setNames(iris, toupper(names(iris)))
#> # A tibble: 150 × 5
#>   SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa 
#> 4          4.6         3.1          1.5         0.2 setosa 
#> # … with 146 more rows

select(): Select variables by name

dplyr::select() subsets columns by position, name, function of name, or other property:

iris %>% select(1:3)
#> # A tibble: 150 × 3
#>   Sepal.Length Sepal.Width Petal.Length
#>          <dbl>       <dbl>        <dbl>
#> 1          5.1         3.5          1.4
#> 2          4.9         3            1.4
#> 3          4.7         3.2          1.3
#> 4          4.6         3.1          1.5
#> # … with 146 more rows
iris %>% select(Species, Sepal.Length)
#> # A tibble: 150 × 2
#>   Species Sepal.Length
#>   <fct>          <dbl>
#> 1 setosa           5.1
#> 2 setosa           4.9
#> 3 setosa           4.7
#> 4 setosa           4.6
#> # … with 146 more rows
iris %>% select(starts_with("Petal"))
#> # A tibble: 150 × 2
#>   Petal.Length Petal.Width
#>          <dbl>       <dbl>
#> 1          1.4         0.2
#> 2          1.4         0.2
#> 3          1.3         0.2
#> 4          1.5         0.2
#> # … with 146 more rows
iris %>% select(where(is.factor))
#> # A tibble: 150 × 1
#>   Species
#>   <fct>  
#> 1 setosa 
#> 2 setosa 
#> 3 setosa 
#> 4 setosa 
#> # … with 146 more rows

Subsetting variables by position is straightforward in base R:

iris[1:3] # single argument selects columns; never drops
#> # A tibble: 150 × 3
#>   Sepal.Length Sepal.Width Petal.Length
#>          <dbl>       <dbl>        <dbl>
#> 1          5.1         3.5          1.4
#> 2          4.9         3            1.4
#> 3          4.7         3.2          1.3
#> 4          4.6         3.1          1.5
#> # … with 146 more rows
iris[1:3, , drop = FALSE]
#> # A tibble: 3 × 5
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa

You have two options to subset by name:

iris[c("Species", "Sepal.Length")]
#> # A tibble: 150 × 2
#>   Species Sepal.Length
#>   <fct>          <dbl>
#> 1 setosa           5.1
#> 2 setosa           4.9
#> 3 setosa           4.7
#> 4 setosa           4.6
#> # … with 146 more rows
subset(iris, select = c(Species, Sepal.Length))
#> # A tibble: 150 × 2
#>   Species Sepal.Length
#>   <fct>          <dbl>
#> 1 setosa           5.1
#> 2 setosa           4.9
#> 3 setosa           4.7
#> 4 setosa           4.6
#> # … with 146 more rows

Subsetting by function of name requires a bit of work with grep():

iris[grep("^Petal", names(iris))]
#> # A tibble: 150 × 2
#>   Petal.Length Petal.Width
#>          <dbl>       <dbl>
#> 1          1.4         0.2
#> 2          1.4         0.2
#> 3          1.3         0.2
#> 4          1.5         0.2
#> # … with 146 more rows

And you can use Filter() to subset by type:

Filter(is.factor, iris)
#> # A tibble: 150 × 1
#>   Species
#>   <fct>  
#> 1 setosa 
#> 2 setosa 
#> 3 setosa 
#> 4 setosa 
#> # … with 146 more rows

summarise(): Reduce multiple values down to a single value

dplyr::summarise() computes one or more summaries for each group:

mtcars %>% 
  group_by(cyl) %>% 
  summarise(mean = mean(disp), n = n())
#> # A tibble: 3 × 3
#>     cyl  mean     n
#>   <dbl> <dbl> <int>
#> 1     4  105.    11
#> 2     6  183.     7
#> 3     8  353.    14

I think the closest base R equivalent uses by(). Unfortunately by() returns a list of data frames, but you can combine them back together again with do.call() and rbind():

mtcars_by <- by(mtcars, mtcars$cyl, function(df) {
  with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df)))
})
do.call(rbind, mtcars_by)
#>   cyl     mean  n
#> 4   4 105.1364 11
#> 6   6 183.3143  7
#> 8   8 353.1000 14

aggregate() comes very close to providing an elegant answer:

agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x)))
agg
#>   cyl disp.mean   disp.n
#> 1   4  105.1364  11.0000
#> 2   6  183.3143   7.0000
#> 3   8  353.1000  14.0000

But unfortunately while it looks like there are disp.mean and disp.n columns, it’s actually a single matrix column:

str(agg)
#> 'data.frame':    3 obs. of  2 variables:
#>  $ cyl : num  4 6 8
#>  $ disp: num [1:3, 1:2] 105 183 353 11 7 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : NULL
#>   .. ..$ : chr [1:2] "mean" "n"

You can see a variety of other options at https://gist.github.com/hadley/c430501804349d382ce90754936ab8ec.

slice(): Choose rows by position

slice() selects rows with their location:

slice(mtcars, 25:n())
#> # A tibble: 8 × 13
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  19.2     8 400     175  3.08  3.84  17.0     0     0     3     2    16    32
#> 2  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1     8    16
#> 3  26       4 120.     91  4.43  2.14  16.7     0     1     5     2     8    16
#> 4  30.4     4  95.1   113  3.77  1.51  16.9     1     1     5     2     8    16
#> # … with 4 more rows

This is straightforward to replicate with [:

mtcars[25:nrow(mtcars), , drop = FALSE]
#> # A tibble: 8 × 13
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  19.2     8 400     175  3.08  3.84  17.0     0     0     3     2    16    32
#> 2  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1     8    16
#> 3  26       4 120.     91  4.43  2.14  16.7     0     1     5     2     8    16
#> 4  30.4     4  95.1   113  3.77  1.51  16.9     1     1     5     2     8    16
#> # … with 4 more rows

Two-table verbs

When we want to merge two data frames, x and y), we have a variety of different ways to bring them together. Various base R merge() calls are replaced by a variety of dplyr join() functions.

dplyr base
inner_join(df1, df2) merge(df1, df2)
left_join(df1, df2) merge(df1, df2, all.x = TRUE)
right_join(df1, df2) merge(df1, df2, all.y = TRUE)
full_join(df1, df2) merge(df1, df2, all = TRUE)
semi_join(df1, df2) df1[df1$x %in% df2$x, , drop = FALSE]
anti_join(df1, df2) df1[!df1$x %in% df2$x, , drop = FALSE]

For more information about two-table verbs, see vignette("two-table").

Mutating joins

dplyr’s inner_join(), left_join(), right_join(), and full_join() add new columns from y to x, matching rows based on a set of “keys”, and differ only in how missing matches are handled. They are equivalent to calls to merge() with various settings of the all, all.x, and all.y arguments. The main difference is the order of the rows:

  • dplyr preserves the order of the x data frame.
  • merge() sorts the key columns.

Filtering joins

dplyr’s semi_join() and anti_join() affect only the rows, not the columns:

band_members %>% semi_join(band_instruments)
#> Joining, by = "name"
#> # A tibble: 2 × 2
#>   name  band   
#>   <chr> <chr>  
#> 1 John  Beatles
#> 2 Paul  Beatles
band_members %>% anti_join(band_instruments)
#> Joining, by = "name"
#> # A tibble: 1 × 2
#>   name  band  
#>   <chr> <chr> 
#> 1 Mick  Stones

They can be replicated in base R with [ and %in%:

band_members[band_members$name %in% band_instruments$name, , drop = FALSE]
#> # A tibble: 2 × 2
#>   name  band   
#>   <chr> <chr>  
#> 1 John  Beatles
#> 2 Paul  Beatles
band_members[!band_members$name %in% band_instruments$name, , drop = FALSE]
#> # A tibble: 1 × 2
#>   name  band  
#>   <chr> <chr> 
#> 1 Mick  Stones

Semi and anti joins with multiple key variables are considerably more challenging to implement.

dplyr/inst/doc/dplyr.Rmd0000644000176200001440000003301514121112104014711 0ustar liggesusers--- title: "Introduction to dplyr" output: rmarkdown::html_vignette description: > Start here if this is your first time using dplyr. You'll learn the basic philosophy, the most important data manipulation verbs, and the pipe, `%>%`, which allows you to combine multiple verbs together to solve real problems. vignette: > %\VignetteIndexEntry{Introduction to dplyr} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) ``` When working with data you must: * Figure out what you want to do. * Describe those tasks in the form of a computer program. * Execute the program. The dplyr package makes these steps fast and easy: * By constraining your options, it helps you think about your data manipulation challenges. * It provides simple "verbs", functions that correspond to the most common data manipulation tasks, to help you translate your thoughts into code. * It uses efficient backends, so you spend less time waiting for the computer. This document introduces you to dplyr's basic set of tools, and shows you how to apply them to data frames. dplyr also supports databases via the dbplyr package, once you've installed, read `vignette("dbplyr")` to learn more. ## Data: starwars To explore the basic data manipulation verbs of dplyr, we'll use the dataset `starwars`. This dataset contains `r nrow(starwars)` characters and comes from the [Star Wars API](https://swapi.dev), and is documented in `?starwars` ```{r} dim(starwars) starwars ``` Note that `starwars` is a tibble, a modern reimagining of the data frame. It's particularly useful for large datasets because it only prints the first few rows. You can learn more about tibbles at ; in particular you can convert data frames to tibbles with `as_tibble()`. ## Single table verbs dplyr aims to provide a function for each basic verb of data manipulation. These verbs can be organised into three categories based on the component of the dataset that they work with: * Rows: * `filter()` chooses rows based on column values. * `slice()` chooses rows based on location. * `arrange()` changes the order of the rows. * Columns: * `select()` changes whether or not a column is included. * `rename()` changes the name of columns. * `mutate()` changes the values of columns and creates new columns. * `relocate()` changes the order of the columns. * Groups of rows: * `summarise()` collapses a group into a single row. ### The pipe All of the dplyr functions take a data frame (or tibble) as the first argument. Rather than forcing the user to either save intermediate objects or nest functions, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so the result from one step is then "piped" into the next step. You can use the pipe to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"). ### Filter rows with `filter()` `filter()` allows you to select a subset of rows in a data frame. Like all single verbs, the first argument is the tibble (or data frame). The second and subsequent arguments refer to variables within that data frame, selecting rows where the expression is `TRUE`. For example, we can select all character with light skin color and brown eyes with: ```{r} starwars %>% filter(skin_color == "light", eye_color == "brown") ``` This is roughly equivalent to this base R code: ```{r, eval = FALSE} starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ] ``` ### Arrange rows with `arrange()` `arrange()` works similarly to `filter()` except that instead of filtering or selecting rows, it reorders them. It takes a data frame, and a set of column names (or more complicated expressions) to order by. If you provide more than one column name, each additional column will be used to break ties in the values of preceding columns: ```{r} starwars %>% arrange(height, mass) ``` Use `desc()` to order a column in descending order: ```{r} starwars %>% arrange(desc(height)) ``` ### Choose rows using their position with `slice()` `slice()` lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows. We can get characters from row numbers 5 through 10. ```{r} starwars %>% slice(5:10) ``` It is accompanied by a number of helpers for common use cases: * `slice_head()` and `slice_tail()` select the first or last rows. ```{r} starwars %>% slice_head(n = 3) ``` * `slice_sample()` randomly selects rows. Use the option prop to choose a certain proportion of the cases. ```{r} starwars %>% slice_sample(n = 5) starwars %>% slice_sample(prop = 0.1) ``` Use `replace = TRUE` to perform a bootstrap sample. If needed, you can weight the sample with the `weight` argument. * `slice_min()` and `slice_max()` select rows with highest or lowest values of a variable. Note that we first must choose only the values which are not NA. ```{r} starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) ``` ### Select columns with `select()` Often you work with large datasets with many columns but only a few are actually of interest to you. `select()` allows you to rapidly zoom in on a useful subset using operations that usually only work on numeric variable positions: ```{r} # Select columns by name starwars %>% select(hair_color, skin_color, eye_color) # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) # Select all columns ending with color starwars %>% select(ends_with("color")) ``` There are a number of helper functions you can use within `select()`, like `starts_with()`, `ends_with()`, `matches()` and `contains()`. These let you quickly match larger blocks of variables that meet some criterion. See `?select` for more details. You can rename variables with `select()` by using named arguments: ```{r} starwars %>% select(home_world = homeworld) ``` But because `select()` drops all the variables not explicitly mentioned, it's not that useful. Instead, use `rename()`: ```{r} starwars %>% rename(home_world = homeworld) ``` ### Add new columns with `mutate()` Besides selecting sets of existing columns, it's often useful to add new columns that are functions of existing columns. This is the job of `mutate()`: ```{r} starwars %>% mutate(height_m = height / 100) ``` We can't see the height in meters we just calculated, but we can fix that using a select command. ```{r} starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) ``` `dplyr::mutate()` is similar to the base `transform()`, but allows you to refer to columns that you've just created: ```{r} starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) ``` If you only want to keep the new variables, use `transmute()`: ```{r} starwars %>% transmute( height_m = height / 100, BMI = mass / (height_m^2) ) ``` ### Change column order with `relocate()` Use a similar syntax as `select()` to move blocks of columns at once ```{r} starwars %>% relocate(sex:homeworld, .before = height) ``` ### Summarise values with `summarise()` The last verb is `summarise()`. It collapses a data frame to a single row. ```{r} starwars %>% summarise(height = mean(height, na.rm = TRUE)) ``` It's not that useful until we learn the `group_by()` verb below. ### Commonalities You may have noticed that the syntax and function of all these verbs are very similar: * The first argument is a data frame. * The subsequent arguments describe what to do with the data frame. You can refer to columns in the data frame directly without using `$`. * The result is a new data frame Together these properties make it easy to chain together multiple simple steps to achieve a complex result. These five functions provide the basis of a language of data manipulation. At the most basic level, you can only alter a tidy data frame in five useful ways: you can reorder the rows (`arrange()`), pick observations and variables of interest (`filter()` and `select()`), add new variables that are functions of existing variables (`mutate()`), or collapse many values to a summary (`summarise()`). ## Combining functions with `%>%` The dplyr API is functional in the sense that function calls don't have side-effects. You must always save their results. This doesn't lead to particularly elegant code, especially if you want to do many operations at once. You either have to do it step-by-step: ```{r, eval = FALSE} a1 <- group_by(starwars, species, sex) a2 <- select(a1, height, mass) a3 <- summarise(a2, height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` Or if you don't want to name the intermediate results, you need to wrap the function calls inside each other: ```{r} summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` This is difficult to read because the order of the operations is from inside to out. Thus, the arguments are a long way away from the function. To get around this problem, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"): ```{r, eval = FALSE} starwars %>% group_by(species, sex) %>% select(height, mass) %>% summarise( height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` ## Patterns of operations The dplyr verbs can be classified by the type of operations they accomplish (we sometimes speak of their **semantics**, i.e., their meaning). It's helpful to have a good grasp of the difference between select and mutate operations. ### Selecting operations One of the appealing features of dplyr is that you can refer to columns from the tibble as if they were regular variables. However, the syntactic uniformity of referring to bare column names hides semantical differences across the verbs. A column symbol supplied to `select()` does not have the same meaning as the same symbol supplied to `mutate()`. Selecting operations expect column names and positions. Hence, when you call `select()` with bare variable names, they actually represent their own positions in the tibble. The following calls are completely equivalent from dplyr's point of view: ```{r} # `name` represents the integer 1 select(starwars, name) select(starwars, 1) ``` By the same token, this means that you cannot refer to variables from the surrounding context if they have the same name as one of the columns. In the following example, `height` still represents 2, not 5: ```{r} height <- 5 select(starwars, height) ``` One useful subtlety is that this only applies to bare names and to selecting calls like `c(height, mass)` or `height:mass`. In all other cases, the columns of the data frame are not put in scope. This allows you to refer to contextual variables in selection helpers: ```{r} name <- "color" select(starwars, ends_with(name)) ``` These semantics are usually intuitive. But note the subtle difference: ```{r} name <- 5 select(starwars, name, identity(name)) ``` In the first argument, `name` represents its own position `1`. In the second argument, `name` is evaluated in the surrounding context and represents the fifth column. For a long time, `select()` used to only understand column positions. Counting from dplyr 0.6, it now understands column names as well. This makes it a bit easier to program with `select()`: ```{r} vars <- c("name", "height") select(starwars, all_of(vars), "mass") ``` ### Mutating operations Mutate semantics are quite different from selection semantics. Whereas `select()` expects column names or positions, `mutate()` expects *column vectors*. We will set up a smaller tibble to use for our examples. ```{r} df <- starwars %>% select(name, height, mass) ``` When we use `select()`, the bare column names stand for their own positions in the tibble. For `mutate()` on the other hand, column symbols represent the actual column vectors stored in the tibble. Consider what happens if we give a string or a number to `mutate()`: ```{r} mutate(df, "height", 2) ``` `mutate()` gets length-1 vectors that it interprets as new columns in the data frame. These vectors are recycled so they match the number of rows. That's why it doesn't make sense to supply expressions like `"height" + 10` to `mutate()`. This amounts to adding 10 to a string! The correct expression is: ```{r} mutate(df, height + 10) ``` In the same way, you can unquote values from the context if these values represent a valid column. They must be either length 1 (they then get recycled) or have the same length as the number of rows. In the following example we create a new vector that we add to the data frame: ```{r} var <- seq(1, nrow(df)) mutate(df, new = var) ``` A case in point is `group_by()`. While you might think it has select semantics, it actually has mutate semantics. This is quite handy as it allows to group by a modified column: ```{r} group_by(starwars, sex) group_by(starwars, sex = as.factor(sex)) group_by(starwars, height_binned = cut(height, 3)) ``` This is why you can't supply a column name to `group_by()`. This amounts to creating a new column containing the string recycled to the number of rows: ```{r} group_by(df, "month") ``` dplyr/inst/doc/colwise.html0000644000176200001440000020107414200154102015452 0ustar liggesusers Column-wise operations

Column-wise operations

It’s often useful to perform the same operation on multiple columns, but copying and pasting is both tedious and error prone:

df %>% 
  group_by(g1, g2) %>% 
  summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d))

(If you’re trying to compute mean(a, b, c, d) for each row, instead see vignette("rowwise"))

This vignette will introduce you to the across() function, which lets you rewrite the previous code more succinctly:

df %>% 
  group_by(g1, g2) %>% 
  summarise(across(a:d, mean))

We’ll start by discussing the basic usage of across(), particularly as it applies to summarise(), and show how to use it with multiple functions. We’ll then show a few uses with other verbs. We’ll finish off with a bit of history, showing why we prefer across() to our last approach (the _if(), _at() and _all() functions) and how to translate your old code to the new syntax.

library(dplyr, warn.conflicts = FALSE)

Basic usage

across() has two primary arguments:

  • The first argument, .cols, selects the columns you want to operate on. It uses tidy selection (like select()) so you can pick variables by position, name, and type.

  • The second argument, .fns, is a function or list of functions to apply to each column. This can also be a purrr style formula (or list of formulas) like ~ .x / 2. (This argument is optional, and you can omit it if you just want to get the underlying data; you’ll see that technique used in vignette("rowwise").)

Here are a couple of examples of across() in conjunction with its favourite verb, summarise(). But you can use across() with any dplyr verb, as you’ll see a little later.

starwars %>% 
  summarise(across(where(is.character), n_distinct))
#> # A tibble: 1 × 8
#>    name hair_color skin_color eye_color   sex gender homeworld species
#>   <int>      <int>      <int>     <int> <int>  <int>     <int>   <int>
#> 1    87         13         31        15     5      3        49      38

starwars %>% 
  group_by(species) %>% 
  filter(n() > 1) %>% 
  summarise(across(c(sex, gender, homeworld), n_distinct))
#> # A tibble: 9 × 4
#>   species    sex gender homeworld
#>   <chr>    <int>  <int>     <int>
#> 1 Droid        1      2         3
#> 2 Gungan       1      1         1
#> 3 Human        2      2        16
#> 4 Kaminoan     2      2         1
#> # … with 5 more rows

starwars %>% 
  group_by(homeworld) %>% 
  filter(n() > 1) %>% 
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))
#> # A tibble: 10 × 4
#>   homeworld height  mass birth_year
#>   <chr>      <dbl> <dbl>      <dbl>
#> 1 Alderaan    176.  64         43  
#> 2 Corellia    175   78.5       25  
#> 3 Coruscant   174.  50         91  
#> 4 Kamino      208.  83.1       31.5
#> # … with 6 more rows

Because across() is usually used in combination with summarise() and mutate(), it doesn’t select grouping variables in order to avoid accidentally modifying them:

df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9))
df %>% 
  group_by(g) %>% 
  summarise(across(where(is.numeric), sum))
#> # A tibble: 2 × 3
#>       g     x     y
#>   <dbl> <dbl> <dbl>
#> 1     1     0    -5
#> 2     2     3    -9

Multiple functions

You can transform each variable with more than one function by supplying a named list of functions or lambda functions in the second argument:

min_max <- list(
  min = ~min(.x, na.rm = TRUE), 
  max = ~max(.x, na.rm = TRUE)
)
starwars %>% summarise(across(where(is.numeric), min_max))
#> # A tibble: 1 × 6
#>   height_min height_max mass_min mass_max birth_year_min birth_year_max
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896
starwars %>% summarise(across(c(height, mass, birth_year), min_max))
#> # A tibble: 1 × 6
#>   height_min height_max mass_min mass_max birth_year_min birth_year_max
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896

Control how the names are created with the .names argument which takes a glue spec:

starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}"))
#> # A tibble: 1 × 6
#>   min.height max.height min.mass max.mass min.birth_year max.birth_year
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896
starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}"))
#> # A tibble: 1 × 6
#>   min.height max.height min.mass max.mass min.birth_year max.birth_year
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896

If you’d prefer all summaries with the same function to be grouped together, you’ll have to expand the calls yourself:

starwars %>% summarise(
  across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"),
  across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}")
)
#> # A tibble: 1 × 6
#>   min_height min_mass min_birth_year max_height max_mass max_birth_year
#>        <int>    <dbl>          <dbl>      <int>    <dbl>          <dbl>
#> 1         66       15              8        264     1358            896

(One day this might become an argument to across() but we’re not yet sure how it would work.)

We cannot however use where(is.numeric) in that last case because the second across() would pick up the variables that were newly created (“min_height”, “min_mass” and “min_birth_year”).

We can work around this by combining both calls to across() into a single expression that returns a tibble:

starwars %>% summarise(
  tibble(
    across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"),
    across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}")  
  )
)
#> # A tibble: 1 × 6
#>   min_height min_mass min_birth_year max_height max_mass max_birth_year
#>        <int>    <dbl>          <dbl>      <int>    <dbl>          <dbl>
#> 1         66       15              8        264     1358            896

Alternatively we could reorganize results with relocate():

starwars %>% 
  summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% 
  relocate(starts_with("min"))
#> # A tibble: 1 × 6
#>   min.height min.mass min.birth_year max.height max.mass max.birth_year
#>        <int>    <dbl>          <dbl>      <int>    <dbl>          <dbl>
#> 1         66       15              8        264     1358            896

Current column

If you need to, you can access the name of the “current” column inside by calling cur_column(). This can be useful if you want to perform some sort of context dependent transformation that’s already encoded in a vector:

df <- tibble(x = 1:3, y = 3:5, z = 5:7)
mult <- list(x = 1, y = 10, z = 100)

df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]]))
#> # A tibble: 3 × 3
#>       x     y     z
#>   <dbl> <dbl> <dbl>
#> 1     1    30   500
#> 2     2    40   600
#> 3     3    50   700

Gotchas

Be careful when combining numeric summaries with where(is.numeric):

df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9))

df %>% 
  summarise(n = n(), across(where(is.numeric), sd))
#>    n x        y
#> 1 NA 1 4.041452

Here n becomes NA because n is numeric, so the across() computes its standard deviation, and the standard deviation of 3 (a constant) is NA. You probably want to compute n() last to avoid this problem:

df %>% 
  summarise(across(where(is.numeric), sd), n = n())
#>   x        y n
#> 1 1 4.041452 3

Alternatively, you could explicitly exclude n from the columns to operate on:

df %>% 
  summarise(n = n(), across(where(is.numeric) & !n, sd))
#>   n x        y
#> 1 3 1 4.041452

Another approach is to combine both the call to n() and across() in a single expression that returns a tibble:

df %>% 
  summarise(
    tibble(n = n(), across(where(is.numeric), sd))
  )
#>   n x        y
#> 1 3 1 4.041452

Other verbs

So far we’ve focused on the use of across() with summarise(), but it works with any other dplyr verb that uses data masking:

  • Rescale all numeric variables to range 0-1:

    rescale01 <- function(x) {
      rng <- range(x, na.rm = TRUE)
      (x - rng[1]) / (rng[2] - rng[1])
    }
    df <- tibble(x = 1:4, y = rnorm(4))
    df %>% mutate(across(where(is.numeric), rescale01))
    #> # A tibble: 4 × 2
    #>       x     y
    #>   <dbl> <dbl>
    #> 1 0     0.385
    #> 2 0.333 1    
    #> 3 0.667 0    
    #> 4 1     0.903

For some verbs, like group_by(), count() and distinct(), you can omit the summary functions:

  • Find all distinct

    starwars %>% distinct(across(contains("color")))
    #> # A tibble: 67 × 3
    #>   hair_color skin_color  eye_color
    #>   <chr>      <chr>       <chr>    
    #> 1 blond      fair        blue     
    #> 2 <NA>       gold        yellow   
    #> 3 <NA>       white, blue red      
    #> 4 none       white       yellow   
    #> # … with 63 more rows
  • Count all combinations of variables with a given pattern:

    starwars %>% count(across(contains("color")), sort = TRUE)
    #> # A tibble: 67 × 4
    #>   hair_color skin_color eye_color     n
    #>   <chr>      <chr>      <chr>     <int>
    #> 1 brown      light      brown         6
    #> 2 brown      fair       blue          4
    #> 3 none       grey       black         4
    #> 4 black      dark       brown         3
    #> # … with 63 more rows

across() doesn’t work with select() or rename() because they already use tidy select syntax; if you want to transform column names with a function, you can use rename_with().

filter()

We cannot directly use across() in filter() because we need an extra step to combine the results. To that end, filter() has two special purpose companion functions:

  • if_any() keeps the rows where the predicate is true for at least one selected column:
starwars %>% 
  filter(if_any(everything(), ~ !is.na(.x)))
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
  • if_all() keeps the rows where the predicate is true for all selected columns:
starwars %>% 
  filter(if_all(everything(), ~ !is.na(.x)))
#> # A tibble: 29 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # … with 25 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>
  • Find all rows where no variable has missing values:

    starwars %>% filter(across(everything(), ~ !is.na(.x)))
    #> Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or
    #> `if_all()`.
    #> # A tibble: 29 × 14
    #>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
    #>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
    #> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
    #> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
    #> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
    #> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
    #> # … with 25 more rows, and 5 more variables: homeworld <chr>, species <chr>,
    #> #   films <list>, vehicles <list>, starships <list>

_if, _at, _all

Prior versions of dplyr allowed you to apply a function to multiple columns in a different way: using functions with _if, _at, and _all() suffixes. These functions solved a pressing need and are used by many people, but are now superseded. That means that they’ll stay around, but won’t receive any new features and will only get critical bug fixes.

Why do we like across()?

Why did we decide to move away from these functions in favour of across()?

  1. across() makes it possible to express useful summaries that were previously impossible:

    df %>%
      group_by(g1, g2) %>% 
      summarise(
        across(where(is.numeric), mean), 
        across(where(is.factor), nlevels),
        n = n(), 
      )
  2. across() reduces the number of functions that dplyr needs to provide. This makes dplyr easier for you to use (because there are fewer functions to remember) and easier for us to implement new verbs (since we only need to implement one function, not four).

  3. across() unifies _if and _at semantics so that you can select by position, name, and type, and you can now create compound selections that were previously impossible. For example, you can now transform all numeric columns whose name begins with “x”: across(where(is.numeric) & starts_with("x")).

  4. across() doesn’t need to use vars(). The _at() functions are the only place in dplyr where you have to manually quote variable names, which makes them a little weird and hence harder to remember.

Why did it take so long to discover across()?

It’s disappointing that we didn’t discover across() earlier, and instead worked through several false starts (first not realising that it was a common problem, then with the _each() functions, and most recently with the _if()/_at()/_all() functions). But across() couldn’t work without three recent discoveries:

  • You can have a column of a data frame that is itself a data frame. This is something provided by base R, but it’s not very well documented, and it took a while to see that it was useful, not just a theoretical curiosity.

  • We can use data frames to allow summary functions to return multiple columns.

  • We can use the absence of an outer name as a convention that you want to unpack a data frame column into individual columns.

How do you convert existing code?

Fortunately, it’s generally straightforward to translate your existing code to use across():

  • Strip the _if(), _at() and _all() suffix off the function.

  • Call across(). The first argument will be:

    1. For _if(), the old second argument wrapped in where().
    2. For _at(), the old second argument, with the call to vars() removed.
    3. For _all(), everything().

    The subsequent arguments can be copied as is.

For example:

df %>% mutate_if(is.numeric, mean, na.rm = TRUE)
# ->
df %>% mutate(across(where(is.numeric), mean, na.rm = TRUE))

df %>% mutate_at(vars(c(x, starts_with("y"))), mean)
# ->
df %>% mutate(across(c(x, starts_with("y")), mean, na.rm = TRUE))

df %>% mutate_all(mean)
# ->
df %>% mutate(across(everything(), mean))

There are a few exceptions to this rule:

  • rename_*() and select_*() follow a different pattern. They already have select semantics, so are generally used in a different way that doesn’t have a direct equivalent with across(); use the new rename_with() instead.

  • Previously, filter_*() were paired with the all_vars() and any_vars() helpers. The new helpers if_any() and if_all() can be used inside filter() to keep rows for which the predicate is true for at least one, or all selected columns:

    df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1))
    
    # Find all rows where EVERY numeric variable is greater than zero
    df %>% filter(if_all(where(is.numeric), ~ .x > 0))
    #> # A tibble: 1 × 3
    #>   x         y     z
    #>   <chr> <dbl> <dbl>
    #> 1 b         1     1
    
    # Find all rows where ANY numeric variable is greater than zero
    df %>% filter(if_any(where(is.numeric), ~ .x > 0))
    #> # A tibble: 2 × 3
    #>   x         y     z
    #>   <chr> <dbl> <dbl>
    #> 1 a         1    -1
    #> 2 b         1     1
  • When used in a mutate(), all transformations performed by an across() are applied at once. This is different to the behaviour of mutate_if(), mutate_at(), and mutate_all(), which apply the transformations one at a time. We expect that you’ll generally find the new behaviour less surprising:

    df <- tibble(x = 2, y = 4, z = 8)
    df %>% mutate_all(~ .x / y)
    #> # A tibble: 1 × 3
    #>       x     y     z
    #>   <dbl> <dbl> <dbl>
    #> 1   0.5     1     8
    
    df %>% mutate(across(everything(), ~ .x / y))
    #> # A tibble: 1 × 3
    #>       x     y     z
    #>   <dbl> <dbl> <dbl>
    #> 1   0.5     1     2
dplyr/inst/doc/two-table.html0000644000176200001440000013442714200154113015714 0ustar liggesusers Two-table verbs

Two-table verbs

It’s rare that a data analysis involves only a single table of data. In practice, you’ll normally have many tables that contribute to an analysis, and you need flexible tools to combine them. In dplyr, there are three families of verbs that work with two tables at a time:

  • Mutating joins, which add new variables to one table from matching rows in another.

  • Filtering joins, which filter observations from one table based on whether or not they match an observation in the other table.

  • Set operations, which combine the observations in the data sets as if they were set elements.

(This discussion assumes that you have tidy data, where the rows are observations and the columns are variables. If you’re not familiar with that framework, I’d recommend reading up on it first.)

All two-table verbs work similarly. The first two arguments are x and y, and provide the tables to combine. The output is always a new table with the same type as x.

Mutating joins

Mutating joins allow you to combine variables from multiple tables. For example, consider the flights and airlines data from the nycflights13 package. In one table we have flight information with an abbreviation for carrier, and in another we have a mapping between abbreviations and full names. You can use a join to add the carrier names to the flight data:

library(nycflights13)
# Drop unimportant variables so it's easier to understand the join results.
flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier)

flights2 %>% 
  left_join(airlines)
#> Joining, by = "carrier"
#> # A tibble: 336,776 × 9
#>    year month   day  hour origin dest  tailnum carrier name                  
#>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>                 
#> 1  2013     1     1     5 EWR    IAH   N14228  UA      United Air Lines Inc. 
#> 2  2013     1     1     5 LGA    IAH   N24211  UA      United Air Lines Inc. 
#> 3  2013     1     1     5 JFK    MIA   N619AA  AA      American Airlines Inc.
#> 4  2013     1     1     5 JFK    BQN   N804JB  B6      JetBlue Airways       
#> 5  2013     1     1     6 LGA    ATL   N668DN  DL      Delta Air Lines Inc.  
#> # … with 336,771 more rows

Controlling how the tables are matched

As well as x and y, each mutating join takes an argument by that controls which variables are used to match observations in the two tables. There are a few ways to specify it, as I illustrate below with various tables from nycflights13:

  • NULL, the default. dplyr will will use all variables that appear in both tables, a natural join. For example, the flights and weather tables match on their common variables: year, month, day, hour and origin.

    flights2 %>% left_join(weather)
    #> Joining, by = c("year", "month", "day", "hour", "origin")
    #> # A tibble: 336,776 × 18
    #>    year month   day  hour origin dest  tailnum carrier  temp  dewp humid
    #>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <dbl> <dbl> <dbl>
    #> 1  2013     1     1     5 EWR    IAH   N14228  UA       39.0  28.0  64.4
    #> 2  2013     1     1     5 LGA    IAH   N24211  UA       39.9  25.0  54.8
    #> 3  2013     1     1     5 JFK    MIA   N619AA  AA       39.0  27.0  61.6
    #> 4  2013     1     1     5 JFK    BQN   N804JB  B6       39.0  27.0  61.6
    #> 5  2013     1     1     6 LGA    ATL   N668DN  DL       39.9  25.0  54.8
    #> # … with 336,771 more rows, and 7 more variables: wind_dir <dbl>,
    #> #   wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
    #> #   visib <dbl>, time_hour <dttm>
  • A character vector, by = "x". Like a natural join, but uses only some of the common variables. For example, flights and planes have year columns, but they mean different things so we only want to join by tailnum.

    flights2 %>% left_join(planes, by = "tailnum")
    #> # A tibble: 336,776 × 16
    #>   year.x month   day  hour origin dest  tailnum carrier year.y type             
    #>    <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>    <int> <chr>            
    #> 1   2013     1     1     5 EWR    IAH   N14228  UA        1999 Fixed wing multi…
    #> 2   2013     1     1     5 LGA    IAH   N24211  UA        1998 Fixed wing multi…
    #> 3   2013     1     1     5 JFK    MIA   N619AA  AA        1990 Fixed wing multi…
    #> 4   2013     1     1     5 JFK    BQN   N804JB  B6        2012 Fixed wing multi…
    #> 5   2013     1     1     6 LGA    ATL   N668DN  DL        1991 Fixed wing multi…
    #> # … with 336,771 more rows, and 6 more variables: manufacturer <chr>,
    #> #   model <chr>, engines <int>, seats <int>, speed <int>, engine <chr>

    Note that the year columns in the output are disambiguated with a suffix.

  • A named character vector: by = c("x" = "a"). This will match variable x in table x to variable a in table y. The variables from use will be used in the output.

    Each flight has an origin and destination airport, so we need to specify which one we want to join to:

    flights2 %>% left_join(airports, c("dest" = "faa"))
    #> # A tibble: 336,776 × 15
    #>    year month   day  hour origin dest  tailnum carrier name      lat   lon   alt
    #>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>   <dbl> <dbl> <dbl>
    #> 1  2013     1     1     5 EWR    IAH   N14228  UA      George…  30.0 -95.3    97
    #> 2  2013     1     1     5 LGA    IAH   N24211  UA      George…  30.0 -95.3    97
    #> 3  2013     1     1     5 JFK    MIA   N619AA  AA      Miami …  25.8 -80.3     8
    #> 4  2013     1     1     5 JFK    BQN   N804JB  B6      <NA>     NA    NA      NA
    #> 5  2013     1     1     6 LGA    ATL   N668DN  DL      Hartsf…  33.6 -84.4  1026
    #> # … with 336,771 more rows, and 3 more variables: tz <dbl>, dst <chr>,
    #> #   tzone <chr>
    flights2 %>% left_join(airports, c("origin" = "faa"))
    #> # A tibble: 336,776 × 15
    #>    year month   day  hour origin dest  tailnum carrier name      lat   lon   alt
    #>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>   <dbl> <dbl> <dbl>
    #> 1  2013     1     1     5 EWR    IAH   N14228  UA      Newark…  40.7 -74.2    18
    #> 2  2013     1     1     5 LGA    IAH   N24211  UA      La Gua…  40.8 -73.9    22
    #> 3  2013     1     1     5 JFK    MIA   N619AA  AA      John F…  40.6 -73.8    13
    #> 4  2013     1     1     5 JFK    BQN   N804JB  B6      John F…  40.6 -73.8    13
    #> 5  2013     1     1     6 LGA    ATL   N668DN  DL      La Gua…  40.8 -73.9    22
    #> # … with 336,771 more rows, and 3 more variables: tz <dbl>, dst <chr>,
    #> #   tzone <chr>

Types of join

There are four types of mutating join, which differ in their behaviour when a match is not found. We’ll illustrate each with a simple example:

df1 <- tibble(x = c(1, 2), y = 2:1)
df2 <- tibble(x = c(3, 1), a = 10, b = "a")
  • inner_join(x, y) only includes observations that match in both x and y.

    df1 %>% inner_join(df2) %>% knitr::kable()
    #> Joining, by = "x"
    x y a b
    1 2 10 a
  • left_join(x, y) includes all observations in x, regardless of whether they match or not. This is the most commonly used join because it ensures that you don’t lose observations from your primary table.

    df1 %>% left_join(df2)
    #> Joining, by = "x"
    #> # A tibble: 2 × 4
    #>       x     y     a b    
    #>   <dbl> <int> <dbl> <chr>
    #> 1     1     2    10 a    
    #> 2     2     1    NA <NA>
  • right_join(x, y) includes all observations in y. It’s equivalent to left_join(y, x), but the columns and rows will be ordered differently.

    df1 %>% right_join(df2)
    #> Joining, by = "x"
    #> # A tibble: 2 × 4
    #>       x     y     a b    
    #>   <dbl> <int> <dbl> <chr>
    #> 1     1     2    10 a    
    #> 2     3    NA    10 a
    df2 %>% left_join(df1)
    #> Joining, by = "x"
    #> # A tibble: 2 × 4
    #>       x     a b         y
    #>   <dbl> <dbl> <chr> <int>
    #> 1     3    10 a        NA
    #> 2     1    10 a         2
  • full_join() includes all observations from x and y.

    df1 %>% full_join(df2)
    #> Joining, by = "x"
    #> # A tibble: 3 × 4
    #>       x     y     a b    
    #>   <dbl> <int> <dbl> <chr>
    #> 1     1     2    10 a    
    #> 2     2     1    NA <NA> 
    #> 3     3    NA    10 a

The left, right and full joins are collectively know as outer joins. When a row doesn’t match in an outer join, the new variables are filled in with missing values.

Observations

While mutating joins are primarily used to add new variables, they can also generate new observations. If a match is not unique, a join will add all possible combinations (the Cartesian product) of the matching observations:

df1 <- tibble(x = c(1, 1, 2), y = 1:3)
df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a"))

df1 %>% left_join(df2)
#> Joining, by = "x"
#> # A tibble: 5 × 3
#>       x     y z    
#>   <dbl> <int> <chr>
#> 1     1     1 a    
#> 2     1     1 b    
#> 3     1     2 a    
#> 4     1     2 b    
#> 5     2     3 a

Filtering joins

Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types:

  • semi_join(x, y) keeps all observations in x that have a match in y.
  • anti_join(x, y) drops all observations in x that have a match in y.

These are most useful for diagnosing join mismatches. For example, there are many flights in the nycflights13 dataset that don’t have a matching tail number in the planes table:

library("nycflights13")
flights %>% 
  anti_join(planes, by = "tailnum") %>% 
  count(tailnum, sort = TRUE)
#> # A tibble: 722 × 2
#>   tailnum     n
#>   <chr>   <int>
#> 1 <NA>     2512
#> 2 N725MQ    575
#> 3 N722MQ    513
#> 4 N723MQ    507
#> 5 N713MQ    483
#> # … with 717 more rows

If you’re worried about what observations your joins will match, start with a semi_join() or anti_join(). semi_join() and anti_join() never duplicate; they only ever remove observations.

df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4)
df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a"))

# Four rows to start with:
df1 %>% nrow()
#> [1] 4
# And we get four rows after the join
df1 %>% inner_join(df2, by = "x") %>% nrow()
#> [1] 4
# But only two rows actually match
df1 %>% semi_join(df2, by = "x") %>% nrow()
#> [1] 2

Set operations

The final type of two-table verb is set operations. These expect the x and y inputs to have the same variables, and treat the observations like sets:

  • intersect(x, y): return only observations in both x and y
  • union(x, y): return unique observations in x and y
  • setdiff(x, y): return observations in x, but not in y.

Given this simple data:

(df1 <- tibble(x = 1:2, y = c(1L, 1L)))
#> # A tibble: 2 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
#> 2     2     1
(df2 <- tibble(x = 1:2, y = 1:2))
#> # A tibble: 2 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
#> 2     2     2

The four possibilities are:

intersect(df1, df2)
#> # A tibble: 1 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
# Note that we get 3 rows, not 4
union(df1, df2)
#> # A tibble: 3 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
#> 2     2     1
#> 3     2     2
setdiff(df1, df2)
#> # A tibble: 1 × 2
#>       x     y
#>   <int> <int>
#> 1     2     1
setdiff(df2, df1)
#> # A tibble: 1 × 2
#>       x     y
#>   <int> <int>
#> 1     2     2

Multiple-table verbs

dplyr does not provide any functions for working with three or more tables. Instead use purrr::reduce() or Reduce(), as described in Advanced R, to iteratively combine the two-table verbs to handle as many tables as you need.

dplyr/inst/doc/window-functions.html0000644000176200001440000011553714200154114017335 0ustar liggesusers Window functions

Window functions

A window function is a variation on an aggregation function. Where an aggregation function, like sum() and mean(), takes n inputs and return a single value, a window function returns n values. The output of a window function depends on all its input values, so window functions don’t include functions that work element-wise, like + or round(). Window functions include variations on aggregate functions, like cumsum() and cummean(), functions for ranking and ordering, like rank(), and functions for taking offsets, like lead() and lag().

In this vignette, we’ll use a small sample of the Lahman batting dataset, including the players that have won an award.

library(Lahman)

batting <- Lahman::Batting %>%
  as_tibble() %>%
  select(playerID, yearID, teamID, G, AB:H) %>%
  arrange(playerID, yearID, teamID) %>%
  semi_join(Lahman::AwardsPlayers, by = "playerID")

players <- batting %>% group_by(playerID)

Window functions are used in conjunction with mutate() and filter() to solve a wide range of problems. Here’s a selection:

# For each player, find the two years with most hits
filter(players, min_rank(desc(H)) <= 2 & H > 0)
# Within each player, rank each year by the number of games played
mutate(players, G_rank = min_rank(G))

# For each player, find every year that was better than the previous year
filter(players, G > lag(G))
# For each player, compute avg change in games played per year
mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID)))

# For each player, find all years where they played more games than they did on average
filter(players, G > mean(G))
# For each, player compute a z score based on number of games played
mutate(players, G_z = (G - mean(G)) / sd(G))

Before reading this vignette, you should be familiar with mutate() and filter().

Types of window functions

There are five main families of window functions. Two families are unrelated to aggregation functions:

  • Ranking and ordering functions: row_number(), min_rank(), dense_rank(), cume_dist(), percent_rank(), and ntile(). These functions all take a vector to order by, and return various types of ranks.

  • Offsets lead() and lag() allow you to access the previous and next values in a vector, making it easy to compute differences and trends.

The other three families are variations on familiar aggregate functions:

  • Cumulative aggregates: cumsum(), cummin(), cummax() (from base R), and cumall(), cumany(), and cummean() (from dplyr).

  • Rolling aggregates operate in a fixed width window. You won’t find them in base R or in dplyr, but there are many implementations in other packages, such as RcppRoll.

  • Recycled aggregates, where an aggregate is repeated to match the length of the input. These are not needed in R because vector recycling automatically recycles aggregates where needed. They are important in SQL, because the presence of an aggregation function usually tells the database to return only one row per group.

Each family is described in more detail below, focussing on the general goals and how to use them with dplyr. For more details, refer to the individual function documentation.

Ranking functions

The ranking functions are variations on a theme, differing in how they handle ties:

x <- c(1, 1, 2, 2, 2)

row_number(x)
#> [1] 1 2 3 4 5
min_rank(x)
#> [1] 1 1 3 3 3
dense_rank(x)
#> [1] 1 1 2 2 2

If you’re familiar with R, you may recognise that row_number() and min_rank() can be computed with the base rank() function and various values of the ties.method argument. These functions are provided to save a little typing, and to make it easier to convert between R and SQL.

Two other ranking functions return numbers between 0 and 1. percent_rank() gives the percentage of the rank; cume_dist() gives the proportion of values less than or equal to the current value.

cume_dist(x)
#> [1] 0.4 0.4 1.0 1.0 1.0
percent_rank(x)
#> [1] 0.0 0.0 0.5 0.5 0.5

These are useful if you want to select (for example) the top 10% of records within each group. For example:

filter(players, cume_dist(desc(G)) < 0.1)
#> # A tibble: 1,057 × 7
#> # Groups:   playerID [964]
#>   playerID  yearID teamID     G    AB     R     H
#>   <chr>      <int> <fct>  <int> <int> <int> <int>
#> 1 aaronha01   1963 ML1      161   631   121   201
#> 2 aaronha01   1968 ATL      160   606    84   174
#> 3 abbotji01   1991 CAL       34     0     0     0
#> 4 abernte02   1965 CHN       84    18     1     3
#> # … with 1,053 more rows

Finally, ntile() divides the data up into n evenly sized buckets. It’s a coarse ranking, and it can be used in with mutate() to divide the data into buckets for further summary. For example, we could use ntile() to divide the players within a team into four ranked groups, and calculate the average number of games within each group.

by_team_player <- group_by(batting, teamID, playerID)
by_team <- summarise(by_team_player, G = sum(G))
#> `summarise()` has grouped output by 'teamID'. You can override using the
#> `.groups` argument.
by_team_quartile <- group_by(by_team, quartile = ntile(G, 4))
summarise(by_team_quartile, mean(G))
#> # A tibble: 4 × 2
#>   quartile `mean(G)`
#>      <int>     <dbl>
#> 1        1      22.9
#> 2        2      92.3
#> 3        3     256. 
#> 4        4     974.

All ranking functions rank from lowest to highest so that small input values get small ranks. Use desc() to rank from highest to lowest.

Lead and lag

lead() and lag() produce offset versions of a input vector that is either ahead of or behind the original vector.

x <- 1:5
lead(x)
#> [1]  2  3  4  5 NA
lag(x)
#> [1] NA  1  2  3  4

You can use them to:

  • Compute differences or percent changes.

    # Compute the relative change in games played
    mutate(players, G_delta = G - lag(G))

    Using lag() is more convenient than diff() because for n inputs diff() returns n - 1 outputs.

  • Find out when a value changes.

    # Find when a player changed teams
    filter(players, teamID != lag(teamID))

lead() and lag() have an optional argument order_by. If set, instead of using the row order to determine which value comes before another, they will use another variable. This is important if you have not already sorted the data, or you want to sort one way and lag another.

Here’s a simple example of what happens if you don’t specify order_by when you need it:

df <- data.frame(year = 2000:2005, value = (0:5) ^ 2)
scrambled <- df[sample(nrow(df)), ]

wrong <- mutate(scrambled, prev_value = lag(value))
arrange(wrong, year)
#>   year value prev_value
#> 1 2000     0          4
#> 2 2001     1          0
#> 3 2002     4          9
#> 4 2003     9         16
#> 5 2004    16         NA
#> 6 2005    25          1

right <- mutate(scrambled, prev_value = lag(value, order_by = year))
arrange(right, year)
#>   year value prev_value
#> 1 2000     0         NA
#> 2 2001     1          0
#> 3 2002     4          1
#> 4 2003     9          4
#> 5 2004    16          9
#> 6 2005    25         16

Cumulative aggregates

Base R provides cumulative sum (cumsum()), cumulative min (cummin()), and cumulative max (cummax()). (It also provides cumprod() but that is rarely useful). Other common accumulating functions are cumany() and cumall(), cumulative versions of || and &&, and cummean(), a cumulative mean. These are not included in base R, but efficient versions are provided by dplyr.

cumany() and cumall() are useful for selecting all rows up to, or all rows after, a condition is true for the first (or last) time. For example, we can use cumany() to find all records for a player after they played a year with 150 games:

filter(players, cumany(G > 150))

Like lead and lag, you may want to control the order in which the accumulation occurs. None of the built in functions have an order_by argument so dplyr provides a helper: order_by(). You give it the variable you want to order by, and then the call to the window function:

x <- 1:10
y <- 10:1
order_by(y, cumsum(x))
#>  [1] 55 54 52 49 45 40 34 27 19 10

This function uses a bit of non-standard evaluation, so I wouldn’t recommend using it inside another function; use the simpler but less concise with_order() instead.

Recycled aggregates

R’s vector recycling makes it easy to select values that are higher or lower than a summary. I call this a recycled aggregate because the value of the aggregate is recycled to be the same length as the original vector. Recycled aggregates are useful if you want to find all records greater than the mean or less than the median:

filter(players, G > mean(G))
filter(players, G < median(G))

While most SQL databases don’t have an equivalent of median() or quantile(), when filtering you can achieve the same effect with ntile(). For example, x > median(x) is equivalent to ntile(x, 2) == 2; x > quantile(x, 75) is equivalent to ntile(x, 100) > 75 or ntile(x, 4) > 3.

filter(players, ntile(G, 2) == 2)

You can also use this idea to select the records with the highest (x == max(x)) or lowest value (x == min(x)) for a field, but the ranking functions give you more control over ties, and allow you to select any number of records.

Recycled aggregates are also useful in conjunction with mutate(). For example, with the batting data, we could compute the “career year”, the number of years a player has played since they entered the league:

mutate(players, career_year = yearID - min(yearID) + 1)
#> # A tibble: 20,087 × 8
#> # Groups:   playerID [1,359]
#>   playerID  yearID teamID     G    AB     R     H career_year
#>   <chr>      <int> <fct>  <int> <int> <int> <int>       <dbl>
#> 1 aaronha01   1954 ML1      122   468    58   131           1
#> 2 aaronha01   1955 ML1      153   602   105   189           2
#> 3 aaronha01   1956 ML1      153   609   106   200           3
#> 4 aaronha01   1957 ML1      151   615   118   198           4
#> # … with 20,083 more rows

Or, as in the introductory example, we could compute a z-score:

mutate(players, G_z = (G - mean(G)) / sd(G))
#> # A tibble: 20,087 × 8
#> # Groups:   playerID [1,359]
#>   playerID  yearID teamID     G    AB     R     H    G_z
#>   <chr>      <int> <fct>  <int> <int> <int> <int>  <dbl>
#> 1 aaronha01   1954 ML1      122   468    58   131 -1.16 
#> 2 aaronha01   1955 ML1      153   602   105   189  0.519
#> 3 aaronha01   1956 ML1      153   609   106   200  0.519
#> 4 aaronha01   1957 ML1      151   615   118   198  0.411
#> # … with 20,083 more rows
dplyr/inst/doc/two-table.R0000644000176200001440000000512314200154113015137 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 5) library(dplyr) ## ---- warning = FALSE--------------------------------------------------------- library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) ## ----------------------------------------------------------------------------- flights2 %>% left_join(weather) ## ----------------------------------------------------------------------------- flights2 %>% left_join(planes, by = "tailnum") ## ----------------------------------------------------------------------------- flights2 %>% left_join(airports, c("dest" = "faa")) flights2 %>% left_join(airports, c("origin" = "faa")) ## ----------------------------------------------------------------------------- df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = "a") ## ----------------------------------------------------------------------------- df1 %>% inner_join(df2) %>% knitr::kable() ## ----------------------------------------------------------------------------- df1 %>% left_join(df2) ## ----------------------------------------------------------------------------- df1 %>% right_join(df2) df2 %>% left_join(df1) ## ----------------------------------------------------------------------------- df1 %>% full_join(df2) ## ----------------------------------------------------------------------------- df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) df1 %>% left_join(df2) ## ----------------------------------------------------------------------------- library("nycflights13") flights %>% anti_join(planes, by = "tailnum") %>% count(tailnum, sort = TRUE) ## ----------------------------------------------------------------------------- df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) # Four rows to start with: df1 %>% nrow() # And we get four rows after the join df1 %>% inner_join(df2, by = "x") %>% nrow() # But only two rows actually match df1 %>% semi_join(df2, by = "x") %>% nrow() ## ----------------------------------------------------------------------------- (df1 <- tibble(x = 1:2, y = c(1L, 1L))) (df2 <- tibble(x = 1:2, y = 1:2)) ## ----------------------------------------------------------------------------- intersect(df1, df2) # Note that we get 3 rows, not 4 union(df1, df2) setdiff(df1, df2) setdiff(df2, df1) dplyr/inst/doc/programming.html0000644000176200001440000013565214200154107016344 0ustar liggesusers Programming with dplyr

Programming with dplyr

Introduction

Most dplyr verbs use tidy evaluation in some way. Tidy evaluation is a special type of non-standard evaluation used throughout the tidyverse. There are two basic forms found in dplyr:

  • arrange(), count(), filter(), group_by(), mutate(), and summarise() use data masking so that you can use data variables as if they were variables in the environment (i.e. you write my_variable not df$myvariable).

  • across(), relocate(), rename(), select(), and pull() use tidy selection so you can easily choose variables based on their position, name, or type (e.g. starts_with("x") or is.numeric).

To determine whether a function argument uses data masking or tidy selection, look at the documentation: in the arguments list, you’ll see <data-masking> or <tidy-select>.

Data masking and tidy selection make interactive data exploration fast and fluid, but they add some new challenges when you attempt to use them indirectly such as in a for loop or a function. This vignette shows you how to overcome those challenges. We’ll first go over the basics of data masking and tidy selection, talk about how to use them indirectly, and then show you a number of recipes to solve common problems.

This vignette will give you the minimum knowledge you need to be an effective programmer with tidy evaluation. If you’d like to learn more about the underlying theory, or precisely how it’s different from non-standard evaluation, we recommend that you read the Metaprogramming chapters in Advanced R.

library(dplyr)

Data masking

Data masking makes data manipulation faster because it requires less typing. In most (but not all1) base R functions you need to refer to variables with $, leading to code that repeats the name of the data frame many times:

starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,]

The dplyr equivalent of this code is more concise because data masking allows you to need to type starwars once:

starwars %>% filter(homeworld == "Naboo", species == "Human")

Data- and env-variables

The key idea behind data masking is that it blurs the line between the two different meanings of the word “variable”:

  • env-variables are “programming” variables that live in an environment. They are usually created with <-.

  • data-variables are “statistical” variables that live in a data frame. They usually come from data files (e.g. .csv, .xls), or are created manipulating existing variables.

To make those definitions a little more concrete, take this piece of code:

df <- data.frame(x = runif(3), y = runif(3))
df$x
#> [1] 0.08075014 0.83433304 0.60076089

It creates a env-variable, df, that contains two data-variables, x and y. Then it extracts the data-variable x out of the env-variable df using $.

I think this blurring of the meaning of “variable” is a really nice feature for interactive data analysis because it allows you to refer to data-vars as is, without any prefix. And this seems to be fairly intuitive since many newer R users will attempt to write diamonds[x == 0 | y == 0, ].

Unfortunately, this benefit does not come for free. When you start to program with these tools, you’re going to have to grapple with the distinction. This will be hard because you’ve never had to think about it before, so it’ll take a while for your brain to learn these new concepts and categories. However, once you’ve teased apart the idea of “variable” into data-variable and env-variable, I think you’ll find it fairly straightforward to use.

Indirection

The main challenge of programming with functions that use data masking arises when you introduce some indirection, i.e. when you want to get the data-variable from an env-variable instead of directly typing the data-variable’s name. There are two main cases:

  • When you have the data-variable in a function argument (i.e. an env-variable that holds a promise2), you need to embrace the argument by surrounding it in doubled braces, like filter(df, {{ var }}).

    The following function uses embracing to create a wrapper around summarise() that computes the minimum and maximum values of a variable, as well as the number of observations that were summarised:

    var_summary <- function(data, var) {
      data %>%
        summarise(n = n(), min = min({{ var }}), max = max({{ var }}))
    }
    mtcars %>% 
      group_by(cyl) %>% 
      var_summary(mpg)
  • When you have an env-variable that is a character vector, you need to index into the .data pronoun with [[, like summarise(df, mean = mean(.data[[var]])).

    The following example uses .data to count the number of unique values in each variable of mtcars:

    for (var in names(mtcars)) {
      mtcars %>% count(.data[[var]]) %>% print()
    }

    Note that .data is not a data frame; it’s a special construct, a pronoun, that allows you to access the current variables either directly, with .data$x or indirectly with .data[[var]]. Don’t expect other functions to work with it.

Tidy selection

Data masking makes it easy to compute on values within a dataset. Tidy selection is a complementary tool that makes it easy to work with the columns of a dataset.

The tidyselect DSL

Underneath all functions that use tidy selection is the tidyselect package. It provides a miniature domain specific language that makes it easy to select columns by name, position, or type. For example:

  • select(df, 1) selects the first column; select(df, last_col()) selects the last column.

  • select(df, c(a, b, c)) selects columns a, b, and c.

  • select(df, starts_with("a")) selects all columns whose name starts with “a”; select(df, ends_with("z")) selects all columns whose name ends with “z”.

  • select(df, where(is.numeric)) selects all numeric columns.

You can see more details in ?dplyr_tidy_select.

Indirection

As with data masking, tidy selection makes a common task easier at the cost of making a less common task harder. When you want to use tidy select indirectly with the column specification stored in an intermediate variable, you’ll need to learn some new tools. Again, there are two forms of indirection:

  • When you have the data-variable in an env-variable that is a function argument, you use the same technique as data masking: you embrace the argument by surrounding it in doubled braces.

    The following function summarises a data frame by computing the mean of all variables selected by the user:

    summarise_mean <- function(data, vars) {
      data %>% summarise(n = n(), across({{ vars }}, mean))
    }
    mtcars %>% 
      group_by(cyl) %>% 
      summarise_mean(where(is.numeric))
  • When you have an env-variable that is a character vector, you need to use all_of() or any_of() depending on whether you want the function to error if a variable is not found.

    The following code uses all_of() to select all of the variables found in a character vector; then ! plus all_of() to select all of the variables not found in a character vector:

    vars <- c("mpg", "vs")
    mtcars %>% select(all_of(vars))
    mtcars %>% select(!all_of(vars))

How-tos

The following examples solve a grab bag of common problems. We show you the minimum amount of code so that you can get the basic idea; most real problems will require more code or combining multiple techniques.

User-supplied data

If you check the documentation, you’ll see that .data never uses data masking or tidy select. That means you don’t need to do anything special in your function:

mutate_y <- function(data) {
  mutate(data, y = a + x)
}

Eliminating R CMD check NOTEs

If you’re writing a package and you have a function that uses data-variables:

my_summary_function <- function(data) {
  data %>% 
    filter(x > 0) %>% 
    group_by(grp) %>% 
    summarise(y = mean(y), n = n())
}

You’ll get an R CMD CHECK NOTE:

N  checking R code for possible problems
   my_summary_function: no visible binding for global variable ‘x’, ‘grp’, ‘y’
   Undefined global functions or variables:
     x grp y

You can eliminate this by using .data$var and importing .data from its source in the rlang package (the underlying package that implements tidy evaluation):

#' @importFrom rlang .data
my_summary_function <- function(data) {
  data %>% 
    filter(.data$x > 0) %>% 
    group_by(.data$grp) %>% 
    summarise(y = mean(.data$y), n = n())
}

One or more user-supplied expressions

If you want the user to supply an expression that’s passed onto an argument which uses data masking or tidy select, embrace the argument:

my_summarise <- function(data, group_var) {
  data %>%
    group_by({{ group_var }}) %>%
    summarise(mean = mean(mass))
}

This generalises in a straightforward way if you want to use one user-supplied expression in multiple places:

my_summarise2 <- function(data, expr) {
  data %>% summarise(
    mean = mean({{ expr }}),
    sum = sum({{ expr }}),
    n = n()
  )
}

If you want the user to provide multiple expressions, embrace each of them:

my_summarise3 <- function(data, mean_var, sd_var) {
  data %>% 
    summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }}))
}

If you want to use the names of variables in the output, you can use glue syntax in conjunction with :=:

my_summarise4 <- function(data, expr) {
  data %>% summarise(
    "mean_{{expr}}" := mean({{ expr }}),
    "sum_{{expr}}" := sum({{ expr }}),
    "n_{{expr}}" := n()
  )
}
my_summarise5 <- function(data, mean_var, sd_var) {
  data %>% 
    summarise(
      "mean_{{mean_var}}" := mean({{ mean_var }}), 
      "sd_{{sd_var}}" := sd({{ sd_var }})
    )
}

Any number of user-supplied expressions

If you want to take an arbitrary number of user supplied expressions, use .... This is most often useful when you want to give the user full control over a single part of the pipeline, like a group_by() or a mutate().

my_summarise <- function(.data, ...) {
  .data %>%
    group_by(...) %>%
    summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE))
}

starwars %>% my_summarise(homeworld)
#> # A tibble: 49 × 3
#>   homeworld    mass height
#>   <chr>       <dbl>  <dbl>
#> 1 Alderaan       64   176.
#> 2 Aleen Minor    15    79 
#> 3 Bespin         79   175 
#> 4 Bestine IV    110   180 
#> # … with 45 more rows
starwars %>% my_summarise(sex, gender)
#> `summarise()` has grouped output by 'sex'. You can override using the `.groups`
#> argument.
#> # A tibble: 6 × 4
#> # Groups:   sex [5]
#>   sex            gender      mass height
#>   <chr>          <chr>      <dbl>  <dbl>
#> 1 female         feminine    54.7   169.
#> 2 hermaphroditic masculine 1358     175 
#> 3 male           masculine   81.0   179.
#> 4 none           feminine   NaN      96 
#> # … with 2 more rows

When you use ... in this way, make sure that any other arguments start with . to reduce the chances of argument clashes; see https://design.tidyverse.org/dots-prefix.html for more details.

Transforming user-supplied variables

If you want the user to provide a set of data-variables that are then transformed, use across():

my_summarise <- function(data, summary_vars) {
  data %>%
    summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE)))
}
starwars %>% 
  group_by(species) %>% 
  my_summarise(c(mass, height))
#> # A tibble: 38 × 3
#>   species   mass height
#>   <chr>    <dbl>  <dbl>
#> 1 Aleena      15     79
#> 2 Besalisk   102    198
#> 3 Cerean      82    198
#> 4 Chagrian   NaN    196
#> # … with 34 more rows

You can use this same idea for multiple sets of input data-variables:

my_summarise <- function(data, group_var, summarise_var) {
  data %>%
    group_by(across({{ group_var }})) %>% 
    summarise(across({{ summarise_var }}, mean))
}

Use the .names argument to across() to control the names of the output.

my_summarise <- function(data, group_var, summarise_var) {
  data %>%
    group_by(across({{ group_var }})) %>% 
    summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}"))
}

Loop over multiple variables

If you have a character vector of variable names, and want to operate on them with a for loop, index into the special .data pronoun:

for (var in names(mtcars)) {
  mtcars %>% count(.data[[var]]) %>% print()
}

This same technique works with for loop alternatives like the base R apply() family and the purrr map() family:

mtcars %>% 
  names() %>% 
  purrr::map(~ count(mtcars, .data[[.x]]))

Use a variable from an Shiny input

Many Shiny input controls return character vectors, so you can use the same approach as above: .data[[input$var]].

library(shiny)
ui <- fluidPage(
  selectInput("var", "Variable", choices = names(diamonds)),
  tableOutput("output")
)
server <- function(input, output, session) {
  data <- reactive(filter(diamonds, .data[[input$var]] > 0))
  output$output <- renderTable(head(data()))
}

See https://mastering-shiny.org/action-tidy.html for more details and case studies.


  1. dplyr’s filter() is inspired by base R’s subset(). subset() provides data masking, but not with tidy evaluation, so the techniques described in this chapter don’t apply to it.↩︎

  2. In R, arguments are lazily evaluated which means that until you attempt to use, they don’t hold a value, just a promise that describes how to compute the value. You can learn more at https://adv-r.hadley.nz/functions.html#lazy-evaluation↩︎

dplyr/inst/doc/dplyr.R0000644000176200001440000001225114200154104014373 0ustar liggesusers## ---- echo = FALSE, message = FALSE------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) ## ----------------------------------------------------------------------------- dim(starwars) starwars ## ----------------------------------------------------------------------------- starwars %>% filter(skin_color == "light", eye_color == "brown") ## ---- eval = FALSE------------------------------------------------------------ # starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ] ## ----------------------------------------------------------------------------- starwars %>% arrange(height, mass) ## ----------------------------------------------------------------------------- starwars %>% arrange(desc(height)) ## ----------------------------------------------------------------------------- starwars %>% slice(5:10) ## ----------------------------------------------------------------------------- starwars %>% slice_head(n = 3) ## ----------------------------------------------------------------------------- starwars %>% slice_sample(n = 5) starwars %>% slice_sample(prop = 0.1) ## ----------------------------------------------------------------------------- starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) ## ----------------------------------------------------------------------------- # Select columns by name starwars %>% select(hair_color, skin_color, eye_color) # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) # Select all columns ending with color starwars %>% select(ends_with("color")) ## ----------------------------------------------------------------------------- starwars %>% select(home_world = homeworld) ## ----------------------------------------------------------------------------- starwars %>% rename(home_world = homeworld) ## ----------------------------------------------------------------------------- starwars %>% mutate(height_m = height / 100) ## ----------------------------------------------------------------------------- starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) ## ----------------------------------------------------------------------------- starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) ## ----------------------------------------------------------------------------- starwars %>% transmute( height_m = height / 100, BMI = mass / (height_m^2) ) ## ----------------------------------------------------------------------------- starwars %>% relocate(sex:homeworld, .before = height) ## ----------------------------------------------------------------------------- starwars %>% summarise(height = mean(height, na.rm = TRUE)) ## ---- eval = FALSE------------------------------------------------------------ # a1 <- group_by(starwars, species, sex) # a2 <- select(a1, height, mass) # a3 <- summarise(a2, # height = mean(height, na.rm = TRUE), # mass = mean(mass, na.rm = TRUE) # ) ## ----------------------------------------------------------------------------- summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ## ---- eval = FALSE------------------------------------------------------------ # starwars %>% # group_by(species, sex) %>% # select(height, mass) %>% # summarise( # height = mean(height, na.rm = TRUE), # mass = mean(mass, na.rm = TRUE) # ) ## ----------------------------------------------------------------------------- # `name` represents the integer 1 select(starwars, name) select(starwars, 1) ## ----------------------------------------------------------------------------- height <- 5 select(starwars, height) ## ----------------------------------------------------------------------------- name <- "color" select(starwars, ends_with(name)) ## ----------------------------------------------------------------------------- name <- 5 select(starwars, name, identity(name)) ## ----------------------------------------------------------------------------- vars <- c("name", "height") select(starwars, all_of(vars), "mass") ## ----------------------------------------------------------------------------- df <- starwars %>% select(name, height, mass) ## ----------------------------------------------------------------------------- mutate(df, "height", 2) ## ----------------------------------------------------------------------------- mutate(df, height + 10) ## ----------------------------------------------------------------------------- var <- seq(1, nrow(df)) mutate(df, new = var) ## ----------------------------------------------------------------------------- group_by(starwars, sex) group_by(starwars, sex = as.factor(sex)) group_by(starwars, height_binned = cut(height, 3)) ## ----------------------------------------------------------------------------- group_by(df, "month") dplyr/inst/doc/window-functions.R0000644000176200001440000000705214200154114016562 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) if (!rlang::is_installed("Lahman")) { knitr::opts_chunk$set(eval = FALSE) } ## ----------------------------------------------------------------------------- library(Lahman) batting <- Lahman::Batting %>% as_tibble() %>% select(playerID, yearID, teamID, G, AB:H) %>% arrange(playerID, yearID, teamID) %>% semi_join(Lahman::AwardsPlayers, by = "playerID") players <- batting %>% group_by(playerID) ## ---- eval = FALSE------------------------------------------------------------ # # For each player, find the two years with most hits # filter(players, min_rank(desc(H)) <= 2 & H > 0) # # Within each player, rank each year by the number of games played # mutate(players, G_rank = min_rank(G)) # # # For each player, find every year that was better than the previous year # filter(players, G > lag(G)) # # For each player, compute avg change in games played per year # mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID))) # # # For each player, find all years where they played more games than they did on average # filter(players, G > mean(G)) # # For each, player compute a z score based on number of games played # mutate(players, G_z = (G - mean(G)) / sd(G)) ## ----------------------------------------------------------------------------- x <- c(1, 1, 2, 2, 2) row_number(x) min_rank(x) dense_rank(x) ## ----------------------------------------------------------------------------- cume_dist(x) percent_rank(x) ## ----------------------------------------------------------------------------- filter(players, cume_dist(desc(G)) < 0.1) ## ----------------------------------------------------------------------------- by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) ## ----------------------------------------------------------------------------- x <- 1:5 lead(x) lag(x) ## ---- results = "hide"-------------------------------------------------------- # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) ## ---- results = "hide"-------------------------------------------------------- # Find when a player changed teams filter(players, teamID != lag(teamID)) ## ----------------------------------------------------------------------------- df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) ## ---- eval = FALSE------------------------------------------------------------ # filter(players, cumany(G > 150)) ## ----------------------------------------------------------------------------- x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) ## ---- eval = FALSE------------------------------------------------------------ # filter(players, G > mean(G)) # filter(players, G < median(G)) ## ---- eval = FALSE------------------------------------------------------------ # filter(players, ntile(G, 2) == 2) ## ----------------------------------------------------------------------------- mutate(players, career_year = yearID - min(yearID) + 1) ## ----------------------------------------------------------------------------- mutate(players, G_z = (G - mean(G)) / sd(G)) dplyr/inst/doc/compatibility.Rmd0000644000176200001440000002352014151641776016461 0ustar liggesusers--- title: "dplyr compatibility" description: > A guide for package authors who need to work with multiple versions of dplyr. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{dplyr compatibility} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} library(dplyr) knitr::opts_chunk$set(collapse = T, comment = "#>") ``` This vignette is aimed at package authors who need to update their code because of a backward incompatible change to dplyr. We do try and minimise backward incompatible changes as much as possible, but sometimes they are necessary in order to radically simplify existing code, or unlock a lot of potential value in the future. This vignette starts with some general advice on writing package code that works with multiple version of dplyr, then continues to discuss specific changes in dplyr versions. ## Working with multiple dplyr versions Ideally, you want to make sure that your package works with both the released version and the development version of dplyr. This is typically a little bit more work, but has two big advantages: 1. It's more convenient for your users, since they're not forced to update dplyr if they don't want to. 1. It's easier on CRAN since it doesn't require a massive coordinated release of multiple packages. To make code work with multiple versions of a package, your first tool is the simple if statement: ```{r, results = "hide"} if (utils::packageVersion("dplyr") > "0.5.0") { # code for new version } else { # code for old version } ``` Always condition on `> current-version`, not `>= next-version` because this will ensure that this branch is also used for the development version of the package. For example, if the current release is version "0.5.0", the development version will be "0.5.0.9000". Occasionally, you'll run into a situation where the `NAMESPACE` has changed and you need to conditionally import different functions. This typically occurs when functions are moved from one package to another. We try out best to provide automatic fallbacks, but this is not always possible. Often you can work around the problem by avoiding `importFrom` and using `::` instead. Do this where possible: ```{r, eval = FALSE} if (utils::packageVersion("dplyr") > "0.5.0") { dbplyr::build_sql(...) } else { dplyr::build_sql(...) } ``` This will generate an `R CMD check` NOTE (because the one of the functions will always be missing), but this is ok. Simply explain that you get the note because you have written a wrapper to make sure your code is backward compatible. Sometimes it's not possible to avoid `importFrom()`. For example you might be importing a generic so that you can define a method for it. In this case, you can take advantage of a little-known feature in the `NAMESPACE` file: you can include `if` statements. ```{r} #' @rawNamespace #' if (utils::packageVersion("dplyr") > "0.5.0") { #' importFrom("dbplyr", "build_sql") #' } else { #' importFrom("dplyr", "build_sql") #' } ``` ## dplyr 0.6.0 ### Database code moves to dbplyr Almost all database related code has been moved out of dplyr and into a new package, [dbplyr](https://github.com/tidyverse/dbplyr/). This makes dplyr simpler, and will make it easier to release fixes for bugs that only affect databases. If you've implemented a database backend for dplyr, please read the [backend news](https://github.com/tidyverse/dbplyr/blob/main/NEWS.md#backends) on the backend. Depending on what generics you use, and what generics you provide methods for you, you may need to write some conditional code. To help make this easier we've written `wrap_dbplyr_obj()` which will write the helper code for you: ```{r, eval = FALSE} wrap_dbplyr_obj("build_sql") wrap_dbplyr_obj("base_agg") ``` Simply copy the results of this function in your package. These will generate `R CMD check` NOTES, so make sure to tell CRAN that this is to ensure backward compatibility. ### Deprecation of underscored `verbs_()` Because the tidyeval framework allows us to combine SE and NSE semantics within the same functions, the underscored verbs have been softly deprecated. #### For users of SE_ verbs The legacy underscored versions take objects for which a `lazyeval::as.lazy()` method is defined. This includes symbols and calls, strings, and formulas. All of these objects have been replaced with quosures and you can call tidyeval verbs with unquoted quosures: ```{r, eval = FALSE} quo <- quo(cyl) select(mtcars, !! quo) ``` Symbolic expressions are also supported, but note that bare symbols and calls do not carry scope information. If you're referring to objects in the data frame, it's safe to omit specifying an enclosure: ```{r, results = "hide"} sym <- quote(cyl) select(mtcars, !! sym) call <- quote(mean(cyl)) summarise(mtcars, cyl = !! call) ``` Transforming objects into quosures is generally straightforward. To enclose with the current environment, you can unquote directly in `quo()` or you can use `as_quosure()`: ```{r} quo(!! sym) quo(!! call) rlang::as_quosure(sym) rlang::as_quosure(call) ``` Note that while formulas and quosures are very similar objects (and in the most general sense, formulas are quosures), they can't be used interchangeably in tidyeval functions. Early implementations did treat bare formulas as quosures, but this created compatibility issues with modelling functions of the stats package. Fortunately, it's easy to transform formulas to quosures that will self-evaluate in tidyeval functions: ```{r} f <- ~cyl f rlang::as_quosure(f) ``` Finally, and perhaps most importantly, **strings are not and should not be parsed**. As developers, it is tempting to try and solve problems using strings because we have been trained to work with strings rather than quoted expressions. However it's almost always the wrong way to approach the problem. The exception is for creating symbols. In that case it is perfectly legitimate to use strings: ```{r} rlang::sym("cyl") rlang::syms(letters[1:3]) ``` But you should never use strings to create calls. Instead you can use quasiquotation: ```{r} syms <- rlang::syms(c("foo", "bar", "baz")) quo(my_call(!!! syms)) fun <- rlang::sym("my_call") quo((!!fun)(!!! syms)) ``` Or create the call with `call2()`: ```{r} call <- rlang::call2("my_call", !!! syms) call rlang::as_quosure(call) # Or equivalently: quo(!! rlang::call2("my_call", !!! syms)) ``` Note that idioms based on `interp()` should now generally be avoided and replaced with quasiquotation. Where you used to interpolate: ```{r, eval=FALSE} lazyeval::interp(~ mean(var), var = rlang::sym("mpg")) ``` You would now unquote: ```{r, eval=FALSE} var <- "mpg" quo(mean(!! rlang::sym(var))) ``` See also `vignette("programming")` for more about quasiquotation and quosures. #### For package authors For package authors, rlang provides a [compatibility file](https://github.com/r-lib/rlang/blob/main/R/compat-lazyeval.R) that you can copy to your package. `compat_lazy()` and `compat_lazy_dots()` turn lazy-able objects into proper quosures. This helps providing an underscored version to your users for backward compatibility. For instance, here is how we defined the underscored version of `filter()` in dplyr 0.6: ```{r, eval = FALSE} filter_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) filter(.data, !!! dots) } ``` With tidyeval, S3 dispatch to the correct method might be an issue. In the past, the genericity of dplyr verbs was accomplished by dispatching in the underscored versions. Now that those are deprecated, we've turned the non-underscored verbs into S3 generics. We maintain backward compatibility by redispatching to old underscored verbs in the default methods of the new S3 generics. For example, here is how we redispatch `filter()`: ```{r, eval = FALSE} filter.default <- function(.data, ...) { filter_(.data, .dots = compat_as_lazy_dots(...)) } ``` This gets the job done in most cases. However, the default method will not be called for objects inheriting from one of the classes for which we provide non-underscored methods: `data.frame`, `tbl_df`, `tbl_cube` and `grouped_df`. An example of this is the `sf` package whose objects have classes `c("sf", "data.frame")`. Authors of such packages should provide a method for the non-underscored generic in order to be compatible with dplyr: ```{r, eval = FALSE} filter.sf <- function(.data, ...) { st_as_sf(NextMethod()) } ``` If you need help with this, please let us know! ### Deprecation of `mutate_each()` and `summarise_each()` These functions have been replaced by a more complete family of functions. This family has suffixes `_if`, `_at` and `_all` and includes more verbs than just `mutate` `summarise`. If you need to update your code to the new family, there are two relevant functions depending on which variables you apply `funs()` to. If you called `mutate_each()` without supplying a selection of variables, `funs` is applied to all variables. In this case, you should update your code to use `mutate_all()` instead: ```{r, eval = FALSE} mutate_each(starwars, funs(as.character)) mutate_all(starwars, funs(as.character)) ``` Note that the new verbs support bare functions as well, so you don't necessarily need to wrap with `funs()`: ```{r, eval = FALSE} mutate_all(starwars, as.character) ``` On the other hand, if you supplied a variable selection, you should use `mutate_at()`. The variable selection should be wrapped with `vars()`. ```{r, eval = FALSE} mutate_each(starwars, funs(as.character), height, mass) mutate_at(starwars, vars(height, mass), as.character) ``` `vars()` supports all the selection helpers that you usually use with `select()`: ```{r, eval = FALSE} summarise_at(mtcars, vars(starts_with("d")), mean) ``` Note that instead of a `vars()` selection, you can also supply character vectors of column names: ```{r, eval = FALSE} mutate_at(starwars, c("height", "mass"), as.character) ``` dplyr/inst/doc/rowwise.R0000644000176200001440000001457314200154110014746 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----setup-------------------------------------------------------------------- library(dplyr, warn.conflicts = FALSE) ## ----include = FALSE---------------------------------------------------------- nest_by <- function(df, ...) { df %>% group_by(...) %>% summarise(data = list(across())) %>% rowwise(...) } # mtcars %>% nest_by(cyl) ## ----------------------------------------------------------------------------- df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() ## ----------------------------------------------------------------------------- df %>% mutate(m = mean(c(x, y, z))) df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) ## ----------------------------------------------------------------------------- df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) ## ----------------------------------------------------------------------------- df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df ## ----------------------------------------------------------------------------- rf <- df %>% rowwise(id) ## ----------------------------------------------------------------------------- rf %>% mutate(total = sum(c(w, x, y, z))) rf %>% summarise(total = sum(c(w, x, y, z))) ## ----------------------------------------------------------------------------- rf %>% mutate(total = sum(c_across(w:z))) rf %>% mutate(total = sum(c_across(where(is.numeric)))) ## ----------------------------------------------------------------------------- rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) ## ----------------------------------------------------------------------------- df %>% mutate(total = rowSums(across(where(is.numeric)))) df %>% mutate(mean = rowMeans(across(where(is.numeric)))) ## ---- eval = FALSE, include = FALSE------------------------------------------- # bench::mark( # df %>% mutate(m = rowSums(across(x:z))), # df %>% mutate(m = apply(across(x:z), 1, sum)), # df %>% rowwise() %>% mutate(m = sum(pick(x:z))), # check = FALSE # ) ## ----------------------------------------------------------------------------- df <- tibble( x = list(1, 2:3, 4:6) ) ## ----------------------------------------------------------------------------- df %>% mutate(l = length(x)) ## ----------------------------------------------------------------------------- df %>% mutate(l = lengths(x)) ## ----------------------------------------------------------------------------- df %>% mutate(l = sapply(x, length)) df %>% mutate(l = purrr::map_int(x, length)) ## ----------------------------------------------------------------------------- df %>% rowwise() %>% mutate(l = length(x)) ## ----------------------------------------------------------------------------- df <- tibble(g = 1:2, y = list(1:3, "a")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) ## ----------------------------------------------------------------------------- gf %>% mutate(type = typeof(y), length = length(y)) rf %>% mutate(type = typeof(y), length = length(y)) ## ----------------------------------------------------------------------------- # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 ## ---- error = TRUE------------------------------------------------------------ gf %>% mutate(y2 = y) rf %>% mutate(y2 = y) rf %>% mutate(y2 = list(y)) ## ----------------------------------------------------------------------------- by_cyl <- mtcars %>% nest_by(cyl) by_cyl ## ----------------------------------------------------------------------------- mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods ## ----------------------------------------------------------------------------- mods <- mods %>% mutate(pred = list(predict(mod, data))) mods ## ----------------------------------------------------------------------------- mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) mods %>% summarise(rsq = summary(mod)$r.squared) mods %>% summarise(broom::glance(mod)) ## ----------------------------------------------------------------------------- mods %>% summarise(broom::tidy(mod)) ## ----------------------------------------------------------------------------- df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) ## ----------------------------------------------------------------------------- df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) ## ---- error = TRUE------------------------------------------------------------ df %>% rowwise() %>% mutate(data = runif(n, min, max)) ## ----------------------------------------------------------------------------- df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) ## ----------------------------------------------------------------------------- df <- tribble( ~rng, ~params, "runif", list(n = 10), "rnorm", list(n = 20), "rpois", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) ## ---- include = FALSE, eval = FALSE------------------------------------------- # df <- rowwise(tribble( # ~rng, ~params, # "runif", list(min = -1, max = 1), # "rnorm", list(), # "rpois", list(lambda = 5), # )) # # # Has to happen in separate function to avoid eager unquoting # f <- function(rng, params) purrr::exec(rng, n = 10, !!!params) # df %>% # mutate(data = list(f(rng, params))) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% do(head(., 1)) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% summarise(head(cur_data(), 1)) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(cur_data())) dplyr/inst/doc/programming.Rmd0000644000176200001440000003321214176714602016125 0ustar liggesusers--- title: "Programming with dplyr" description: > Most dplyr verbs use "tidy evaluation", a special type of non-standard evaluation. In this vignette, you'll learn the two basic forms, data masking and tidy selection, and how you can program with them using either functions or for loops. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Programming with dplyr} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` ## Introduction Most dplyr verbs use **tidy evaluation** in some way. Tidy evaluation is a special type of non-standard evaluation used throughout the tidyverse. There are two basic forms found in dplyr: * `arrange()`, `count()`, `filter()`, `group_by()`, `mutate()`, and `summarise()` use **data masking** so that you can use data variables as if they were variables in the environment (i.e. you write `my_variable` not `df$myvariable`). * `across()`, `relocate()`, `rename()`, `select()`, and `pull()` use **tidy selection** so you can easily choose variables based on their position, name, or type (e.g. `starts_with("x")` or `is.numeric`). To determine whether a function argument uses data masking or tidy selection, look at the documentation: in the arguments list, you'll see `` or ``. Data masking and tidy selection make interactive data exploration fast and fluid, but they add some new challenges when you attempt to use them indirectly such as in a for loop or a function. This vignette shows you how to overcome those challenges. We'll first go over the basics of data masking and tidy selection, talk about how to use them indirectly, and then show you a number of recipes to solve common problems. This vignette will give you the minimum knowledge you need to be an effective programmer with tidy evaluation. If you'd like to learn more about the underlying theory, or precisely how it's different from non-standard evaluation, we recommend that you read the Metaprogramming chapters in [_Advanced R_](https://adv-r.hadley.nz). ```{r setup, message = FALSE} library(dplyr) ``` ## Data masking Data masking makes data manipulation faster because it requires less typing. In most (but not all[^subset]) base R functions you need to refer to variables with `$`, leading to code that repeats the name of the data frame many times: ```{r, results = FALSE} starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,] ``` [^subset]: dplyr's `filter()` is inspired by base R's `subset()`. `subset()` provides data masking, but not with tidy evaluation, so the techniques described in this chapter don't apply to it. The dplyr equivalent of this code is more concise because data masking allows you to need to type `starwars` once: ```{r, results = FALSE} starwars %>% filter(homeworld == "Naboo", species == "Human") ``` ### Data- and env-variables The key idea behind data masking is that it blurs the line between the two different meanings of the word "variable": * **env-variables** are "programming" variables that live in an environment. They are usually created with `<-`. * **data-variables** are "statistical" variables that live in a data frame. They usually come from data files (e.g. `.csv`, `.xls`), or are created manipulating existing variables. To make those definitions a little more concrete, take this piece of code: ```{r} df <- data.frame(x = runif(3), y = runif(3)) df$x ``` It creates a env-variable, `df`, that contains two data-variables, `x` and `y`. Then it extracts the data-variable `x` out of the env-variable `df` using `$`. I think this blurring of the meaning of "variable" is a really nice feature for interactive data analysis because it allows you to refer to data-vars as is, without any prefix. And this seems to be fairly intuitive since many newer R users will attempt to write `diamonds[x == 0 | y == 0, ]`. Unfortunately, this benefit does not come for free. When you start to program with these tools, you're going to have to grapple with the distinction. This will be hard because you've never had to think about it before, so it'll take a while for your brain to learn these new concepts and categories. However, once you've teased apart the idea of "variable" into data-variable and env-variable, I think you'll find it fairly straightforward to use. ### Indirection The main challenge of programming with functions that use data masking arises when you introduce some indirection, i.e. when you want to get the data-variable from an env-variable instead of directly typing the data-variable's name. There are two main cases: * When you have the data-variable in a function argument (i.e. an env-variable that holds a promise[^promise]), you need to **embrace** the argument by surrounding it in doubled braces, like `filter(df, {{ var }})`. The following function uses embracing to create a wrapper around `summarise()` that computes the minimum and maximum values of a variable, as well as the number of observations that were summarised: ```{r, results = FALSE} var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) ``` * When you have an env-variable that is a character vector, you need to index into the `.data` pronoun with `[[`, like `summarise(df, mean = mean(.data[[var]]))`. The following example uses `.data` to count the number of unique values in each variable of `mtcars`: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` Note that `.data` is not a data frame; it's a special construct, a pronoun, that allows you to access the current variables either directly, with `.data$x` or indirectly with `.data[[var]]`. Don't expect other functions to work with it. [^promise]: In R, arguments are lazily evaluated which means that until you attempt to use, they don't hold a value, just a __promise__ that describes how to compute the value. You can learn more at ## Tidy selection Data masking makes it easy to compute on values within a dataset. Tidy selection is a complementary tool that makes it easy to work with the columns of a dataset. ### The tidyselect DSL Underneath all functions that use tidy selection is the [tidyselect](https://tidyselect.r-lib.org/) package. It provides a miniature domain specific language that makes it easy to select columns by name, position, or type. For example: * `select(df, 1)` selects the first column; `select(df, last_col())` selects the last column. * `select(df, c(a, b, c))` selects columns `a`, `b`, and `c`. * `select(df, starts_with("a"))` selects all columns whose name starts with "a"; `select(df, ends_with("z"))` selects all columns whose name ends with "z". * `select(df, where(is.numeric))` selects all numeric columns. You can see more details in `?dplyr_tidy_select`. ### Indirection As with data masking, tidy selection makes a common task easier at the cost of making a less common task harder. When you want to use tidy select indirectly with the column specification stored in an intermediate variable, you'll need to learn some new tools. Again, there are two forms of indirection: * When you have the data-variable in an env-variable that is a function argument, you use the same technique as data masking: you **embrace** the argument by surrounding it in doubled braces. The following function summarises a data frame by computing the mean of all variables selected by the user: ```{r, results = FALSE} summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) ``` * When you have an env-variable that is a character vector, you need to use `all_of()` or `any_of()` depending on whether you want the function to error if a variable is not found. The following code uses `all_of()` to select all of the variables found in a character vector; then `!` plus `all_of()` to select all of the variables *not* found in a character vector: ```{r, results = FALSE} vars <- c("mpg", "vs") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars)) ``` ## How-tos The following examples solve a grab bag of common problems. We show you the minimum amount of code so that you can get the basic idea; most real problems will require more code or combining multiple techniques. ### User-supplied data If you check the documentation, you'll see that `.data` never uses data masking or tidy select. That means you don't need to do anything special in your function: ```{r} mutate_y <- function(data) { mutate(data, y = a + x) } ``` ### Eliminating `R CMD check` `NOTE`s If you're writing a package and you have a function that uses data-variables: ```{r} my_summary_function <- function(data) { data %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } ``` You'll get an `R CMD CHECK` `NOTE`: ``` N checking R code for possible problems my_summary_function: no visible binding for global variable ‘x’, ‘grp’, ‘y’ Undefined global functions or variables: x grp y ``` You can eliminate this by using `.data$var` and importing `.data` from its source in the [rlang](https://rlang.r-lib.org/) package (the underlying package that implements tidy evaluation): ```{r} #' @importFrom rlang .data my_summary_function <- function(data) { data %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) } ``` ### One or more user-supplied expressions If you want the user to supply an expression that's passed onto an argument which uses data masking or tidy select, embrace the argument: ```{r} my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } ``` This generalises in a straightforward way if you want to use one user-supplied expression in multiple places: ```{r} my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } ``` If you want the user to provide multiple expressions, embrace each of them: ```{r} my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } ``` If you want to use the names of variables in the output, you can use glue syntax in conjunction with `:=`: ```{r} my_summarise4 <- function(data, expr) { data %>% summarise( "mean_{{expr}}" := mean({{ expr }}), "sum_{{expr}}" := sum({{ expr }}), "n_{{expr}}" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( "mean_{{mean_var}}" := mean({{ mean_var }}), "sd_{{sd_var}}" := sd({{ sd_var }}) ) } ``` ### Any number of user-supplied expressions If you want to take an arbitrary number of user supplied expressions, use `...`. This is most often useful when you want to give the user full control over a single part of the pipeline, like a `group_by()` or a `mutate()`. ```{r} my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) starwars %>% my_summarise(sex, gender) ``` When you use `...` in this way, make sure that any other arguments start with `.` to reduce the chances of argument clashes; see for more details. ### Transforming user-supplied variables If you want the user to provide a set of data-variables that are then transformed, use `across()`: ```{r} my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) ``` You can use this same idea for multiple sets of input data-variables: ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(across({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } ``` Use the `.names` argument to `across()` to control the names of the output. ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(across({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}")) } ``` ### Loop over multiple variables If you have a character vector of variable names, and want to operate on them with a for loop, index into the special `.data` pronoun: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` This same technique works with for loop alternatives like the base R `apply()` family and the purrr `map()` family: ```{r, results = FALSE} mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]])) ``` ### Use a variable from an Shiny input Many Shiny input controls return character vectors, so you can use the same approach as above: `.data[[input$var]]`. ```{r, eval = FALSE} library(shiny) ui <- fluidPage( selectInput("var", "Variable", choices = names(diamonds)), tableOutput("output") ) server <- function(input, output, session) { data <- reactive(filter(diamonds, .data[[input$var]] > 0)) output$output <- renderTable(head(data())) } ``` See for more details and case studies. dplyr/inst/doc/compatibility.R0000644000176200001440000000712414200154103016114 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- library(dplyr) knitr::opts_chunk$set(collapse = T, comment = "#>") ## ---- results = "hide"-------------------------------------------------------- if (utils::packageVersion("dplyr") > "0.5.0") { # code for new version } else { # code for old version } ## ---- eval = FALSE------------------------------------------------------------ # if (utils::packageVersion("dplyr") > "0.5.0") { # dbplyr::build_sql(...) # } else { # dplyr::build_sql(...) # } ## ----------------------------------------------------------------------------- #' @rawNamespace #' if (utils::packageVersion("dplyr") > "0.5.0") { #' importFrom("dbplyr", "build_sql") #' } else { #' importFrom("dplyr", "build_sql") #' } ## ---- eval = FALSE------------------------------------------------------------ # wrap_dbplyr_obj("build_sql") # # wrap_dbplyr_obj("base_agg") ## ---- eval = FALSE------------------------------------------------------------ # quo <- quo(cyl) # select(mtcars, !! quo) ## ---- results = "hide"-------------------------------------------------------- sym <- quote(cyl) select(mtcars, !! sym) call <- quote(mean(cyl)) summarise(mtcars, cyl = !! call) ## ----------------------------------------------------------------------------- quo(!! sym) quo(!! call) rlang::as_quosure(sym) rlang::as_quosure(call) ## ----------------------------------------------------------------------------- f <- ~cyl f rlang::as_quosure(f) ## ----------------------------------------------------------------------------- rlang::sym("cyl") rlang::syms(letters[1:3]) ## ----------------------------------------------------------------------------- syms <- rlang::syms(c("foo", "bar", "baz")) quo(my_call(!!! syms)) fun <- rlang::sym("my_call") quo((!!fun)(!!! syms)) ## ----------------------------------------------------------------------------- call <- rlang::call2("my_call", !!! syms) call rlang::as_quosure(call) # Or equivalently: quo(!! rlang::call2("my_call", !!! syms)) ## ---- eval=FALSE-------------------------------------------------------------- # lazyeval::interp(~ mean(var), var = rlang::sym("mpg")) ## ---- eval=FALSE-------------------------------------------------------------- # var <- "mpg" # quo(mean(!! rlang::sym(var))) ## ---- eval = FALSE------------------------------------------------------------ # filter_.tbl_df <- function(.data, ..., .dots = list()) { # dots <- compat_lazy_dots(.dots, caller_env(), ...) # filter(.data, !!! dots) # } ## ---- eval = FALSE------------------------------------------------------------ # filter.default <- function(.data, ...) { # filter_(.data, .dots = compat_as_lazy_dots(...)) # } ## ---- eval = FALSE------------------------------------------------------------ # filter.sf <- function(.data, ...) { # st_as_sf(NextMethod()) # } ## ---- eval = FALSE------------------------------------------------------------ # mutate_each(starwars, funs(as.character)) # mutate_all(starwars, funs(as.character)) ## ---- eval = FALSE------------------------------------------------------------ # mutate_all(starwars, as.character) ## ---- eval = FALSE------------------------------------------------------------ # mutate_each(starwars, funs(as.character), height, mass) # mutate_at(starwars, vars(height, mass), as.character) ## ---- eval = FALSE------------------------------------------------------------ # summarise_at(mtcars, vars(starts_with("d")), mean) ## ---- eval = FALSE------------------------------------------------------------ # mutate_at(starwars, c("height", "mass"), as.character) dplyr/inst/doc/grouping.Rmd0000644000176200001440000002041614121112104015412 0ustar liggesusers--- title: "Grouped data" description: > To unlock the full potential of dplyr, you need to understand how each verb interacts with grouping. This vignette shows you how to manipulate grouping, how each verb changes its behaviour when working with grouped data, and how you can access data about the "current" group from within a verb. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Grouped data} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE, warning = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr verbs are particularly powerful when you apply them to grouped data frames (`grouped_df` objects). This vignette shows you: * How to group, inspect, and ungroup with `group_by()` and friends. * How individual dplyr verbs changes their behaviour when applied to grouped data frame. * How to access data about the "current" group from within a verb. We'll start by loading dplyr: ```{r, message = FALSE} library(dplyr) ``` ## `group_by()` The most important grouping verb is `group_by()`: it takes a data frame and one or more variables to group by: ```{r} by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) ``` You can see the grouping when you print the data: ```{r} by_species by_sex_gender ``` Or use `tally()` to count the number of rows in each group. The `sort` argument is useful if you want to see the largest groups up front. ```{r} by_species %>% tally() by_sex_gender %>% tally(sort = TRUE) ``` As well as grouping by existing variables, you can group by any function of existing variables. This is equivalent to performing a `mutate()` **before** the `group_by()`: ```{r group_by_with_expression} bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() ``` ## Group metadata You can see underlying group data with `group_keys()`. It has one row for each group and one column for each grouping variable: ```{r group_vars} by_species %>% group_keys() by_sex_gender %>% group_keys() ``` You can see which group each row belongs to with `group_indices()`: ```{r} by_species %>% group_indices() ``` And which rows each group contains with `group_rows()`: ```{r} by_species %>% group_rows() %>% head() ``` Use `group_vars()` if you just want the names of the grouping variables: ```{r} by_species %>% group_vars() by_sex_gender %>% group_vars() ``` ### Changing and adding to grouping variables If you apply `group_by()` to an already grouped dataset, will overwrite the existing grouping variables. For example, the following code groups by `homeworld` instead of `species`: ```{r} by_species %>% group_by(homeworld) %>% tally() ``` To **augment** the grouping, using `.add = TRUE`[^add]. For example, the following code groups by species and homeworld: ```{r} by_species %>% group_by(homeworld, .add = TRUE) %>% tally() ``` [^add]: Note that the argument changed from `add = TRUE` to `.add = TRUE` in dplyr 1.0.0. ### Removing grouping variables To remove all grouping variables, use `ungroup()`: ```{r} by_species %>% ungroup() %>% tally() ``` You can also choose to selectively ungroup by listing the variables you want to remove: ```{r} by_sex_gender %>% ungroup(sex) %>% tally() ``` ## Verbs The following sections describe how grouping affects the main dplyr verbs. ### `summarise()` `summarise()` computes a summary for each group. This means that it starts from `group_keys()`, adding summary variables to the right hand side: ```{r summarise} by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) ``` The `.groups=` argument controls the grouping structure of the output. The historical behaviour of removing the right hand side grouping variable corresponds to `.groups = "drop_last"` without a message or `.groups = NULL` with a message (the default). ```{r} by_sex_gender %>% summarise(n = n()) %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop_last") %>% group_vars() ``` Since version 1.0.0 the groups may also be kept (`.groups = "keep"`) or dropped (`.groups = "drop"`). ```{r} by_sex_gender %>% summarise(n = n(), .groups = "keep") %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop") %>% group_vars() ``` When the output no longer have grouping variables, it becomes ungrouped (i.e. a regular tibble). ### `select()`, `rename()`, and `relocate()` `rename()` and `relocate()` behave identically with grouped and ungrouped data because they only affect the name or position of existing columns. Grouped `select()` is almost identical to ungrouped select, except that it always includes the grouping variables: ```{r select} by_species %>% select(mass) ``` If you don't want the grouping variables, you'll have to first `ungroup()`. (This design is possibly a mistake, but we're stuck with it for now.) ### `arrange()` Grouped `arrange()` is the same as ungrouped `arrange()`, unless you set `.by_group = TRUE`, in which case it will order first by the grouping variables. ```{r} by_species %>% arrange(desc(mass)) %>% relocate(species, mass) by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) ``` Note that second example is sorted by `species` (from the `group_by()` statement) and then by `mass` (within species). ### `mutate()` and `transmute()` In simple cases with vectorised functions, grouped and ungrouped `mutate()` give the same results. They differ when used with summary functions: ```{r by_homeworld} # Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) ``` Or with window functions like `min_rank()`: ```{r} # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) ``` ### `filter()` A grouped `filter()` effectively does a `mutate()` to generate a logical variable, and then only keeps the rows where the variable is `TRUE`. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species: ```{r filter} by_species %>% select(name, species, height) %>% filter(height == max(height)) ``` You can also use `filter()` to remove entire groups. For example, the following code eliminates all groups that only have a single member: ```{r filter_group} by_species %>% filter(n() != 1) %>% tally() ``` ### `slice()` and friends `slice()` and friends (`slice_head()`, `slice_tail()`, `slice_sample()`, `slice_min()` and `slice_max()`) select rows within a group. For example, we can select the first observation within each species: ```{r slice} by_species %>% relocate(species) %>% slice(1) ``` Similarly, we can use `slice_min()` to select the smallest `n` values of a variable: ```{r slice_min} by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) ``` ## Computing on grouping information Inside dplyr verbs, you can access various properties of the "current" group using a family of functions with the `cur_` prefix. These functions are typically needed for everyday usage of dplyr, but can be useful because they allow you to free from some of the typical constraints of dplyr verbs. ### `cur_data()` `cur_data()` returns the current group, excluding grouping variables. It's useful to feed to functions that take a whole data frame. For example, the following code fits a linear model of `mass ~ height` to each species: ```{r cur_data} by_species %>% filter(n() > 1) %>% mutate(mod = list(lm(mass ~ height, data = cur_data()))) ``` ### `cur_group()` and `cur_group_id()` `cur_group_id()` gives a unique numeric identifier for the current group. This is sometimes useful if you want to index into an external data structure. ```{r cur_group_id} by_species %>% arrange(species) %>% select(name, species, homeworld) %>% mutate(id = cur_group_id()) ``` dplyr/inst/doc/grouping.R0000644000176200001440000001100614200154105015071 0ustar liggesusers## ---- echo = FALSE, message = FALSE, warning = FALSE-------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ---- message = FALSE--------------------------------------------------------- library(dplyr) ## ----------------------------------------------------------------------------- by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) ## ----------------------------------------------------------------------------- by_species by_sex_gender ## ----------------------------------------------------------------------------- by_species %>% tally() by_sex_gender %>% tally(sort = TRUE) ## ----group_by_with_expression------------------------------------------------- bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() ## ----group_vars--------------------------------------------------------------- by_species %>% group_keys() by_sex_gender %>% group_keys() ## ----------------------------------------------------------------------------- by_species %>% group_indices() ## ----------------------------------------------------------------------------- by_species %>% group_rows() %>% head() ## ----------------------------------------------------------------------------- by_species %>% group_vars() by_sex_gender %>% group_vars() ## ----------------------------------------------------------------------------- by_species %>% group_by(homeworld) %>% tally() ## ----------------------------------------------------------------------------- by_species %>% group_by(homeworld, .add = TRUE) %>% tally() ## ----------------------------------------------------------------------------- by_species %>% ungroup() %>% tally() ## ----------------------------------------------------------------------------- by_sex_gender %>% ungroup(sex) %>% tally() ## ----summarise---------------------------------------------------------------- by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) ## ----------------------------------------------------------------------------- by_sex_gender %>% summarise(n = n()) %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop_last") %>% group_vars() ## ----------------------------------------------------------------------------- by_sex_gender %>% summarise(n = n(), .groups = "keep") %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop") %>% group_vars() ## ----select------------------------------------------------------------------- by_species %>% select(mass) ## ----------------------------------------------------------------------------- by_species %>% arrange(desc(mass)) %>% relocate(species, mass) by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) ## ----by_homeworld------------------------------------------------------------- # Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) ## ----------------------------------------------------------------------------- # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) ## ----filter------------------------------------------------------------------- by_species %>% select(name, species, height) %>% filter(height == max(height)) ## ----filter_group------------------------------------------------------------- by_species %>% filter(n() != 1) %>% tally() ## ----slice-------------------------------------------------------------------- by_species %>% relocate(species) %>% slice(1) ## ----slice_min---------------------------------------------------------------- by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) ## ----cur_data----------------------------------------------------------------- by_species %>% filter(n() > 1) %>% mutate(mod = list(lm(mass ~ height, data = cur_data()))) ## ----cur_group_id------------------------------------------------------------- by_species %>% arrange(species) %>% select(name, species, homeworld) %>% mutate(id = cur_group_id()) dplyr/inst/doc/colwise.Rmd0000644000176200001440000002763214144435746015265 0ustar liggesusers--- title: "Column-wise operations" description: > Learn how to easily repeat the same operation across multiple columns using `across()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{colwise} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` It's often useful to perform the same operation on multiple columns, but copying and pasting is both tedious and error prone: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d)) ``` (If you're trying to compute `mean(a, b, c, d)` for each row, instead see `vignette("rowwise")`) This vignette will introduce you to the `across()` function, which lets you rewrite the previous code more succinctly: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(across(a:d, mean)) ``` We'll start by discussing the basic usage of `across()`, particularly as it applies to `summarise()`, and show how to use it with multiple functions. We'll then show a few uses with other verbs. We'll finish off with a bit of history, showing why we prefer `across()` to our last approach (the `_if()`, `_at()` and `_all()` functions) and how to translate your old code to the new syntax. ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ## Basic usage `across()` has two primary arguments: * The first argument, `.cols`, selects the columns you want to operate on. It uses tidy selection (like `select()`) so you can pick variables by position, name, and type. * The second argument, `.fns`, is a function or list of functions to apply to each column. This can also be a purrr style formula (or list of formulas) like `~ .x / 2`. (This argument is optional, and you can omit it if you just want to get the underlying data; you'll see that technique used in `vignette("rowwise")`.) Here are a couple of examples of `across()` in conjunction with its favourite verb, `summarise()`. But you can use `across()` with any dplyr verb, as you'll see a little later. ```{r} starwars %>% summarise(across(where(is.character), n_distinct)) starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) ``` Because `across()` is usually used in combination with `summarise()` and `mutate()`, it doesn't select grouping variables in order to avoid accidentally modifying them: ```{r} df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) ``` ### Multiple functions You can transform each variable with more than one function by supplying a named list of functions or lambda functions in the second argument: ```{r} min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) starwars %>% summarise(across(c(height, mass, birth_year), min_max)) ``` Control how the names are created with the `.names` argument which takes a [glue](https://glue.tidyverse.org/) spec: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}")) ``` If you'd prefer all summaries with the same function to be grouped together, you'll have to expand the calls yourself: ```{r} starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ``` (One day this might become an argument to `across()` but we're not yet sure how it would work.) We cannot however use `where(is.numeric)` in that last case because the second `across()` would pick up the variables that were newly created ("min_height", "min_mass" and "min_birth_year"). We can work around this by combining both calls to `across()` into a single expression that returns a tibble: ```{r} starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ) ``` Alternatively we could reorganize results with `relocate()`: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% relocate(starts_with("min")) ``` ### Current column If you need to, you can access the name of the "current" column inside by calling `cur_column()`. This can be useful if you want to perform some sort of context dependent transformation that's already encoded in a vector: ```{r} df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) ``` ### Gotchas Be careful when combining numeric summaries with `where(is.numeric)`: ```{r} df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) ``` Here `n` becomes `NA` because `n` is numeric, so the `across()` computes its standard deviation, and the standard deviation of 3 (a constant) is `NA`. You probably want to compute `n()` last to avoid this problem: ```{r} df %>% summarise(across(where(is.numeric), sd), n = n()) ``` Alternatively, you could explicitly exclude `n` from the columns to operate on: ```{r} df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) ``` Another approach is to combine both the call to `n()` and `across()` in a single expression that returns a tibble: ```{r} df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) ``` ### Other verbs So far we've focused on the use of `across()` with `summarise()`, but it works with any other dplyr verb that uses data masking: * Rescale all numeric variables to range 0-1: ```{r} rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) ``` For some verbs, like `group_by()`, `count()` and `distinct()`, you can omit the summary functions: * Find all distinct ```{r} starwars %>% distinct(across(contains("color"))) ``` * Count all combinations of variables with a given pattern: ```{r} starwars %>% count(across(contains("color")), sort = TRUE) ``` `across()` doesn't work with `select()` or `rename()` because they already use tidy select syntax; if you want to transform column names with a function, you can use `rename_with()`. ### filter() We cannot directly use `across()` in `filter()` because we need an extra step to combine the results. To that end, `filter()` has two special purpose companion functions: * `if_any()` keeps the rows where the predicate is true for *at least one* selected column: ```{r} starwars %>% filter(if_any(everything(), ~ !is.na(.x))) ``` * `if_all()` keeps the rows where the predicate is true for *all* selected columns: ```{r} starwars %>% filter(if_all(everything(), ~ !is.na(.x))) ``` * Find all rows where no variable has missing values: ```{r} starwars %>% filter(across(everything(), ~ !is.na(.x))) ``` ## `_if`, `_at`, `_all` Prior versions of dplyr allowed you to apply a function to multiple columns in a different way: using functions with `_if`, `_at`, and `_all()` suffixes. These functions solved a pressing need and are used by many people, but are now superseded. That means that they'll stay around, but won't receive any new features and will only get critical bug fixes. ### Why do we like `across()`? Why did we decide to move away from these functions in favour of `across()`? 1. `across()` makes it possible to express useful summaries that were previously impossible: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise( across(where(is.numeric), mean), across(where(is.factor), nlevels), n = n(), ) ``` 1. `across()` reduces the number of functions that dplyr needs to provide. This makes dplyr easier for you to use (because there are fewer functions to remember) and easier for us to implement new verbs (since we only need to implement one function, not four). 1. `across()` unifies `_if` and `_at` semantics so that you can select by position, name, and type, and you can now create compound selections that were previously impossible. For example, you can now transform all numeric columns whose name begins with "x": `across(where(is.numeric) & starts_with("x"))`. 1. `across()` doesn't need to use `vars()`. The `_at()` functions are the only place in dplyr where you have to manually quote variable names, which makes them a little weird and hence harder to remember. ### Why did it take so long to discover `across()`? It's disappointing that we didn't discover `across()` earlier, and instead worked through several false starts (first not realising that it was a common problem, then with the `_each()` functions, and most recently with the `_if()`/`_at()`/`_all()` functions). But `across()` couldn't work without three recent discoveries: * You can have a column of a data frame that is itself a data frame. This is something provided by base R, but it's not very well documented, and it took a while to see that it was useful, not just a theoretical curiosity. * We can use data frames to allow summary functions to return multiple columns. * We can use the absence of an outer name as a convention that you want to unpack a data frame column into individual columns. ### How do you convert existing code? Fortunately, it's generally straightforward to translate your existing code to use `across()`: * Strip the `_if()`, `_at()` and `_all()` suffix off the function. * Call `across()`. The first argument will be: 1. For `_if()`, the old second argument wrapped in `where()`. 1. For `_at()`, the old second argument, with the call to `vars()` removed. 1. For `_all()`, `everything()`. The subsequent arguments can be copied as is. For example: ```{r, results = FALSE} df %>% mutate_if(is.numeric, mean, na.rm = TRUE) # -> df %>% mutate(across(where(is.numeric), mean, na.rm = TRUE)) df %>% mutate_at(vars(c(x, starts_with("y"))), mean) # -> df %>% mutate(across(c(x, starts_with("y")), mean, na.rm = TRUE)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) ``` There are a few exceptions to this rule: * `rename_*()` and `select_*()` follow a different pattern. They already have select semantics, so are generally used in a different way that doesn't have a direct equivalent with `across()`; use the new `rename_with()` instead. * Previously, `filter_*()` were paired with the `all_vars()` and `any_vars()` helpers. The new helpers `if_any()` and `if_all()` can be used inside `filter()` to keep rows for which the predicate is true for at least one, or all selected columns: ```{r} df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) ``` * When used in a `mutate()`, all transformations performed by an `across()` are applied at once. This is different to the behaviour of `mutate_if()`, `mutate_at()`, and `mutate_all()`, which apply the transformations one at a time. We expect that you'll generally find the new behaviour less surprising: ```{r} df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) df %>% mutate(across(everything(), ~ .x / y)) ```