vctrs/0000755000176200001440000000000014042554003011410 5ustar liggesusersvctrs/NAMESPACE0000644000176200001440000004356514042545721012653 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("!",vctrs_vctr) S3method("!=",vctrs_vctr) S3method("$",vctrs_list_of) S3method("$",vctrs_rcrd) S3method("$",vctrs_sclr) S3method("$",vctrs_vctr) S3method("$<-",vctrs_list_of) S3method("$<-",vctrs_rcrd) S3method("$<-",vctrs_sclr) S3method("$<-",vctrs_vctr) S3method("%%",vctrs_vctr) S3method("%/%",vctrs_vctr) S3method("&",vctrs_vctr) S3method("*",vctrs_vctr) S3method("+",vctrs_vctr) S3method("-",vctrs_vctr) S3method("/",vctrs_vctr) S3method("<",vctrs_vctr) S3method("<=",vctrs_vctr) S3method("==",vctrs_vctr) S3method(">",vctrs_vctr) S3method(">=",vctrs_vctr) S3method("[",vctrs_rcrd) S3method("[",vctrs_sclr) S3method("[",vctrs_unspecified) S3method("[",vctrs_vctr) S3method("[<-",vctrs_list_of) S3method("[<-",vctrs_rcrd) S3method("[<-",vctrs_sclr) S3method("[<-",vctrs_vctr) S3method("[[",vctrs_list_of) S3method("[[",vctrs_rcrd) S3method("[[",vctrs_sclr) S3method("[[",vctrs_vctr) S3method("[[<-",vctrs_list_of) S3method("[[<-",vctrs_rcrd) S3method("[[<-",vctrs_sclr) S3method("[[<-",vctrs_vctr) S3method("^",vctrs_vctr) S3method("dim<-",vctrs_sclr) S3method("dim<-",vctrs_vctr) S3method("dimnames<-",vctrs_sclr) S3method("dimnames<-",vctrs_vctr) S3method("is.na<-",vctrs_sclr) S3method("is.na<-",vctrs_vctr) S3method("length<-",vctrs_rcrd) S3method("length<-",vctrs_vctr) S3method("levels<-",vctrs_sclr) S3method("levels<-",vctrs_vctr) S3method("names<-",vctrs_sclr) S3method("names<-",vctrs_vctr) S3method("|",vctrs_vctr) S3method(Complex,vctrs_sclr) S3method(Math,vctrs_sclr) S3method(Math,vctrs_vctr) S3method(Ops,vctrs_sclr) S3method(Summary,vctrs_sclr) S3method(Summary,vctrs_vctr) S3method(anyDuplicated,vctrs_sclr) S3method(anyDuplicated,vctrs_vctr) S3method(anyNA,vctrs_vctr) S3method(as.Date,vctrs_sclr) S3method(as.Date,vctrs_vctr) S3method(as.POSIXct,vctrs_sclr) S3method(as.POSIXct,vctrs_vctr) S3method(as.POSIXlt,vctrs_vctr) S3method(as.character,vctrs_list_of) S3method(as.character,vctrs_sclr) S3method(as.character,vctrs_vctr) S3method(as.data.frame,vctrs_sclr) S3method(as.data.frame,vctrs_vctr) S3method(as.double,vctrs_sclr) S3method(as.double,vctrs_vctr) S3method(as.integer,vctrs_sclr) S3method(as.integer,vctrs_vctr) S3method(as.list,vctrs_list_of) S3method(as.list,vctrs_sclr) S3method(as.list,vctrs_vctr) S3method(as.logical,vctrs_sclr) S3method(as.logical,vctrs_vctr) S3method(as_list_of,list) S3method(as_list_of,vctrs_list_of) S3method(c,vctrs_sclr) S3method(c,vctrs_vctr) S3method(cnd_body,vctrs_error_cast_lossy) S3method(cnd_body,vctrs_error_incompatible_size) S3method(cnd_body,vctrs_error_names_cannot_be_dot_dot) S3method(cnd_body,vctrs_error_names_cannot_be_empty) S3method(cnd_body,vctrs_error_names_must_be_unique) S3method(cnd_body,vctrs_error_subscript_oob) S3method(cnd_body,vctrs_error_subscript_type) S3method(cnd_header,vctrs_error_cast_lossy) S3method(cnd_header,vctrs_error_incompatible_size) S3method(cnd_header,vctrs_error_names_cannot_be_dot_dot) S3method(cnd_header,vctrs_error_names_cannot_be_empty) S3method(cnd_header,vctrs_error_names_must_be_unique) S3method(cnd_header,vctrs_error_subscript_oob) S3method(cnd_header,vctrs_error_subscript_size) S3method(cnd_header,vctrs_error_subscript_type) S3method(conditionMessage,vctrs_error_cast_lossy) S3method(diff,vctrs_vctr) S3method(duplicated,vctrs_sclr) S3method(duplicated,vctrs_vctr) S3method(format,vctrs_bytes) S3method(format,vctrs_group_rle) S3method(format,vctrs_list_of) S3method(format,vctrs_rcrd) S3method(format,vctrs_vctr) S3method(is.finite,vctrs_vctr) S3method(is.infinite,vctrs_vctr) S3method(is.na,vctrs_vctr) S3method(is.nan,vctrs_vctr) S3method(length,vctrs_rcrd) S3method(levels,vctrs_sclr) S3method(levels,vctrs_vctr) S3method(max,vctrs_vctr) S3method(mean,vctrs_vctr) S3method(median,vctrs_vctr) S3method(min,vctrs_vctr) S3method(names,vctrs_rcrd) S3method(obj_print_data,default) S3method(obj_print_data,vctrs_list_of) S3method(obj_print_data,vctrs_partial) S3method(obj_print_footer,default) S3method(obj_print_header,default) S3method(obj_print_header,vctrs_group_rle) S3method(obj_print_header,vctrs_partial) S3method(obj_str_data,default) S3method(obj_str_data,vctrs_rcrd) S3method(obj_str_footer,default) S3method(obj_str_header,default) S3method(print,vctrs_bytes) S3method(print,vctrs_sclr) S3method(print,vctrs_unspecified) S3method(print,vctrs_vctr) S3method(quantile,vctrs_vctr) S3method(range,vctrs_vctr) S3method(rep,vctrs_rcrd) S3method(rep,vctrs_vctr) S3method(str,vctrs_vctr) S3method(summary,vctrs_sclr) S3method(summary,vctrs_vctr) S3method(t,vctrs_sclr) S3method(t,vctrs_vctr) S3method(unique,vctrs_sclr) S3method(unique,vctrs_vctr) S3method(vec_arith,Date) S3method(vec_arith,POSIXct) S3method(vec_arith,POSIXlt) S3method(vec_arith,default) S3method(vec_arith,difftime) S3method(vec_arith,factor) S3method(vec_arith,logical) S3method(vec_arith,numeric) S3method(vec_arith.Date,Date) S3method(vec_arith.Date,POSIXct) S3method(vec_arith.Date,POSIXlt) S3method(vec_arith.Date,default) S3method(vec_arith.Date,difftime) S3method(vec_arith.Date,numeric) S3method(vec_arith.POSIXct,Date) S3method(vec_arith.POSIXct,POSIXct) S3method(vec_arith.POSIXct,POSIXlt) S3method(vec_arith.POSIXct,default) S3method(vec_arith.POSIXct,difftime) S3method(vec_arith.POSIXct,numeric) S3method(vec_arith.POSIXlt,Date) S3method(vec_arith.POSIXlt,POSIXct) S3method(vec_arith.POSIXlt,POSIXlt) S3method(vec_arith.POSIXlt,default) S3method(vec_arith.POSIXlt,difftime) S3method(vec_arith.POSIXlt,numeric) S3method(vec_arith.difftime,Date) S3method(vec_arith.difftime,MISSING) S3method(vec_arith.difftime,POSIXct) S3method(vec_arith.difftime,POSIXlt) S3method(vec_arith.difftime,default) S3method(vec_arith.difftime,difftime) S3method(vec_arith.difftime,numeric) S3method(vec_arith.logical,default) S3method(vec_arith.logical,logical) S3method(vec_arith.logical,numeric) S3method(vec_arith.numeric,Date) S3method(vec_arith.numeric,POSIXct) S3method(vec_arith.numeric,POSIXlt) S3method(vec_arith.numeric,default) S3method(vec_arith.numeric,difftime) S3method(vec_arith.numeric,logical) S3method(vec_arith.numeric,numeric) S3method(vec_cast,Date) S3method(vec_cast,POSIXct) S3method(vec_cast,POSIXlt) S3method(vec_cast,character) S3method(vec_cast,character.factor) S3method(vec_cast,character.ordered) S3method(vec_cast,complex) S3method(vec_cast,data.frame) S3method(vec_cast,data.frame.data.table) S3method(vec_cast,data.table.data.frame) S3method(vec_cast,data.table.data.table) S3method(vec_cast,difftime) S3method(vec_cast,double) S3method(vec_cast,double.exclude) S3method(vec_cast,double.omit) S3method(vec_cast,exclude.double) S3method(vec_cast,exclude.exclude) S3method(vec_cast,exclude.integer) S3method(vec_cast,factor) S3method(vec_cast,factor.character) S3method(vec_cast,factor.factor) S3method(vec_cast,integer) S3method(vec_cast,integer.exclude) S3method(vec_cast,integer.omit) S3method(vec_cast,integer64) S3method(vec_cast,list) S3method(vec_cast,logical) S3method(vec_cast,omit.double) S3method(vec_cast,omit.integer) S3method(vec_cast,omit.omit) S3method(vec_cast,ordered) S3method(vec_cast,ordered.character) S3method(vec_cast,ordered.ordered) S3method(vec_cast,raw) S3method(vec_cast,table.table) S3method(vec_cast,vctrs_list_of) S3method(vec_cast,vctrs_rcrd) S3method(vec_cast,vctrs_rcrd.vctrs_rcrd) S3method(vec_cast,vctrs_vctr) S3method(vec_cast.Date,Date) S3method(vec_cast.Date,POSIXct) S3method(vec_cast.Date,POSIXlt) S3method(vec_cast.POSIXct,Date) S3method(vec_cast.POSIXct,POSIXct) S3method(vec_cast.POSIXct,POSIXlt) S3method(vec_cast.POSIXlt,Date) S3method(vec_cast.POSIXlt,POSIXct) S3method(vec_cast.POSIXlt,POSIXlt) S3method(vec_cast.character,character) S3method(vec_cast.complex,complex) S3method(vec_cast.complex,double) S3method(vec_cast.complex,integer) S3method(vec_cast.complex,logical) S3method(vec_cast.data.frame,data.frame) S3method(vec_cast.difftime,difftime) S3method(vec_cast.double,double) S3method(vec_cast.double,integer) S3method(vec_cast.double,integer64) S3method(vec_cast.double,logical) S3method(vec_cast.integer,double) S3method(vec_cast.integer,integer) S3method(vec_cast.integer,integer64) S3method(vec_cast.integer,logical) S3method(vec_cast.integer64,double) S3method(vec_cast.integer64,integer) S3method(vec_cast.integer64,integer64) S3method(vec_cast.integer64,logical) S3method(vec_cast.list,list) S3method(vec_cast.logical,double) S3method(vec_cast.logical,integer) S3method(vec_cast.logical,integer64) S3method(vec_cast.logical,logical) S3method(vec_cast.raw,raw) S3method(vec_cast.vctrs_list_of,vctrs_list_of) S3method(vec_cbind_frame_ptype,default) S3method(vec_cbind_frame_ptype,sf) S3method(vec_math,Date) S3method(vec_math,POSIXct) S3method(vec_math,POSIXlt) S3method(vec_math,default) S3method(vec_math,factor) S3method(vec_math,vctrs_rcrd) S3method(vec_proxy,"vctrs:::common_class_fallback") S3method(vec_proxy,AsIs) S3method(vec_proxy,Date) S3method(vec_proxy,POSIXct) S3method(vec_proxy,POSIXlt) S3method(vec_proxy,default) S3method(vec_proxy,exclude) S3method(vec_proxy,factor) S3method(vec_proxy,numeric_version) S3method(vec_proxy,omit) S3method(vec_proxy,ordered) S3method(vec_proxy,vctrs_list_of) S3method(vec_proxy,vctrs_rcrd) S3method(vec_proxy,vctrs_vctr) S3method(vec_proxy_compare,POSIXlt) S3method(vec_proxy_compare,array) S3method(vec_proxy_compare,data.frame) S3method(vec_proxy_compare,default) S3method(vec_proxy_compare,integer64) S3method(vec_proxy_compare,list) S3method(vec_proxy_compare,raw) S3method(vec_proxy_compare,vctrs_rcrd) S3method(vec_proxy_equal,POSIXlt) S3method(vec_proxy_equal,array) S3method(vec_proxy_equal,data.frame) S3method(vec_proxy_equal,default) S3method(vec_proxy_order,array) S3method(vec_proxy_order,data.frame) S3method(vec_proxy_order,default) S3method(vec_proxy_order,list) S3method(vec_proxy_order,raw) S3method(vec_ptype2,AsIs) S3method(vec_ptype2,Date) S3method(vec_ptype2,POSIXct) S3method(vec_ptype2,POSIXlt) S3method(vec_ptype2,character) S3method(vec_ptype2,character.factor) S3method(vec_ptype2,character.ordered) S3method(vec_ptype2,complex) S3method(vec_ptype2,data.frame) S3method(vec_ptype2,data.frame.data.table) S3method(vec_ptype2,data.table.data.frame) S3method(vec_ptype2,data.table.data.table) S3method(vec_ptype2,difftime) S3method(vec_ptype2,double) S3method(vec_ptype2,double.exclude) S3method(vec_ptype2,double.omit) S3method(vec_ptype2,exclude.double) S3method(vec_ptype2,exclude.exclude) S3method(vec_ptype2,exclude.integer) S3method(vec_ptype2,factor) S3method(vec_ptype2,factor.character) S3method(vec_ptype2,factor.factor) S3method(vec_ptype2,factor.ordered) S3method(vec_ptype2,integer) S3method(vec_ptype2,integer.exclude) S3method(vec_ptype2,integer.omit) S3method(vec_ptype2,integer64) S3method(vec_ptype2,list) S3method(vec_ptype2,logical) S3method(vec_ptype2,omit.double) S3method(vec_ptype2,omit.integer) S3method(vec_ptype2,omit.omit) S3method(vec_ptype2,ordered) S3method(vec_ptype2,ordered.character) S3method(vec_ptype2,ordered.factor) S3method(vec_ptype2,ordered.ordered) S3method(vec_ptype2,raw) S3method(vec_ptype2,table.table) S3method(vec_ptype2,vctrs_list_of) S3method(vec_ptype2,vctrs_partial_factor) S3method(vec_ptype2,vctrs_partial_frame) S3method(vec_ptype2.AsIs,AsIs) S3method(vec_ptype2.Date,Date) S3method(vec_ptype2.Date,POSIXct) S3method(vec_ptype2.Date,POSIXlt) S3method(vec_ptype2.POSIXct,Date) S3method(vec_ptype2.POSIXct,POSIXct) S3method(vec_ptype2.POSIXct,POSIXlt) S3method(vec_ptype2.POSIXlt,Date) S3method(vec_ptype2.POSIXlt,POSIXct) S3method(vec_ptype2.POSIXlt,POSIXlt) S3method(vec_ptype2.character,character) S3method(vec_ptype2.complex,complex) S3method(vec_ptype2.complex,double) S3method(vec_ptype2.complex,integer) S3method(vec_ptype2.data.frame,data.frame) S3method(vec_ptype2.data.frame,vctrs_partial_frame) S3method(vec_ptype2.difftime,difftime) S3method(vec_ptype2.double,complex) S3method(vec_ptype2.double,double) S3method(vec_ptype2.double,integer) S3method(vec_ptype2.double,logical) S3method(vec_ptype2.factor,vctrs_partial_factor) S3method(vec_ptype2.integer,complex) S3method(vec_ptype2.integer,double) S3method(vec_ptype2.integer,integer) S3method(vec_ptype2.integer,integer64) S3method(vec_ptype2.integer,logical) S3method(vec_ptype2.integer64,integer) S3method(vec_ptype2.integer64,integer64) S3method(vec_ptype2.integer64,logical) S3method(vec_ptype2.list,list) S3method(vec_ptype2.logical,double) S3method(vec_ptype2.logical,integer) S3method(vec_ptype2.logical,integer64) S3method(vec_ptype2.logical,logical) S3method(vec_ptype2.raw,raw) S3method(vec_ptype2.vctrs_list_of,vctrs_list_of) S3method(vec_ptype2.vctrs_partial_factor,factor) S3method(vec_ptype2.vctrs_partial_factor,vctrs_partial_factor) S3method(vec_ptype2.vctrs_partial_frame,data.frame) S3method(vec_ptype2.vctrs_partial_frame,vctrs_partial_frame) S3method(vec_ptype_abbr,"NULL") S3method(vec_ptype_abbr,AsIs) S3method(vec_ptype_abbr,Date) S3method(vec_ptype_abbr,POSIXt) S3method(vec_ptype_abbr,data.frame) S3method(vec_ptype_abbr,default) S3method(vec_ptype_abbr,difftime) S3method(vec_ptype_abbr,factor) S3method(vec_ptype_abbr,integer64) S3method(vec_ptype_abbr,ordered) S3method(vec_ptype_abbr,table) S3method(vec_ptype_abbr,vctrs_list_of) S3method(vec_ptype_abbr,vctrs_partial_factor) S3method(vec_ptype_abbr,vctrs_partial_frame) S3method(vec_ptype_abbr,vctrs_unspecified) S3method(vec_ptype_finalise,default) S3method(vec_ptype_finalise,vctrs_partial) S3method(vec_ptype_finalise,vctrs_partial_factor) S3method(vec_ptype_finalise,vctrs_partial_frame) S3method(vec_ptype_full,"NULL") S3method(vec_ptype_full,AsIs) S3method(vec_ptype_full,Date) S3method(vec_ptype_full,POSIXct) S3method(vec_ptype_full,POSIXlt) S3method(vec_ptype_full,data.frame) S3method(vec_ptype_full,default) S3method(vec_ptype_full,difftime) S3method(vec_ptype_full,factor) S3method(vec_ptype_full,integer64) S3method(vec_ptype_full,ordered) S3method(vec_ptype_full,table) S3method(vec_ptype_full,vctrs_list_of) S3method(vec_ptype_full,vctrs_partial_factor) S3method(vec_ptype_full,vctrs_partial_frame) S3method(vec_restore,AsIs) S3method(vec_restore,Date) S3method(vec_restore,POSIXct) S3method(vec_restore,POSIXlt) S3method(vec_restore,data.frame) S3method(vec_restore,default) S3method(vec_restore,exclude) S3method(vec_restore,factor) S3method(vec_restore,omit) S3method(vec_restore,ordered) S3method(vec_restore,table) S3method(vec_restore,vctrs_rcrd) S3method(vec_restore,vctrs_vctr) S3method(xtfrm,vctrs_sclr) S3method(xtfrm,vctrs_vctr) export("%0%") export("field<-") export("vec_slice<-") export(MISSING) export(allow_lossy_cast) export(as_list_of) export(data_frame) export(df_cast) export(df_list) export(df_ptype2) export(field) export(fields) export(is_list_of) export(is_partial) export(list_of) export(list_sizes) export(maybe_lossy_cast) export(n_fields) export(new_data_frame) export(new_date) export(new_datetime) export(new_duration) export(new_factor) export(new_list_of) export(new_ordered) export(new_partial) export(new_rcrd) export(new_vctr) export(num_as_location) export(num_as_location2) export(obj_print) export(obj_print_data) export(obj_print_footer) export(obj_print_header) export(obj_str) export(obj_str_data) export(obj_str_footer) export(obj_str_header) export(partial_factor) export(partial_frame) export(s3_register) export(stop_incompatible_cast) export(stop_incompatible_op) export(stop_incompatible_size) export(stop_incompatible_type) export(tib_cast) export(tib_ptype2) export(unspecified) export(validate_list_of) export(vec_arith) export(vec_arith.Date) export(vec_arith.POSIXct) export(vec_arith.POSIXlt) export(vec_arith.difftime) export(vec_arith.logical) export(vec_arith.numeric) export(vec_arith_base) export(vec_as_index) export(vec_as_location) export(vec_as_location2) export(vec_as_names) export(vec_as_names_legacy) export(vec_as_subscript) export(vec_as_subscript2) export(vec_assert) export(vec_assign) export(vec_c) export(vec_cast) export(vec_cast.Date) export(vec_cast.POSIXct) export(vec_cast.POSIXlt) export(vec_cast.character) export(vec_cast.complex) export(vec_cast.data.frame) export(vec_cast.difftime) export(vec_cast.double) export(vec_cast.factor) export(vec_cast.integer) export(vec_cast.integer64) export(vec_cast.list) export(vec_cast.logical) export(vec_cast.ordered) export(vec_cast.raw) export(vec_cast.vctrs_list_of) export(vec_cast_common) export(vec_cbind) export(vec_cbind_frame_ptype) export(vec_chop) export(vec_compare) export(vec_count) export(vec_data) export(vec_default_cast) export(vec_default_ptype2) export(vec_detect_complete) export(vec_duplicate_any) export(vec_duplicate_detect) export(vec_duplicate_id) export(vec_empty) export(vec_equal) export(vec_equal_na) export(vec_fill_missing) export(vec_group_id) export(vec_group_loc) export(vec_group_rle) export(vec_identify_runs) export(vec_in) export(vec_init) export(vec_init_along) export(vec_is) export(vec_is_empty) export(vec_is_list) export(vec_list_cast) export(vec_match) export(vec_math) export(vec_math_base) export(vec_names) export(vec_names2) export(vec_order) export(vec_proxy) export(vec_proxy_compare) export(vec_proxy_equal) export(vec_proxy_order) export(vec_ptype) export(vec_ptype2) export(vec_ptype2.AsIs) export(vec_ptype2.Date) export(vec_ptype2.POSIXct) export(vec_ptype2.POSIXlt) export(vec_ptype2.character) export(vec_ptype2.complex) export(vec_ptype2.data.frame) export(vec_ptype2.difftime) export(vec_ptype2.double) export(vec_ptype2.factor) export(vec_ptype2.integer) export(vec_ptype2.integer64) export(vec_ptype2.list) export(vec_ptype2.logical) export(vec_ptype2.ordered) export(vec_ptype2.raw) export(vec_ptype2.vctrs_list_of) export(vec_ptype_abbr) export(vec_ptype_common) export(vec_ptype_finalise) export(vec_ptype_full) export(vec_ptype_show) export(vec_rbind) export(vec_recycle) export(vec_recycle_common) export(vec_rep) export(vec_rep_each) export(vec_repeat) export(vec_restore) export(vec_seq_along) export(vec_set_names) export(vec_size) export(vec_size_common) export(vec_slice) export(vec_sort) export(vec_split) export(vec_type) export(vec_type2) export(vec_type_common) export(vec_unchop) export(vec_unique) export(vec_unique_count) export(vec_unique_loc) export(vec_unrep) import(rlang) importFrom(stats,median) importFrom(stats,quantile) useDynLib(vctrs, .registration = TRUE) vctrs/LICENSE.note0000644000176200001440000004136713753021253013400 0ustar liggesusersThe implementation of vec_order() is based on data.table’s forder() and their earlier contribution to R’s order(). This warrants placing specific files in the vctrs package under the MPL-2.0 license used by data.table. Files named with the pattern of `src/order-*.c` and `src/order-*.h` are additionally under the MPL-2.0 license. MPL-2.0 License ---------------------------------------------------------------- Mozilla Public License Version 2.0 ================================== 1. Definitions -------------- 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or (b) any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions -------------------------------- 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: (a) for any code that a Contributor has removed from Covered Software; or (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or (c) under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities ------------------- 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation --------------------------------------------------- If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination -------------- 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ************************************************************************ * * * 6. Disclaimer of Warranty * * ------------------------- * * * * Covered Software is provided under this License on an "as is" * * basis, without warranty of any kind, either expressed, implied, or * * statutory, including, without limitation, warranties that the * * Covered Software is free of defects, merchantable, fit for a * * particular purpose or non-infringing. The entire risk as to the * * quality and performance of the Covered Software is with You. * * Should any Covered Software prove defective in any respect, You * * (not any Contributor) assume the cost of any necessary servicing, * * repair, or correction. This disclaimer of warranty constitutes an * * essential part of this License. No use of any Covered Software is * * authorized under this License except under this disclaimer. * * * ************************************************************************ ************************************************************************ * * * 7. Limitation of Liability * * -------------------------- * * * * Under no circumstances and under no legal theory, whether tort * * (including negligence), contract, or otherwise, shall any * * Contributor, or anyone who distributes Covered Software as * * permitted above, be liable to You for any direct, indirect, * * special, incidental, or consequential damages of any character * * including, without limitation, damages for lost profits, loss of * * goodwill, work stoppage, computer failure or malfunction, or any * * and all other commercial damages or losses, even if such party * * shall have been informed of the possibility of such damages. This * * limitation of liability shall not apply to liability for death or * * personal injury resulting from such party's negligence to the * * extent applicable law prohibits such limitation. Some * * jurisdictions do not allow the exclusion or limitation of * * incidental or consequential damages, so this exclusion and * * limitation may not apply to You. * * * ************************************************************************ 8. Litigation ------------- Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous ---------------- This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License --------------------------- 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice ------------------------------------------- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. Exhibit B - "Incompatible With Secondary Licenses" Notice --------------------------------------------------------- This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. vctrs/LICENSE0000644000176200001440000000005313753021253012417 0ustar liggesusersYEAR: 2020 COPYRIGHT HOLDER: vctrs authors vctrs/README.md0000644000176200001440000000737014030312432012671 0ustar liggesusers # vctrs [![Coverage status](https://codecov.io/gh/r-lib/vctrs/branch/master/graph/badge.svg)](https://codecov.io/github/r-lib/vctrs?branch=master) ![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg) [![R build status](https://github.com/r-lib/vctrs/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/vctrs/actions) There are three main goals to the vctrs package, each described in a vignette: - To propose `vec_size()` and `vec_ptype()` as alternatives to `length()` and `class()`; `vignette("type-size")`. These definitions are paired with a framework for size-recycling and type-coercion. `ptype` should evoke the notion of a prototype, i.e. the original or typical form of something. - To define size- and type-stability as desirable function properties, use them to analyse existing base functions, and to propose better alternatives; `vignette("stability")`. This work has been particularly motivated by thinking about the ideal properties of `c()`, `ifelse()`, and `rbind()`. - To provide a new `vctr` base class that makes it easy to create new S3 vectors; `vignette("s3-vector")`. vctrs provides methods for many base generics in terms of a few new vctrs generics, making implementation considerably simpler and more robust. vctrs is a developer-focussed package. Understanding and extending vctrs requires some effort from developers, but should be invisible to most users. It’s our hope that having an underlying theory will mean that users can build up an accurate mental model without explicitly learning the theory. vctrs will typically be used by other packages, making it easy for them to provide new classes of S3 vectors that are supported throughout the tidyverse (and beyond). For that reason, vctrs has few dependencies. ## Installation Install vctrs from CRAN with: ``` r install.packages("vctrs") ``` Alternatively, if you need the development version, install it with: ``` r # install.packages("devtools") devtools::install_github("r-lib/vctrs") ``` ## Usage ``` r library(vctrs) # Sizes str(vec_size_common(1, 1:10)) #> int 10 str(vec_recycle_common(1, 1:10)) #> List of 2 #> $ : num [1:10] 1 1 1 1 1 1 1 1 1 1 #> $ : int [1:10] 1 2 3 4 5 6 7 8 9 10 # Prototypes str(vec_ptype_common(FALSE, 1L, 2.5)) #> num(0) str(vec_cast_common(FALSE, 1L, 2.5)) #> List of 3 #> $ : num 0 #> $ : num 1 #> $ : num 2.5 ``` ## Motivation The original motivation for vctrs comes from two separate but related problems. The first problem is that `base::c()` has rather undesirable behaviour when you mix different S3 vectors: ``` r # combining factors makes integers c(factor("a"), factor("b")) #> [1] 1 1 # combining dates and date-times gives incorrect values; also, order matters dt <- as.Date("2020-01-01") dttm <- as.POSIXct(dt) c(dt, dttm) #> [1] "2020-01-01" "4321940-06-07" c(dttm, dt) #> [1] "2019-12-31 19:00:00 EST" "1970-01-01 00:04:22 EST" ``` This behaviour arises because `c()` has dual purposes: as well as its primary duty of combining vectors, it has a secondary duty of stripping attributes. For example, `?POSIXct` suggests that you should use `c()` if you want to reset the timezone. The second problem is that `dplyr::bind_rows()` is not extensible by others. Currently, it handles arbitrary S3 classes using heuristics, but these often fail, and it feels like we really need to think through the problem in order to build a principled solution. This intersects with the need to cleanly support more types of data frame columns, including lists of data frames, data frames, and matrices. vctrs/man/0000755000176200001440000000000014042546502012170 5ustar liggesusersvctrs/man/vctrs-data-frame.Rd0000644000176200001440000000075713712271424015631 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{vctrs-data-frame} \alias{vctrs-data-frame} \alias{vec_ptype2.data.frame} \alias{vec_cast.data.frame} \title{vctrs methods for data frames} \usage{ \method{vec_ptype2}{data.frame}(x, y, ...) \method{vec_cast}{data.frame}(x, to, ...) } \description{ These functions help the base data.frame class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/theory-faq-coercion.Rd0000644000176200001440000002575314027045462016353 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{theory-faq-coercion} \alias{theory-faq-coercion} \title{FAQ - How does coercion work in vctrs?} \description{ This is an overview of the usage of \code{vec_ptype2()} and \code{vec_cast()} and their role in the vctrs coercion mechanism. Related topics: \itemize{ \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } \subsection{Combination mechanism in vctrs}{ The coercion system in vctrs is designed to make combination of multiple inputs consistent and extensible. Combinations occur in many places, such as row-binding, joins, subset-assignment, or grouped summary functions that use the split-apply-combine strategy. For example:\if{html}{\out{
}}\preformatted{vec_c(TRUE, 1) #> [1] 1 1 vec_c("a", 1) #> Error: Can't combine `..1` and `..2` . vec_rbind( data.frame(x = TRUE), data.frame(x = 1, y = 2) ) #> x y #> 1 1 NA #> 2 1 2 vec_rbind( data.frame(x = "a"), data.frame(x = 1, y = 2) ) #> Error: Can't combine `..1$x` and `..2$x` . }\if{html}{\out{
}} One major goal of vctrs is to provide a central place for implementing the coercion methods that make generic combinations possible. The two relevant generics are \code{vec_ptype2()} and \code{vec_cast()}. They both take two arguments and perform \strong{double dispatch}, meaning that a method is selected based on the classes of both inputs. The general mechanism for combining multiple inputs is: \enumerate{ \item Find the common type of a set of inputs by reducing (as in \code{base::Reduce()} or \code{purrr::reduce()}) the \code{vec_ptype2()} binary function over the set. \item Convert all inputs to the common type with \code{vec_cast()}. \item Initialise the output vector as an instance of this common type with \code{vec_init()}. \item Fill the output vector with the elements of the inputs using \code{vec_assign()}. } The last two steps may require \code{vec_proxy()} and \code{vec_restore()} implementations, unless the attributes of your class are constant and do not depend on the contents of the vector. We focus here on the first two steps, which require \code{vec_ptype2()} and \code{vec_cast()} implementations. } \subsection{\code{vec_ptype2()}}{ Methods for \code{vec_ptype2()} are passed two \emph{prototypes}, i.e. two inputs emptied of their elements. They implement two behaviours: \itemize{ \item If the types of their inputs are compatible, indicate which of them is the richer type by returning it. If the types are of equal resolution, return any of the two. \item Throw an error with \code{stop_incompatible_type()} when it can be determined from the attributes that the types of the inputs are not compatible. } \subsection{Type compatibility}{ A type is \strong{compatible} with another type if the values it represents are a subset or a superset of the values of the other type. The notion of “value” is to be interpreted at a high level, in particular it is not the same as the memory representation. For example, factors are represented in memory with integers but their values are more related to character vectors than to round numbers:\if{html}{\out{
}}\preformatted{# Two factors are compatible vec_ptype2(factor("a"), factor("b")) #> factor(0) #> Levels: a b # Factors are compatible with a character vec_ptype2(factor("a"), "b") #> character(0) # But they are incompatible with integers vec_ptype2(factor("a"), 1L) #> Error: Can't combine > and . }\if{html}{\out{
}} } \subsection{Richness of type}{ Richness of type is not a very precise notion. It can be about richer data (for instance a \code{double} vector covers more values than an integer vector), richer behaviour (a \code{data.table} has richer behaviour than a \code{data.frame}), or both. If you have trouble determining which one of the two types is richer, it probably means they shouldn’t be automatically coercible. Let’s look again at what happens when we combine a factor and a character:\if{html}{\out{
}}\preformatted{vec_ptype2(factor("a"), "b") #> character(0) }\if{html}{\out{
}} The ptype2 method for \verb{} and \verb{>} returns \verb{} because the former is a richer type. The factor can only contain \code{"a"} strings, whereas the character can contain any strings. In this sense, factors are a \emph{subset} of character. Note that another valid behaviour would be to throw an incompatible type error. This is what a strict factor implementation would do. We have decided to be laxer in vctrs because it is easy to inadvertently create factors instead of character vectors, especially with older versions of R where \code{stringsAsFactors} is still true by default. } \subsection{Consistency and symmetry on permutation}{ Each ptype2 method should strive to have exactly the same behaviour when the inputs are permuted. This is not always possible, for example factor levels are aggregated in order:\if{html}{\out{
}}\preformatted{vec_ptype2(factor(c("a", "c")), factor("b")) #> factor(0) #> Levels: a c b vec_ptype2(factor("b"), factor(c("a", "c"))) #> factor(0) #> Levels: b a c }\if{html}{\out{
}} In any case, permuting the input should not return a fundamentally different type or introduce an incompatible type error. } \subsection{Coercion hierarchy}{ Coercible classes form a coercion (or subtyping) hierarchy. Here is a simplified diagram of the hierarchy for base types. In this diagram the directions of the arrows express which type is richer. They flow from the bottom (more constrained types) to the top (richer types). \figure{coerce.png} As a class implementor, you have two options. The simplest is to create an entirely separate hierarchy. The date and date-time classes are an example of an S3-based hierarchy that is completely separate. Alternatively, you can integrate your class in an existing hierarchy, typically by adding parent nodes on top of the hierarchy (your class is richer), by adding children node at the root of the hierarchy (your class is more constrained), or by inserting a node in the tree. These coercion hierarchies are \emph{implicit}, in the sense that they are implied by the \code{vec_ptype2()} implementations. There is no structured way to create or modify a hierarchy, instead you need to implement the appropriate coercion methods for all the types in your hierarchy, and diligently return the richer type in each case. The \code{vec_ptype2()} implementations are not transitive nor inherited, so all pairwise methods between classes lying on a given path must be implemented manually. This is something we might make easier in the future. } } \subsection{\code{vec_cast()}}{ The second generic, \code{vec_cast()}, is the one that looks at the data and actually performs the conversion. Because it has access to more information than \code{vec_ptype2()}, it may be stricter and cause an error in more cases. \code{vec_cast()} has three possible behaviours: \itemize{ \item Determine that the prototypes of the two inputs are not compatible. This must be decided in exactly the same way as for \code{vec_ptype2()}. Call \code{stop_incompatible_cast()} if you can determine from the attributes that the types are not compatible. \item Detect incompatible values. Usually this is because the target type is too restricted for the values supported by the input type. For example, a fractional number can’t be converted to an integer. The method should throw an error in that case. \item Return the input vector converted to the target type if all values are compatible. Whereas \code{vec_ptype2()} must return the same type when the inputs are permuted, \code{vec_cast()} is \emph{directional}. It always returns the type of the right-hand side, or dies trying. } } \subsection{Double dispatch}{ The dispatch mechanism for \code{vec_ptype2()} and \code{vec_cast()} looks like S3 but is actually a custom mechanism. Compared to S3, it has the following differences: \itemize{ \item It dispatches on the classes of the first two inputs. \item There is no inheritance of ptype2 and cast methods. This is because the S3 class hierarchy is not necessarily the same as the coercion hierarchy. \item \code{NextMethod()} does not work. Parent methods must be called explicitly if necessary. \item The default method is hard-coded. } } \subsection{Data frames}{ The determination of the common type of data frames with \code{vec_ptype2()} happens in three steps: \enumerate{ \item Match the columns of the two input data frames. If some columns don’t exist, they are created and filled with adequately typed \code{NA} values. \item Find the common type for each column by calling \code{vec_ptype2()} on each pair of matched columns. \item Find the common data frame type. For example the common type of a grouped tibble and a tibble is a grouped tibble because the latter is the richer type. The common type of a data table and a data frame is a data table. } \code{vec_cast()} operates similarly. If a data frame is cast to a target type that has fewer columns, this is an error. If you are implementing coercion methods for data frames, you will need to explicitly call the parent methods that perform the common type determination or the type conversion described above. These are exported as \code{\link[=df_ptype2]{df_ptype2()}} and \code{\link[=df_cast]{df_cast()}}. \subsection{Data frame fallbacks}{ Being too strict with data frame combinations would cause too much pain because there are many data frame subclasses in the wild that don’t implement vctrs methods. We have decided to implement a special fallback behaviour for foreign data frames. Incompatible data frames fall back to a base data frame:\if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1) df2 <- structure(df1, class = c("foreign_df", "data.frame")) vec_rbind(df1, df2) #> x #> 1 1 #> 2 1 }\if{html}{\out{
}} When a tibble is involved, we fall back to tibble:\if{html}{\out{
}}\preformatted{df3 <- tibble::as_tibble(df1) vec_rbind(df1, df3) #> # A tibble: 2 x 1 #> x #> #> 1 1 #> 2 1 }\if{html}{\out{
}} These fallbacks are not ideal but they make sense because all data frames share a common data structure. This is not generally the case for vectors. For example factors and characters have different representations, and it is not possible to find a fallback time mechanically. However this fallback has a big downside: implementing vctrs methods for your data frame subclass is a breaking behaviour change. The proper coercion behaviour for your data frame class should be specified as soon as possible to limit the consequences of changing the behaviour of your class in R scripts. } } } vctrs/man/vec_duplicate.Rd0000644000176200001440000000413113663716767015311 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_duplicate} \alias{vec_duplicate} \alias{vec_duplicate_any} \alias{vec_duplicate_detect} \alias{vec_duplicate_id} \title{Find duplicated values} \usage{ vec_duplicate_any(x) vec_duplicate_detect(x) vec_duplicate_id(x) } \arguments{ \item{x}{A vector (including a data frame).} } \value{ \itemize{ \item \code{vec_duplicate_any()}: a logical vector of length 1. \item \code{vec_duplicate_detect()}: a logical vector the same length as \code{x}. \item \code{vec_duplicate_id()}: an integer vector the same length as \code{x}. } } \description{ \itemize{ \item \code{vec_duplicate_any()}: detects the presence of duplicated values, similar to \code{\link[=anyDuplicated]{anyDuplicated()}}. \item \code{vec_duplicate_detect()}: returns a logical vector describing if each element of the vector is duplicated elsewhere. Unlike \code{\link[=duplicated]{duplicated()}}, it reports all duplicated values, not just the second and subsequent repetitions. \item \code{vec_duplicate_id()}: returns an integer vector giving the location of the first occurrence of the value. } } \section{Missing values}{ In most cases, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. This behaviour would be unappealing here, so these functions consider all \code{NAs} to be equal. (Similarly, all \code{NaN} are also considered to be equal.) } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ vec_duplicate_any(1:10) vec_duplicate_any(c(1, 1:10)) x <- c(10, 10, 20, 30, 30, 40) vec_duplicate_detect(x) # Note that `duplicated()` doesn't consider the first instance to # be a duplicate duplicated(x) # Identify elements of a vector by the location of the first element that # they're equal to: vec_duplicate_id(x) # Location of the unique values: vec_unique_loc(x) # Equivalent to `duplicated()`: vec_duplicate_id(x) == seq_along(x) } \seealso{ \code{\link[=vec_unique]{vec_unique()}} for functions that work with the dual of duplicated values: unique values. } vctrs/man/faq-error-incompatible-attributes.Rd0000644000176200001440000000233713655017473021223 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-error-incompatible-attributes} \alias{faq-error-incompatible-attributes} \title{FAQ - Error/Warning: Some attributes are incompatible} \description{ This error occurs when \code{\link[=vec_ptype2]{vec_ptype2()}} or \code{\link[=vec_cast]{vec_cast()}} are supplied vectors of the same classes with different attributes. In this case, vctrs doesn't know how to combine the inputs. To fix this error, the maintainer of the class should implement self-to-self coercion methods for \code{\link[=vec_ptype2]{vec_ptype2()}} and \code{\link[=vec_cast]{vec_cast()}}. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } vctrs/man/vec_seq_along.Rd0000644000176200001440000000135513622451540015270 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{vec_seq_along} \alias{vec_seq_along} \alias{vec_init_along} \title{Useful sequences} \usage{ vec_seq_along(x) vec_init_along(x, y = x) } \arguments{ \item{x, y}{Vectors} } \value{ \itemize{ \item \code{vec_seq_along()} an integer vector with the same size as \code{x}. \item \code{vec_init_along()} a vector with the same type as \code{x} and the same size as \code{y}. } } \description{ \code{vec_seq_along()} is equivalent to \code{\link[=seq_along]{seq_along()}} but uses size, not length. \code{vec_init_along()} creates a vector of missing values with size matching an existing object. } \examples{ vec_seq_along(mtcars) vec_init_along(head(mtcars)) } vctrs/man/vec_list_cast.Rd0000644000176200001440000000142113622451540015277 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cast-list.R \name{vec_list_cast} \alias{vec_list_cast} \title{Cast a list to vector of specific type} \usage{ vec_list_cast(x, to, ..., x_arg = "", to_arg = "") } \arguments{ \item{x}{A list} \item{to}{Type to coerce to} \item{...}{These dots are for future extensions and must be empty.} } \description{ This is a function for developers to use when extending vctrs. It casts a list to a more specific vectoring type, keeping the length constant. It does this by discarding (with a warning), any elements after the 1. It is called from \code{vec_cast.XYZ.list()} methods to preserve symmetry with \code{vec_cast.list.XYZ()}. } \details{ See \code{vignette("s3-vector")} for details. } \keyword{internal} vctrs/man/maybe_lossy_cast.Rd0000644000176200001440000000523614027045462016027 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conditions.R \name{maybe_lossy_cast} \alias{maybe_lossy_cast} \title{Lossy cast error} \usage{ maybe_lossy_cast( result, x, to, lossy = NULL, locations = NULL, ..., loss_type = c("precision", "generality"), x_arg, to_arg, details = NULL, message = NULL, class = NULL, .deprecation = FALSE ) } \arguments{ \item{result}{The result of a potentially lossy cast.} \item{x}{Vectors} \item{to}{Type to cast to.} \item{lossy}{A logical vector indicating which elements of \code{result} were lossy. Can also be a single \code{TRUE}, but note that \code{locations} picks up locations from this vector by default. In this case, supply your own location vector, possibly empty.} \item{locations}{An optional integer vector giving the locations where \code{x} lost information.} \item{...}{Only use these fields when creating a subclass.} \item{loss_type}{The kind of lossy cast to be mentioned in error messages. Can be loss of precision (for instance from double to integer) or loss of generality (from character to factor).} \item{x_arg}{Argument names for \code{x}, \code{y}, and \code{to}. Used in error messages to inform the user about the locations of incompatible types.} \item{to_arg}{Argument names for \code{x}, \code{y}, and \code{to}. Used in error messages to inform the user about the locations of incompatible types.} \item{details}{Any additional human readable details.} \item{message}{An overriding message for the error. \code{details} and \code{message} are mutually exclusive, supplying both is an error.} \item{class}{Only use these fields when creating a subclass.} \item{.deprecation}{If \code{TRUE}, the error is downgraded to a deprecation warning. This is useful for transitioning your class to a stricter conversion scheme. The warning advises your users to wrap their code with \code{allow_lossy_cast()}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} By default, lossy casts are an error. Use \code{allow_lossy_cast()} to silence these errors and continue with the partial results. In this case the lost values are typically set to \code{NA} or to a lower value resolution, depending on the type of cast. Lossy cast errors are thrown by \code{maybe_lossy_cast()}. Unlike functions prefixed with \code{stop_}, \code{maybe_lossy_cast()} usually returns a result. If a lossy cast is detected, it throws an error, unless it's been wrapped in \code{allow_lossy_cast()}. In that case, it returns the result silently. } \keyword{internal} vctrs/man/vec-rep.Rd0000644000176200001440000000536013753021253014023 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rep.R \name{vec-rep} \alias{vec-rep} \alias{vec_rep} \alias{vec_rep_each} \alias{vec_unrep} \title{Repeat a vector} \usage{ vec_rep(x, times) vec_rep_each(x, times) vec_unrep(x) } \arguments{ \item{x}{A vector.} \item{times}{For \code{vec_rep()}, a single integer for the number of times to repeat the entire vector. For \code{vec_rep_each()}, an integer vector of the number of times to repeat each element of \code{x}. \code{times} will be recycled to the size of \code{x}.} } \value{ For \code{vec_rep()}, a vector the same type as \code{x} with size \code{vec_size(x) * times}. For \code{vec_rep_each()}, a vector the same type as \code{x} with size \code{sum(vec_recycle(times, vec_size(x)))}. For \code{vec_unrep()}, a data frame with two columns, \code{key} and \code{times}. \code{key} is a vector with the same type as \code{x}, and \code{times} is an integer vector. } \description{ \itemize{ \item \code{vec_rep()} repeats an entire vector a set number of \code{times}. \item \code{vec_rep_each()} repeats each element of a vector a set number of \code{times}. \item \code{vec_unrep()} compresses a vector with repeated values. The repeated values are returned as a \code{key} alongside the number of \code{times} each key is repeated. } } \details{ Using \code{vec_unrep()} and \code{vec_rep_each()} together is similar to using \code{\link[base:rle]{base::rle()}} and \code{\link[base:rle]{base::inverse.rle()}}. The following invariant shows the relationship between the two functions:\preformatted{compressed <- vec_unrep(x) identical(x, vec_rep_each(compressed$key, compressed$times)) } There are two main differences between \code{vec_unrep()} and \code{\link[base:rle]{base::rle()}}: \itemize{ \item \code{vec_unrep()} treats adjacent missing values as equivalent, while \code{rle()} treats them as different values. \item \code{vec_unrep()} works along the size of \code{x}, while \code{rle()} works along its length. This means that \code{vec_unrep()} works on data frames by compressing repeated rows. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ # Repeat the entire vector vec_rep(1:2, 3) # Repeat within each vector vec_rep_each(1:2, 3) x <- vec_rep_each(1:2, c(3, 4)) x # After using `vec_rep_each()`, you can recover the original vector # with `vec_unrep()` vec_unrep(x) df <- data.frame(x = 1:2, y = 3:4) # `rep()` repeats columns of data frames, and returns lists rep(df, each = 2) # `vec_rep()` and `vec_rep_each()` repeat rows, and return data frames vec_rep(df, 2) vec_rep_each(df, 2) # `rle()` treats adjacent missing values as different y <- c(1, NA, NA, 2) rle(y) # `vec_unrep()` treats them as equivalent vec_unrep(y) } vctrs/man/vec_ptype2.Rd0000644000176200001440000000515213663716767014566 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-bare.R, R/type2.R \name{vec_ptype2.logical} \alias{vec_ptype2.logical} \alias{vec_ptype2.integer} \alias{vec_ptype2.double} \alias{vec_ptype2.complex} \alias{vec_ptype2.character} \alias{vec_ptype2.raw} \alias{vec_ptype2.list} \alias{vec_ptype2} \title{Find the common type for a pair of vectors} \usage{ \method{vec_ptype2}{logical}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{integer}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{double}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{complex}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{character}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{raw}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{list}(x, y, ..., x_arg = "", y_arg = "") vec_ptype2(x, y, ..., x_arg = "", y_arg = "") } \arguments{ \item{x, y}{Vector types.} \item{...}{These dots are for future extensions and must be empty.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \description{ \code{vec_ptype2()} defines the coercion hierarchy for a set of related vector types. Along with \code{\link[=vec_cast]{vec_cast()}}, this generic forms the foundation of type coercions in vctrs. \code{vec_ptype2()} is relevant when you are implementing vctrs methods for your class, but it should not usually be called directly. If you need to find the common type of a set of inputs, call \code{\link[=vec_ptype_common]{vec_ptype_common()}} instead. This function supports multiple inputs and \link[=vec_ptype_finalise]{finalises} the common type. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_ptype]{vec_ptype()}} is applied to \code{x} and \code{y} } } \seealso{ \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} when you determine from the attributes that an input can't be cast to the target type. } vctrs/man/new_rcrd.Rd0000644000176200001440000000170213723213047014262 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-rcrd.R \name{new_rcrd} \alias{new_rcrd} \alias{ses} \alias{rcrd} \title{rcrd (record) S3 class} \usage{ new_rcrd(fields, ..., class = character()) } \arguments{ \item{fields}{A list or a data frame. Lists must be rectangular (same sizes), and contain uniquely named vectors (at least one). \code{fields} is validated with \code{\link[=df_list]{df_list()}} to ensure uniquely named vectors.} \item{...}{Additional attributes} \item{class}{Name of subclass.} } \description{ The rcrd class extends \link{vctr}. A rcrd is composed of 1 or more \link{field}s, which must be vectors of the same length. Is designed specifically for classes that can naturally be decomposed into multiple vectors of the same length, like \link{POSIXlt}, but where the organisation should be considered an implementation detail invisible to the user (unlike a \link{data.frame}). } \keyword{internal} vctrs/man/new_partial.Rd0000644000176200001440000000220414027045462014764 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/partial.R \name{new_partial} \alias{new_partial} \alias{is_partial} \alias{vec_ptype_finalise} \title{Partial type} \usage{ new_partial(..., class = character()) is_partial(x) vec_ptype_finalise(x, ...) } \arguments{ \item{...}{Attributes of the partial type} \item{class}{Name of subclass.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use \code{new_partial()} when constructing a new partial type subclass; and use \code{is_partial()} to test if a type is partial. All subclasses need to provide a \code{vec_ptype_finalise()} method. } \details{ As the name suggests, a partial type \emph{partially} specifies a type, and it must be combined with data to yield a full type. A useful example of a partial type is \code{\link[=partial_frame]{partial_frame()}}, which makes it possible to specify the type of just a few columns in a data frame. Use this constructor if you're making your own partial type. } \keyword{internal} vctrs/man/faq/0000755000176200001440000000000014042554003012732 5ustar liggesusersvctrs/man/faq/internal/0000755000176200001440000000000014024442541014551 5ustar liggesusersvctrs/man/faq/internal/ptype2-identity.Rmd0000644000176200001440000000612313653027721020277 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ## Promotion monoid Promotions (i.e. automatic coercions) should always transform inputs to their richer type to avoid losing values of precision. `vec_ptype2()` returns the _richer_ type of two vectors, or throws an incompatible type error if none of the two vector types include the other. For example, the richer type of integer and double is the latter because double covers a larger range of values than integer. `vec_ptype2()` is a [monoid](https://en.wikipedia.org/wiki/Monoid) over vectors, which in practical terms means that it is a well behaved operation for [reduction](https://purrr.tidyverse.org/reference/reduce.html). Reduction is an important operation for promotions because that is how the richer type of multiple elements is computed. As a monoid, `vec_ptype2()` needs an identity element, i.e. a value that doesn't change the result of the reduction. vctrs has two identity values, `NULL` and __unspecified__ vectors. ## The `NULL` identity As an identity element that shouldn't influence the determination of the common type of a set of vectors, `NULL` is promoted to any type: ```{r} vec_ptype2(NULL, "") vec_ptype2(1L, NULL) ``` The common type of `NULL` and `NULL` is the identity `NULL`: ```{r} vec_ptype2(NULL, NULL) ``` This way the result of `vec_ptype2(NULL, NULL)` does not influence subsequent promotions: ```{r} vec_ptype2( vec_ptype2(NULL, NULL), "" ) ``` ## Unspecified vectors In the vctrs coercion system, logical vectors of missing values are also automatically promoted to the type of any other vector, just like `NULL`. We call these vectors unspecified. The special coercion semantics of unspecified vectors serve two purposes: 1. It makes it possible to assign vectors of `NA` inside any type of vectors, even when they are not coercible with logical: ```{r} x <- letters[1:5] vec_assign(x, 1:2, c(NA, NA)) ``` 2. We can't put `NULL` in a data frame, so we need an identity element that behaves more like a vector. Logical vectors of `NA` seem a natural fit for this. Unspecified vectors are thus promoted to any other type, just like `NULL`: ```{r} vec_ptype2(NA, "") vec_ptype2(1L, c(NA, NA)) ``` ## Finalising common types vctrs has an internal vector type of class `vctrs_unspecified`. Users normally don't see such vectors in the wild, but they do come up when taking the common type of an unspecified vector with another identity value: ```{r} vec_ptype2(NA, NA) vec_ptype2(NA, NULL) vec_ptype2(NULL, NA) ``` We can't return `NA` here because `vec_ptype2()` normally returns empty vectors. We also can't return `NULL` because unspecified vectors need to be recognised as logical vectors if they haven't been promoted at the end of the reduction. ```{r} vec_ptype_finalise(vec_ptype2(NULL, NA)) ``` See the output of `vec_ptype_common()` which performs the reduction and finalises the type, ready to be used by the caller: ```{r} vec_ptype_common(NULL, NULL) vec_ptype_common(NA, NULL) ``` Note that __partial__ types in vctrs make use of the same mechanism. They are finalised with `vec_ptype_finalise()`. vctrs/man/faq/developer/0000755000176200001440000000000014024441431014717 5ustar liggesusersvctrs/man/faq/developer/reference-compatibility.Rmd0000644000176200001440000000576213671672047022222 0ustar liggesusers vctrs provides a framework for working with vector classes in a generic way. However, it implements several compatibility fallbacks to base R methods. In this reference you will find how vctrs tries to be compatible with your vector class, and what base methods you need to implement for compatibility. If you're starting from scratch, we think you'll find it easier to start using [new_vctr()] as documented in `vignette("s3-vector")`. This guide is aimed for developers with existing vector classes. ## Aggregate operations with fallbacks All vctrs operations are based on four primitive generics described in the next section. However there are many higher level operations. The most important ones implement fallbacks to base generics for maximum compatibility with existing classes. - [vec_slice()] falls back to the base `[` generic if no [vec_proxy()] method is implemented. This way foreign classes that do not implement [vec_restore()] can restore attributes based on the new subsetted contents. - [vec_c()] and [vec_rbind()] now fall back to [base::c()] if the inputs have a common parent class with a `c()` method (only if they have no self-to-self `vec_ptype2()` method). vctrs works hard to make your `c()` method success in various situations (with `NULL` and `NA` inputs, even as first input which would normally prevent dispatch to your method). The main downside compared to using vctrs primitives is that you can't combine vectors of different classes since there is no extensible mechanism of coercion in `c()`, and it is less efficient in some cases. ## The vctrs primitives Most functions in vctrs are aggregate operations: they call other vctrs functions which themselves call other vctrs functions. The dependencies of a vctrs functions are listed in the Dependencies section of its documentation page. Take a look at [vec_count()] for an example. These dependencies form a tree whose leaves are the four vctrs primitives. Here is the diagram for `vec_count()`: \\figure{vec-count-deps.png} ### The coercion generics The coercion mechanism in vctrs is based on two generics: - [vec_ptype2()] - [vec_cast()] See the [theory overview][theory-faq-coercion]. Two objects with the same class and the same attributes are always considered compatible by ptype2 and cast. If the attributes or classes differ, they throw an incompatible type error. Coercion errors are the main source of incompatibility with vctrs. See the [howto guide][howto-faq-coercion] if you need to implement methods for these generics. ### The proxy and restoration generics - [vec_proxy()] - [vec_restore()] These generics are essential for vctrs but mostly optional. `vec_proxy()` defaults to an [identity][identity] function and you normally don't need to implement it. The proxy a vector must be one of the atomic vector types, a list, or a data frame. By default, S3 lists that do not inherit from `"list"` do not have an identity proxy. In that case, you need to explicitly implement `vec_proxy()` or make your class inherit from list. vctrs/man/faq/developer/snippet-roxy-workflow.Rmd0000644000176200001440000000122413653027721021724 0ustar liggesusers To implement methods for generics, first import the generics in your namespace and redocument: ```{r, eval = FALSE} #' @importFrom vctrs vec_ptype2 vec_cast NULL ``` Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won't be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with `s3_register()`. vctrs/man/faq/developer/howto-faq-fix-scalar-type-error.Rmd0000644000176200001440000000443513712307232023436 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} stopifnot(rlang::is_installed("dplyr")) ``` The tidyverse is a bit stricter than base R regarding what kind of objects are considered as vectors (see the [user FAQ][faq-error-scalar-type] about this topic). Sometimes vctrs won't treat your class as a vector when it should. ## Why isn't my list class considered a vector? By default, S3 lists are not considered to be vectors by vctrs: ```{r} my_list <- structure(list(), class = "my_class") vctrs::vec_is(my_list) ``` To be treated as a vector, the class must either inherit from `"list"` explicitly: ```{r} my_explicit_list <- structure(list(), class = c("my_class", "list")) vctrs::vec_is(my_explicit_list) ``` Or it should implement a `vec_proxy()` method that returns its input if explicit inheritance is not possible or troublesome: ```{r} #' @export vec_proxy.my_class <- function(x, ...) x vctrs::vec_is(my_list) ``` Note that explicit inheritance is the preferred way because this makes it possible for your class to dispatch on `list` methods of S3 generics: ```{r, error = TRUE} my_generic <- function(x) UseMethod("my_generic") my_generic.list <- function(x) "dispatched!" my_generic(my_list) my_generic(my_explicit_list) ``` ## Why isn't my data frame class considered a vector? The most likely explanation is that the data frame has not been properly constructed. However, if you get an "Input must be a vector" error with a data frame subclass, it probably means that the data frame has not been properly constructed. The main cause of these errors are data frames whose _base class_ is not `"data.frame"`: ```{r, error = TRUE} my_df <- data.frame(x = 1) class(my_df) <- c("data.frame", "my_class") vctrs::vec_assert(my_df) ``` This is problematic as many tidyverse functions won't work properly: ```{r, error = TRUE} dplyr::slice(my_df, 1) ``` It is generally not appropriate to declare your class to be a superclass of another class. We generally consider this undefined behaviour (UB). To fix these errors, you can simply change the construction of your data frame class so that `"data.frame"` is a base class, i.e. it should come last in the class vector: ```{r} class(my_df) <- c("my_class", "data.frame") vctrs::vec_assert(my_df) dplyr::slice(my_df, 1) ``` vctrs/man/faq/developer/howto-coercion.Rmd0000644000176200001440000002104713655017473020344 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} old_warn_on_fallback <- options(`vctrs:::warn_on_fallback` = FALSE) knitr_defer(options(old_warn_on_fallback)) ``` This guide illustrates how to implement `vec_ptype2()` and `vec_cast()` methods for existing classes. Related topics: - For an overview of how these generics work and their roles in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")` ## The natural number class We'll illustrate how to implement coercion methods with a simple class that represents natural numbers. In this scenario we have an existing class that already features a constructor and methods for `print()` and subset. ```{r} #' @export new_natural <- function(x) { if (is.numeric(x) || is.logical(x)) { stopifnot(is_whole(x)) x <- as.integer(x) } else { stop("Can't construct natural from unknown type.") } structure(x, class = "my_natural") } is_whole <- function(x) { all(x %% 1 == 0 | is.na(x)) } #' @export print.my_natural <- function(x, ...) { cat("\n") x <- unclass(x) NextMethod() } #' @export `[.my_natural` <- function(x, i, ...) { new_natural(NextMethod()) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("base::print", "my_natural") knitr_local_registration("base::[", "my_natural") ``` ```{r} new_natural(1:3) new_natural(c(1, NA)) ``` ## Roxygen workflow ```{r, child = "snippet-roxy-workflow.Rmd"} ``` ## Implementing `vec_ptype2()` ### The self-self method The first method to implement is the one that signals that your class is compatible with itself: ```{r} #' @export vec_ptype2.my_natural.my_natural <- function(x, y, ...) { x } vec_ptype2(new_natural(1), new_natural(2:3)) ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.my_natural") ``` `vec_ptype2()` implements a fallback to try and be compatible with simple classes, so it may seem that you don't need to implement the self-self coercion method. However, you must implement it explicitly because this is how vctrs knows that a class that is implementing vctrs methods (for instance this disable fallbacks to `base::c()`). Also, it makes your class a bit more efficient. ### The parent and children methods Our natural number class is conceptually a parent of `` and a child of ``, but the class is not compatible with logical, integer, or double vectors yet: ```{r, error = TRUE} vec_ptype2(TRUE, new_natural(2:3)) vec_ptype2(new_natural(1), 2:3) ``` We'll specify the twin methods for each of these classes, returning the richer class in each case. ```{r} #' @export vec_ptype2.my_natural.logical <- function(x, y, ...) { # The order of the classes in the method name follows the order of # the arguments in the function signature, so `x` is the natural # number and `y` is the logical x } #' @export vec_ptype2.logical.my_natural <- function(x, y, ...) { # In this case `y` is the richer natural number y } ``` Between a natural number and an integer, the latter is the richer class: ```{r} #' @export vec_ptype2.my_natural.integer <- function(x, y, ...) { y } #' @export vec_ptype2.integer.my_natural <- function(x, y, ...) { x } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.logical") knitr_local_registration("vctrs::vec_ptype2", "my_natural.integer") knitr_local_registration("vctrs::vec_ptype2", "integer.my_natural") knitr_local_registration("vctrs::vec_ptype2", "logical.my_natural") ``` We no longer get common type errors for logical and integer: ```{r} vec_ptype2(TRUE, new_natural(2:3)) vec_ptype2(new_natural(1), 2:3) ``` We are not done yet. Pairwise coercion methods must be implemented for all the connected nodes in the coercion hierarchy, which include double vectors further up. The coercion methods for grand-parent types must be implemented separately: ```{r} #' @export vec_ptype2.my_natural.double <- function(x, y, ...) { y } #' @export vec_ptype2.double.my_natural <- function(x, y, ...) { x } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.double") knitr_local_registration("vctrs::vec_ptype2", "double.my_natural") ``` ### Incompatible attributes Most of the time, inputs are incompatible because they have different classes for which no `vec_ptype2()` method is implemented. More rarely, inputs could be incompatible because of their attributes. In that case incompatibility is signalled by calling `stop_incompatible_type()`. In the following example, we implement a self-self ptype2 method for a hypothetical subclass of `` that has stricter combination semantics. The method throws when the levels of the two factors are not compatible. ```{r, eval = FALSE} #' @export vec_ptype2.my_strict_factor.my_strict_factor <- function(x, y, ..., x_arg = "", y_arg = "") { if (!setequal(levels(x), levels(y))) { stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) } x } ``` Note how the methods need to take `x_arg` and `y_arg` parameters and pass them on to `stop_incompatible_type()`. These argument tags help create more informative error messages when the common type determination is for a column of a data frame. They are part of the generic signature but can usually be left out if not used. ## Implementing `vec_cast()` Corresponding `vec_cast()` methods must be implemented for all `vec_ptype2()` methods. The general pattern is to convert the argument `x` to the type of `to`. The methods should validate the values in `x` and make sure they conform to the values of `to`. Please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents `to`, whereas the second class represents `x`. The self-self method is easy in this case, it just returns the target input: ```{r} #' @export vec_cast.my_natural.my_natural <- function(x, to, ...) { x } ``` The other types need to be validated. We perform input validation in the `new_natural()` constructor, so that's a good fit for our `vec_cast()` implementations. ```{r} #' @export vec_cast.my_natural.logical <- function(x, to, ...) { # The order of the classes in the method name is in reverse order # of the arguments in the function signature, so `to` is the natural # number and `x` is the logical new_natural(x) } vec_cast.my_natural.integer <- function(x, to, ...) { new_natural(x) } vec_cast.my_natural.double <- function(x, to, ...) { new_natural(x) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_cast", "my_natural.my_natural") knitr_local_registration("vctrs::vec_cast", "my_natural.logical") knitr_local_registration("vctrs::vec_cast", "my_natural.integer") knitr_local_registration("vctrs::vec_cast", "my_natural.double") ``` With these methods, vctrs is now able to combine logical and natural vectors. It properly returns the richer type of the two, a natural vector: ```{r} vec_c(TRUE, new_natural(1), FALSE) ``` Because we haven't implemented conversions _from_ natural, it still doesn't know how to combine natural with the richer integer and double types: ```{r, error = TRUE} vec_c(new_natural(1), 10L) vec_c(1.5, new_natural(1)) ``` This is quick work which completes the implementation of coercion methods for vctrs: ```{r} #' @export vec_cast.logical.my_natural <- function(x, to, ...) { # In this case `to` is the logical and `x` is the natural number attributes(x) <- NULL as.logical(x) } #' @export vec_cast.integer.my_natural <- function(x, to, ...) { attributes(x) <- NULL as.integer(x) } #' @export vec_cast.double.my_natural <- function(x, to, ...) { attributes(x) <- NULL as.double(x) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_cast", "logical.my_natural") knitr_local_registration("vctrs::vec_cast", "integer.my_natural") knitr_local_registration("vctrs::vec_cast", "double.my_natural") ``` And we now get the expected combinations. ```{r} vec_c(new_natural(1), 10L) vec_c(1.5, new_natural(1)) ``` vctrs/man/faq/developer/theory-coercion.Rmd0000644000176200001440000002305013653764444020517 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` This is an overview of the usage of `vec_ptype2()` and `vec_cast()` and their role in the vctrs coercion mechanism. Related topics: - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")`. ## Combination mechanism in vctrs The coercion system in vctrs is designed to make combination of multiple inputs consistent and extensible. Combinations occur in many places, such as row-binding, joins, subset-assignment, or grouped summary functions that use the split-apply-combine strategy. For example: ```{r, error = TRUE} vec_c(TRUE, 1) vec_c("a", 1) vec_rbind( data.frame(x = TRUE), data.frame(x = 1, y = 2) ) vec_rbind( data.frame(x = "a"), data.frame(x = 1, y = 2) ) ``` One major goal of vctrs is to provide a central place for implementing the coercion methods that make generic combinations possible. The two relevant generics are `vec_ptype2()` and `vec_cast()`. They both take two arguments and perform __double dispatch__, meaning that a method is selected based on the classes of both inputs. The general mechanism for combining multiple inputs is: 1. Find the common type of a set of inputs by reducing (as in `base::Reduce()` or `purrr::reduce()`) the `vec_ptype2()` binary function over the set. 2. Convert all inputs to the common type with `vec_cast()`. 3. Initialise the output vector as an instance of this common type with `vec_init()`. 4. Fill the output vector with the elements of the inputs using `vec_assign()`. The last two steps may require `vec_proxy()` and `vec_restore()` implementations, unless the attributes of your class are constant and do not depend on the contents of the vector. We focus here on the first two steps, which require `vec_ptype2()` and `vec_cast()` implementations. ## `vec_ptype2()` Methods for `vec_ptype2()` are passed two _prototypes_, i.e. two inputs emptied of their elements. They implement two behaviours: * If the types of their inputs are compatible, indicate which of them is the richer type by returning it. If the types are of equal resolution, return any of the two. * Throw an error with `stop_incompatible_type()` when it can be determined from the attributes that the types of the inputs are not compatible. ### Type compatibility A type is __compatible__ with another type if the values it represents are a subset or a superset of the values of the other type. The notion of "value" is to be interpreted at a high level, in particular it is not the same as the memory representation. For example, factors are represented in memory with integers but their values are more related to character vectors than to round numbers: ```{r, error = TRUE} # Two factors are compatible vec_ptype2(factor("a"), factor("b")) # Factors are compatible with a character vec_ptype2(factor("a"), "b") # But they are incompatible with integers vec_ptype2(factor("a"), 1L) ``` ### Richness of type Richness of type is not a very precise notion. It can be about richer data (for instance a `double` vector covers more values than an integer vector), richer behaviour (a `data.table` has richer behaviour than a `data.frame`), or both. If you have trouble determining which one of the two types is richer, it probably means they shouldn't be automatically coercible. Let's look again at what happens when we combine a factor and a character: ```{r} vec_ptype2(factor("a"), "b") ``` The ptype2 method for `` and `>` returns `` because the former is a richer type. The factor can only contain `"a"` strings, whereas the character can contain any strings. In this sense, factors are a _subset_ of character. Note that another valid behaviour would be to throw an incompatible type error. This is what a strict factor implementation would do. We have decided to be laxer in vctrs because it is easy to inadvertently create factors instead of character vectors, especially with older versions of R where `stringsAsFactors` is still true by default. ### Consistency and symmetry on permutation Each ptype2 method should strive to have exactly the same behaviour when the inputs are permuted. This is not always possible, for example factor levels are aggregated in order: ```{r} vec_ptype2(factor(c("a", "c")), factor("b")) vec_ptype2(factor("b"), factor(c("a", "c"))) ``` In any case, permuting the input should not return a fundamentally different type or introduce an incompatible type error. ### Coercion hierarchy Coercible classes form a coercion (or subtyping) hierarchy. Here is a simplified diagram of the hierarchy for base types. In this diagram the directions of the arrows express which type is richer. They flow from the bottom (more constrained types) to the top (richer types). \\figure{coerce.png} As a class implementor, you have two options. The simplest is to create an entirely separate hierarchy. The date and date-time classes are an example of an S3-based hierarchy that is completely separate. Alternatively, you can integrate your class in an existing hierarchy, typically by adding parent nodes on top of the hierarchy (your class is richer), by adding children node at the root of the hierarchy (your class is more constrained), or by inserting a node in the tree. These coercion hierarchies are _implicit_, in the sense that they are implied by the `vec_ptype2()` implementations. There is no structured way to create or modify a hierarchy, instead you need to implement the appropriate coercion methods for all the types in your hierarchy, and diligently return the richer type in each case. The `vec_ptype2()` implementations are not transitive nor inherited, so all pairwise methods between classes lying on a given path must be implemented manually. This is something we might make easier in the future. ## `vec_cast()` The second generic, `vec_cast()`, is the one that looks at the data and actually performs the conversion. Because it has access to more information than `vec_ptype2()`, it may be stricter and cause an error in more cases. `vec_cast()` has three possible behaviours: - Determine that the prototypes of the two inputs are not compatible. This must be decided in exactly the same way as for `vec_ptype2()`. Call `stop_incompatible_cast()` if you can determine from the attributes that the types are not compatible. - Detect incompatible values. Usually this is because the target type is too restricted for the values supported by the input type. For example, a fractional number can't be converted to an integer. The method should throw an error in that case. - Return the input vector converted to the target type if all values are compatible. Whereas `vec_ptype2()` must return the same type when the inputs are permuted, `vec_cast()` is _directional_. It always returns the type of the right-hand side, or dies trying. ## Double dispatch The dispatch mechanism for `vec_ptype2()` and `vec_cast()` looks like S3 but is actually a custom mechanism. Compared to S3, it has the following differences: * It dispatches on the classes of the first two inputs. * There is no inheritance of ptype2 and cast methods. This is because the S3 class hierarchy is not necessarily the same as the coercion hierarchy. * `NextMethod()` does not work. Parent methods must be called explicitly if necessary. * The default method is hard-coded. ## Data frames The determination of the common type of data frames with `vec_ptype2()` happens in three steps: 1. Match the columns of the two input data frames. If some columns don't exist, they are created and filled with adequately typed `NA` values. 2. Find the common type for each column by calling `vec_ptype2()` on each pair of matched columns. 3. Find the common data frame type. For example the common type of a grouped tibble and a tibble is a grouped tibble because the latter is the richer type. The common type of a data table and a data frame is a data table. `vec_cast()` operates similarly. If a data frame is cast to a target type that has fewer columns, this is an error. If you are implementing coercion methods for data frames, you will need to explicitly call the parent methods that perform the common type determination or the type conversion described above. These are exported as [df_ptype2()] and [df_cast()]. ### Data frame fallbacks Being too strict with data frame combinations would cause too much pain because there are many data frame subclasses in the wild that don't implement vctrs methods. We have decided to implement a special fallback behaviour for foreign data frames. Incompatible data frames fall back to a base data frame: ```{r} df1 <- data.frame(x = 1) df2 <- structure(df1, class = c("foreign_df", "data.frame")) vec_rbind(df1, df2) ``` When a tibble is involved, we fall back to tibble: ```{r} df3 <- tibble::as_tibble(df1) vec_rbind(df1, df3) ``` These fallbacks are not ideal but they make sense because all data frames share a common data structure. This is not generally the case for vectors. For example factors and characters have different representations, and it is not possible to find a fallback time mechanically. However this fallback has a big downside: implementing vctrs methods for your data frame subclass is a breaking behaviour change. The proper coercion behaviour for your data frame class should be specified as soon as possible to limit the consequences of changing the behaviour of your class in R scripts. vctrs/man/faq/developer/howto-coercion-data-frame.Rmd0000644000176200001440000003051713655017473022345 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} old_warn_on_fallback <- options(`vctrs:::warn_on_fallback` = FALSE) knitr_defer(options(old_warn_on_fallback)) ``` This guide provides a practical recipe for implementing `vec_ptype2()` and `vec_cast()` methods for coercions of data frame subclasses. Related topics: - For an overview of the coercion mechanism in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. Coercion of data frames occurs when different data frame classes are combined in some way. The two main methods of combination are currently row-binding with [vec_rbind()] and col-binding with [vec_cbind()] (which are in turn used by a number of dplyr and tidyr functions). These functions take multiple data frame inputs and automatically coerce them to their common type. vctrs is generally strict about the kind of automatic coercions that are performed when combining inputs. In the case of data frames we have decided to be a bit less strict for convenience. Instead of throwing an incompatible type error, we fall back to a base data frame or a tibble if we don't know how to combine two data frame subclasses. It is still a good idea to specify the proper coercion behaviour for your data frame subclasses as soon as possible. We will see two examples in this guide. The first example is about a data frame subclass that has no particular attributes to manage. In the second example, we implement coercion methods for a tibble subclass that includes potentially incompatible attributes. ## Roxygen workflow ```{r, child = "snippet-roxy-workflow.Rmd"} ``` ## Parent methods Most of the common type determination should be performed by the parent class. In vctrs, double dispatch is implemented in such a way that you need to call the methods for the parent class manually. For `vec_ptype2()` this means you need to call `df_ptype2()` (for data frame subclasses) or `tib_ptype2()` (for tibble subclasses). Similarly, `df_cast()` and `tib_cast()` are the workhorses for `vec_cast()` methods of subtypes of `data.frame` and `tbl_df`. These functions take the union of the columns in `x` and `y`, and ensure shared columns have the same type. These functions are much less strict than `vec_ptype2()` and `vec_cast()` as they accept any subclass of data frame as input. They always return a `data.frame` or a `tbl_df`. You will probably want to write similar functions for your subclass to avoid repetition in your code. You may want to export them as well if you are expecting other people to derive from your class. ## A `data.table` example ```{r, include = FALSE} delayedAssign("as.data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "as.data.table") } else { function(...) abort("`data.table` must be installed.") } }) delayedAssign("data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "data.table") } else { function(...) abort("`data.table` must be installed.") } }) ``` This example is the actual implementation of vctrs coercion methods for `data.table`. This is a simple example because we don't have to keep track of attributes for this class or manage incompatibilities. See the tibble section for a more complicated example. We first create the `dt_ptype2()` and `dt_cast()` helpers. They wrap around the parent methods `df_ptype2()` and `df_cast()`, and transform the common type or converted input to a data table. You may want to export these helpers if you expect other packages to derive from your data frame class. These helpers should always return data tables. To this end we use the conversion generic `as.data.table()`. Depending on the tools available for the particular class at hand, a constructor might be appropriate as well. ```{r} dt_ptype2 <- function(x, y, ...) { as.data.table(df_ptype2(x, y, ...)) } dt_cast <- function(x, to, ...) { as.data.table(df_cast(x, to, ...)) } ``` We start with the self-self method: ```{r} #' @export vec_ptype2.data.table.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } ``` Between a data frame and a data table, we consider the richer type to be data table. This decision is not based on the value coverage of each data structures, but on the idea that data tables have richer behaviour. Since data tables are the richer type, we call `dt_type2()` from the `vec_ptype2()` method. It always returns a data table, no matter the order of arguments: ```{r} #' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } ``` The `vec_cast()` methods follow the same pattern, but note how the method for coercing to data frame uses `df_cast()` rather than `dt_cast()`. Also, please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents `to`, whereas the second class represents `x`. ```{r} #' @export vec_cast.data.table.data.table <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.table.data.frame <- function(x, to, ...) { # `x` is a data.frame to be converted to a data.table dt_cast(x, to, ...) } #' @export vec_cast.data.frame.data.table <- function(x, to, ...) { # `x` is a data.table to be converted to a data.frame df_cast(x, to, ...) } ``` With these methods vctrs is now able to combine data tables with data frames: ```{r} vec_cbind(data.frame(x = 1:3), data.table(y = "foo")) ``` ## A tibble example In this example we implement coercion methods for a tibble subclass that carries a colour as a scalar metadata: ```{r} # User constructor my_tibble <- function(colour = NULL, ...) { new_my_tibble(tibble::tibble(...), colour = colour) } # Developer constructor new_my_tibble <- function(x, colour = NULL) { stopifnot(is.data.frame(x)) tibble::new_tibble( x, colour = colour, class = "my_tibble", nrow = nrow(x) ) } df_colour <- function(x) { if (inherits(x, "my_tibble")) { attr(x, "colour") } else { NULL } } #'@export print.my_tibble <- function(x, ...) { cat(sprintf("<%s: %s>\n", class(x)[[1]], df_colour(x))) cli::cat_line(format(x)[-1]) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("base::print", "my_tibble") ``` This subclass is very simple. All it does is modify the header. ```{r} red <- my_tibble("red", x = 1, y = 1:2) red red[2] green <- my_tibble("green", z = TRUE) green ``` Combinations do not work properly out of the box, instead vctrs falls back to a bare tibble: ```{r} vec_rbind(red, tibble::tibble(x = 10:12)) ``` Instead of falling back to a data frame, we would like to return a `` when combined with a data frame or a tibble. Because this subclass has more metadata than normal data frames (it has a colour), it is a _supertype_ of tibble and data frame, i.e. it is the richer type. This is similar to how a grouped tibble is a more general type than a tibble or a data frame. Conceptually, the latter are pinned to a single constant group. The coercion methods for data frames operate in two steps: - They check for compatible subclass attributes. In our case the tibble colour has to be the same, or be undefined. - They call their parent methods, in this case [tib_ptype2()] and [tib_cast()] because we have a subclass of tibble. This eventually calls the data frame methods [df_ptype2()] and [tib_ptype2()] which match the columns and their types. This process should usually be wrapped in two functions to avoid repetition. Consider exporting these if you expect your class to be derived by other subclasses. We first implement a helper to determine if two data frames have compatible colours. We use the `df_colour()` accessor which returns `NULL` when the data frame colour is undefined. ```{r} has_compatible_colours <- function(x, y) { x_colour <- df_colour(x) %||% df_colour(y) y_colour <- df_colour(y) %||% x_colour identical(x_colour, y_colour) } ``` Next we implement the coercion helpers. If the colours are not compatible, we call `stop_incompatible_cast()` or `stop_incompatible_type()`. These strict coercion semantics are justified because in this class colour is a _data_ attribute. If it were a non essential _detail_ attribute, like the timezone in a datetime, we would just standardise it to the value of the left-hand side. In simpler cases (like the data.table example), these methods do not need to take the arguments suffixed in `_arg`. Here we do need to take these arguments so we can pass them to the `stop_` functions when we detect an incompatibility. They also should be passed to the parent methods. ```{r} #' @export my_tib_cast <- function(x, to, ..., x_arg = "", to_arg = "") { out <- tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) if (!has_compatible_colours(x, to)) { stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, details = "Can't combine colours." ) } colour <- df_colour(x) %||% df_colour(to) new_my_tibble(out, colour = colour) } #' @export my_tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { out <- tib_ptype2(x, y, ..., x_arg = x_arg, y_arg = y_arg) if (!has_compatible_colours(x, y)) { stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = "Can't combine colours." ) } colour <- df_colour(x) %||% df_colour(y) new_my_tibble(out, colour = colour) } ``` Let's now implement the coercion methods, starting with the self-self methods. ```{r} #' @export vec_ptype2.my_tibble.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_cast.my_tibble.my_tibble <- function(x, to, ...) { my_tib_cast(x, to, ...) } ``` ```{r, include = FALSE} knitr_local_registration("vctrs::vec_ptype2", "my_tibble.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.my_tibble") ``` We can now combine compatible instances of our class! ```{r, error = TRUE} vec_rbind(red, red) vec_rbind(green, green) vec_rbind(green, red) ``` The methods for combining our class with tibbles follow the same pattern. For ptype2 we return our class in both cases because it is the richer type: ```{r} #' @export vec_ptype2.my_tibble.tbl_df <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_ptype2.tbl_df.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } ``` For cast are careful about returning a tibble when casting to a tibble. Note the call to `vctrs::tib_cast()`: ```{r} #' @export vec_cast.my_tibble.tbl_df <- function(x, to, ...) { my_tib_cast(x, to, ...) } #' @export vec_cast.tbl_df.my_tibble <- function(x, to, ...) { tib_cast(x, to, ...) } ``` ```{r, include = FALSE} knitr_local_registration("vctrs::vec_ptype2", "my_tibble.tbl_df") knitr_local_registration("vctrs::vec_ptype2", "tbl_df.my_tibble") knitr_local_registration("vctrs::vec_cast", "tbl_df.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.tbl_df") ``` From this point, we get correct combinations with tibbles: ```{r} vec_rbind(red, tibble::tibble(x = 10:12)) ``` However we are not done yet. Because the coercion hierarchy is different from the class hierarchy, there is no inheritance of coercion methods. We're not getting correct behaviour for data frames yet because we haven't explicitly specified the methods for this class: ```{r} vec_rbind(red, data.frame(x = 10:12)) ``` Let's finish up the boiler plate: ```{r} #' @export vec_ptype2.my_tibble.data.frame <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_cast.my_tibble.data.frame <- function(x, to, ...) { my_tib_cast(x, to, ...) } #' @export vec_cast.data.frame.my_tibble <- function(x, to, ...) { df_cast(x, to, ...) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_tibble.data.frame") knitr_local_registration("vctrs::vec_ptype2", "data.frame.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.data.frame") knitr_local_registration("vctrs::vec_cast", "data.frame.my_tibble") ``` This completes the implementation: ```{r} vec_rbind(red, data.frame(x = 10:12)) ``` vctrs/man/faq/developer/links-coercion.Rmd0000644000176200001440000000100713653764444020323 0ustar liggesusers # Implementing coercion methods - For an overview of how these generics work and their roles in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")`. vctrs/man/faq/user/0000755000176200001440000000000014042546502013715 5ustar liggesusersvctrs/man/faq/user/faq-error-scalar-type.Rmd0000644000176200001440000000304313712305520020475 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` This error occurs when a function expects a vector and gets a scalar object instead. This commonly happens when some code attempts to assign a scalar object as column in a data frame: ```{r, error = TRUE} fn <- function() NULL tibble::tibble(x = fn) fit <- lm(1:3 ~ 1) tibble::tibble(x = fit) ``` # Vectorness in base R and in the tidyverse In base R, almost everything is a vector or behaves like a vector. In the tidyverse we have chosen to be a bit stricter about what is considered a vector. The main question we ask ourselves to decide on the vectorness of a type is whether it makes sense to include that object as a column in a data frame. The main difference is that S3 lists are considered vectors by base R but in the tidyverse that's not the case by default: ```{r, error = TRUE} fit <- lm(1:3 ~ 1) typeof(fit) class(fit) # S3 lists can be subset like a vector using base R: fit[1:3] # But not in vctrs vctrs::vec_slice(fit, 1:3) ``` Defused function calls are another (more esoteric) example: ```{r, error = TRUE} call <- quote(foo(bar = TRUE, baz = FALSE)) call # They can be subset like a vector using base R: call[1:2] lapply(call, function(x) x) # But not with vctrs: vctrs::vec_slice(call, 1:2) ``` # I get a scalar type error but I think this is a bug It's possible the author of the class needs to do some work to declare their class a vector. Consider reaching out to the author. We have written a [developer FAQ page][howto-faq-fix-scalar-type-error] to help them fix the issue. vctrs/man/faq/user/faq-compatibility-types.Rmd0000644000176200001440000000517613712266564021164 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} stopifnot(rlang::is_installed("dplyr")) ``` Two vectors are __compatible__ when you can safely: - Combine them into one larger vector. - Assign values from one of the vectors into the other vector. Examples of compatible types are integer and double vectors. On the other hand, integer and character vectors are not compatible. # Common type of multiple vectors There are two possible outcomes when multiple vectors of different types are combined into a larger vector: - An incompatible type error is thrown because some of the types are not compatible: ```{r, error = TRUE} df1 <- data.frame(x = 1:3) df2 <- data.frame(x = "foo") dplyr::bind_rows(df1, df2) ``` - The vectors are combined into a vector that has the common type of all inputs. In this example, the common type of integer and logical is integer: ```{r} df1 <- data.frame(x = 1:3) df2 <- data.frame(x = FALSE) dplyr::bind_rows(df1, df2) ``` In general, the common type is the _richer_ type, in other words the type that can represent the most values. Logical vectors are at the bottom of the hierarchy of numeric types because they can only represent two values (not counting missing values). Then come integer vectors, and then doubles. Here is the vctrs type hierarchy for the fundamental vectors: \\figure{coerce.png} # Type conversion and lossy cast errors Type compatibility does not necessarily mean that you can __convert__ one type to the other type. That's because one of the types might support a larger set of possible values. For instance, integer and double vectors are compatible, but double vectors can't be converted to integer if they contain fractional values. When vctrs can't convert a vector because the target type is not as rich as the source type, it throws a lossy cast error. Assigning a fractional number to an integer vector is a typical example of a lossy cast error: ```{r, error = TRUE} int_vector <- 1:3 vec_assign(int_vector, 2, 0.001) ``` # How to make two vector classes compatible? If you encounter two vector types that you think should be compatible, they might need to implement coercion methods. Reach out to the author(s) of the classes and ask them if it makes sense for their classes to be compatible. These developer FAQ items provide guides for implementing coercion methods: - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. vctrs/man/faq/setup.Rmd0000644000176200001440000000021013723213047014534 0ustar liggesusers ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(cli.unicode = FALSE) library(vctrs) ``` vctrs/man/vec_is_list.Rd0000644000176200001440000000120713717173502014766 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{vec_is_list} \alias{vec_is_list} \title{Is the object a list?} \usage{ vec_is_list(x) } \arguments{ \item{x}{An object.} } \description{ \code{vec_is_list()} tests if \code{x} is considered a list in the vctrs sense. It returns \code{TRUE} if: \itemize{ \item \code{x} is a bare list with no class. \item \code{x} is a list explicitly inheriting from \code{"list"}. } } \details{ Notably, data frames and S3 record style classes like POSIXlt are not considered lists. } \examples{ vec_is_list(list()) vec_is_list(list_of(1)) vec_is_list(data.frame()) } vctrs/man/vctrs-conditions.Rd0000644000176200001440000000520713654721330015775 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conditions.R \name{vctrs-conditions} \alias{vctrs-conditions} \alias{stop_incompatible_type} \alias{stop_incompatible_cast} \alias{stop_incompatible_op} \alias{stop_incompatible_size} \alias{allow_lossy_cast} \title{Custom conditions for vctrs package} \usage{ stop_incompatible_type( x, y, ..., x_arg, y_arg, action = c("combine", "convert"), details = NULL, message = NULL, class = NULL ) stop_incompatible_cast( x, to, ..., x_arg, to_arg, details = NULL, message = NULL, class = NULL ) stop_incompatible_op( op, x, y, details = NULL, ..., message = NULL, class = NULL ) stop_incompatible_size( x, y, x_size, y_size, ..., x_arg, y_arg, details = NULL, message = NULL, class = NULL ) allow_lossy_cast(expr, x_ptype = NULL, to_ptype = NULL) } \arguments{ \item{x, y, to}{Vectors} \item{..., class}{Only use these fields when creating a subclass.} \item{x_arg, y_arg, to_arg}{Argument names for \code{x}, \code{y}, and \code{to}. Used in error messages to inform the user about the locations of incompatible types.} \item{action}{An option to customize the incompatible type message depending on the context. Errors thrown from \code{\link[=vec_ptype2]{vec_ptype2()}} use \code{"combine"} and those thrown from \code{\link[=vec_cast]{vec_cast()}} use \code{"convert"}.} \item{details}{Any additional human readable details.} \item{message}{An overriding message for the error. \code{details} and \code{message} are mutually exclusive, supplying both is an error.} \item{x_ptype, to_ptype}{Suppress only the casting errors where \code{x} or \code{to} match these \link[=vec_ptype]{prototypes}.} \item{subclass}{Use if you want to further customize the class.} } \value{ \verb{stop_incompatible_*()} unconditionally raise an error of class \code{"vctrs_error_incompatible_*"} and \code{"vctrs_error_incompatible"}. } \description{ These functions are called for their side effect of raising errors and warnings. These conditions have custom classes and structures to make testing easier. } \examples{ # Most of the time, `maybe_lossy_cast()` returns its input normally: maybe_lossy_cast( c("foo", "bar"), NULL, "", lossy = c(FALSE, FALSE), x_arg = "", to_arg = "" ) # If `lossy` has any `TRUE`, an error is thrown: try(maybe_lossy_cast( c("foo", "bar"), NULL, "", lossy = c(FALSE, TRUE), x_arg = "", to_arg = "" )) # Unless lossy casts are allowed: allow_lossy_cast( maybe_lossy_cast( c("foo", "bar"), NULL, "", lossy = c(FALSE, TRUE), x_arg = "", to_arg = "" ) ) } \keyword{internal} vctrs/man/vec_c.Rd0000644000176200001440000000674013671672047013560 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/c.R \name{vec_c} \alias{vec_c} \title{Combine many vectors into one vector} \usage{ vec_c( ..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal") ) } \arguments{ \item{...}{Vectors to coerce.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{.name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} } \value{ A vector with class given by \code{.ptype}, and length equal to the sum of the \code{vec_size()} of the contents of \code{...}. The vector will have names if the individual components have names (inner names) or if the arguments are named (outer names). If both inner and outer names are present, an error is thrown unless a \code{.name_spec} is provided. } \description{ Combine all arguments into a new vector of common type. } \section{Invariants}{ \itemize{ \item \code{vec_size(vec_c(x, y)) == vec_size(x) + vec_size(y)} \item \code{vec_ptype(vec_c(x, y)) == vec_ptype_common(x, y)}. } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies}{ \itemize{ \item \code{\link[base:c]{base::c()}} } If inputs inherit from a common class hierarchy, \code{vec_c()} falls back to \code{base::c()} if there exists a \code{c()} method implemented for this class hierarchy. } } \examples{ vec_c(FALSE, 1L, 1.5) # Date/times -------------------------- c(Sys.Date(), Sys.time()) c(Sys.time(), Sys.Date()) vec_c(Sys.Date(), Sys.time()) vec_c(Sys.time(), Sys.Date()) # Factors ----------------------------- c(factor("a"), factor("b")) vec_c(factor("a"), factor("b")) # By default, named inputs must be length 1: vec_c(name = 1) try(vec_c(name = 1:3)) # Pass a name specification to work around this: vec_c(name = 1:3, .name_spec = "{outer}_{inner}") # See `?name_spec` for more examples of name specifications. } \seealso{ \code{\link[=vec_cbind]{vec_cbind()}}/\code{\link[=vec_rbind]{vec_rbind()}} for combining data frames by rows or columns. } vctrs/man/vec_empty.Rd0000644000176200001440000000075314027045462014461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_empty} \alias{vec_empty} \title{Is a vector empty} \usage{ vec_empty(x) } \arguments{ \item{x}{An object.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} This function is defunct, please use \code{\link[=vec_is_empty]{vec_is_empty()}}. } \keyword{internal} vctrs/man/internal-faq-ptype2-identity.Rd0000644000176200001440000001021314022644673020113 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-internal.R \name{internal-faq-ptype2-identity} \alias{internal-faq-ptype2-identity} \title{Internal FAQ - \code{vec_ptype2()}, \code{NULL}, and unspecified vectors} \description{ \subsection{Promotion monoid}{ Promotions (i.e. automatic coercions) should always transform inputs to their richer type to avoid losing values of precision. \code{vec_ptype2()} returns the \emph{richer} type of two vectors, or throws an incompatible type error if none of the two vector types include the other. For example, the richer type of integer and double is the latter because double covers a larger range of values than integer. \code{vec_ptype2()} is a \href{https://en.wikipedia.org/wiki/Monoid}{monoid} over vectors, which in practical terms means that it is a well behaved operation for \href{https://purrr.tidyverse.org/reference/reduce.html}{reduction}. Reduction is an important operation for promotions because that is how the richer type of multiple elements is computed. As a monoid, \code{vec_ptype2()} needs an identity element, i.e. a value that doesn’t change the result of the reduction. vctrs has two identity values, \code{NULL} and \strong{unspecified} vectors. } \subsection{The \code{NULL} identity}{ As an identity element that shouldn’t influence the determination of the common type of a set of vectors, \code{NULL} is promoted to any type:\if{html}{\out{
}}\preformatted{vec_ptype2(NULL, "") #> character(0) vec_ptype2(1L, NULL) #> integer(0) }\if{html}{\out{
}} The common type of \code{NULL} and \code{NULL} is the identity \code{NULL}:\if{html}{\out{
}}\preformatted{vec_ptype2(NULL, NULL) #> NULL }\if{html}{\out{
}} This way the result of \code{vec_ptype2(NULL, NULL)} does not influence subsequent promotions:\if{html}{\out{
}}\preformatted{vec_ptype2( vec_ptype2(NULL, NULL), "" ) #> character(0) }\if{html}{\out{
}} } \subsection{Unspecified vectors}{ In the vctrs coercion system, logical vectors of missing values are also automatically promoted to the type of any other vector, just like \code{NULL}. We call these vectors unspecified. The special coercion semantics of unspecified vectors serve two purposes: \enumerate{ \item It makes it possible to assign vectors of \code{NA} inside any type of vectors, even when they are not coercible with logical:\if{html}{\out{
}}\preformatted{x <- letters[1:5] vec_assign(x, 1:2, c(NA, NA)) #> [1] NA NA "c" "d" "e" }\if{html}{\out{
}} \item We can’t put \code{NULL} in a data frame, so we need an identity element that behaves more like a vector. Logical vectors of \code{NA} seem a natural fit for this. } Unspecified vectors are thus promoted to any other type, just like \code{NULL}:\if{html}{\out{
}}\preformatted{vec_ptype2(NA, "") #> character(0) vec_ptype2(1L, c(NA, NA)) #> integer(0) }\if{html}{\out{
}} } \subsection{Finalising common types}{ vctrs has an internal vector type of class \code{vctrs_unspecified}. Users normally don’t see such vectors in the wild, but they do come up when taking the common type of an unspecified vector with another identity value:\if{html}{\out{
}}\preformatted{vec_ptype2(NA, NA) #> [0] vec_ptype2(NA, NULL) #> [0] vec_ptype2(NULL, NA) #> [0] }\if{html}{\out{
}} We can’t return \code{NA} here because \code{vec_ptype2()} normally returns empty vectors. We also can’t return \code{NULL} because unspecified vectors need to be recognised as logical vectors if they haven’t been promoted at the end of the reduction.\if{html}{\out{
}}\preformatted{vec_ptype_finalise(vec_ptype2(NULL, NA)) #> logical(0) }\if{html}{\out{
}} See the output of \code{vec_ptype_common()} which performs the reduction and finalises the type, ready to be used by the caller:\if{html}{\out{
}}\preformatted{vec_ptype_common(NULL, NULL) #> NULL vec_ptype_common(NA, NULL) #> logical(0) }\if{html}{\out{
}} Note that \strong{partial} types in vctrs make use of the same mechanism. They are finalised with \code{vec_ptype_finalise()}. } } vctrs/man/unspecified.Rd0000644000176200001440000000120613650511520014750 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-unspecified.R \name{unspecified} \alias{unspecified} \title{A 1d vector of unspecified type} \usage{ unspecified(n = 0) } \arguments{ \item{n}{Length of vector} } \description{ This is a \link[=new_partial]{partial type} used to represent logical vectors that only contain \code{NA}. These require special handling because we want to allow \code{NA} to specify missingness without requiring a type. } \examples{ vec_ptype_show() vec_ptype_show(NA) vec_c(NA, factor("x")) vec_c(NA, Sys.Date()) vec_c(NA, Sys.time()) vec_c(NA, list(1:3, 4:5)) } \keyword{internal} vctrs/man/reference-faq-compatibility.Rd0000644000176200001440000000723614022644673020047 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{reference-faq-compatibility} \alias{reference-faq-compatibility} \title{FAQ - Is my class compatible with vctrs?} \description{ vctrs provides a framework for working with vector classes in a generic way. However, it implements several compatibility fallbacks to base R methods. In this reference you will find how vctrs tries to be compatible with your vector class, and what base methods you need to implement for compatibility. If you’re starting from scratch, we think you’ll find it easier to start using \code{\link[=new_vctr]{new_vctr()}} as documented in \code{vignette("s3-vector")}. This guide is aimed for developers with existing vector classes. \subsection{Aggregate operations with fallbacks}{ All vctrs operations are based on four primitive generics described in the next section. However there are many higher level operations. The most important ones implement fallbacks to base generics for maximum compatibility with existing classes. \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} falls back to the base \code{[} generic if no \code{\link[=vec_proxy]{vec_proxy()}} method is implemented. This way foreign classes that do not implement \code{\link[=vec_restore]{vec_restore()}} can restore attributes based on the new subsetted contents. \item \code{\link[=vec_c]{vec_c()}} and \code{\link[=vec_rbind]{vec_rbind()}} now fall back to \code{\link[base:c]{base::c()}} if the inputs have a common parent class with a \code{c()} method (only if they have no self-to-self \code{vec_ptype2()} method). vctrs works hard to make your \code{c()} method success in various situations (with \code{NULL} and \code{NA} inputs, even as first input which would normally prevent dispatch to your method). The main downside compared to using vctrs primitives is that you can’t combine vectors of different classes since there is no extensible mechanism of coercion in \code{c()}, and it is less efficient in some cases. } } \subsection{The vctrs primitives}{ Most functions in vctrs are aggregate operations: they call other vctrs functions which themselves call other vctrs functions. The dependencies of a vctrs functions are listed in the Dependencies section of its documentation page. Take a look at \code{\link[=vec_count]{vec_count()}} for an example. These dependencies form a tree whose leaves are the four vctrs primitives. Here is the diagram for \code{vec_count()}: \figure{vec-count-deps.png} \subsection{The coercion generics}{ The coercion mechanism in vctrs is based on two generics: \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } See the \link[=theory-faq-coercion]{theory overview}. Two objects with the same class and the same attributes are always considered compatible by ptype2 and cast. If the attributes or classes differ, they throw an incompatible type error. Coercion errors are the main source of incompatibility with vctrs. See the \link[=howto-faq-coercion]{howto guide} if you need to implement methods for these generics. } \subsection{The proxy and restoration generics}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } These generics are essential for vctrs but mostly optional. \code{vec_proxy()} defaults to an \link{identity} function and you normally don’t need to implement it. The proxy a vector must be one of the atomic vector types, a list, or a data frame. By default, S3 lists that do not inherit from \code{"list"} do not have an identity proxy. In that case, you need to explicitly implement \code{vec_proxy()} or make your class inherit from list. } } } vctrs/man/vec_init.Rd0000644000176200001440000000066713663716767014314 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{vec_init} \alias{vec_init} \title{Initialize a vector} \usage{ vec_init(x, n = 1L) } \arguments{ \item{x}{Template of vector to initialize.} \item{n}{Desired size of result.} } \description{ Initialize a vector } \section{Dependencies}{ \itemize{ \item vec_slice() } } \examples{ vec_init(1:10, 3) vec_init(Sys.Date(), 5) vec_init(mtcars, 2) } vctrs/man/vec_proxy_compare.Rd0000644000176200001440000000430513705026323016204 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare.R \name{vec_proxy_compare} \alias{vec_proxy_compare} \alias{vec_proxy_order} \title{Comparison and order proxy} \usage{ vec_proxy_compare(x, ...) vec_proxy_order(x, ...) } \arguments{ \item{x}{A vector x.} \item{...}{These dots are for future extensions and must be empty.} } \value{ A 1d atomic vector or a data frame. } \description{ \code{vec_proxy_compare()} and \code{vec_proxy_order()} return proxy objects, i.e. an atomic vector or data frame of atomic vectors. For \code{\link[=vctr]{vctrs_vctr}} objects: \itemize{ \item \code{vec_proxy_compare()} determines the behavior of \code{<}, \code{>}, \code{>=} and \code{<=} (via \code{\link[=vec_compare]{vec_compare()}}); and \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}}, and \code{\link[=quantile]{quantile()}}. \item \code{vec_proxy_order()} determines the behavior of \code{order()} and \code{sort()} (via \code{xtfrm()}). } } \details{ The default method of \code{vec_proxy_compare()} assumes that all classes built on top of atomic vectors or records are comparable. Internally the default calls \code{\link[=vec_proxy_equal]{vec_proxy_equal()}}. If your class is not comparable, you will need to provide a \code{vec_proxy_compare()} method that throws an error. The behavior of \code{vec_proxy_order()} is identical to \code{vec_proxy_compare()}, with the exception of lists. Lists are not comparable, as comparing elements of different types is undefined. However, to allow ordering of data frames containing list-columns, the ordering proxy of a list is generated as an integer vector that can be used to order list elements by first appearance. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} called by default in \code{vec_proxy_compare()} \item \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} called by default in \code{vec_proxy_order()} } } \examples{ # Lists are not comparable x <- list(1:2, 1, 1:2, 3) try(vec_compare(x, x)) # But lists are orderable by first appearance to allow for # ordering data frames with list-cols df <- new_data_frame(list(x = x)) vec_sort(df) } \keyword{internal} vctrs/man/new_date.Rd0000644000176200001440000000440213712211241014235 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-date-time.R \name{new_date} \alias{new_date} \alias{new_datetime} \alias{new_duration} \alias{vec_ptype2.Date} \alias{vec_ptype2.POSIXct} \alias{vec_ptype2.POSIXlt} \alias{vec_ptype2.difftime} \alias{vec_cast.Date} \alias{vec_cast.POSIXct} \alias{vec_cast.POSIXlt} \alias{vec_cast.difftime} \alias{vec_arith.Date} \alias{vec_arith.POSIXct} \alias{vec_arith.POSIXlt} \alias{vec_arith.difftime} \title{Date, date-time, and duration S3 classes} \usage{ new_date(x = double()) new_datetime(x = double(), tzone = "") new_duration(x = double(), units = c("secs", "mins", "hours", "days", "weeks")) \method{vec_ptype2}{Date}(x, y, ...) \method{vec_ptype2}{POSIXct}(x, y, ...) \method{vec_ptype2}{POSIXlt}(x, y, ...) \method{vec_ptype2}{difftime}(x, y, ...) \method{vec_cast}{Date}(x, to, ...) \method{vec_cast}{POSIXct}(x, to, ...) \method{vec_cast}{POSIXlt}(x, to, ...) \method{vec_cast}{difftime}(x, to, ...) \method{vec_arith}{Date}(op, x, y, ...) \method{vec_arith}{POSIXct}(op, x, y, ...) \method{vec_arith}{POSIXlt}(op, x, y, ...) \method{vec_arith}{difftime}(op, x, y, ...) } \arguments{ \item{x}{A double vector representing the number of days since UNIX epoch for \code{new_date()}, number of seconds since UNIX epoch for \code{new_datetime()}, and number of \code{units} for \code{new_duration()}.} \item{tzone}{Time zone. A character vector of length 1. Either \code{""} for the local time zone, or a value from \code{\link[=OlsonNames]{OlsonNames()}}} \item{units}{Units of duration.} } \description{ \itemize{ \item A \code{date} (\link{Date}) is a double vector. Its value represent the number of days since the Unix "epoch", 1970-01-01. It has no attributes. \item A \code{datetime} (\link{POSIXct} is a double vector. Its value represents the number of seconds since the Unix "Epoch", 1970-01-01. It has a single attribute: the timezone (\code{tzone})) \item A \code{duration} (\link{difftime}) } } \details{ These function help the base \code{Date}, \code{POSIXct}, and \code{difftime} classes fit into the vctrs type system by providing constructors, coercion functions, and casting functions. } \examples{ new_date(0) new_datetime(0, tzone = "UTC") new_duration(1, "hours") } \keyword{internal} vctrs/man/fields.Rd0000644000176200001440000000154213564465066013744 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fields.R \name{fields} \alias{fields} \alias{n_fields} \alias{field} \alias{field<-} \title{Tools for accessing the fields of a record.} \usage{ fields(x) n_fields(x) field(x, i) field(x, i) <- value } \arguments{ \item{x}{A \link{rcrd}, i.e. a list of equal length vectors with unique names.} } \description{ A \link{rcrd} behaves like a vector, so \code{length()}, \code{names()}, and \code{$} can not provide access to the fields of the underlying list. These helpers do: \code{fields()} is equivalent to \code{names()}; \code{n_fields()} is equivalent to \code{length()}; \code{field()} is equivalent to \code{$}. } \examples{ x <- new_rcrd(list(x = 1:3, y = 3:1, z = letters[1:3])) n_fields(x) fields(x) field(x, "y") field(x, "y") <- runif(3) field(x, "y") } \keyword{internal} vctrs/man/vctrs-package.Rd0000644000176200001440000000231014027045462015207 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-package.R \docType{package} \name{vctrs-package} \alias{vctrs} \alias{vctrs-package} \title{vctrs: Vector Helpers} \description{ \if{html}{\figure{logo.png}{options: align='right'}} \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} Defines new notions of prototype and size that are used to provide tools for consistent and well-founded type-coercion and size-recycling, and are in turn connected to ideas of type- and size-stability useful for analysing function interfaces. } \seealso{ Useful links: \itemize{ \item \url{https://vctrs.r-lib.org/} \item Report bugs at \url{https://github.com/r-lib/vctrs/issues} } } \author{ \strong{Maintainer}: Lionel Henry \email{lionel@rstudio.com} Authors: \itemize{ \item Hadley Wickham \email{hadley@rstudio.com} \item Davis Vaughan \email{davis@rstudio.com} } Other contributors: \itemize{ \item data.table team (Radix sort based on data.table's forder() and their contribution to R's order()) [copyright holder] \item RStudio [copyright holder] } } \keyword{internal} vctrs/man/vec_equal.Rd0000644000176200001440000000301513663716767014446 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equal.R \name{vec_equal} \alias{vec_equal} \alias{vec_equal_na} \title{Test if two vectors are equal} \usage{ vec_equal(x, y, na_equal = FALSE, .ptype = NULL) vec_equal_na(x) } \arguments{ \item{x}{Vectors with compatible types and lengths.} \item{y}{Vectors with compatible types and lengths.} \item{na_equal}{Should \code{NA} values be considered equal?} \item{.ptype}{Override to optionally specify common type} } \value{ A logical vector the same size as. Will only contain \code{NA}s if \code{na_equal} is \code{FALSE}. } \description{ \code{vec_equal_na()} tests a special case: equality with \code{NA}. It is similar to \link{is.na} but: \itemize{ \item Considers the missing element of a list to be \code{NULL}. \item Considered data frames and records to be missing if every component is missing. This preserves the invariant that \code{vec_equal_na(x)} is equal to \code{vec_equal(x, vec_init(x), na_equal = TRUE)}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_recycle_common]{vec_recycle_common()}} \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ vec_equal(c(TRUE, FALSE, NA), FALSE) vec_equal(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) vec_equal_na(c(TRUE, FALSE, NA)) vec_equal(5, 1:10) vec_equal("d", letters[1:10]) df <- data.frame(x = c(1, 1, 2, 1, NA), y = c(1, 2, 1, NA, NA)) vec_equal(df, data.frame(x = 1, y = 2)) vec_equal_na(df) } vctrs/man/vec_group.Rd0000644000176200001440000000470414027045462014457 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group.R \name{vec_group} \alias{vec_group} \alias{vec_group_id} \alias{vec_group_loc} \alias{vec_group_rle} \title{Identify groups} \usage{ vec_group_id(x) vec_group_loc(x) vec_group_rle(x) } \arguments{ \item{x}{A vector} } \value{ \itemize{ \item \code{vec_group_id()}: An integer vector with the same size as \code{x}. \item \code{vec_group_loc()}: A two column data frame with size equal to \code{vec_size(vec_unique(x))}. \itemize{ \item A \code{key} column of type \code{vec_ptype(x)} \item A \code{loc} column of type list, with elements of type integer. } \item \code{vec_group_rle()}: A \code{vctrs_group_rle} rcrd object with two integer vector fields: \code{group} and \code{length}. } Note that when using \code{vec_group_loc()} for complex types, the default \code{data.frame} print method will be suboptimal, and you will want to coerce into a tibble to better understand the output. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \itemize{ \item \code{vec_group_id()} returns an identifier for the group that each element of \code{x} falls in, constructed in the order that they appear. The number of groups is also returned as an attribute, \code{n}. \item \code{vec_group_loc()} returns a data frame containing a \code{key} column with the unique groups, and a \code{loc} column with the locations of each group in \code{x}. \item \code{vec_group_rle()} locates groups in \code{x} and returns them run length encoded in the order that they appear. The return value is a rcrd object with fields for the \code{group} identifiers and the run \code{length} of the corresponding group. The number of groups is also returned as an attribute, \code{n}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ purrr <- c("p", "u", "r", "r", "r") vec_group_id(purrr) vec_group_rle(purrr) groups <- mtcars[c("vs", "am")] vec_group_id(groups) group_rle <- vec_group_rle(groups) group_rle # Access fields with `field()` field(group_rle, "group") field(group_rle, "length") # `vec_group_id()` is equivalent to vec_match(groups, vec_unique(groups)) vec_group_loc(mtcars$vs) vec_group_loc(mtcars[c("vs", "am")]) if (require("tibble")) { as_tibble(vec_group_loc(mtcars[c("vs", "am")])) } } \keyword{internal} vctrs/man/howto-faq-coercion-data-frame.Rd0000644000176200001440000003442514022644660020174 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-coercion-data-frame} \alias{howto-faq-coercion-data-frame} \title{FAQ - How to implement ptype2 and cast methods? (Data frames)} \description{ This guide provides a practical recipe for implementing \code{vec_ptype2()} and \code{vec_cast()} methods for coercions of data frame subclasses. Related topics: \itemize{ \item For an overview of the coercion mechanism in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. } Coercion of data frames occurs when different data frame classes are combined in some way. The two main methods of combination are currently row-binding with \code{\link[=vec_rbind]{vec_rbind()}} and col-binding with \code{\link[=vec_cbind]{vec_cbind()}} (which are in turn used by a number of dplyr and tidyr functions). These functions take multiple data frame inputs and automatically coerce them to their common type. vctrs is generally strict about the kind of automatic coercions that are performed when combining inputs. In the case of data frames we have decided to be a bit less strict for convenience. Instead of throwing an incompatible type error, we fall back to a base data frame or a tibble if we don’t know how to combine two data frame subclasses. It is still a good idea to specify the proper coercion behaviour for your data frame subclasses as soon as possible. We will see two examples in this guide. The first example is about a data frame subclass that has no particular attributes to manage. In the second example, we implement coercion methods for a tibble subclass that includes potentially incompatible attributes. \subsection{Roxygen workflow}{ To implement methods for generics, first import the generics in your namespace and redocument:\if{html}{\out{
}}\preformatted{#' @importFrom vctrs vec_ptype2 vec_cast NULL }\if{html}{\out{
}} Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won’t be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with \code{s3_register()}. } \subsection{Parent methods}{ Most of the common type determination should be performed by the parent class. In vctrs, double dispatch is implemented in such a way that you need to call the methods for the parent class manually. For \code{vec_ptype2()} this means you need to call \code{df_ptype2()} (for data frame subclasses) or \code{tib_ptype2()} (for tibble subclasses). Similarly, \code{df_cast()} and \code{tib_cast()} are the workhorses for \code{vec_cast()} methods of subtypes of \code{data.frame} and \code{tbl_df}. These functions take the union of the columns in \code{x} and \code{y}, and ensure shared columns have the same type. These functions are much less strict than \code{vec_ptype2()} and \code{vec_cast()} as they accept any subclass of data frame as input. They always return a \code{data.frame} or a \code{tbl_df}. You will probably want to write similar functions for your subclass to avoid repetition in your code. You may want to export them as well if you are expecting other people to derive from your class. } \subsection{A \code{data.table} example}{ This example is the actual implementation of vctrs coercion methods for \code{data.table}. This is a simple example because we don’t have to keep track of attributes for this class or manage incompatibilities. See the tibble section for a more complicated example. We first create the \code{dt_ptype2()} and \code{dt_cast()} helpers. They wrap around the parent methods \code{df_ptype2()} and \code{df_cast()}, and transform the common type or converted input to a data table. You may want to export these helpers if you expect other packages to derive from your data frame class. These helpers should always return data tables. To this end we use the conversion generic \code{as.data.table()}. Depending on the tools available for the particular class at hand, a constructor might be appropriate as well.\if{html}{\out{
}}\preformatted{dt_ptype2 <- function(x, y, ...) \{ as.data.table(df_ptype2(x, y, ...)) \} dt_cast <- function(x, to, ...) \{ as.data.table(df_cast(x, to, ...)) \} }\if{html}{\out{
}} We start with the self-self method:\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.data.table.data.table <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} }\if{html}{\out{
}} Between a data frame and a data table, we consider the richer type to be data table. This decision is not based on the value coverage of each data structures, but on the idea that data tables have richer behaviour. Since data tables are the richer type, we call \code{dt_type2()} from the \code{vec_ptype2()} method. It always returns a data table, no matter the order of arguments:\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} }\if{html}{\out{
}} The \code{vec_cast()} methods follow the same pattern, but note how the method for coercing to data frame uses \code{df_cast()} rather than \code{dt_cast()}. Also, please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents \code{to}, whereas the second class represents \code{x}.\if{html}{\out{
}}\preformatted{#' @export vec_cast.data.table.data.table <- function(x, to, ...) \{ dt_cast(x, to, ...) \} #' @export vec_cast.data.table.data.frame <- function(x, to, ...) \{ # `x` is a data.frame to be converted to a data.table dt_cast(x, to, ...) \} #' @export vec_cast.data.frame.data.table <- function(x, to, ...) \{ # `x` is a data.table to be converted to a data.frame df_cast(x, to, ...) \} }\if{html}{\out{
}} With these methods vctrs is now able to combine data tables with data frames:\if{html}{\out{
}}\preformatted{vec_cbind(data.frame(x = 1:3), data.table(y = "foo")) #> x y #> 1: 1 foo #> 2: 2 foo #> 3: 3 foo }\if{html}{\out{
}} } \subsection{A tibble example}{ In this example we implement coercion methods for a tibble subclass that carries a colour as a scalar metadata:\if{html}{\out{
}}\preformatted{# User constructor my_tibble <- function(colour = NULL, ...) \{ new_my_tibble(tibble::tibble(...), colour = colour) \} # Developer constructor new_my_tibble <- function(x, colour = NULL) \{ stopifnot(is.data.frame(x)) tibble::new_tibble( x, colour = colour, class = "my_tibble", nrow = nrow(x) ) \} df_colour <- function(x) \{ if (inherits(x, "my_tibble")) \{ attr(x, "colour") \} else \{ NULL \} \} #'@export print.my_tibble <- function(x, ...) \{ cat(sprintf("<\%s: \%s>\n", class(x)[[1]], df_colour(x))) cli::cat_line(format(x)[-1]) \} }\if{html}{\out{
}} This subclass is very simple. All it does is modify the header.\if{html}{\out{
}}\preformatted{red <- my_tibble("red", x = 1, y = 1:2) red #> #> x y #> #> 1 1 1 #> 2 1 2 red[2] #> #> y #> #> 1 1 #> 2 2 green <- my_tibble("green", z = TRUE) green #> #> z #> #> 1 TRUE }\if{html}{\out{
}} Combinations do not work properly out of the box, instead vctrs falls back to a bare tibble:\if{html}{\out{
}}\preformatted{vec_rbind(red, tibble::tibble(x = 10:12)) #> # A tibble: 5 x 2 #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} Instead of falling back to a data frame, we would like to return a \verb{} when combined with a data frame or a tibble. Because this subclass has more metadata than normal data frames (it has a colour), it is a \emph{supertype} of tibble and data frame, i.e. it is the richer type. This is similar to how a grouped tibble is a more general type than a tibble or a data frame. Conceptually, the latter are pinned to a single constant group. The coercion methods for data frames operate in two steps: \itemize{ \item They check for compatible subclass attributes. In our case the tibble colour has to be the same, or be undefined. \item They call their parent methods, in this case \code{\link[=tib_ptype2]{tib_ptype2()}} and \code{\link[=tib_cast]{tib_cast()}} because we have a subclass of tibble. This eventually calls the data frame methods \code{\link[=df_ptype2]{df_ptype2()}} and \code{\link[=tib_ptype2]{tib_ptype2()}} which match the columns and their types. } This process should usually be wrapped in two functions to avoid repetition. Consider exporting these if you expect your class to be derived by other subclasses. We first implement a helper to determine if two data frames have compatible colours. We use the \code{df_colour()} accessor which returns \code{NULL} when the data frame colour is undefined.\if{html}{\out{
}}\preformatted{has_compatible_colours <- function(x, y) \{ x_colour <- df_colour(x) \%||\% df_colour(y) y_colour <- df_colour(y) \%||\% x_colour identical(x_colour, y_colour) \} }\if{html}{\out{
}} Next we implement the coercion helpers. If the colours are not compatible, we call \code{stop_incompatible_cast()} or \code{stop_incompatible_type()}. These strict coercion semantics are justified because in this class colour is a \emph{data} attribute. If it were a non essential \emph{detail} attribute, like the timezone in a datetime, we would just standardise it to the value of the left-hand side. In simpler cases (like the data.table example), these methods do not need to take the arguments suffixed in \verb{_arg}. Here we do need to take these arguments so we can pass them to the \code{stop_} functions when we detect an incompatibility. They also should be passed to the parent methods.\if{html}{\out{
}}\preformatted{#' @export my_tib_cast <- function(x, to, ..., x_arg = "", to_arg = "") \{ out <- tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) if (!has_compatible_colours(x, to)) \{ stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, details = "Can't combine colours." ) \} colour <- df_colour(x) \%||\% df_colour(to) new_my_tibble(out, colour = colour) \} #' @export my_tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") \{ out <- tib_ptype2(x, y, ..., x_arg = x_arg, y_arg = y_arg) if (!has_compatible_colours(x, y)) \{ stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = "Can't combine colours." ) \} colour <- df_colour(x) \%||\% df_colour(y) new_my_tibble(out, colour = colour) \} }\if{html}{\out{
}} Let’s now implement the coercion methods, starting with the self-self methods.\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_cast.my_tibble.my_tibble <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} }\if{html}{\out{
}} We can now combine compatible instances of our class!\if{html}{\out{
}}\preformatted{vec_rbind(red, red) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 1 1 #> 4 1 2 vec_rbind(green, green) #> #> z #> #> 1 TRUE #> 2 TRUE vec_rbind(green, red) #> Error: Can't combine `..1` and `..2` . #> Can't combine colours. }\if{html}{\out{
}} The methods for combining our class with tibbles follow the same pattern. For ptype2 we return our class in both cases because it is the richer type:\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.tbl_df <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_ptype2.tbl_df.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} }\if{html}{\out{
}} For cast are careful about returning a tibble when casting to a tibble. Note the call to \code{vctrs::tib_cast()}:\if{html}{\out{
}}\preformatted{#' @export vec_cast.my_tibble.tbl_df <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} #' @export vec_cast.tbl_df.my_tibble <- function(x, to, ...) \{ tib_cast(x, to, ...) \} }\if{html}{\out{
}} From this point, we get correct combinations with tibbles:\if{html}{\out{
}}\preformatted{vec_rbind(red, tibble::tibble(x = 10:12)) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} However we are not done yet. Because the coercion hierarchy is different from the class hierarchy, there is no inheritance of coercion methods. We’re not getting correct behaviour for data frames yet because we haven’t explicitly specified the methods for this class:\if{html}{\out{
}}\preformatted{vec_rbind(red, data.frame(x = 10:12)) #> # A tibble: 5 x 2 #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} Let’s finish up the boiler plate:\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.data.frame <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_ptype2.data.frame.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_cast.my_tibble.data.frame <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} #' @export vec_cast.data.frame.my_tibble <- function(x, to, ...) \{ df_cast(x, to, ...) \} }\if{html}{\out{
}} This completes the implementation:\if{html}{\out{
}}\preformatted{vec_rbind(red, data.frame(x = 10:12)) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} } } vctrs/man/vec_arith.Rd0000644000176200001440000000373513622451540014433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/arith.R \name{vec_arith} \alias{vec_arith} \alias{vec_arith.default} \alias{vec_arith.logical} \alias{vec_arith.numeric} \alias{vec_arith_base} \alias{MISSING} \title{Arithmetic operations} \usage{ vec_arith(op, x, y, ...) \method{vec_arith}{default}(op, x, y, ...) \method{vec_arith}{logical}(op, x, y, ...) \method{vec_arith}{numeric}(op, x, y, ...) vec_arith_base(op, x, y) MISSING() } \arguments{ \item{op}{An arithmetic operator as a string} \item{x, y}{A pair of vectors. For \code{!}, unary \code{+} and unary \code{-}, \code{y} will be a sentinel object of class \code{MISSING}, as created by \code{MISSING()}.} \item{...}{These dots are for future extensions and must be empty.} } \description{ This generic provides a common double dispatch mechanism for all infix operators (\code{+}, \code{-}, \code{/}, \code{*}, \code{^}, \code{\%\%}, \code{\%/\%}, \code{!}, \code{&}, \code{|}). It is used to power the default arithmetic and boolean operators for \link{vctr}s objects, overcoming the limitations of the base \link{Ops} generic. } \details{ \code{vec_arith_base()} is provided as a convenience for writing methods. It recycles \code{x} and \code{y} to common length then calls the base operator with the underlying \code{\link[=vec_data]{vec_data()}}. \code{vec_arith()} is also used in \code{diff.vctrs_vctr()} method via \code{-}. } \examples{ d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-02 12:00") t <- as.difftime(12, unit = "hours") vec_arith("-", dt, 1) vec_arith("-", dt, t) vec_arith("-", dt, d) vec_arith("+", dt, 86400) vec_arith("+", dt, t) vec_arith("+", t, t) vec_arith("/", t, t) vec_arith("/", t, 2) vec_arith("*", t, 2) } \seealso{ \code{\link[=stop_incompatible_op]{stop_incompatible_op()}} for signalling that an arithmetic operation is not permitted/supported. See \code{\link[=vec_math]{vec_math()}} for the equivalent for the unary mathematical functions. } \keyword{internal} vctrs/man/howto-faq-coercion.Rd0000644000176200001440000002221113653027721016165 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-coercion} \alias{howto-faq-coercion} \title{FAQ - How to implement ptype2 and cast methods?} \description{ This guide illustrates how to implement \code{vec_ptype2()} and \code{vec_cast()} methods for existing classes. Related topics: \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")} } \subsection{The natural number class}{ We’ll illustrate how to implement coercion methods with a simple class that represents natural numbers. In this scenario we have an existing class that already features a constructor and methods for \code{print()} and subset.\if{html}{\out{
}}\preformatted{#' @export new_natural <- function(x) \{ if (is.numeric(x) || is.logical(x)) \{ stopifnot(is_whole(x)) x <- as.integer(x) \} else \{ stop("Can't construct natural from unknown type.") \} structure(x, class = "my_natural") \} is_whole <- function(x) \{ all(x \%\% 1 == 0 | is.na(x)) \} #' @export print.my_natural <- function(x, ...) \{ cat("\n") x <- unclass(x) NextMethod() \} #' @export `[.my_natural` <- function(x, i, ...) \{ new_natural(NextMethod()) \} }\if{html}{\out{
}}\if{html}{\out{
}}\preformatted{new_natural(1:3) #> #> [1] 1 2 3 new_natural(c(1, NA)) #> #> [1] 1 NA }\if{html}{\out{
}} } \subsection{Roxygen workflow}{ To implement methods for generics, first import the generics in your namespace and redocument:\if{html}{\out{
}}\preformatted{#' @importFrom vctrs vec_ptype2 vec_cast NULL }\if{html}{\out{
}} Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won’t be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with \code{s3_register()}. } \subsection{Implementing \code{vec_ptype2()}}{ \subsection{The self-self method}{ The first method to implement is the one that signals that your class is compatible with itself:\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.my_natural <- function(x, y, ...) \{ x \} vec_ptype2(new_natural(1), new_natural(2:3)) #> #> integer(0) }\if{html}{\out{
}} \code{vec_ptype2()} implements a fallback to try and be compatible with simple classes, so it may seem that you don’t need to implement the self-self coercion method. However, you must implement it explicitly because this is how vctrs knows that a class that is implementing vctrs methods (for instance this disable fallbacks to \code{base::c()}). Also, it makes your class a bit more efficient. } \subsection{The parent and children methods}{ Our natural number class is conceptually a parent of \verb{} and a child of \verb{}, but the class is not compatible with logical, integer, or double vectors yet:\if{html}{\out{
}}\preformatted{vec_ptype2(TRUE, new_natural(2:3)) #> Error: Can't combine and . vec_ptype2(new_natural(1), 2:3) #> Error: Can't combine and . }\if{html}{\out{
}} We’ll specify the twin methods for each of these classes, returning the richer class in each case.\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.logical <- function(x, y, ...) \{ # The order of the classes in the method name follows the order of # the arguments in the function signature, so `x` is the natural # number and `y` is the logical x \} #' @export vec_ptype2.logical.my_natural <- function(x, y, ...) \{ # In this case `y` is the richer natural number y \} }\if{html}{\out{
}} Between a natural number and an integer, the latter is the richer class:\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.integer <- function(x, y, ...) \{ y \} #' @export vec_ptype2.integer.my_natural <- function(x, y, ...) \{ x \} }\if{html}{\out{
}} We no longer get common type errors for logical and integer:\if{html}{\out{
}}\preformatted{vec_ptype2(TRUE, new_natural(2:3)) #> #> integer(0) vec_ptype2(new_natural(1), 2:3) #> integer(0) }\if{html}{\out{
}} We are not done yet. Pairwise coercion methods must be implemented for all the connected nodes in the coercion hierarchy, which include double vectors further up. The coercion methods for grand-parent types must be implemented separately:\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.double <- function(x, y, ...) \{ y \} #' @export vec_ptype2.double.my_natural <- function(x, y, ...) \{ x \} }\if{html}{\out{
}} } \subsection{Incompatible attributes}{ Most of the time, inputs are incompatible because they have different classes for which no \code{vec_ptype2()} method is implemented. More rarely, inputs could be incompatible because of their attributes. In that case incompatibility is signalled by calling \code{stop_incompatible_type()}. In the following example, we implement a self-self ptype2 method for a hypothetical subclass of \verb{} that has stricter combination semantics. The method throws when the levels of the two factors are not compatible.\if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_strict_factor.my_strict_factor <- function(x, y, ..., x_arg = "", y_arg = "") \{ if (!setequal(levels(x), levels(y))) \{ stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) \} x \} }\if{html}{\out{
}} Note how the methods need to take \code{x_arg} and \code{y_arg} parameters and pass them on to \code{stop_incompatible_type()}. These argument tags help create more informative error messages when the common type determination is for a column of a data frame. They are part of the generic signature but can usually be left out if not used. } } \subsection{Implementing \code{vec_cast()}}{ Corresponding \code{vec_cast()} methods must be implemented for all \code{vec_ptype2()} methods. The general pattern is to convert the argument \code{x} to the type of \code{to}. The methods should validate the values in \code{x} and make sure they conform to the values of \code{to}. Please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents \code{to}, whereas the second class represents \code{x}. The self-self method is easy in this case, it just returns the target input:\if{html}{\out{
}}\preformatted{#' @export vec_cast.my_natural.my_natural <- function(x, to, ...) \{ x \} }\if{html}{\out{
}} The other types need to be validated. We perform input validation in the \code{new_natural()} constructor, so that’s a good fit for our \code{vec_cast()} implementations.\if{html}{\out{
}}\preformatted{#' @export vec_cast.my_natural.logical <- function(x, to, ...) \{ # The order of the classes in the method name is in reverse order # of the arguments in the function signature, so `to` is the natural # number and `x` is the logical new_natural(x) \} vec_cast.my_natural.integer <- function(x, to, ...) \{ new_natural(x) \} vec_cast.my_natural.double <- function(x, to, ...) \{ new_natural(x) \} }\if{html}{\out{
}} With these methods, vctrs is now able to combine logical and natural vectors. It properly returns the richer type of the two, a natural vector:\if{html}{\out{
}}\preformatted{vec_c(TRUE, new_natural(1), FALSE) #> #> [1] 1 1 0 }\if{html}{\out{
}} Because we haven’t implemented conversions \emph{from} natural, it still doesn’t know how to combine natural with the richer integer and double types:\if{html}{\out{
}}\preformatted{vec_c(new_natural(1), 10L) #> Error: Can't convert to . vec_c(1.5, new_natural(1)) #> Error: Can't convert to . }\if{html}{\out{
}} This is quick work which completes the implementation of coercion methods for vctrs:\if{html}{\out{
}}\preformatted{#' @export vec_cast.logical.my_natural <- function(x, to, ...) \{ # In this case `to` is the logical and `x` is the natural number attributes(x) <- NULL as.logical(x) \} #' @export vec_cast.integer.my_natural <- function(x, to, ...) \{ attributes(x) <- NULL as.integer(x) \} #' @export vec_cast.double.my_natural <- function(x, to, ...) \{ attributes(x) <- NULL as.double(x) \} }\if{html}{\out{
}} And we now get the expected combinations.\if{html}{\out{
}}\preformatted{vec_c(new_natural(1), 10L) #> [1] 1 10 vec_c(1.5, new_natural(1)) #> [1] 1.5 1.0 }\if{html}{\out{
}} } } vctrs/man/as-is.Rd0000644000176200001440000000060213650511520013465 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-asis.R \name{as-is} \alias{as-is} \alias{vec_ptype2.AsIs} \title{AsIs S3 class} \usage{ \method{vec_ptype2}{AsIs}(x, y, ..., x_arg = "", y_arg = "") } \description{ These functions help the base AsIs class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/vec_cbind_frame_ptype.Rd0000644000176200001440000000134414027045462016772 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind.R \name{vec_cbind_frame_ptype} \alias{vec_cbind_frame_ptype} \title{Frame prototype} \usage{ vec_cbind_frame_ptype(x, ...) } \arguments{ \item{x}{A data frame.} \item{...}{These dots are for future extensions and must be empty.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This is an experimental generic that returns zero-columns variants of a data frame. It is needed for \code{\link[=vec_cbind]{vec_cbind()}}, to work around the lack of colwise primitives in vctrs. Expect changes. } \keyword{internal} vctrs/man/vec_chop.Rd0000644000176200001440000001146213671672047014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice-chop.R \name{vec_chop} \alias{vec_chop} \alias{vec_unchop} \title{Chopping} \usage{ vec_chop(x, indices = NULL) vec_unchop( x, indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal") ) } \arguments{ \item{x}{A vector} \item{indices}{For \code{vec_chop()}, a list of positive integer vectors to slice \code{x} with, or \code{NULL}. If \code{NULL}, \code{x} is split into its individual elements, equivalent to using an \code{indices} of \code{as.list(vec_seq_along(x))}. For \code{vec_unchop()}, a list of positive integer vectors specifying the locations to place elements of \code{x} in. Each element of \code{x} is recycled to the size of the corresponding index vector. The size of \code{indices} must match the size of \code{x}. If \code{NULL}, \code{x} is combined in the order it is provided in, which is equivalent to using \code{\link[=vec_c]{vec_c()}}.} \item{ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{x}. Alternatively, you can supply \code{ptype} to give the output a known type.} \item{name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} } \value{ \itemize{ \item \code{vec_chop()}: A list of size \code{vec_size(indices)} or, if \code{indices == NULL}, \code{vec_size(x)}. \item \code{vec_unchop()}: A vector of type \code{vec_ptype_common(!!!x)}, or \code{ptype}, if specified. The size is computed as \code{vec_size_common(!!!indices)} unless the indices are \code{NULL}, in which case the size is \code{vec_size_common(!!!x)}. } } \description{ \itemize{ \item \code{vec_chop()} provides an efficient method to repeatedly slice a vector. It captures the pattern of \code{map(indices, vec_slice, x = x)}. When no indices are supplied, it is generally equivalent to \code{\link[=as.list]{as.list()}}. \item \code{vec_unchop()} combines a list of vectors into a single vector, placing elements in the output according to the locations specified by \code{indices}. It is similar to \code{\link[=vec_c]{vec_c()}}, but gives greater control over how the elements are combined. When no indices are supplied, it is identical to \code{vec_c()}. } If \code{indices} selects every value in \code{x} exactly once, in any order, then \code{vec_unchop()} is the inverse of \code{vec_chop()} and the following invariant holds:\preformatted{vec_unchop(vec_chop(x, indices), indices) == x } } \section{Dependencies of \code{vec_chop()}}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \section{Dependencies of \code{vec_unchop()}}{ \itemize{ \item \code{\link[=vec_c]{vec_c()}} } } \examples{ vec_chop(1:5) vec_chop(1:5, list(1, 1:2)) vec_chop(mtcars, list(1:3, 4:6)) # If `indices` selects every value in `x` exactly once, # in any order, then `vec_unchop()` inverts `vec_chop()` x <- c("a", "b", "c", "d") indices <- list(2, c(3, 1), 4) vec_chop(x, indices) vec_unchop(vec_chop(x, indices), indices) # When unchopping, size 1 elements of `x` are recycled # to the size of the corresponding index vec_unchop(list(1, 2:3), list(c(1, 3, 5), c(2, 4))) # Names are retained, and outer names can be combined with inner # names through the use of a `name_spec` lst <- list(x = c(a = 1, b = 2), y = 1) vec_unchop(lst, list(c(3, 2), c(1, 4)), name_spec = "{outer}_{inner}") # An alternative implementation of `ave()` can be constructed using # `vec_chop()` and `vec_unchop()` in combination with `vec_group_loc()` ave2 <- function(.x, .by, .f, ...) { indices <- vec_group_loc(.by)$loc chopped <- vec_chop(.x, indices) out <- lapply(chopped, .f, ...) vec_unchop(out, indices) } breaks <- warpbreaks$breaks wool <- warpbreaks$wool ave2(breaks, wool, mean) identical( ave2(breaks, wool, mean), ave(breaks, wool, FUN = mean) ) } vctrs/man/op-empty-default.Rd0000644000176200001440000000101113650511520015640 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{\%0\%} \alias{\%0\%} \title{Default value for empty vectors} \usage{ x \%0\% y } \arguments{ \item{x}{A vector} \item{y}{Value to use if \code{x} is empty. To preserve type-stability, should be the same type as \code{x}.} } \description{ Use this inline operator when you need to provide a default value for empty (as defined by \code{\link[=vec_is_empty]{vec_is_empty()}}) vectors. } \examples{ 1:10 \%0\% 5 integer() \%0\% 5 } vctrs/man/name_spec.Rd0000644000176200001440000000522513671672047014430 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{name_spec} \alias{name_spec} \title{Name specifications} \arguments{ \item{name_spec, .name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} } \description{ A name specification describes how to combine an inner and outer names. This sort of name combination arises when concatenating vectors or flattening lists. There are two possible cases: \itemize{ \item Named vector:\preformatted{vec_c(outer = c(inner1 = 1, inner2 = 2)) } \item Unnamed vector:\preformatted{vec_c(outer = 1:2) } } In r-lib and tidyverse packages, these cases are errors by default, because there's no behaviour that works well for every case. Instead, you can provide a name specification that describes how to combine the inner and outer names of inputs. Name specifications can refer to: \itemize{ \item \code{outer}: The external name recycled to the size of the input vector. \item \code{inner}: Either the names of the input vector, or a sequence of integer from 1 to the size of the vector if it is unnamed. } } \examples{ # By default, named inputs must be length 1: vec_c(name = 1) # ok try(vec_c(name = 1:3)) # bad # They also can't have internal names, even if scalar: try(vec_c(name = c(internal = 1))) # bad # Pass a name specification to work around this. A specification # can be a glue string referring to `outer` and `inner`: vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}") vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}_{inner}") # They can also be functions: my_spec <- function(outer, inner) paste(outer, inner, sep = "_") vec_c(name = 1:3, other = 4:5, .name_spec = my_spec) # Or purrr-style formulas for anonymous functions: vec_c(name = 1:3, other = 4:5, .name_spec = ~ paste0(.x, .y)) } vctrs/man/df_list.Rd0000644000176200001440000000435713712271424014115 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{df_list} \alias{df_list} \title{Collect columns for data frame construction} \usage{ df_list( ..., .size = NULL, .name_repair = c("check_unique", "unique", "universal", "minimal") ) } \arguments{ \item{...}{Vectors of equal-length. When inputs are named, those names are used for names of the resulting list.} \item{.size}{The common size of vectors supplied in \code{...}. If \code{NULL}, this will be computed as the common size of the inputs.} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"} or \code{"minimal"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} } \description{ \code{df_list()} constructs the data structure underlying a data frame, a named list of equal-length vectors. It is often used in combination with \code{\link[=new_data_frame]{new_data_frame()}} to safely and consistently create a helper function for data frame subclasses. } \section{Properties}{ \itemize{ \item Inputs are recycled to a common size with \code{\link[=vec_recycle_common]{vec_recycle_common()}}. \item With the exception of data frames, inputs are not modified in any way. Character vectors are never converted to factors, and lists are stored as-is for easy creation of list-columns. \item Unnamed data frame inputs are automatically spliced. Named data frame inputs are stored unmodified as data frame columns. \item \code{NULL} inputs are completely ignored. \item The dots are dynamic, allowing for splicing of lists with \verb{!!!} and unquoting. } } \examples{ # `new_data_frame()` can be used to create custom data frame constructors new_fancy_df <- function(x = list(), n = NULL, ..., class = NULL) { new_data_frame(x, n = n, ..., class = c(class, "fancy_df")) } # Combine this constructor with `df_list()` to create a safe, # consistent helper function for your data frame subclass fancy_df <- function(...) { data <- df_list(...) new_fancy_df(data) } df <- fancy_df(x = 1) class(df) } \seealso{ \code{\link[=new_data_frame]{new_data_frame()}} for constructing data frame subclasses from a validated input. \code{\link[=data_frame]{data_frame()}} for a fast data frame creation helper. } vctrs/man/new_data_frame.Rd0000644000176200001440000000272713712271424015424 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{new_data_frame} \alias{new_data_frame} \title{Assemble attributes for data frame construction} \usage{ new_data_frame(x = list(), n = NULL, ..., class = NULL) } \arguments{ \item{x}{A named list of equal-length vectors. The lengths are not checked; it is responsibility of the caller to make sure they are equal.} \item{n}{Number of rows. If \code{NULL}, will be computed from the length of the first element of \code{x}.} \item{..., class}{Additional arguments for creating subclasses. The \code{"names"} and \code{"row.names"} attributes override input in \code{x} and \code{n}, respectively: \itemize{ \item \code{"names"} is used if provided, overriding existing names in \code{x} \item \code{"row.names"} is used if provided, if \code{n} is provided it must be consistent. }} } \description{ \code{new_data_frame()} constructs a new data frame from an existing list. It is meant to be performant, and does not check the inputs for correctness in any way. It is only safe to use after a call to \code{\link[=df_list]{df_list()}}, which collects and validates the columns used to construct the data frame. } \examples{ new_data_frame(list(x = 1:10, y = 10:1)) } \seealso{ \code{\link[=df_list]{df_list()}} for a way to safely construct a data frame's underlying data structure from individual columns. This can be used to create a named list for further use by \code{new_data_frame()}. } vctrs/man/data_frame.Rd0000644000176200001440000000625113712271424014547 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{data_frame} \alias{data_frame} \title{Construct a data frame} \usage{ data_frame( ..., .size = NULL, .name_repair = c("check_unique", "unique", "universal", "minimal") ) } \arguments{ \item{...}{Vectors to become columns in the data frame. When inputs are named, those names are used for column names.} \item{.size}{The number of rows in the data frame. If \code{NULL}, this will be computed as the common size of the inputs.} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"} or \code{"minimal"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} } \description{ \code{data_frame()} constructs a data frame. It is similar to \code{\link[base:data.frame]{base::data.frame()}}, but there are a few notable differences that make it more in line with vctrs principles. The Properties section outlines these. } \details{ If no column names are supplied, \code{""} will be used as a default for all columns. This is applied before name repair occurs, so the default name repair of \code{"check_unique"} will error if any unnamed inputs are supplied and \code{"unique"} will repair the empty string column names appropriately. If the column names don't matter, use a \code{"minimal"} name repair for convenience and performance. } \section{Properties}{ \itemize{ \item Inputs are recycled to a common size with \code{\link[=vec_recycle_common]{vec_recycle_common()}}. \item With the exception of data frames, inputs are not modified in any way. Character vectors are never converted to factors, and lists are stored as-is for easy creation of list-columns. \item Unnamed data frame inputs are automatically spliced. Named data frame inputs are stored unmodified as data frame columns. \item \code{NULL} inputs are completely ignored. \item The dots are dynamic, allowing for splicing of lists with \verb{!!!} and unquoting. } } \examples{ data_frame(x = 1, y = 2) # Inputs are recycled using tidyverse recycling rules data_frame(x = 1, y = 1:3) # Strings are never converted to factors class(data_frame(x = "foo")$x) # List columns can be easily created df <- data_frame(x = list(1:2, 2, 3:4), y = 3:1) # However, the base print method is suboptimal for displaying them, # so it is recommended to convert them to tibble if (rlang::is_installed("tibble")) { tibble::as_tibble(df) } # Named data frame inputs create data frame columns df <- data_frame(x = data_frame(y = 1:2, z = "a")) # The `x` column itself is another data frame df$x # Again, it is recommended to convert these to tibbles for a better # print method if (rlang::is_installed("tibble")) { tibble::as_tibble(df) } # Unnamed data frame input is automatically spliced data_frame(x = 1, data_frame(y = 1:2, z = "a")) } \seealso{ \code{\link[=df_list]{df_list()}} for safely creating a data frame's underlying data structure from individual columns. \code{\link[=new_data_frame]{new_data_frame()}} for constructing the actual data frame from that underlying data structure. Together, these can be useful for developers when creating new data frame subclasses supporting standard evaluation. } vctrs/man/vec_order.Rd0000644000176200001440000000240014042540502014415 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order.R \name{vec_order} \alias{vec_order} \alias{vec_sort} \title{Order and sort vectors} \usage{ vec_order(x, direction = c("asc", "desc"), na_value = c("largest", "smallest")) vec_sort(x, direction = c("asc", "desc"), na_value = c("largest", "smallest")) } \arguments{ \item{x}{A vector} \item{direction}{Direction to sort in. Defaults to \code{asc}ending.} \item{na_value}{Should \code{NA}s be treated as the largest or smallest values?} } \value{ \itemize{ \item \code{vec_order()} an integer vector the same size as \code{x}. \item \code{vec_sort()} a vector with the same size and type as \code{x}. } } \description{ Order and sort vectors } \section{Dependencies of \code{vec_order()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} } } \section{Dependencies of \code{vec_sort()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} \item \code{\link[=vec_order]{vec_order()}} \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ x <- round(c(runif(9), NA), 3) vec_order(x) vec_sort(x) vec_sort(x, "desc") # Can also handle data frames df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) vec_order(df) vec_sort(df) vec_sort(df, "desc") } vctrs/man/vec_fill_missing.Rd0000644000176200001440000000275614027045462016007 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fill.R \name{vec_fill_missing} \alias{vec_fill_missing} \title{Fill in missing values with the previous or following value} \usage{ vec_fill_missing( x, direction = c("down", "up", "downup", "updown"), max_fill = NULL ) } \arguments{ \item{x}{A vector} \item{direction}{Direction in which to fill missing values. Must be either \code{"down"}, \code{"up"}, \code{"downup"}, or \code{"updown"}.} \item{max_fill}{A single positive integer specifying the maximum number of sequential missing values that will be filled. If \code{NULL}, there is no limit.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_fill_missing()} fills gaps of missing values with the previous or following non-missing value. } \examples{ x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) # Filling down replaces missing values with the previous non-missing value vec_fill_missing(x, direction = "down") # To also fill leading missing values, use `"downup"` vec_fill_missing(x, direction = "downup") # Limit the number of sequential missing values to fill with `max_fill` vec_fill_missing(x, max_fill = 1) # Data frames are filled rowwise. Rows are only considered missing # if all elements of that row are missing. y <- c(1, NA, 2, NA, NA, 3, 4, NA, 5) df <- data_frame(x = x, y = y) df vec_fill_missing(df) } vctrs/man/vec_repeat.Rd0000644000176200001440000000151714027045462014602 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_repeat} \alias{vec_repeat} \title{Expand the length of a vector} \usage{ vec_repeat(x, each = 1L, times = 1L) } \arguments{ \item{x}{A vector.} \item{each}{Number of times to repeat each element of \code{x}.} \item{times}{Number of times to repeat the whole vector of \code{x}.} } \value{ A vector the same type as \code{x} with size \code{vec_size(x) * times * each}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_repeat()} has been replaced with \code{\link[=vec_rep]{vec_rep()}} and \code{\link[=vec_rep_each]{vec_rep_each()}} and is deprecated as of vctrs 0.3.0. } \keyword{internal} vctrs/man/vec_proxy_equal.Rd0000644000176200001440000000247413663716767015717 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equal.R \name{vec_proxy_equal} \alias{vec_proxy_equal} \title{Equality proxy} \usage{ vec_proxy_equal(x, ...) } \arguments{ \item{x}{A vector x.} \item{...}{These dots are for future extensions and must be empty.} } \value{ A 1d atomic vector or a data frame. } \description{ Returns a proxy object (i.e. an atomic vector or data frame of atomic vectors). For \link{vctr}s, this determines the behaviour of \code{==} and \code{!=} (via \code{\link[=vec_equal]{vec_equal()}}); \code{\link[=unique]{unique()}}, \code{\link[=duplicated]{duplicated()}} (via \code{\link[=vec_unique]{vec_unique()}} and \code{\link[=vec_duplicate_detect]{vec_duplicate_detect()}}); \code{\link[=is.na]{is.na()}} and \code{\link[=anyNA]{anyNA()}} (via \code{\link[=vec_equal_na]{vec_equal_na()}}). } \details{ The default method calls \code{\link[=vec_proxy]{vec_proxy()}}, as the default underlying vector data should be equal-able in most cases. If your class is not equal-able, provide a \code{vec_proxy_equal()} method that throws an error. If the proxy for \code{x} is a data frame, \code{vec_proxy_equal()} is recursively applied on all columns as well. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} called by default } } \keyword{internal} vctrs/man/obj_print.Rd0000644000176200001440000000166513637142417014463 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print-str.R \name{obj_print} \alias{obj_print} \alias{obj_print_header} \alias{obj_print_data} \alias{obj_print_footer} \alias{obj_str} \alias{obj_str_header} \alias{obj_str_data} \alias{obj_str_footer} \title{\code{print()} and \code{str()} generics.} \usage{ obj_print(x, ...) obj_print_header(x, ...) obj_print_data(x, ...) obj_print_footer(x, ...) obj_str(x, ...) obj_str_header(x, ...) obj_str_data(x, ...) obj_str_footer(x, ...) } \arguments{ \item{x}{A vector} \item{...}{Additional arguments passed on to methods. See \code{\link[=print]{print()}} and \code{\link[=str]{str()}} for commonly used options} } \description{ These are constructed to be more easily extensible since you can override the \verb{_header()}, \verb{_data()} or \verb{_footer()} components individually. The default methods are built on top of \code{format()}. } \keyword{internal} vctrs/man/vec_ptype.Rd0000644000176200001440000001051014027045462014454 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type.R \name{vec_ptype} \alias{vec_ptype} \alias{vec_ptype_common} \alias{vec_ptype_show} \title{Find the prototype of a set of vectors} \usage{ vec_ptype(x, ..., x_arg = "") vec_ptype_common(..., .ptype = NULL) vec_ptype_show(...) } \arguments{ \item{x}{A vector} \item{...}{For \code{vec_ptype()}, these dots are for future extensions and must be empty. For \code{vec_ptype_common()} and \code{vec_ptype_show()}, vector inputs.} \item{x_arg}{Argument name for \code{x}. This is used in error messages to inform the user about the locations of incompatible types.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} } \value{ \code{vec_ptype()} and \code{vec_ptype_common()} return a prototype (a size-0 vector) } \description{ \code{vec_ptype()} returns the unfinalised prototype of a single vector. \code{vec_ptype_common()} finds the common type of multiple vectors. \code{vec_ptype_show()} nicely prints the common type of any number of inputs, and is designed for interactive exploration. } \section{\code{vec_ptype()}}{ \code{vec_ptype()} returns \link[=vec_size]{size} 0 vectors potentially containing attributes but no data. Generally, this is just \code{vec_slice(x, 0L)}, but some inputs require special handling. \itemize{ \item While you can't slice \code{NULL}, the prototype of \code{NULL} is itself. This is because we treat \code{NULL} as an identity value in the \code{vec_ptype2()} monoid. \item The prototype of logical vectors that only contain missing values is the special \link{unspecified} type, which can be coerced to any other 1d type. This allows bare \code{NA}s to represent missing values for any 1d vector type. } See \link{internal-faq-ptype2-identity} for more information about identity values. \code{vec_ptype()} is a \emph{performance} generic. It is not necessary to implement it because the default method will work for any vctrs type. However the default method builds around other vctrs primitives like \code{vec_slice()} which incurs performance costs. If your class has a static prototype, you might consider implementing a custom \code{vec_ptype()} method that returns a constant. This will improve the performance of your class in many cases (\link[=vec_ptype2]{common type} imputation in particular). Because it may contain unspecified vectors, the prototype returned by \code{vec_ptype()} is said to be \strong{unfinalised}. Call \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} to finalise it. Commonly you will need the finalised prototype as returned by \code{vec_slice(x, 0L)}. } \section{\code{vec_ptype_common()}}{ \code{vec_ptype_common()} first finds the prototype of each input, then successively calls \code{\link[=vec_ptype2]{vec_ptype2()}} to find a common type. It returns a \link[=vec_ptype_finalise]{finalised} prototype. } \section{Dependencies of \code{vec_ptype()}}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} for returning an empty slice } } \section{Dependencies of \code{vec_ptype_common()}}{ \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} } } \examples{ # Unknown types ------------------------------------------ vec_ptype_show() vec_ptype_show(NA) vec_ptype_show(NULL) # Vectors ------------------------------------------------ vec_ptype_show(1:10) vec_ptype_show(letters) vec_ptype_show(TRUE) vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(factor("a")) vec_ptype_show(ordered("a")) # Matrices ----------------------------------------------- # The prototype of a matrix includes the number of columns vec_ptype_show(array(1, dim = c(1, 2))) vec_ptype_show(array("x", dim = c(1, 2))) # Data frames -------------------------------------------- # The prototype of a data frame includes the prototype of # every column vec_ptype_show(iris) # The prototype of multiple data frames includes the prototype # of every column that in any data frame vec_ptype_show( data.frame(x = TRUE), data.frame(y = 2), data.frame(z = "a") ) } vctrs/man/faq-compatibility-types.Rd0000644000176200001440000000623114022644653017245 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-compatibility-types} \alias{faq-compatibility-types} \title{FAQ - How is the compatibility of vector types decided?} \description{ Two vectors are \strong{compatible} when you can safely: \itemize{ \item Combine them into one larger vector. \item Assign values from one of the vectors into the other vector. } Examples of compatible types are integer and double vectors. On the other hand, integer and character vectors are not compatible. } \section{Common type of multiple vectors}{ There are two possible outcomes when multiple vectors of different types are combined into a larger vector: \itemize{ \item An incompatible type error is thrown because some of the types are not compatible:\if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1:3) df2 <- data.frame(x = "foo") dplyr::bind_rows(df1, df2) #> Error: Can't combine `..1$x` and `..2$x` . }\if{html}{\out{
}} \item The vectors are combined into a vector that has the common type of all inputs. In this example, the common type of integer and logical is integer:\if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1:3) df2 <- data.frame(x = FALSE) dplyr::bind_rows(df1, df2) #> x #> 1 1 #> 2 2 #> 3 3 #> 4 0 }\if{html}{\out{
}} } In general, the common type is the \emph{richer} type, in other words the type that can represent the most values. Logical vectors are at the bottom of the hierarchy of numeric types because they can only represent two values (not counting missing values). Then come integer vectors, and then doubles. Here is the vctrs type hierarchy for the fundamental vectors: \figure{coerce.png} } \section{Type conversion and lossy cast errors}{ Type compatibility does not necessarily mean that you can \strong{convert} one type to the other type. That’s because one of the types might support a larger set of possible values. For instance, integer and double vectors are compatible, but double vectors can’t be converted to integer if they contain fractional values. When vctrs can’t convert a vector because the target type is not as rich as the source type, it throws a lossy cast error. Assigning a fractional number to an integer vector is a typical example of a lossy cast error:\if{html}{\out{
}}\preformatted{int_vector <- 1:3 vec_assign(int_vector, 2, 0.001) #> Error: Can't convert from to due to loss of precision. #> * Locations: 1 }\if{html}{\out{
}} } \section{How to make two vector classes compatible?}{ If you encounter two vector types that you think should be compatible, they might need to implement coercion methods. Reach out to the author(s) of the classes and ask them if it makes sense for their classes to be compatible. These developer FAQ items provide guides for implementing coercion methods: \itemize{ \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. } } vctrs/man/vec_as_names_legacy.Rd0000644000176200001440000000211413622451540016424 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_as_names_legacy} \alias{vec_as_names_legacy} \title{Repair names with legacy method} \usage{ vec_as_names_legacy(names, prefix = "V", sep = "") } \arguments{ \item{names}{A character vector.} \item{prefix, sep}{Prefix and separator for repaired names.} } \description{ This standardises names with the legacy approach that was used in tidyverse packages (such as tibble, tidyr, and readxl) before \code{\link[=vec_as_names]{vec_as_names()}} was implemented. This tool is meant to help transitioning to the new name repairing standard and will be deprecated and removed from the package some time in the future. } \examples{ if (rlang::is_installed("tibble")) { library(tibble) # Names repair is turned off by default in tibble: try(tibble(a = 1, a = 2)) # You can turn it on by supplying a repair method: tibble(a = 1, a = 2, .name_repair = "universal") # If you prefer the legacy method, use `vec_as_names_legacy()`: tibble(a = 1, a = 2, .name_repair = vec_as_names_legacy) } } \keyword{internal} vctrs/man/vec_data.Rd0000644000176200001440000000231214027045462014225 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/proxy.R \name{vec_data} \alias{vec_data} \title{Extract underlying data} \usage{ vec_data(x) } \arguments{ \item{x}{A vector or object implementing \code{vec_proxy()}.} } \value{ The data underlying \code{x}, free from any attributes except the names. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Extract the data underlying an S3 vector object, i.e. the underlying (named) atomic vector, data frame, or list. } \section{Difference with \code{vec_proxy()}}{ \itemize{ \item \code{vec_data()} returns unstructured data. The only attributes preserved are names, dims, and dimnames. Currently, due to the underlying memory architecture of R, this creates a full copy of the data for atomic vectors. \item \code{vec_proxy()} may return structured data. This generic is the main customisation point for accessing memory values in vctrs, along with \code{\link[=vec_restore]{vec_restore()}}. Methods must return a vector type. Records and data frames will be processed rowwise. } } \keyword{internal} vctrs/man/vec_math.Rd0000644000176200001440000000403313723213047014245 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/numeric.R \name{vec_math} \alias{vec_math} \alias{vec_math_base} \title{Mathematical operations} \usage{ vec_math(.fn, .x, ...) vec_math_base(.fn, .x, ...) } \arguments{ \item{.fn}{A mathematical function from the base package, as a string.} \item{.x}{A vector.} \item{...}{Additional arguments passed to \code{.fn}.} } \description{ This generic provides a common dispatch mechanism for all regular unary mathematical functions. It is used as a common wrapper around many of the Summary group generics, the Math group generics, and a handful of other mathematical functions like \code{mean()} (but not \code{var()} or \code{sd()}). } \details{ \code{vec_math_base()} is provided as a convenience for writing methods. It calls the base \code{.fn} on the underlying \code{\link[=vec_data]{vec_data()}}. } \section{Included functions}{ \itemize{ \item From the \link{Summary} group generic: \code{prod()}, \code{sum()}, \code{any()}, \code{all()}. \item From the \link{Math} group generic: \code{abs()}, \code{sign()}, \code{sqrt()}, \code{ceiling()}, \code{floor()}, \code{trunc()}, \code{cummax()}, \code{cummin()}, \code{cumprod()}, \code{cumsum()}, \code{log()}, \code{log10()}, \code{log2()}, \code{log1p()}, \code{acos()}, \code{acosh()}, \code{asin()}, \code{asinh()}, \code{atan()}, \code{atanh()}, \code{exp()}, \code{expm1()}, \code{cos()}, \code{cosh()}, \code{cospi()}, \code{sin()}, \code{sinh()}, \code{sinpi()}, \code{tan()}, \code{tanh()}, \code{tanpi()}, \code{gamma()}, \code{lgamma()}, \code{digamma()}, \code{trigamma()}. \item Additional generics: \code{mean()}, \code{is.nan()}, \code{is.finite()}, \code{is.infinite()}. } Note that \code{median()} is currently not implemented, and \code{sd()} and \code{var()} are currently not generic and so do not support custom classes. } \examples{ x <- new_vctr(c(1, 2.5, 10)) x abs(x) sum(x) cumsum(x) } \seealso{ \code{\link[=vec_arith]{vec_arith()}} for the equivalent for the arithmetic infix operators. } \keyword{internal} vctrs/man/list_of.Rd0000644000176200001440000000370113721736206014124 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-list-of.R \name{list_of} \alias{list_of} \alias{as_list_of} \alias{validate_list_of} \alias{is_list_of} \alias{vec_ptype2.vctrs_list_of} \alias{vec_cast.vctrs_list_of} \title{\code{list_of} S3 class for homogenous lists} \usage{ list_of(..., .ptype = NULL) as_list_of(x, ...) validate_list_of(x) is_list_of(x) \method{vec_ptype2}{vctrs_list_of}(x, y, ..., x_arg = "", y_arg = "") \method{vec_cast}{vctrs_list_of}(x, to, ...) } \arguments{ \item{...}{Vectors to coerce.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{x}{For \code{as_list_of()}, a vector to be coerced to list_of.} \item{y, to}{Arguments to \code{vec_ptype2()} and \code{vec_cast()}.} \item{x_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \description{ A \code{list_of} object is a list where each element has the same type. Modifying the list with \code{$}, \code{[}, and \code{[[} preserves the constraint by coercing all input items. } \details{ Unlike regular lists, setting a list element to \code{NULL} using \code{[[} does not remove it. } \examples{ x <- list_of(1:3, 5:6, 10:15) if (requireNamespace("tibble", quietly = TRUE)) { tibble::tibble(x = x) } vec_c(list_of(1, 2), list_of(FALSE, TRUE)) } vctrs/man/df_ptype2.Rd0000644000176200001440000000411613663716767014401 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R, R/type-tibble.R \name{df_ptype2} \alias{df_ptype2} \alias{df_cast} \alias{tib_ptype2} \alias{tib_cast} \title{Coercion between two data frames} \usage{ df_ptype2(x, y, ..., x_arg = "", y_arg = "") df_cast(x, to, ..., x_arg = "", to_arg = "") tib_ptype2(x, y, ..., x_arg = "", y_arg = "") tib_cast(x, to, ..., x_arg = "", to_arg = "") } \arguments{ \item{x, y, to}{Subclasses of data frame.} \item{...}{If you call \code{df_ptype2()} or \code{df_cast()} from a \code{vec_ptype2()} or \code{vec_cast()} method, you must forward the dots passed to your method on to \code{df_ptype2()} or \code{df_cast()}.} \item{x_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument names for \code{x} and \code{to}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \value{ \itemize{ \item When \code{x} and \code{y} are not compatible, an error of class \code{vctrs_error_incompatible_type} is thrown. \item When \code{x} and \code{y} are compatible, \code{df_ptype2()} returns the common type as a bare data frame. \code{tib_ptype2()} returns the common type as a bare tibble. } } \description{ \code{df_ptype2()} and \code{df_cast()} are the two functions you need to call from \code{vec_ptype2()} and \code{vec_cast()} methods for data frame subclasses. See \link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}. Their main job is to determine the common type of two data frames, adding and coercing columns as needed, or throwing an incompatible type error when the columns are not compatible. } vctrs/man/faq-error-scalar-type.Rd0000644000176200001440000000475614027045674016622 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-error-scalar-type} \alias{faq-error-scalar-type} \title{FAQ - Error: Input must be a vector} \description{ This error occurs when a function expects a vector and gets a scalar object instead. This commonly happens when some code attempts to assign a scalar object as column in a data frame:\if{html}{\out{
}}\preformatted{fn <- function() NULL tibble::tibble(x = fn) #> Error: All columns in a tibble must be vectors. #> x Column `x` is a function. fit <- lm(1:3 ~ 1) tibble::tibble(x = fit) #> Error: All columns in a tibble must be vectors. #> x Column `x` is a `lm` object. }\if{html}{\out{
}} } \section{Vectorness in base R and in the tidyverse}{ In base R, almost everything is a vector or behaves like a vector. In the tidyverse we have chosen to be a bit stricter about what is considered a vector. The main question we ask ourselves to decide on the vectorness of a type is whether it makes sense to include that object as a column in a data frame. The main difference is that S3 lists are considered vectors by base R but in the tidyverse that’s not the case by default:\if{html}{\out{
}}\preformatted{fit <- lm(1:3 ~ 1) typeof(fit) #> [1] "list" class(fit) #> [1] "lm" # S3 lists can be subset like a vector using base R: fit[1:3] #> $coefficients #> (Intercept) #> 2 #> #> $residuals #> 1 2 3 #> -1.000000e+00 -3.885781e-16 1.000000e+00 #> #> $effects #> (Intercept) #> -3.4641016 0.3660254 1.3660254 # But not in vctrs vctrs::vec_slice(fit, 1:3) #> Error: Input must be a vector, not a object. }\if{html}{\out{
}} Defused function calls are another (more esoteric) example:\if{html}{\out{
}}\preformatted{call <- quote(foo(bar = TRUE, baz = FALSE)) call #> foo(bar = TRUE, baz = FALSE) # They can be subset like a vector using base R: call[1:2] #> foo(bar = TRUE) lapply(call, function(x) x) #> [[1]] #> foo #> #> $bar #> [1] TRUE #> #> $baz #> [1] FALSE # But not with vctrs: vctrs::vec_slice(call, 1:2) #> Error: Input must be a vector, not a call. }\if{html}{\out{
}} } \section{I get a scalar type error but I think this is a bug}{ It’s possible the author of the class needs to do some work to declare their class a vector. Consider reaching out to the author. We have written a \link[=howto-faq-fix-scalar-type-error]{developer FAQ page} to help them fix the issue. } vctrs/man/vec_ptype_full.Rd0000644000176200001440000000257414027045462015511 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ptype-abbr-full.R \name{vec_ptype_full} \alias{vec_ptype_full} \alias{vec_ptype_abbr} \title{Vector type as a string} \usage{ vec_ptype_full(x, ...) vec_ptype_abbr(x, ..., prefix_named = FALSE, suffix_shape = TRUE) } \arguments{ \item{x}{A vector.} \item{...}{These dots are for future extensions and must be empty.} \item{prefix_named}{If \code{TRUE}, add a prefix for named vectors.} \item{suffix_shape}{If \code{TRUE} (the default), append the shape of the vector.} } \value{ A string. } \description{ \code{vec_ptype_full()} displays the full type of the vector. \code{vec_ptype_abbr()} provides an abbreviated summary suitable for use in a column heading. } \section{S3 dispatch}{ The default method for \code{vec_ptype_full()} uses the first element of the class vector. Override this method if your class has parameters that should be prominently displayed. The default method for \code{vec_ptype_abbr()} \code{\link[=abbreviate]{abbreviate()}}s \code{vec_ptype_full()} to 8 characters. You should almost always override, aiming for 4-6 characters where possible. These arguments are handled by the generic and not passed to methods: \itemize{ \item \code{prefix_named} \item \code{suffix_shape} } } \examples{ cat(vec_ptype_full(1:10)) cat(vec_ptype_full(iris)) cat(vec_ptype_abbr(1:10)) } \keyword{internal} vctrs/man/vec_type.Rd0000644000176200001440000000150214027045462014275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_type} \alias{vec_type} \alias{vec_type_common} \alias{vec_type2} \title{Deprecated type functions} \usage{ vec_type(x) vec_type_common(..., .ptype = NULL) vec_type2(x, y, ...) } \arguments{ \item{x, y, ..., .ptype}{Arguments for deprecated functions.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions have been renamed: \itemize{ \item \code{vec_type()} => \code{\link[=vec_ptype]{vec_ptype()}} \item \code{vec_type2()} => \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{vec_type_common()} => \code{\link[=vec_ptype_common]{vec_ptype_common()}} } } \keyword{internal} vctrs/man/new_list_of.Rd0000644000176200001440000000075713622451540015000 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-list-of.R \name{new_list_of} \alias{new_list_of} \title{Create list_of subclass} \usage{ new_list_of(x = list(), ptype = logical(), ..., class = character()) } \arguments{ \item{x}{A list} \item{ptype}{The prototype which every element of \code{x} belongs to} \item{...}{Additional attributes used by subclass} \item{class}{Optional subclass name} } \description{ Create list_of subclass } \keyword{internal} vctrs/man/vec_cast.Rd0000644000176200001440000000730013663775021014256 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cast.R, R/type-bare.R \name{vec_cast} \alias{vec_cast} \alias{vec_cast_common} \alias{vec_cast.logical} \alias{vec_cast.integer} \alias{vec_cast.double} \alias{vec_cast.complex} \alias{vec_cast.raw} \alias{vec_cast.character} \alias{vec_cast.list} \title{Cast a vector to a specified type} \usage{ vec_cast(x, to, ..., x_arg = "", to_arg = "") vec_cast_common(..., .to = NULL) \method{vec_cast}{logical}(x, to, ...) \method{vec_cast}{integer}(x, to, ...) \method{vec_cast}{double}(x, to, ...) \method{vec_cast}{complex}(x, to, ...) \method{vec_cast}{raw}(x, to, ...) \method{vec_cast}{character}(x, to, ...) \method{vec_cast}{list}(x, to, ...) } \arguments{ \item{x}{Vectors to cast.} \item{to, .to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} \item{...}{For \code{vec_cast_common()}, vectors to cast. For \code{vec_cast()}, \code{vec_cast_default()}, and \code{vec_restore()}, these dots are only for future extensions and should be empty.} \item{x_arg, to_arg}{Argument names for \code{x} and \code{to}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \value{ A vector the same length as \code{x} with the same type as \code{to}, or an error if the cast is not possible. An error is generated if information is lost when casting between compatible types (i.e. when there is no 1-to-1 mapping for a specific value). } \description{ \code{vec_cast()} provides directional conversions from one type of vector to another. Along with \code{\link[=vec_ptype2]{vec_ptype2()}}, this generic forms the foundation of type coercions in vctrs. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } \section{Dependencies of \code{vec_cast_common()}}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } } \subsection{base dependencies}{ Some functions enable a base-class fallback for \code{vec_cast_common()}. In that case the inputs are deemed compatible when they have the same \link[base:typeof]{base type} and inherit from the same base class. } } \examples{ # x is a double, but no information is lost vec_cast(1, integer()) # When information is lost the cast fails try(vec_cast(c(1, 1.5), integer())) try(vec_cast(c(1, 2), logical())) # You can suppress this error and get the partial results allow_lossy_cast(vec_cast(c(1, 1.5), integer())) allow_lossy_cast(vec_cast(c(1, 2), logical())) # By default this suppress all lossy cast errors without # distinction, but you can be specific about what cast is allowed # by supplying prototypes allow_lossy_cast(vec_cast(c(1, 1.5), integer()), to_ptype = integer()) try(allow_lossy_cast(vec_cast(c(1, 2), logical()), to_ptype = integer())) # No sensible coercion is possible so an error is generated try(vec_cast(1.5, factor("a"))) # Cast to common type vec_cast_common(factor("a"), factor(c("a", "b"))) } \seealso{ Call \code{\link[=stop_incompatible_cast]{stop_incompatible_cast()}} when you determine from the attributes that an input can't be cast to the target type. } vctrs/man/figures/0000755000176200001440000000000014042546502013634 5ustar liggesusersvctrs/man/figures/sizes.graffle0000644000176200001440000000653013622451540016325 0ustar liggesusers]ks ]kFoN8& )" c3B'.[B:=}Q0{c?%h?'_ޞީ ZPa]Neu-X.u{(TFܸ: pߕ岤`TIu藻B/X]N@I #Tͺ:foNTݿYN:xOޜ+9s=g:)rIA/2䴼):TmtB1;m qYpELPYiժ,'EMu.2|#05_8Բ\ur7VNqŷk)sawJ+s6 }" vwN;^s^WMQy{L[ܮ+'[㙺i "7A۞9[hʑRL"E,G AÁT,cXpxWV,i"| *W ?U#sjo<-UZgnٍ%?0_nGW5i>XZkpy|ȿ^YZOJ}JZ3&[;ip 4DWNDYdQ S;@uPǔ1OT#оzʅM>h!luw-eWhe^:S|(:/QHLy/ˆJQT8VDAlX ]gLвX΢S1^gкZ~ )ۮL+8%EpY `p7-µJ]3vJ.m .;7A'g;1LIE A) /W?Q gВ$ "ا4Ṅ {5HTOiS>l[Y3Ӳ {,HKj E\@]QWjfr(1& 5 R((R+ 3<<1"2/& ߣO#6k<> lA/Ix=> ;pˆ5c)F,DܗT_KPgqL`&3QBd~/=OoMf5ѓmj(JŸbOF?'6>Z !*-]M^_>H4>la+!BO$! \ӳ`)䋊/xW3E ~6lDєXه B9%X8SoJzkwK0B^=}xU \^4Pom|*ΐnvt=ӹ*o ^{`]us=ٸ؈ "H$Qڽesl82r$VF9/A5UYnX@ý׆,{9qq:7] K5!D7mTW4=XQEPEW}6kj#m5Tu5?m v!jq˞j@^m]uK}n2 jOF.˗|WTn+-C>D)nŝІ"zhUHaFؗ4oaO/zޒyRɌJ-f<XA=PU[Yc!L|!>B|vK"sL /-*EW«dlQ-*EE_e-Ȓk^2 v6]Jx,a|ͤ#F>GH"=Loߦakm#2xz8k&>%GoR_$Qh !+^1#Gq,b1@+~h+=GxlڋtJ(i7 Ǒ5mABi8_{F L-٤(KZd(EԪ$/lR}vEl?/Yb8KKLI'dJ؊e;pB0!tN6Dٸ8"d44a)ÃFVJ%@хR2Hzc.>C:`ɶ9Od]!\5=չ4z*dž>DCa:F.9+Ta]8"~S71T|eC2eIh<úW7ѺXH2y|w?":3^HӸr<F|vctrs/man/figures/cast.png0000644000176200001440000003567513473164157015325 0ustar liggesusersPNG  IHDRTTSsRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$9KIDATxyM.%".[I"RT(6$Z e -(囥URٲ!!m,Bз|{?9{q95y{f޳! ?WdX" W;E@ъW_m@D1D+^b~!Qx[@DGUl! WE@ъW_m@D1D+^b~!Qx[@DGUl! W~z…7gΜ &r?*=zۤ=q~%τޡykAط Wdm֮]q׮];o޼a  fΝ;wyK,9֭[Z5ΝrJL>c\ ,[Hw IÆ ر#㏦M6i0`@z!t2nܸ-[nz̘1i$)# O*>_:u*e|>}o~!%Jl޼w$eWm ,YǏڵg5GYr 4~ҥVzwرc}~< ЩSM6͚5M6oFڵQ% On! w I nΝׯ}͟?׮]ӦM$-I1)r!Fboo_fM2)T֭[gΜɂy*! WS&rСCY+^x…CGpBG}tƍMÛ޺73%c_nML2_НㄊsE"xB x|!)bLScꪫg1{X*馛q7bd S4(_5kH\rWv+Y/HE[o붮};vmNJ}O4|G`ӦM%K$~x ѭ%ߟ?8$EmB-=z:aOs0֪$R@@b )Z*Uq>J~{!$GUR!b=ЋbԵJ*@@̿zA@VIC/B :S*1p%-!Cܹ3X $cɒ%:ur-?0娃=ae…ո@E*2s3,[_K.}r!G}?A*U:,viXgҥ7onذ!qCtbݶmۂ lN>SO-R!SҴiS%\ҡCvj*Iȸ2(V=>d5kb PBק?&/C~ b~۷k׮X"Ae WKc=6֭[GW__hıݻW_}4`~|^a~wOIB&ʗNg2B+V0 3p}AL^6 9?FO233_wu29G7ָ[֭l28Hq' $ߘLuA1X}.D^yD? 3AwF O:?7WqhyAƍQ `bge?ߘI a8brqe?qM' f̘a=?CFxdxϢ{c g}QG[x& z'x" "CW5uT8r _m v:q{^K.]t$1aE!ugA(F7JIEݾtC۷ogbč7os;&yG!/q{s6Cʕ+e~l 5j`+fM>l@Oc.QL/qY>O;w.zn裏"f*Ȇ]^#'fҼm@(Z|C/w}Bˆ@DМ?b 1ڀ(bV-jB #Z* " h{QիW3Ƌ9hРD/b|!R[qO?tڽ ?3PJvl9;ѐ;%{Üf|8BÁiӦ@ 0"~ #zpK/弐11#RG>`Ǿ`<!1(<,q~f4waGNmP:={b>|8lرcU_/G b@իW?~5Chb3h^~RЕW^i)# B{ M rao f9τ@@1c:&|,| Ӓ3jϙ &3jt~@B~9d0ztAtFXg \BPqCGAFx0s5 ]{cZ)ゐw}jٲ%6,D"Xv#Uݙ4hyӧon=Ps e{L3sL. U)7k N1Ӝ?ĕBьMx9#ưJ QbUEyG4h5k9s d9)pyJ:^}Xf'8ƷQFr̉,t]WNTDI n5o/3cߣG.v^fiK܁[Gy$m[:|݇q ¹LJ삜 ׎ߴߟM;>Pr>ϝ/* q6JK1|&M*!gΝbA0^y@b`"{0?ٱr=B៵ &py ?v}N]6Rl[JO6,s. s`H VU s/XZ]%Qb:di\M8c=5#E$9WvmU^@7QkY& }6Ӝ?g5xmUq¡`7|!q)֭6XbqBD @l8%h0hz_~2a! LYg1Ig26iҤjժ1[V[b@+ENl;>q~[L1VnV}M-8ioF9T|9s7's2$/ܹ37_{ v4~Ls]C : _q8J>RY8BU^xVZl#|b3X#B>gW/?qԩlFoɃpt$gX9́r [.Bޓ|[ntڻw7xÓ h PbS:sG?|$RTo&wCRiӦ8xWȟ?F]k*YP_tޝN( 1G,qg&*}8_|p1YƍIwOSc9 Q4{5^{-]Oe05ȟMR 7j]_Z^:FLG/";[T?E JڶmyguV58GE'x"fP7xcab)dM ;Egg3fmh.ڪ96ZJ+ǏpTQTtj87ie/З_~AVVL?^zI8?hɿ+ G#/^b!v%KШRfX`gÅp¨ ?ZO>=*0s JK" ZǎID_~1Q#=իWF)Rvꫯ|a.;^_b;;8l߾I>c|ߩP ҝ\MEs` eX.ƃ:IY&eTaYQn5`1wd*" _*c{̵i.bCE0 kB}Du7 TlE~=L3 %oxNٲeZwq!H1 j'xwއ[l@P&"‹~ٯ[vq2%?;""h ?DL`'C}` R(J L/?<&bW1\X)GX֭e˖ 3תU`뮳7s<OI > }{xvȐ!#z{v>rGV1g7fAwsμ#?&^C ΐ<2(w¸ݵkW4tZSN[lXI!FgJUF>Y;WLaVBeGGoίN8]ilH&,~!" 0n?~4tץKE/ .>c7nv\P=&^fΜ8b!W2q 0C ,]@Iep=EZ&MB+>oذ᭷ڵk W|^IjC%KZSLRm0&iT? LZO9SO۰Ƿp{(i'F?b4 @xy=>%1N@DvLH]vYiX1s1%f‡_|nqOccO/"{o 80]ʰGq^t琁cSp(|80'xۣrEa9x8uL̐yؓntu1;An X:xJ^̂6visA<DǤ[戲 mgqC@CSNEVbۑk49kmn͌Ƈu], ڙ &pDs%)NbR'*d_=jNN%cQL]|'(0pa~R2H׷o_:ܪr#s[ċ"2s=}9FȨ*_۞DX! ǜ9s69T712[xfUB ]B>g s4zmڴ)coVn+096׳fN*?餓VK/GU -#cs7d訣P 0R9@8…+WMcr9W=fO?0Q0 $Qq%J>/IsE裏.ж<|G=DDs=G _VRG TK}m#ur=?;?\/ 0?'9K;zk7Q.]Xhٲ%?iҤP%2@HkÃ܀#y7o^A̱+;<0swx9s&JriX"W\2NJ#ؚUm槉Ϣ^*y(3 D{X(Q w1v$ԧ"EH2ebgʮ¦@2~Z|y8>7U ` z˫f@N2?&.ex̕XX%o߾=PD cƌa1,t"r K2 }yUȱ|5`Gb5sVL!(XDȥ o TC\UEG+ v[.{ɋӛ]L;\bꫯ+Iq*^'LJ.&j*BK/r?Spo-]Gcn]6Wٱ]]ImӦMk|ڸq+)`.`wsQjA9??eU1jp5hs'{l/4:Xbl6 HjdI9..+y٣@tڻ 2uqL2izz"xZ֜8ulcz?C5]C! /0bGB}#ל lM'ra% x0Y 䏅:\RAis AԴKx cuhY|' E2RɚU2kgd+vOWaA:AVjjKgT?cTS+gi?'xg۷TQHGs'K1QGy+ GsFsښHŊYi{>4`(G @bV@:D1X+'!(#C@I 1CrB ϙ3+T b9*RuЦuk|'so38?qTp1??hРoذaÆJ,ט)7)=7ons,W9\G XLvBo׮]L 7&~m֬YXˠP1qSD5=N;\H^d4@߹s' +K,iܸ1EjTaqOΙ0c.Ŧ7FRqBt> H36fL8>L/Ias֦d>#CI磏>ӯZJ r֝?9B0 ۠AAseP͋ŋ&;)Y$ͩggB;w\,)\ZT\l׮]oD*TeZ85M"b9.M֩SN.ꔦtR[c^y3Qkܺu+ի#1OΝ'蠸O>ĨօRUW]EFQ|T]joJMk,|]R7n ݻwǐ.7ܱcG,K+4d U \$$R!jzyR(Cs2MaiE@G?;ZKLf֪U$qDƏ?F?B Xm|!Ʊl '`?F3biMMvGi^_y̔>G.Xh`3s߆aD sC0iS$5x`fٸ}s1nu|B7?p,_y5З*U0j^bƠz嗓 @M41 ЙVvy (O)MP/Ttz.bsGn&>EA4P|~~̛ho 4$1SM$Q(;%w 5UR%3d~pԨQÄ10x'db;shH"痻@< H!>  m62&n ^ڤ@v/3 yj]^eg Α%R0Z 1DB 7ߴE^njv|؋M~5IkLyI<{=gRpO^3'ƃ}ex7s?c˗/X؀^~#Q׳>*2əv:ANݺu.[bǵtL'bŭj_:J(a qXy'Iv͚5oo`d1pFcPot7lK<50H֘LYl"IO;[;bW\T̟JN1pc"C1f:?yM|Et*4¦c_hт7B *tZp5tHtfNd 2s'.E͒ѣf4غC0VgIgxhm٩$19S8I Bm޸k8 3nwf9^ \,J!C k0IAZ$|fttdǃ,LYL13w2KXL>TM+&Iyp Y?Byl{NiEqYoI< L@lje_#sG”~@;]y9L~<ӻwo> f ̢_E5'esoFϞ=Am۲b"#+ RF~Ybdke$<32K|yb|M5RI00h̙Dz!Zn >,"Ebbzeۮ +`*7mv`pMSgD̓U˖-NDZ[uĨ|Ex 6f9xm'y03&8vX4&{Aج%e$,y\q5LV5QFFCK̆3G4(v猛Udm1|ɦ]"22_f Q2wK}PkDѳS̃P$Zɀ`Fa2yX3HRQ]veNF$b$jW_}J0y~.ʙX r+, SO\WMv&f2q6#SK"#>4Ob}.O{ C+J̉7&:`&t9nk4RgD̓ˀHm2--1C͞&"! \G mL[V2|׌!Bx>^*P3(8JA"JEBu2_s%(D[y ! Pe!'b~FZrJHxW϶9sc}YJ_2EߘŒIA ̑9#TH# s4MDFBT<{y9 JeDiڴ) Myq\p|[r,"]t9蠃L7x0;ލ)@ &e~Wsk5~1T%PbŊmڴ a*2<K/ 7SFX}FC2& kqlق ~3<3r^J*X|_!Xb~ \VŋsemDF;R3 le0!DM>&[ouҤI|| 70w+22ѢE Eڴ"J<}>PY̚5 z"&cCB j/QB #P*?*@@JV@<PbT(! 揇@Hs̙0aBA@x.\q qI"~7C I ҧ\A O8_~)I=8՗q qI"~g:utW0|l޼y0*{){ʔ)f%kΛ7/+Y+S!"i0?@ްaC͚5O;4۷z3_[.0kk.K ,1|[֨Q~;o_~A Vj?IsbI&1ĥ+W΂ӧs1+W&P_|q's1IU@^֧sεk6cǎ?+VVd ?ZyF >FXE{\`,ѷo_cdɒݺu;}ƽ={/&'OFM6(9fYu@ իW0(E-ZDg$.J{wM|lRrHiϸkCD YjBpdzj$gHhHx%KR1gCfсC6"'Eʗ-[F:uЯh *T`SCnnL1 iR0f>? ?k&;Dtk:u*3o#$B(|NucɟFm65d C@Jb''01sڴi/lց82>p>| srm@ZHC+&&휆ؒN919rsy(>]L_Š!TĸEuDaÆaߪZlɌ_\ti޽Y7Y\tE<̜i0l$~ 0+6ì+PjP 0%a}f?9@ ƌe*V4X#AP*|F %m?41Gemz!Z)@÷1c#|ur/+V@Dꩧ̔A-'LYq6ԗ5k 4ӗ]FwGcט]E9 '۱c5W:> 30K[fnT̢a׭[ǀo֢qml9 ve.(D1Ud?/0?r>܎ ?0eϔ)SAOI^{m2u`Y ٞc-w\p )Jȋ OHA_\99R>B@w?% B@@JiB@b&)G@>JQb&)G[3:QàIENDB`vctrs/man/figures/lifecycle-defunct.svg0000644000176200001440000000170413762413632017751 0ustar liggesuserslifecyclelifecycledefunctdefunct vctrs/man/figures/lifecycle-maturing.svg0000644000176200001440000000170613762413632020151 0ustar liggesuserslifecyclelifecyclematuringmaturing vctrs/man/figures/logo.png0000644000176200001440000024327413473164157015327 0ustar liggesusersPNG  IHDRޫhsRGB pHYs  iTXtXML:com.adobe.xmp Adobe ImageReady 1 ).=@IDATx^Gu/vz9X%↱ HM$@*7D 禓r)$@c H\{;M3|jH-o6kfϞgfM]/ؙ nlg og?2/G=r cǎܮ]̍\_iʇ*TD ?3?yȇle]/Ƚ^K1ÇclI>+lSF2Ll;7 =Аþk׋5j5͙Ztvms&qRXFWr0L\rL˃wfwؕu4;^ɮճ{2{5߳"WT* u咒h7[ gsy,*/Gݻ>ծ+?j|#hmjN?SꖕK ]n.dQClNJ)NKn/{ F0䏳/H]WnW)3}w˩S|\f\L 5ꜵ<7( oYmR)`ĝLfKWw-jpOfyVvHNl'\q9+.mڭ\`oѦpqVQzC !&PF`94lzrn 'Jىt@̀7cg{d^qW_|0Vʴ efЊ" dHe3?ozϨ{ɽ[ gf>psfD swؙ?1`VW%%sac{${ v`__6]*P@̀/;GݙlWP+ގ Rs2dΡvKMP 'Bf29z_ k8vj˔5LEtGߖ̮];uպo"/Zbfؠo:Si9'XA{lƏ"pܓ5q7L3_~^ >hXho_qET._U*~*`vX|Kjca,%aؐFᇥ84Ð*r vp9P3.}Ldfu?nLy86cr|SEexh"fd [!sOboxapeP%\_ِj/D eGʛ{5oQT~Y2q6[9c RR=ne ŢBy߈ ^eL3ox1~,{Pjjò<BٲWP-Jpaul3`x'xcT[sx('GY.eqQC !SŠtHXƱ;` vT1 jJ~uMP]Q`@J1Dx?Sck `ȥY`WڲE>^[̀L?Բ .%r׵aiX<ɨ2큋y74ha* s`^GQ;v- ;x.5~.stXBfq~l%K(o2|ˢ//2PsYa.<[g b+G.[ ;%\B5~r3t;VY,-x?V 53iƛMlLa{ vB^řL%J9{}ͲݔH]Y ;˷0T4qՎw7ԏ|C\a%hgb +7;-^сzR&! ca֣PO,ZH7;lN@rqO5bP6-3ɲ wo As9 2 )yǛ*ʇ}I0^4j4^3{כφ 9)qq@ H 4@zL՘K3712 B!>m<$-vEfT(vs6\) f+ Z$i. empy-WF6~2R5Y}{ Pd6Wׂ1QM8Z%m VTbI*DKs2N y HB{1D^!ЫȖ 2yl"@/8)~С/ڶ>fI ln:迠s9㟞e썗>.ٜ3߄$MCDŽ!wЫRD4UT]55C_st!8AV%j\%SBtcՍEuux*?ң|e~{Ip>h fD 7>*w IPx)4 =ql"F2VhZB.bG}W3ght 8#ߙ̢|۷_ ,Xi(,?!1^ΈLe-oϑP1/Rr" _:b1y 2rG?qm[Efre' w'ywte> 48,- P54&_#"6yӛaw,+RM6fdoU@O:n 4:bJŰS&a'WOņ?z3CF|(w~vpR\7BKIWߔ[:@}j9Zy%/~~oQq(T j4$ d$(2pra~5ՕSCk/U?YAfLK&l^u쿊aPefɰP\Mo8-PoeTJ(8M"U*_a/}ՆeBxyp;d*_}浥6Kw9lJ'V4x>OrdX20NNZ" -DžLh9`e GN"י(巎 Ú{[g0Tس^XwJC\Ȝr 6v˘.eޘ"C$\_\dg @SP@ Y͉ sW\g[l*`Ty~~ w5 0]-h.GsQ.mpߔ23`BI ͶgwXU.anu;.ޅ wwv>cYT/$^>5Nl',}ﺉb&),meAI54eՔ NIDr:qa/aޙ 'b[SElnNY`Xra+a 'op-O/BeG TcTw"@VQ|]+!tMcv>S.6KҀy,^o|{,*SL.x|yVs+Eq2fǥAS&GcƱ(6b,0dbsbzX]i簭ǏK#c70 G| _Zz{g+_{m5yj\j[HeB|2^]U5@+IXPĊH;D!ifS8棇=Mmvzp͖߽VpJD._Vv6zʁe"~(GM]X+ǛwL hP *m~EUO"H߃QzUV8O,F 1)pFF#- 4#x)mc~E]B<_؆;އ7qOLq 8[\ex B5 Y)\C߸ MۑٵvvxGNOY^>l sJۋ$Wwz0wСE s*wՉ} )b?TeP3⁐hÇ<0v}޳1S|N.i|0}5y>Ghuo(c|xFq%4GC NK@w Ģl̗y)YT]:,1x..7 +Rk^+iXoŁ`z2 Pe$\]IHČr|@OZc@K]0wl8a'{D80e];Pd|_/ N eoG~[h8x:JoNMӗKyUoU=ct6!6 7[^(ADIAXևĪJx袶Ip:> &2tE0^\wߎQ*2Y~Ql@:WTl%,V b4ɽPa FJᧈM 볘g{e&2qZ˚1P9"ݲE6m}Pw^[*gY^ Օt Nssix8+h&PRl$=j2~irM &pC2BPyD+& = DPz-Nx_;鵵1k+Y}0r4mwyn4OczΦL/=Xe_R05U5nP/4ϩy pq9 y+ŀ3,=2p9oy=K*^b,4ͰJYeZIk\Bb@>=$R*N{wKQg0yJ2MY!ա(jd3L"@N$GĮtuO0hVmځ^9I\vN{0,tC)e~9_ef[Mh3V !5^>lai\4M|/~qxtRH/-p*1}p&Pa; Ugz qL{wa(y귰/ܺP  T&ß(mJ0,R(ҪI(R琈!N|4 T&?ic|$3wu{9 8gs㔃xYm?ق¡?Dw+7C0lǴwC.ql 3m G-LA4a8ş HApֱ#c);sA=bGxܹ7Vm}Kw?1/hF_Ȉ;՛w-lSncrVnW O4[26^/ϷF/#${`|HyZooiG_;qGvw`ȀwfwlSl+l; xRv6XxM^6- :ћa0{E. p%Mg+}X:ʪ2RTx"ՉťƱ=3wir<' X,Ɯ#VDb/aPvViZ-kؿtT_FE3C{=˟r+~v:,d ;!2^Ksߐrx#r!\0+s]F}5 O<Ջ&|C p F=4%qUcb,P<\%T\m6i]1\߹~?|kxw~˾|dމ\ۙt۱񋘷x f` LS˂-Y9FА l2 !;8\2d\t.~\^4R{11)+HL,cҔy@/Q$Bu-.uT/ݔoh딷fžۗMcw| vЇ~3zwLf?=xWol.\)Gţ8/M7^Wբ")1PĊH5M-bؒ+/Yh=?f}}5cNQUҐyz7`RT~2\>0?Yּ678H(hqʰ”q/4-Sg'̧y| F=f_ЫRyb)ח.V.sy\}3l~¦26)ds7v7k_>];}]yk&]z. ؜rpウ^v|(W[X,Ɍfh> =Z"#p%3)h6^#u4)ѭLQwl4$QIz1DBWeRD^8Й|A MؼYMٺ|P*={rpOd-Rwf!ϮYЦ %owxwsuja8=;=y<+2^\s26?\bHTʭ-R0+ BR S<AP u Y)<? Jس=x0^L`؄-?ꤩGegfKyrqfbya1<=#? {,W]&3FssݢʖW+7| [Bf:ʤqV&>HEPو'L.!% h B^9v\Ɣsݳ=Y/kKUvaKA:;[toU4W`C-ٶo>g>gvac0'aDX5W*Ժ{_.9Z3l?&Bƫeoq VբI/⡼u>$'籣Ӑ)GFvi41FTIJ^b͙lIfQ{;-Qgjf~+f32SUmET{ m_ﭺo-Ѧl|DޙݱCv, !i9Als8d1]$B]ucAAex5^K3^+QkE"! )4u( :Fhф@.DtlD=,kY%=-RWWI4TĕȖ%͙K/=5,d?'7iS㽿_{}$Ø>8ſ^` 7j+?}Mٯ`I*)xu2Fk U k$-@D\joPV!X̡Ҟa+W`HfܹPlq+:*:eާgJj2fɭ{`yӹٹrKۋsťJz/|5081kI^-Up_wnzwAJ 14^sw*p\dA eT.F"$T[C4 >faZNI`pa0uYF5߬L"@^&.E_0jWlz@2h…m25=#ppHLQe^eU'W6UX,a,75^1\u Glqsm^ŀ+X-c{5m˅`_}sinZ'Qs;.N_~PP¾:"4"ݰR4EZ`@xn.V5$P&3HSqrZIl*N rIhؼ(^:X oXeCCg]Y/mM8szT 0R,0\Ǧm Iv?|LeU W5Nre3J[VKl!s4]+׾O 6>άNE/'lB)0[D2&mE%EUzܔ]'c߲EY)-F (Fva ?D)NRQ qg0nKnB)QX:MtD(DCeRULT6JQpu280aa38?&uC,6.B\={ M鲦|.YYͯafM\&}}y-+a" ɞQUcT"@.#$!6/g`e 2^]_uy޸Y,Y ǏSݔY.: lo^m8X_+AM?FN0dӄm[M;>pupTy;aHMs9zV]}k5D_ c Y|vL ±6U +6q);1*N*g5 'b6!Gwq%QzN%BXƒ`x思o2vlFӎqYALHPm۴):as(j ̅Z-,sB}տON['ޚىo uKff~o'+F;r*<$xbgy0,R'e6V7Έ8Q(`wgCR4H ؚL{&7\ Ҝql>jNƔGjH0.3QXN'p Lj^J1‰yA^+!ioı.(#Σz9cGx 庶RPk~{_k?|O>MW7r_F5EnUXbw Egeb贺+J essiN*A!H)10/é( NSb̅{ĊH9f.s$T,Ĭ\q!&wg4ՙokNx9N'W .ַ/( TJ7m,mzqSҹ*w@GB}:88sBC s= b 4J1؛ yh@SRS D#@C \Ц Ƶ9\'6Ʈ^4m7ώKǯ5wtCCYÄuq"io/0!^̆wzjD!DdxГϯ⟋ A!n4)էˉq{11AJ牥ֈ!T >GԌFQ&1iomАÓm  ioȩwfj`K:zQ RhzoC1VKgWcGiXfx`Mq@L/_,Ղi9? =Fuۧ!]x[.]t(4:Sd(^ԆwBz;UY)<?5A/V. Xh>7ó,`jiwvBZ[4^L{=OљYX&Z` /1tbY/\kYYڐDY/_lP*`:T/ v 2V0UoQQJVM.ߪg<;U]%@@"@`V姈RD<i~p9XNe u ҂YW;^ e(3XȐ/Ɔ81V3xwh6$b a|SiyԷuI?FqzrY}O9._?,]&ki7FZ/~4 NRa1$eL;,kV}pK$8@} | 4q :1$/ЀC&PVxy+f.|Lŧw1xD@Q8H-Pȅ큈]C.2l7C#kjB7/ 0!C0`7aPM:.|؀?3.Xlذg`%b~WI m R=Y5З})Fy砢0 U0ޯA/RI i]赚Gr:ݡ.Ei1+3l)|^xiP,P( !^\jlwX{zq.>bBkX#s[E;#.;1e3yuˆcFK<.],8F<;3t] ^@{6-P+SDs3[\q&jd@@ >' AENU'b x=bGH$VC.1PKr;6 M͂ eQY0::%gqĈH< `ߤ4e236MkL:Ua,sY4,;ʵr- rwLp|X2&3OΉH # /7 B^  ^8(3t\NQ$6/=>e6P1SD %ttՐKCv/]$MM 208.51!K֭iQg'? j;d;C[h,Z˄IH+l(u:-L(W]N6` : hP xq\qlEք% | F= OI󈐉b,"$!.[O֒Fiim 6Um.4+"~4?_a ݍ !jr$A!MfGPNctщE\0}%Fk6D*'a 9UQ$ 5]v#CLPg6^* s1!Lx/艆Bc E$E鋄jslr YL`,`83eb/j^,QN6.m !SP2C<ܠ+ TE^{YG<B{fsf(Ɍ  7*b5^*XEzJU, CbTgUbV&ZD4$g{ ՠ7ιz3c10w4ȸ,_ҁGy8yZQqd'q" 6c6l#-ecR/W6M7$9{QKN#+P&ur:?[SG"?BĀ|/DŲnzs4*ۻC=k:gⶄ("ňQw5ID˄ءh ~s,Y2 q,_ޭΫ' 4˜Y`8Yƛij>kEN"}h!yԤ6AOg`0o+ƀ'ZX h4,~:"uLwg \3$/|Lr*Wɝ@Em[=&q7rz(XB 4IՁf[ׁog7v-u>[X;i'}q#6w(cGCȗ xU-$46 HMJ5t\q("uq[,͗O( UԈE/&pT`8 X\Uynq)sľ~ [a8Ngaр Xc;L7(=9.閻~Pkc:9̂n m!8ͱ`N -*Ǘ ÎC-aV1O%TJW1^ H 4PU3#DGU k h )v GIdj}1cۼr e;p,ΐuU O9yr;Nb'Ak$LΨֲk,j_,iG;"p:֭eaCCN?1N@kXQ;޾Xb0^ϦsQL&ͥ*ɐmK3^0⢈(VҪ\^Ts FH(U__@r 4cf88r09؏i7ơ0I,d(Fefڕmyߏ-rZ &3fR !'ܕ'qn3?D6<1 ں*cABû) „<聄0MW$Mq+=pƱDSW:"ИcaXA0)9CM¦ă@^fsvV \^h (&ye[YZvlߐ,  KAݝW WlJp& >]Vܧ)H xLUY)"Иc0G. TC`/`ho'0,jzba* CH˝ckS*>YkW1vK \>YQZX Zp!~v/t iƫ.E4|]}SǒĥhDSRPCĊP9`X܁}XAښus3Ҋ~i4&4sә(9 F86=#mXM>qr\ڰ0 3E ZguohEolHr)@1D'zb*YDC;]^ ]}Qy n> tT{*+E'⧴&%!<,{=5˷@IDAT:x! 0Yݬ,ؤ!K]0wl89tlЫYb qwY1rN5 $놏{)[MeNY.jʚ+>l7Y4cp\6ŭ77KO(}ADW[lBӀ)# Rֶ5.ƭ_XzVˆ99 A0W2||uaݝ9S.";7^׫M=? b1Tس=0O$M`,_W-KۚDM65.Xa><-_L)-RCւ@9K ,Lj~x2N,Ф|:ݾJ|{ᤅ XOf8"L4ђơ$n@B:Z[:5eԽX 8 '`n j(Uƞ>\$`c)%Bl(o;@BSTY5r:ʈRƫHV#V&M (yb90Fk6QV\,kV,L3rXΩKS}_dr* Ra6mËhNa׌1%Ԙ9wCZ@ ~|>Jd9P3Fh1k4+XsB!@ՉE"z↞ҶhLZ{ڄ Bk[(%n9֏/ bD-QzKWbN 5r!~a)H"9M2h)H _p=jףe =wꅸLG[f ]*<5`H)'[Ve D"p!<\YXouMg*| <Ɂ|Рn$x=lζtlEyZe|J$Nf7b; \P Y"2lS:2X@P+`'Q qHa§r"$6f)Z7w6yUqg̢IZã܇+RqHps35xi@P AsV> 4Fmi;YȘh Ydz50%ӂmJ;:y |3 %wb*k\".-L3ߩx [bBjkҝQ|F' 4'u2:4"SQeW3=19zr[J>H?tVپit/i?}!\/Qsm kke֢Mv:`&&ǰa-q[U* B/z}u/w;1KZK$>gѨFYhog3rvٲy>/5&q<ʰ s`74dtFŃ-,oTwXZZak[-?~q,=wLwX+~bHYYޥn8 \LD`Z\?Z1 Cn>hQɌ2_:1Ǹa2?I9tr'k3w5ί2IBX'Jop#&SZ*پy;Su.3h~l߲Ny0O}A9tvD : μܽ :u(?9Gǧ(r>.oiy5^?<=lZ!xMrU=^ )c׃ f0˨]~Mív]q'.+cصh'&J5ڗ~Ԧ[LCxijQ)9;:!:*d$b5c"=UCfR1&\z^g F>u0ުcQ魛d^Йo<$hH6*aM9R\9"oFl)|MXdቜQ*EJL:,rh6;;,=uP 9Xl߸\A40I8KP e~+lʝWʥ=6H [FV06I w.YC;e^p6G %ҳ]1Y&?λt/ kJhCH/}pč:8^ŀG-pqjxZrRY)&С8Eh}/G|&k|  o]SAqVo׼>"_jq}Q&N#2G%{j"waN㬏q[b;MG/-К E A*DBg]K;>"-̫f8p jdz0P\LsxZ{J$g|w9ܣfbmB#GqfwoUۤ^>YԈ0%sQ? {^ErAy>Y%iSڮ]OObR?@m0S| F=CQ`4M(2Бҹmf<֬X,5}KPMRᩍMx2݊ٴn!__*ԾJAp`ͷ=s 3OxcDŜE ڕk7QB>7]]ڌ>pQ_&;nަQMԴCJSXv ?.Xxtfy14ȏa̩E:LWtm7lV9ٿfKٳF~!toq9rb@ZjGStPyhW\F?M8wbȗF9?@d||}-5/os`6\0,=WGe5pw';`Ky4ɮ/}, ceͯ4Qqjw|TfqMi 0LDCEg 0bc-mohF?f[7hB5vJjFxsh~j_K[dYDmy9AΞ7Z/_{wɾg0e7fCKexBW|ᛲ;1s) x»Ic_yxaik <ãhp^9rX#ܴ]z{;4mvS0p`HuAZ?o#~07, x :1܅!.{v}Nȯ?!Ԛ%/ WcD]t6kKpN~9瓇KΠ%1!37#ON j,] 땸];k_gIׂlj~NmW|.Ůl`+oY>/?xP_b>ܵo| jj~=eo? XQgPr?J|,Oa23فicS%MI[]\Ʊ’512v)!J$#NCf3iAP W lu5{OՀH`f}jԜZO jYݎZcSgNyUXJk˶MkU{g?܇>,+;yNt06}jhZpE&*s04vFZ.Y^y9l9#L Mh]3f??-j8Ub118 9s'İr^QQRA64 gצGbc0"wCe Jb pb _`#{MnlٲXAW0'35xХ( +y] sW7foHMf^&4'0a+ϫ/RYf~Vu |Lf%֭+鑊,YTV`l|LŒaE2+sy0g^19;fc7wڕDY*'b+2E7nEJN/}tbt Scُ~TK50֐AL5kt%`cr+A̲zRkCxPb'5^V+[>y!y(@fE2N1JczJSOta#UzļTrJDٴdgq[<;lАM33mڈs\>Ϝ_ϳ"㱃ҊSHZфIf:3 _{f-Ï@^[T1H4QN=}5r)?6a(pV?Fx6-;mwˏz`NY\Z&] 1cNLT' K Ӛ9҂y: y.˓>zOrd\':|a|u/hxy4(G90D|F9cw//߂yhlϡ4ޛ:9,o{dGJ9՜!YZvtuȧO~8nl։lߺD,k2FjbqQ2S1a>C›QxH)4ҫz0lj;̷` '/T^%m"݉MP买=LeJsMYzdBj4%{We9~O+/ϡv{ZS8s'e|6{E 'kXe^Ok(f?h>{[L~ԛ6T}.]酭ˇ+5 &0.\ĤiUp^OʿT4ao:(GPK6?ֈn'3'3l[3_7Z$cm6[.Klbmj܈\3G G|t 3*">P;&я>8:>q_ܥE&0=_B"5bG\34WyCTÓz hA CF4,=ڇjL`3!1%U€Y]ar9pMY4C1y_cz*i|<蠍b"~8-&quPo˯[GQsc N;~U[e*,=,H4_fJZdQ sq {O8jE MF`~ٰqZvf0y1z8dW?֦aQ~+6(ɦ%7^<*=3}i/, Zm+Εٵ=lPY4`"驂&O:1*47, 0n^N,8()L/4rۤBs:>+0t}RGJY ?]McA>)Oggw)]h:Л_#7\Z̷fSq+Ы6b1L]֞3vo?0m@I8qzӸ0Fh%՚a;J,bmTT/|o|d8UEdX*:ٍ3P|sbqJ޼XWΠ)]tB1j<hKcVмϼ!Q#Ljz°IN)"P_Q ytU1 z#&lZ,w>~g=<3[թ:9`H%J*XN뙳 ;>{wgƞq-4EQ"ŜAȱ@#4:]kN$OVM}m0iCd?ڢ:X0z2GEQF{-Rdz _xlS) ]=1{xz %ǚ}EGY QAmY1E -=vBK;q򺝺K!T3 6A.  dXqWv`t֧BQ s\sݼp —F[+pũ1guRࣝ;V8Ėjb rN榓Dፎ 1.wsgPGQ!(#YV՘QQ9w]ǢBD0('-*YV DD|dqabJ|}kfGnrk/ge#Pßj/_.uJOEKKsKR818Ije>Cl%HMIK(hNjifRvWX)Lʕ{?&~?"AD{UJw,< WR>kkD |k#F?@x!fKY>Uz'Ѐ $x;+my&cX^BG+Eޜμg W͹ȌƌNP2YQF@AA)MY0BG*Oƥk -LSX/T 9Lt _`|OKpD@'!b#b8Kml;{~Ukr.\8fCn>y[A]D +]HB{6$건x{!^J=`As?X'?J斳o{oVaћ\+ #uɮ\oq;FXER(zh2 _fb>6 ~ܰ{jK)WKE.Ӟ>rJ_ ƙGKf^ c4Fx23,7?|S-tuYZ4 8i;i}v+-"ͤeaG}_@ Bq@+wMʇW[YsL9-qV<ڢ(3Ld7p>avALEEsH`dtk(Kuu`߁BJ_#(h1?>jb_x}kq賭L"m!r6A@[fbJ{շXZ=Z'i"% M$潞}y쫛-z YR$!E_lv , 6~O]) "L5c텚X+9?aiYVbil'DUk,ﳮDrUKl^xqoh*?TVdB 7 xW,q|3ǭuI@[8FtР` '_Wp I[3?ؓ.g'RBrQ$i`K+sq&ܕ \h`s>;Nm=*ыvفk2Yis@IU 1 Db{A>6n,#-<% ncjKlצ $Y#)Ysvreflmex`ΕD4xV\"v{u%2=zR+B2!R7hi—3EoEU1pbxA+;zf(?Ӫ_!"1] )r1vu;x&v6 a-K Tg۸/Zm)]Za9VV*hLBU4Ag rm"҄Xz,;kln$[ȟaVT^bKJ[&b\\pBMDm%MEJ'|{q^! WKJ1NtF]%J #;̿4*qc'2=1: rC݂+$J忎R',wA|V Gģ[GضmK1Ӈ퇯c>aj 7ZXm -T7 p$s1y&)Ct`rwF껳_EoE{^ T;: +\*IBM&X.g yU7k$PJOr*idU/+giЙݪ2gsSLV , SG*5:mnIDX_oLMk}@+ũ)φТbr2mVQlK 0'CVVNW.$p:B!u&4adgdɢ3T͛?󙏥=2!#G 9%D% Q^S(Ĕvs&B͏\?͇P«顛.YE%`'O]4j둜 =fհH&! DȎ"ȯSMyǐ ],dݕ4P+sq@qM "`1kV >lJyQT='c/֕N.@Kfb wәF*<>TgUS0JvpE{Ռ^CqJ#d.V :%E'u+Hp.?z=oղZ@赦ܼdVHRr% *4IbeȁVF,ڽK)^؟#:dd^M߹?O}/JB:KjeID'(x0H :1af( R-ओ`w7[Ѹn_ɳ?\D:jMC - {S߁Y"ACPmL.BkG5 /Ԩ[21RjW/_x%M[e ܄]#lb}KG`<)MO%.x5Jj}yUWIא 5ZL "3|OaN iań>Y c$go9bF>3IAeƒЦu.1?fGo^-b+:&x5eNL""aNO 79c;^aM1֬sT [ hD"Ý q [3ߙܳ9|HE w%5&Pi34r~GUIU ~m_{TmEc+tV Ac˷UmE>To%]/`df}ݽDchfr{pFS}")'ڏ`G! M.k >Z@o%4uMCALio˶oXJd~3PL E糘ow.]k44o!|ClJ!I<i]+mP(EtڎQ+Nh%@ `UQmXLJAWF``H+G.3qxg"` [gk!뀈bT ZRg])&'Pݶi2[Vctq膵̤"C*簖79 ̽eˋ׎ -Pӆȋ={Ih59܃ڇ9"mz@F?p,zsܻshdzOc>k? nAhnaGS얧*Pe~_K0A,^߀ızOl-mPȺf(Ik"e|ҔbVvND>iϞ1sG/ǹ7H2riy1BFRfn$ڟ&tֵ̝GPfZ+}vOy]pr+TFיr}u-GO;M=H=~ўyZnHDt@$ڧ9Lj[m|HGگQe؝h#Tf\?F[AC+ {H5/,‰^[~ζXnW~x~[m׎JgNI+'-әΚaf@xD`7 NN~1󝹯=>cFXp?7~+UT`_ڟČblv&n`Tc} Ԁw8R.cT\%Ʉ)}>j[<&7 ~ףuѦ0~FȑϓNYF78s,Y%lI͸bIMQa"r)юٽVPG:%`ď04UڈR?âAbN[--f.;F ((`g{$1>pf = gG]i#6 b JdU@qB GpYgedkz{}F/]?tzg萕3vnWJ$K!9uߓhq[W#K,}*= PC=F&mBIN_?sĮЉjy}kΥ;XuKwB%xpy4Uqt髱 }Ӥ'dB]3֥.k'-_y\]AfE20s,t5=mU } 0vA`C_8YN)(=+lj:舖Dz Ar\GD~JKPJ!܈[,HhB@v+Ht"@WMQ(vy`my )D?~cc=Tm_QJ`+mMvWn={;G~Gۑ#7; #@91];MNfr; _-r >Zbn/HtyEeDrpڢ W8M^uK_4ImNX(.Q̳8 uVi:<7ELG '^NiX 9f7E 9VKQ\xX6L LEޜ?LfEKn+KM'rf?8l?x{UZݰT ;џh/jb^hT#IٗO,M<̢3{\˼텯f' >Zbn>"YœhseetS裋g,sj9Y:0 Z)'kF7-SW=Gn=a-D?SHNĨ!M0ҊRlJMw+7>x"dW,~LJLv;k"K/R %:>oﮰ_̢5- m )1}Oitƚ\gQ=eN 9z{BwJ{Hsםs3,3j1yNSk4D| rĜlo#-ÅL-xk_|:G@ZoL>Xqrj;A!&]>;з1=àrUOyu8D~fRP7;;h_opJ84T:ҜRj/44ou-q.WE&<&sGO5|_ ۱c z |ct\v&(@ ى^ۂ\^e9MvsāޅJ c_m]m>h[eUE֗q6vIk]}@% $ ơ-Hw'N|}>nncκ/3miN)R4ٔ>^|9(r 1.ہL9qVC8m+J0txV1"|eH~6+g5|?($` {3` ]$$ym}i=tO ]a/h3 v&Mu[Bͣv|#Hkr\n.ă>C; =d$ظr94OL S/+a|ȷ,~H߼ɜV&DVY}_ѧn27MQcU-b `{4`bB9^aq=*+3qnWN 5%WNhAL|jf~ꖃAǖiM3=Ba̝4p}} ){Lrx?}u- Fsd`p%UTerx- %9VBYֈ0s4B m?E?7ϼo *]v'۞~55fL/U qw)&`"pn2;CzODR*ڇAQR$dRNg5aA933 h)ƇIh U}%v{.x+ z 6G.F)BG}/H *+ceΫ mmik߸w= _=c|}c67\zxZi}r c®D"ʤXl{NeB@3 FM]~x܎P=LӦ|νqW#ơi?'i1`۶TH@Hޏq>##H-+0*WjPڥ$p"ns&|Y&X Mzˀ=6l(Wo=' i*O+zɪcHĒ8tO9\ADY +Xc 2JTo|^?` 4]z~$,pZt nj#a9z0&88S1iu4PVKB,\oq967DrcakϿ|:{{=>[?&q6@04!F &C'smfIu/gz#BȎZ_U 2+rLZ_ {h](W{q^x#oWZmm$)~pi!9Ms[Fe>G#xUC,2I8j/6W-Nz3D(?܊S%rLIUM?NJ '(!M L^q$ReGPꓘ:0X\X ;:5wY w_Š%eUmak<1Kʃ "H#&\qY j4Yuu땴qD?(!$<)!*ɜp1YQq6N:#1_E{̾{WK|Nl0..-PVg{$=QktPYp}4#HbN^9 Av2h{M0v,Ku(~Z-+Nfjy zLCd@`U<"A{+,r|,De; d!CednysBs͕(w8c`e{(&Cn %$OvZjliUmXiohƿ1ۂcNܨ_0CRϚCqv壏l}BYBo,QB({h#il$Q(=E2&!a}W7m<Fxs_@G_ƞƢ X\DiiIo/uK(U^~op S,WNhIc}ciwO^jnD\ص;=>X$~JU3%Kpެ%4by>Fjy%SXk.Z;'UPCpb/FzIJ =+XG@\"8{ݳVW@+hKe3xc4`/tXgf/K)?:S }޾#t6R=/>(-+y-J<P+# \%b&"jѾdmH.z#$Q@s4m]_XMMާWO +B3!":i; M R ) J#ͷdd2XGJ1O%QnuO'Z wqΎ^x- )"d?cɰ旾jV于cԢ>˚B ,V\Cnr[J,w4u41#B~H>Y)E>֑۝ۺ\;KsM_jMCƁ1S& {Kpff"zYBvC]Kfe+3_ԫmܓ세F8lC.ۿ# -qA m8L4o^k]s 2iP b}~g z|CY%Qub I E/Uyܢ)ELh$*wR1$j=<{GcS,k@(Ec̦  DVP}s`L;*;_YghmZpӡ.VOA(FV'3څg`\ L6K'pư2 z"퓖C[%x^}[RR*dۗ充IiLUq[O33%O (gmD7,WwE]A2`_}\_`F>,6 dWQT?=7Hx'4*sT4r|}/i2Q0QQR^&ۤ'־u}n6>SEX#ʿvS3N O:r9#T|MT\} NXO"ƅizڏ߸Hw"10Y]IUZuN|T ,ץ$XX"Vஉ8+!ؽm ~{ ve>c:A0LyeW?=j  .zr9>V+XAr)HB~>F@g%Y]=gn٪-`̂1C|aI60M=YЛ&vKhh#JBc%R$h33s9Lvv8)jj"߭iә ,%M=-dit*ԣJߐIQo)M*O󯝳kĦzA{%Q3?ʫ}8~iRUqDG>۲2K5Nofae ȍ >`bIFz1c[#)Hs>9pY6 H$h,M}NwN( _>a }zOL_7=N[^sqЎ#.N2u%t˱j-}ѱ&9vHOj|]z=d|9η}HXIUK$CTBIZmYI}_ D1mWG濭x"=Uj2^~/j|{hs^]QiJuqM]8_ ֞@(HT @JA.punuW#jJ&1>Lv f]˜Api?X7ZL ~B}+rk:{z7/ RI(xD?m tI*%MV#M96LIIoX8p/1n\#0 iy9htLw/͑It$1@ƨ%b4Y.]"@U1S~ˡC\̔~ZakUҾdkIj~ 0VU4_Aq Fcx)S>sT<8 k\hx@"\ EibL3ڝAZY *])S[L%ne:+V;qg%LïVTaie-33β7[6a$LOD<[JFK t@еwҗ9[:a#9>j)뗬wݵr)̍>R5ӊ=FC aEP0׸S`AvB :cw3G0H५npS%cNR@dEAzD.o4t)/%" I&fE`JHt#aWGKLxhLXk[MA5pgVŘrtA.`Mg`sJUA nq[j&EWл&1ˆ,m6_7lZyEuK HNosBW?|JN|%0p#aRsrPd#.ٲWm݆r\=9syDQ^,2Х'n؛'Z,ޗq03-,,r6!{\pq!FnxajѯOA"O22Zڲtq;q+'lDr]ugaqp,&%T_AI:8f9{fq'`Ͽ[9.LJAw,E !%Pd]H֠E'8260.! 931,N–#%!H,_<2bϭbUD{TۉwULjI g؆uevj5[ 1A9At R'vClkһu] Zr9{6.|sin,n?0>wpIeƈ$gaE[5kDfhb8FVL|]eP ]WyZ{f:0qUas,3^(,?69FR8m<0k;Km%μ~ꈱ r嗥LWpWr&ebB,*sT'c?ݹ>0` θu]JxI)I6&2t2(J5YщNwtYwg/ x_[BBks(]q0 &"rk7.~wޟI:,C/aJX ;"^^YT7G>2Yh6I-~ic9^n[ 4F`6pE0ڛ~kg Y-LV"S혶Hr HނBl~`}f04:b:y&t +Qꡒ)h_Nmu^%EtJeNT3@{ QC-f6 3xLqr;Ob+3IvӐZu. $Sߋ?i%BAgT1g/0 B\皀Wf.8+rV2(O(=**+vۗ,q,@UE)RwP#35 B^fvE;#sPXR8 2 ۊ[xgrg盇FLSOoLG,)gJ7,a+גEM?D*3Soˣ©,x]JGڋ}`Vնu퉷 `ZԌh~n#1js ,e4\`ח9ЬC׹1/o/Q s= zbThk28 CG @Cf]DP=$r:r`ֺ5%s7O01(oB7fTEDA c(0&ߏ&.a=Ma;A<=L*栝O3 ,zZoF<_yB}"Mq>^4yj b q[CRt|ǵH}*$O>_j]U%Fioa`.>< -eB$CQl4Z(P!͜IBeŅrI&iKI z۴iQ+߂7Ԋ:WqmM.tIv=DuҴ㓶&1tOJ֠=yF9>|Ԓx۸ĠE!g}OEkDvk/Hp]nŠ,/ǒ8?zBӸC7;ӼXwa/oeZ&0ʋ2>|_W 1v\ V.u}!;?sw L. ~ t+| yȅ GWф_:S RasiԎ{!ye 3jY6Ms(!œbY,! S @_?B,-V\]lzk2?<غXLĠv 7:CV Vzow ,:o^HrK,6B}_mߥnLxrW=~ܔkvVY9gh:0Vԯy"Ki)@Gʙw#UDu8_N+@shLJP[)Z48+~C O-ɍH!bq_zS<ވ8 p)IUPY +jw%džk/wxIJRD79[&6_xݛTgA%N7Z[)=,AzS4 Ubdb%ǁ`ۉ}v|Q 9]97 ~0AjN3 ~ӘX cZW-m!^Z1g$6A;Zؙ,hxꅳJZW /W ~;5ku#PHF>T~]&{`+Vyꉶ!=uVݴ1Z(PEDf"{s)ǎV!:KGB!_YA[-*OUO޿brSz!x62Dí/M X3>,p\0U pLRM17R(ShQ4}H5_۾";kIEF8429y!Eg2/8WTM:M7QEˠRC/U@`ctLQA EJdݘ'h/.ΡY $౗$[d65ԍ=0Pf("PF,밶z ϔW:\w:zl!3)%3 Gi p-/>HEE}#ӖWjk/~6rCq@^ \hm-ۮ]33\ߣWa{(6Ę1:M"%PK7j{`HpoS Q INg@k',?ɾJl_--`O#'p=FFj߽8a8/Τ>+ =Z-A_ꟁ6Gx `Ol%&e;IC&Tc+,wtOQf~rfIveEN@OMRPp`''+8+T PCMN 4`,/φ3Ӷ۵j+s'_nju? { 5v{mpfwT۶m, vS? $0iĸnO# j/\kqfJo_a՘ ä5%lfgnׯR]tIVQ?sh̆hp+=H:;k6-@̡a-rŞzrƁJ\VlQ0`,v"s"i$ukHw0~jSffj#K!1b'G-ڞplzwdUnߎE'ңJ0V"ٵt&D"E8_ͥ|R 56tY# ,˳Zؙ+X#rwM0QH|s@FJ oQuubK+03Hs8u~JB{@I7PG꺬9ߺ (F5YK7ɤ-jRN7;rb=y2l H0oCky~N ۱}.zsFWYhT?JSJ*#%sBn4 qp|j >m #~RX0l['p,EM+ȷ>Hw %.۲pOJ=5< v"&)5c&wr!#rRsa=U+_Y+HJxp4? ZG뛐&nW7)'_I ΉbԢMĪth+-JDA?'X7?lMS BC+y6,2KЉ:DM\eJL~|~Aƙ44?==o&(lwo{¾{3grY-r9ljyUeI b-=1JvMDpd67V"m`,KNKpkƔCpj֧~i;xTӟڰz"IUU(+@]mgyDbM"{K#gϮY(a)&X #<"ZX*_}hwۦ= ̗1ejVA?Q&6 ':.ME3V $Qo~fR9€CEND3y)VtG.$P\-zK,U!r Ji[{k[)g]4g pCwJE h""I XG߄=s25۪T]nYu_zxٕܜn:pӾf5Qã=s .." 0tym/@3 57^kae,v=XFkܽR2s]/-o [kv,cwϊ s gO6a:RF`Pq\ Awo6Ƚdc=˦QVbӇ:ӧ'\ra}wEJEv/qމd4xy9apDj{$3`촩dI|dmPgP#KFBӌyr'>+1%%zzrtw`4A]w3R"ʫ (-?9]E/Gi@\yӥpP; b豔DfXZ&Pa9q;XVgZ@L-O rMeiA:d˗䚟ʹ&j lT{@;<,ϐ+,2+kVOw;/^ci\%i{gufdO=VUivfAh!;L.2`P{tI#gF 9`u;xT|+K)9 +H|jڪx4büTs0[F;0w=iu> ߞiAtS,sU,*"-7}ͳ{b]M^Lƺk]e˄Z $4pzVSE1,ffs#iKc>`IpU}p~0e3@*x/`n"6  aNH g'g(6zQHX/P(78^& O\;ڷYj(#z0ԫ Wd.;Hy?[m}˩rJW] $A:|Y *E١A3wLw:\ݸ'ߏVFXT#KV|~pUR5r\VXT zq_"|VJx# ؉B*4WES*^XX"_k# 3 &@IDAT}֯޻]jCo8cܰ>`ZZ9:M'pWL R\I BPlWl&,"g1p])0ɈRL'896D}b |?|:bc_|2D] *h9 瘸FEf4(t23@,6kKQ0L4ٟ4 1RDq)^ցHQAc8^>cߦxeuZZUaR M:w/AUIBHGY?&ẵLJ{4%A4o\SWCo ?d7'y콃v@e%\ NaoLfϞhKj}5[[zlsvBl<~ZV)1O$R|J[<RDw$ؓ@"J%'o^+-sAoi1X8[JDyb6DH,jwuO-ĂkcX"pU Stڴ[%uCq'JUXdm1[0.P a_0i: bD]4[yqebZKw=jebaf0E@MԴQ4 }˒pINIvnFC 8(\5;PD vM9FKB*H-- enuReLK{JUK  Ci WHEh^.ݎy; l^1>Nص+`vƯݾȩ:Wwm5|^W]:U.Ҽ4-54KޯuYf*~cqA#S!aeQ(LDp;73^V:`z^1c6 7+$ 5Rp3sOXAݵu]1k\)J|y5C QY7uSy[ 8-Hk RLY=C,Z5.}no 3c} G;[mrinxGE\R5)T" TXain*"j4|:AF} h2B%S5mG" G|[HD=2a8Nx`70u2(#J> “% o2% koQՊk]ţAM Ovu"܃=`zZ nf/.0C@# U[,Lunx}*6U]m\K Ͽn9m;{񅷬kv0wmܐ¢{:; ÿ:Ld묓͛*b9ZtQ3 \R qAn( oV,J%A-[kf$>]];W;U&_bbLfQH߉K"Z +c)4iY,hcr (bwT`/2 北ó{tV]5HXWwuOL_>I5:~zA>2%/ar:$3Ҫz!GT*&£*%Y# Iexh$SS9  $p,܄jHQ׊<Lُ1{goGJ2Nlrƹ?~pT JYnlb寰*#f&u[v0s ,p_`IK'g$ٶ&b98Ϫ|9{Z[)P!dt\m۶VZ==fW[%)_o<@,=!~jTpB ӐLzut%EHóaBój,@ Ʊ{lLEy Q n\k̭x i*  a#O>z'K[ 5P8w0؝P"_YKs7ѣWoq$SMc""X =>h}˪] L'3Q֌3{$YBPC N}Tרzlj'Phq! *\_ut B2sCR >-n@ Ď!yIeu qxOt$?7?î4hpMrq3:/=K3^\',OIotGtdƈEO tg"?8eA ͜ˢŦ~RR{ ~+]4ݿ̬ Y̖B:q! P‰2 /t G?]sBԳϔmt˱ĭ)P9I&2}.֟Dv|Ůu!cى>HH$a;;Gܩa+&&1;Io9I<!(o5Ɋ|ã5.l CSURFmYJW`)Եzyb$5Xw֯[³KłmkZwZZL8燨Y{?:`ʢ;H$!t:tPl )#Itu4ݡw*52qXiq9?׍IC$J6ש Ggn[LR-Yز/Rt/`JUv;b;7چZ哧CQ ҸasuƸ _k1ݳ锽V >rx84k}P9Sq&6 7j<} X )sǰcRi):$\̮RްHcԩQb;\)na&'ANR)9p}q4m58;B54ꥻdF[qq&VCnu*pakAIpBЂ_ƲOkJ<ۧb\/\ \NX3TulYk{˕X Z$D/$uAS .q.譀K`[pǏ5۪B:ط &y~y`8y:Cx@ *_N![gز3LI<,2ɐ 'Q}8̂:ZKn'V[>V$[nYhˡ.4\QA)&!ɗ1N)&y.!4aSRlk͸  %sAÓuT!zcPz7/%&'(gaG;$λ!4{:S$m*/% ً'Zl7Vyaa<hn3`{mnJJHcP &$Y5)%QRB i|Kry\WgWp}Ğ{~ Ĥ"|:T8Õ*9@DRh,$<! )BJ>2Ys6EoN*{:}6n\ĺCD1pPz ;;_t$g FXDL0Qr3,QAȴ)r]c@a%V7Jprsq-glPzF >hv_2֊ S ];֨# ]mO dN@L5-%lYgsVQR' #99YpƱ4]'$It-"aq 0xvEZD631@Qp?k$cQ}V$,=Cb* ̤hKpQEn'-O%4~!m1wNݍS[6T?&70MK @*Pm, 7w9K@=yr96at>1h?}fab\ƃ@DؿPiS^5u]TQlV] =P-Id*6Y/M0`P5lyYd5+{lwEY]M.]D; ?ɳT-5E(rS8SӒxg90H15)%Hť3d VOV!H;lx)xw[;{Q;T`Q\5> w\޷q|AEknW,LY(,MIˍ]IQ6]^T^lb/+G8aC* 7Z`a ¦JBkAxx&$>"1+EjGkk[mu'=\Ep~\\OQz%ÏF“q?S`?p'2HbBWHv4#9&{n[$ers0Mk1v1q5l*CK  fN:j=O"In+;5su%3&19~43qcHLʈCXM7U]V?*fvЦ.5{[QOymK&;&2ʞ0OXJ+{Ms!Kosږ/4V._h'm–ܴFP \X$e4~zo).겯Va4D9@ϣz@WYc @H\Y;x4 7zNb=ŐEQ &dJSs5_qNaYخ_ 1,,9cW"u´̤ 4"(>װ; <GZIXQQPc/߸/FBKNJˇF$EڛJKgT @Pjˣ##6%K>W]1ŵK'Ϫ{kQ!~Ջv<d%Ӱq$Ry`\#PhN !J"N⃿}U+YK8nl_Lqe*Y4,eS'NX;dwиTNn9Ǿ %i\\Ї-M %:h(=3v U7ͣ#Ν'=NV*ٷ Z j*b(c"Q<[Fo DYFl*Jce>i:ph} T//]۱c,u0{B:pR%Fq a}yyu ᅄE\9#=ʦ0&K`M eyqv7@B3VxΪ;BK0 nkm&nռ?:!IbZ֬=G=X..qX\GR xTہrfy=a\p8(f? kʇF9 8 WM9(&w7mN?pK]|e9HYD4'-fXv)"HZƔK ZWX[nZ,&gGOf VJ3]@]'Sso`'462Dn.x FoePAhr8+[. Kɔ踊Ԣ>LR~R\1P~顑Y}s?. ٘1QTĭ,u*6i8`wn,%t{a{^D 93"eJPc.RTׯ*OfE ۠S رӮvGhNe"$oG3빘fR. U(\I;I*=!!U]&ET 4"O'`WBc3t1\IHl')&} 0ZՐr^|"Kx[5L>4L9nngZwO^ >A%6&E@G&u:JAfS #F&X=qЃmz 董v$[@5ksijNjp')VG\L6m疓Hs4kfG{\ ǻ+ȋ H&.ZL[5tHck!p*`̺}|.C,|k/ٴ:+`!چ3lygO 64WBdz1) @"UVă FMWkσsi 6a!419؁|F2 &n T@Ğ*GgrbfUVNOpɽ*BHFC$e$< @u4 FFJ` 9+6s(w58;7Wzt%'k^?UA'j SXZo3IB5PPU,¨gL s ֬,$j] +߳T9k+JXo eq$`pN*<9wByI*5‹ *ã@MM[_s G S $YmMmXi΍ e +!pWpy?[! ½H$-7Mi)z{h? w>SnńK.-y݇PG(A3HhPlEY#DgO324J<9E 'h'%'&U$S>J&?ܼ|^J^mw0-eeL&!L>Z6?G%L/EJs2Z(N쮱Cu6~&Y:6iu5 b+}$tŤPHH/GMl%bm>CO鳐y*HLO~z}bH] n[`)9f9!]-SH6QλU,R(1ݏPeb D %eLјEakĐ} TM^d }$FiO&0`5!=L>W/8!uֺa=;GbxX0ro4a2yچ0}m"s FY\ ($Q֤A5X{uØqB)JNnDvD2c3}T[}/]/T6)zU( p,.YI/_SW!CP_Y![ptl5dZS&ﻩg8_80> B&V]8++iגfْt4avuŐqӍcXXf=k)gyZZyF v_3RXWd&AHEx Qw4) Dَ1&K4`p^>v.iq,_E\\<4⍖02;&,C |n88)}BFrc[J@&*!u`v YL{rM`h8nqg? c ojLERߎœ&OZç%PB]5vʝv<#h'OVTgFQizɵl< Rl4(JpOKv# 4P v3GW-.2(hf)+A϶v&;LHb=Aaޕf^iJ>}{%E"x z8Z@"'F;wX5a(ʮg͒麊RAi"6-BiS4r V[#mFhȶ램CK5etJ [PbPxg鞛J עz##Ft:MEKK8&Ni),vxN@gLz8?RJ2]^.Owxma$+4Zq9μv~R;;<9ml%nVVؽcUUJO, uUT y}u}_p< O+N |1(Z)wFsXX5 dR/@2KII=AB,:|s'<{PM$}к"P*޹;m14X\MIYL(b!l_ w׉KHX㪥j(+)7-x,<GX,Bj_A0Nƿ Y_WFq.G pZуV;XzB5]>l/r~>fs֜ ]S!Xqߥk-`$}#<ʅ;&fdqV_#1m Io햽ĥ}?Gb捎Oִ#HG{|WHiH%!NqFc=cS O-:ɜpw|b;m*W+)(%*(pÐ&QZFբ@|ghnƄz`!u!E"G@1EiAZ[pT{ QP * fG%XD hT{YT8Oԩz۱;Gk'~B'I5$]^;{uEbdJ00|Db dE+Ȉ52EYhO7^QQټƒxٖuec~xio|CM ~Ȉa[FAsQ]Եr.]VXr~r/kM_/񍋷Njݿ}cr(M^1n!+gSѧ.Q=CM\{ájT'H`/!;WͶc?݃@ƒTR2w˺聺Ds,saeQ&gW/=GUC.b|b+k 48NM#)dTvSjpVX3lD$JL2YdJZkPօw],;C/Gea-iC9Q~$ZPCC(\DGNfoAי:ƒFXȽ`IFuͳJn@bGk(ܯj4hQp ep?zK1Un tV*rwr^Yb_)c+Pj`bێbUQ((?m ?F :Ĉ]^=\ͼ.bYS %`$z@8hu2 0.Ne5Rw_{!8u\F顟k_2bOۮO'e:ЊoK}r/]b>G٩H <+.>=+%2ىSuV<5DL.ç uS@\'[Åʍƒ:+pk/֌wY{pMEL3%vFmR/~%̈mʺ8qBP=J ֣Vg~jnТ1)lċb>bP5Sf+Zlժypϲh~0 thSHXE~@ہM! d*+g0Kf8n&ڗ;7'A.K[#x9xTnEPwN.KLAF%$TL ʩ0O)*q!st$H~]efVApY5gX| _&Y Rm%8OhKv-]sjͻ]߶͂M fP/y$:DSi žRk Ğk\h uNGZ$Ʋ!T,+RJaeQAx%9tpkHU7ep/1XeV:F)oMRhzk$ɢBuxx-TѤ TWM L!,dLz {|,w>&<A_rWu_׮Ș;K&7l/Ul:͏GTo5ܙ>`@l_k'd*x,gqHfP$FSr(}o=,b1$›[+WU˗C ƌEAHjp1HNa \bNN߼7|Ͼ*nk af6BPMB0@LbrJJ BڝcIJMrwNj ꀫ%q*^*AP0LasX/ܣ p1j^!/dPz:m!VvT_j>uԚ;(!u3 n/\$rDV,o!YX`,T\uUI3揤#AX8Bx,pKaUOTQ@l,/v{?=e)pzUҒy-3f/ 1w { tjȂJ ȳP3s~p"^ƲEdGm+<Ɵ=菱z0eg? PcEl,P\\|D~~.FǞ؞/ou/?g[7gd{c[=r/ߵ~n}@祺={?P4u}/h~iY/ZB`5UP0/[fιl5HF wA6-jxa=q#`d1$J7}L_i@ިy2Ir.Ky|k{kvZŐ[̂?}~ANw5 ic׭ktSPf[tr'PfwnƸ+9=B,>.`q"8SPdPoVP?[:OG^EQL| rJ5U2I!ݺ;^gQ C0l} 06}#Zqp%ɰ!+FµFZ%hqylJNLϐw2`Q W`3ɼSoi:7-Υ v+ O{ J_9YPNu)S-A@IDATIq{,ʯzJ0yLOjF^|^(?G ǡ#hM}~OzO<׷uhQAMc$ָޱO F/(kjiAx fZ| kWG5Ѳs/xCyu]sq΢cUԊmb8+Wj,[z8y/_tuI 6:UVQ" O)t5d}=%7R 1կ%\sݑqr#+r@cq#&Hdu7 < 1$]O(2Q $R1d-֋lHңqE5s~ݾe{[KCnkg9KF{Yje 4бIL$G;;(7Zfݺa&8:*R%;  ;7)Sď[?*oaܧJ)?vh FǶyGg(VE'lg!\#JƆȊ$1Fm@Cmp͚1DŢYd0%\u\+UJ%>NuC0pS)kGX"(agH+Fuf=])B/f: -l&JL8H80揍};?/?{[%+ =CYagٯjU yڵkeeuu[ӶIwŝPA3&gYVGG%%~b)L?WoWδ%t< 2zQpQ+'O=[k~ۿR[+] q]bxѣ:WD`׆'{B} 1(Aǻʳl|J 6Ls_EeqrZ9j*q6GoU`2iⲜRZѴPqzw鲚+lӆ9X _LKwChTl<(|,*MĶM-ݴipl W:`SUsͪYh-tPe){ 騙agB$KXGѩ>-S\Q7dv(U E'U}ٱNUѺ*)iyn3#Td"xjz)^b!AJ8t^=*z! +?E~0'&Y\ՙ/lu,{7ۯK9]pN̵^¯ \(Gء?x(YTUc?o>W`!Ra~3ٹ`2j.Ќ(K3m &NZ&=2#*tsM ZpUK $14~^ `N|}M̓^5FvkL;iE}kд>$NhK AyC~4I  jol=aVB3cX!d??#_bILdYUI7|H^%_v<ҾuɾYd^F3y:F0Ys(6^Q+#cSPbw_.TF[-ԍH:$LE7ʽojقbu~_aBͥ&L wc\V[(beA-4*)ɇsYaBC0ZxplXp7$#'H{MIM 8S/lx|xR̷WW[ڶ;P#*s ͚ǷJc]ləg^۟+G,5L 4\$ks *f*W//sd[RTX G>x?=D!퍵$f@r,^VŠeû˒L 3WS t%&? P7).No%dK8zM~/Aqsc>%B,7aQ@f rҧeD 8x~,iqbJc@h\XedΐRT\(a *Mjc;( NnR= (j#%ԟĕ bvB[dSaG_g۩ S..Zp+憔 4H%КǤ%"K-]XDxBU\jSV+N Njƿ^~s6BV#-~$~:uL?g}gI41.#3͗:e(hm_} 8~zIfЕ{ J9,! ܹzJYG RUR5L{hu[/uG}Q[{18,`!aFJ);X qf&˝SiCBKYnMʈz V*+D g@?lGU `btprhB(p f_nr-XL1 & YylB\~9DܳԄ,,xAilji5+wV 䵰v(-> 1?ۺ҆<N{L_ǒj:J!iI #1RnKunp:Dwd%G##ԍGcc"K| :͸%6ԢMmnvqרmBgn >L ZQ7.Z ":k붓vp]L#xR0%$z.YB ,~>OPhveޕxZ.~挕n6,,,Ⲏ^qQ}%;Xp˒ t %d<|40YnHݜp9 y %k V%WT1%Ԏ8Ddz3qe=/\,…SX7մ+Vqmmx +WvK2rT; ^y=1#M;d[a\oZͦ2Tv*V@;Dj()۫oL(Tb #GW,)bcuXeǞ:n.8B!,c OGTV4K"-p#c}Xޕ%%iQ6r%N`ĤvS}y3WNu&xC^{,1 L~4?YhD\r(!=3Z9I̤]Fg}ӖJ!J)1'^nE{w_X ް“7F5Ka8JćWo[l_%`9!B'%[\IBVswY !tc`;Q*1! UIeP%PrQ%XPt,f8yqPY=0 9;bB`U^J7pX@֛ BEք 2\M7vaܮr;XمRCyʢ2A,RXg;@2iZ1Ne-N4s[@uR#dI 2f s|B8yHG!a 82tـBɺ;IrDu̡;/3bV v@|W>`\8WkWo;_Mdsl оsGG' Ep'D<_/.ʰyId7ꃾ8e\|; 67B :n {IJIq2<ԏ=d̨gB,#ŃI_8'Ȳ~B \M  "_4dtKPe*Ag"wq!#6dKVD0?/1uX {'>,!Ea;g]g*d/lQp"PS[쩝qX";iDJrQt3s"'xg!E$dÅ4ХAF\)5d|P@7iPta? YWV%v60Ẍ́sFNǒjH5f"Wxϸ'I<\(g ',ꃞ+Ji}=A,WbIy’ @e)7b!kRV+* џ+ ȷN'0Zf@,thuaz)pm5'KR‹88qYB\ˈSp-N|Ђ?ŖԃfK}5ޗf5C?3 @iSM9IGdS(F|G^P+ov5@-x<^YMR\+)C6QRr+<,tg7G55%.L<_x: KbKnu6XжƮց(Cl7d?|ZOr,=S{c~mrʶ87E'w߼rb4!.L[&[PŚXB KŁT΋U,N %)FTftnf+ɱnb2 ( + G8LyaY@,"?u ;K.$ ~A[Ɔ[b>㧻(n@I9qܮhN&w< "I9EPV{Cvwiv\7h붣.|P,xbD*LD\K %rYȱWW}l4K,a hsE,Kj+EOX)ɝUK8H@򙩶 FLnn%(̆tQ3~jjO 'U.HүlZy$ y9Iy$5Y'y5T\Ɗ7BCDhd|Diy3řϱ[o[2b-Nx},B/Uup w aQB? ID8*^'N UpP֒Gzy=-Qr! '/[!t1Gӄ0{%a*H $\5+u&SHĦE٣Of#^( pK!kJhgj;c`DKx7b}-Xxk& <חz Y7Et-Qquit; k4F5'`{ڮ*DQKhfT,Psx(ηQ(t9VǖLeqFc!k9s UI3JB(-weBP;?_HFZ #J{*H c]kki@`$uQª2gt8+(xmt0kg⛜+FٖcV=)))Y@Ɩ5k0TmDž~E3>s>I=g:F 0ab @H.餦沊3=93_8LEL,%?E(Q h($8" PE)5hXbQ"{+w}X^*m1Dm')¶74?@lZF=+ׄaGaQ!6)5(dw:,)ܘ"q}X*:P"knK=G'S 3a!f߭oWՌ9}ʀDMoEI9W1wb4I%Gkb60M+/e'EI r"T\cpaj!!g kxGFő(T VP?102 G|-te{Sp'4Kcc̿nwCwԏ, ˯NKP\/,Iq%Tϧl:YG'Hs N C9v%YvKi8B&t3&`Z@ݽ)>Uőе&I~4?s>XgّXa 4Z\k~l!‹lfFov>8h%}Kܰvm' mKmYTPFx箓[ӴC}0mҀbC[lbŀn;fɷmϝ;FO H`ZKrb8|_`8JYh3x&eCI"DcqK-V][OGUY;ⵄUB$;3H,0Cd:ݩ_='ֆO]w-. ދf\Vs|X_W.HGV 1x7|&4lfQ`udQC .,˞9|25IPV;C뉯LĹr O`Yvs~uJA"c$ExU1A2s+:nZ಻}nȊdte绯?[׭f}Hl\ CHe=cAf8E6IVBvw1b7-F]'8!箉~FNcȦ)=-BPVCדR5Y;#lS5#rrl=]kv?|2Bh8N V @sJ{T67lev[c㾣?N7sy|G[4(|&՞[sFQƦ8*% UM'ׇL/ EcRfX7LiAjQ7lCL\eկJwPmƊZ. 3nlʱնvoV5=7OI'R%X qi KIM?~Ğ=<(K9 $r!M:xcca2@R|M#_ ھU?zCߧPSeB#'_vm7X>t^OwF]4G´E( Qi᥄KZ{w.سrNROd;O45Y)os3d &Z xww3!OIT?rmL)y+,JQ#W<9R-viijw\?1w|Mܿ7$^OɧPjs˖C,c!8k\~:jAw种ΜXjڿ64v5GQfY_?n{Kػw[yd;ܵ -';N>lΌ{阇EӅǹ<$1yVG咒Ʉ$6- m>X%^$<\bQ*b!gv|5!KT=U2"H@«q!!4>,;v}㙳gFf L ;ӝIС{Id)Ipa{L1~x0AK.b n\3$~k T aEX@Rնkles֑JQ^2%,,ji/jx8K-C"A2L1O [FZ %~wm!1J:jV0?0v_~߿}>Ÿ L>ҭP\jh~BY%+Kv?Mcg]tqz0-KQQRL%-jȧ_dƏ1f{<~U7,>c1DerLk6SLQv> $ B>BoMTrȢKԅf -xcDۣ{k֍ kJZ0@Lᨸ-g3fd:bj5Rhөazm=ɰ!%E%Kjf@BP&!b &PPJhrP06.?2B1JfTm=NG3EN*9~0-~hT[y|z{~ROMjUٻf9Z{8.2T7`dD1w$T;>lAY; +7 jkEQq*&U qj6}E/ F$#!ƹB N4a=o2hCń~M`i.""G҂e%9>M.PٿmdoVKw_{aԏl >;y?R8}V| ?A#1IJmYVHe)QqX̄ ?uj͟ßmt,(xd]MWB ;ekA ԏQq>'m94(N794 RAǰ\恰}|A62`{\ כMz[M5U '>mmD.zqXE72D9g @$ ~ cOySP~jlDw Jx#cS׮]5^ %L=@&l\SXhSBւKUO`>=!"q kE ćz>,vN>Ji^5t\e 0%[El8"vmq]=i\ts!D (U}PNfYjW3 7Ͳ.֬(m +שP Czcd!]4e u|˸ $A2)*usB p֩DX+=`Ģ2>eïp-[$8eep#7aXMM Z699Y]haя:;ŭWYi2:]ryH >cY\iG+7]^wR$< WECjBGl0{O^g%CdW┍ ӁϜ:6,ʴ%L*8ZͨqbN[oEԾH5KcWFB٨oN䘥NgX=""Ip"L職#˫ BAztE" $2p)j PW`}؞JO{잰x˯>O>Ycն"jF.HOX؏9Fu~V-XVRV˴&#]Tem|]OfXn$\C7jFeE18ѤB^I'5ZVtFfҎH-TX]l/N iwnDpew,ɷu70Cp@E5a=>eCPQ2Ow] QE7.w^qPʥS4SnA Qm^% ͕\֞{rt )L 1F J:rw4f70h/K;_!ؾm56~M7?FY,Kpw$O}n7&K,i` Z_{ۤ'JDSΧk'|Ն߅1>XCH` X^$4(ǸҌ5jSm;sV1,T2Ȼ:o^z3-\S7bn\Z$9$b W|xW*1,: .U5J{Ӗ&:f_ki@2P=Y^2Ӧf$.Pg0YB<1G ы'=Y (|I_9eKj w5`[C9܏;:*xϳ9v3O&9DL~KOWNG ˲>)+xM jjVJ'u0gچ<96(5ތ%u f6 ,# i!] ..R:3VS,NFVUWx@IOϻ縘k\bF1k+YݺkڦC $;%Sl3) ]%H$FdAp79^sia2#'R7b0`1Y=5@ th㚄;|` nD=,lIMDD ߞ{OWvO%{ɯ!: wu՜ON.vwzN?M #HJLr+Xks͝)TQ5fhD\Ds)/+\YmjG,R1al:%8hcN0sxjn4XLDh$'Ł*"DCN:"3/nIL0bm{)f(J]4b"AM49零dꮻ9["ls@@.2;N97wwn>)"4Ԛdc.K4ÌL,#B˄:=PS(_QV''v9%'R0ChDt\[pK02pZg޵򲖪z jPTUECjup-FxzwJah;E| ձ씚J'&%J/( 0fw/"nŃxĈ&,=ƈ½٦mh< "F8&A4!5⒖rl~\緖IXL*Ȇ}M2ίw5 'Tد;* LQjEE=Re1rdl=Ԡl0m(yʅ3CvHF!cGCĮ$H5sDY'.eE'feg=zOjOҰ?e}BS?d&5I**Uo"{D!j $BrajuhʧFi06Pc;fLycO i h h0݈a<ELa_LqJ7(MAi"z 6ZgtyUՇ$n5pCu#iLxo9?{`9lez{Y,*HfRGw'e''(Aj^x/ TGҰ6t ij`*::A^ 5rMh|OϠO/nomSC8q8_k)?ZP֙q9xN{qfH+pZ?.gx?5}w( CiwL&r2 qQr] ˯" O6:%ݨhX*&xMRj;@Щtǜ XUHOrR=ѝM5m~ݶㄔ M vl7̎+kO`ހZ!d8Yĸhgݹn`- "GVbȾ[f;Wܝ ;c- N̨Z :Q;œ4ڔIinÖX4Th|͏?}Pk~;94v kN##_p{]tog#cٔm.1q&X*iԮ{5NP)uƥH#^hnH)0jzIcx v4U逩RL#l\Ԥ痎HYacy\\:}0QrD61ok1۽V$͓Qoҝ!OFڎz:}]Zmg0nhf7FuLHhFIVt~yQ\F5G|۩c"3n΋c˲n !V۽ w/+MKŃ4Q9(؂ɏi5Zj{!''Ԅe</*'J9N}I))!N^vlVwO`*Gjε`:*N1Bͻ ׾ĤhًUi ,Q-4`ti@AԬzhD ek֦*caVwZ[|}| kt#*`zCG'+_y㮂/-;y2RayjiB\X!…GRA'H}T{l.d4,zzQ؅LE k;MbjT&p$RЙ!?I^;inHJW1^dЛSqC"fϾrٸ6ڐFڐB!Դ{ sfd+^>z_|K巯9 jgI$LE/kDi*IEIN]JIE!fdv_mBǀ]+EِlWrf\8#g3 ^{tKwP״V.)CE[sǖ3g䥎:8 6 (aMQr'gĉixbJ'Ɓv'j2XU mV-.^~LD5H&>T݅26x2' ;NsK] ;Uވԑ:RxMLt]}zձkwj›CG M{"O V* o] ;-O"2hK:#Z2}Co^3w+WMPxt. g4+ʊmMf;7e}\@@Ncb]C||vgv V.Y0"gH6m9^,\U` }rh~\L|іx?]a7 yfwyӿ[?{W Nz4G}«q~rTeQ*t>؇k. ;E^?S֮\QT}`'Xۻo{弬o}ZTC퀼FI5̌q;gOs'M¹ B_o8\lAY0o$'>TUXqQN pck-{%jP9z'A06>x:=Oy:@ZyZtku6`C?|)ۊ2h}bL7VZõ燛\Qr'Yge&B^Xs|3f]P_gon7PTNIw9n<@zLp&tOvH=O-ۖO^pֻ K*{I tn >R?2{L}\ҭ9gRՌ]Q;4 n7Yt_(oc^OJ zvw,@-|ڹг %I bf+VYB clEoieONIDATV'-Ɉx{e~Wx(7N{Ͽ<0Z)LnR -%]_%鄖hUT8ԭ&㷷W;JpGf/_ PH7t  ?1SphxCeG:jʥ\Vtm4eEjm,Hs1Eːj@K nZљn(.~z"( _!*u {o;%^O簊 u~\\b7@yTXJ=u,<-Ǫ}NF"N.mV\*sHTo@]", T+4D ^_Ѫesٟ1-*2ށ/â)ļ@5>RͿ7[irߝ󝃁C}(0.JӼ}|g2i%iTFKNm}jh~- ܑ˶~b=3w\Q9{d0!)y P\KgmjZ?v`x./!'p'gYL> 9hGqEWҴ+Kh.)'~-6ޕ@`W=kDq6%gTݣ ;#]Tuywq宀Wg3;sP:׭QAw7^[P}~;?K#rŒy8 I͆QNzм禓&|mN{D+;٧{%9>e㉿IENDB`vctrs/man/figures/lifecycle-archived.svg0000644000176200001440000000170713762413632020111 0ustar liggesusers lifecyclelifecyclearchivedarchived vctrs/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172613762413632021376 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated vctrs/man/figures/lifecycle-questioning.svg0000644000176200001440000000171413762413632020667 0ustar liggesuserslifecyclelifecyclequestioningquestioning vctrs/man/figures/types.graffle0000644000176200001440000003254013473164157016345 0ustar liggesusers}isG_}ૃGG{>tG^|YvG>|aT4G?̦g,Nv@Dl]fqЧt}?|:\.ŰǏx6xEIz2EWqb?R-{# C?tkˋN9Mf^^oI]en*[ѣ {k}ge }/N\7͒gI:z*6b,}OC/>>fxFwp9SqVtO(y壕;/r:K5ctu" pO^}t뫗'/?y翏'?_i~{'?O^i>?9z71dM/ikF39s:xjJPN>Sw6;p Ázؖt]m27;wӳ8G M?mȐ.Ra?%WQqyvy'ϳxn‘Vy:JwǬ2[^1.oZNMo@dw>r_ovO+s=e0ى(4?B#Ϗ{߇1ORw>&ʑ`KΓ|x:9%#xx2KIh|п?&blGm|d%<8LҭIS˩ֆ<+~[-oao^&.͚ lԻ8I?Oi;X=ͽT{?uS[OZ{ۙSoɁ0r[p utfv^G>trPgN{;s긪⾪*}lfYG#kA>9)(悫B7y)4c [F^˷ {!e(#%V+p[8 (A РE\O`l`p aڰ- `Q681>`l`l-lXOnFv/sӶW%Bf$]gEwޯx0PpKF+pCCCGnlJja@ 8c{R6Ko$(\\#.28ܴ~5s08p2{L908p]bA^OZ28pBEL[nn^p{AЖ;ڤW7NrIܮ/ e1(ܷ}+5Щ&vq]Gmu]no ])v (Uن 5{,or݅65GrƅlrD}meƚ7fD`DMymre^Smc}vLҏO|g19`rХiyօkk=##BoGmzm=V.}إA@]xzޤ7Myzޤ{J[٢^tczqz kpP[mQNFz5P ߇|$7@_P>  BhʂC mӶTA5Ua`"yUJÎC^Rұ
:7>k;Ce9.|vrTxTP?R&Jl%{7X,Pլ1LLA@ic9pZsc#'He"SD5@H#K9=uZ"MTW*큘2iE6Jsb"ҒǂB sY...2eLsvU忙V5BNaz l!JGd͂qo~LEKEpNqpRƬC=a|*Be-u[<-sl؜t6XpxB/ƠAg-r(В~L}kz4W AIBein(Ra1$ p W^vR4P,)/3g 'ijϦ_|Mu8G,1u'99'{p;ovL6_g)7 L™}G`f]D9,y!jJ}spח{V)A(AVڒ'/; ge~P:%5D2(RF{Jh2,_+P Jh@u7Z-HusmepBt7ZY^# LQ ޥ퀣G=v¢18SE't߽ Zˈl)b<x5(2zGkºŽ-6C芞Q̍-ɸ@Փj o>j;5HvݕF W:Q(+(K K,i( p p.&P;Eҧj ҃}F"\lv\lv=xQ_aD RZ1e040VWi4S/УxZ!̥0E="exYMs,0m6z!<k(EqA)",z^Q( 0% b6¨^R*'r*g*,=DFKYV9#xJ9d"'mh'(w^edi Ę3g P*.)_}#;1gt>n}M1_}x=PK_=Ĵ젾eJո{߲V(h:SS~;Ut* y7s*OGs>`Yf\`#% 81(P.5b_.92G"Cӕ^''S֛d>~Y oi21 ZS4x[nFySW [ &4yk62"R^a[KJYxS VZkuqnU`DBX DFOI aKuJMҔi6>h}XiZDKB5kZdV zTCxO `jZv{I @ +@pnuPQK"!ԀX Arb "EQDHi@)e<@H` `^6$:b (w^[J-@h)X` XdK 4Ew( R>w W+QЗ@lP g\=+[qIr.i")iJͽW '@ ^VK[Iv#Ei ,'r PQ (sECa[ևNW{X^RV ,d}$`O?5\lT~~Kl#gMi^ڈPQK R".L\l:ɎBH,xm/#EQ`` \$HrcQVAɯ! 冏,@% z!@X,bB[n ,@ɮ NE ڄ%>>AF9A冏nXϨ _V$D*H,v +@"AI-ڔˀ*cD0V Y`V Xdi<OR]O irД oAabJ}HBK:r@x]P,X~ @0.Ҕ+۠@kqXN`9:EDȴ|ε"'p=٫~ uVKފ3o.~F (=W{+2g\>ss}#%†2}{GsF HCXd_2gd^^n|N%z&PgOd>}&L3ZF *(V"-o{6MTFDBZ @BqE ?Mɮ}'n\=|ΆZSJJ:T0.%)5ürッ][㚶_a?8ᄶDN\w`G"hpD_@*a:-zF-E)O!tdh҇V#\wEkW>- Z:Z"q 1@y㱻bUl Bs +ٺ-hbQas]-(>ha|- DZ"Dqrh1RQkDe ӳ`)DIᳰSqE(/DI+-pP= ̂qBYT@3T @ ދ Я`BxAg9XYg"cv"X`Ϫg{,v)Xp,1] =+~ϒE@iCIQCtx1 QAoFSzh!-'؝h-G =;-t{j˥ˍrF/Uw/hѣajkl]V[in E?VZH[gj%[;+X#b4XBw7XV[-N ^'̎VBJMo"KP{)'I{~O%^(:h ZP ^J{Q@=xO2ޓ$c$s$<r_zp1x]7%=[)+{@Dd z}fa]O֏F`waai_Y@O_H<0Ac<,tceqg=ԝlT5ىB|tW8g|035[X";IwǬ_^m͓ /I/i0MY2"r{x2KIh|п?&(-燣8glu}ϒq<̦3x 1 H3p?kY{?6N\O$D=PMYS(o$~ZxSiBu{FT-8 ogt[hl4$Mejc٘|nz6&[Iڔhf+nМ`?۳ApYÎg ٧Dv5m0pҜ4?ڋA 44O1֗ϫTd2"JH3K/"εqOI\,Ʊu8 HDBk= D̖ؒ-[^DV{Prz^Zc=Mю|m󉴞;9 ;9;y'\-qoݓqz2)[Wmcγ) ;yֈr.LSu{P]OY`J2oXLuL ))Z|܅lCG$لQFcccT FMi0] RF|ASH R8K4Lb*T\Jߓaaaj LuaSD*tؔH齛ړ:C u= (;2PPPm9h SoKB((ƨx)KL(Dg JE; R R RmUq@Jc9sBqʰRso랣>۝ATX6t*$@"Z<:dG*G@ v~}؄]£%lZuhV_o=("N;aɱq`8RF"e#rHJ͢THc#!#ql$:, '5*G/(HH,IHbWFbHJ]IHh$ 62FR0еRb"-* ]&Y2q?[J7+ݽR;ՆKRL.wi4>]r)u`k,RUL6HUU2F/og#adDHUU2HjF"UU,sl$d$VCau}FBF"HiP`Emi2pPd*.k}y[rݝig);C_;KƝ^NwZTД<-^!{٣{^ON rH)Kȣ^􎑇wӉ:5ξAAWӕ搦 w׼aaWә~!ב20 vQzL-'e Gww3]C҄ (;J}$ ^/žzwG۸\6b9b^vqXmWn\+{EFximXu /9}e#굍po"֚{3#lFXGؤ#IG`aa6:&muM5+2, (؈}6Ik꽍pF}6H#{ca{aa{aa{aaFXGب#FXG8wюu{ca{aa{aa{aa Bb͊雍IG`aaS=BmDzMZؤX#YbZkM&֚6iMl#5mԚPFXkڨ5FXkڨ5F$kMkMlGzo#H#G}s7)bMFsL7ľÍ&phM2;bH+uiMm&~{a~Dn⇽7懛"{o"!-x6g˯,-p{6tF}K>-?Zһ6G\)R/F!>I ϮHwӷø dU\7 FnxO2k Óqz2hx (0IKd]~'L-Xv(z0KHr~5% }rMy^4ڳ};lqyW-\p!aD JR c;U?&2]lε[srVZ: [=hǾvt˷,Q6xu]UΙl:N⛋#vOB3BjNqq" 8:X~] 㙿.ڼDSH%3:K+Qy Ep hCyFC`_t$,E#[EODP8/W5nG֐fR#l-7Ѷ,2ebtE~g@h<hz,8B0Ļ2HO}HGܠ۩ڃZqڰV[cWp.[>kp -A{X-ghnEԿo-#m׫Z X2"WMAY'T/'Z:4:BRзVx+ -`kW̧ɿ ')~V]K9PP!JBYAS#WgA@+8EWg 'ijϦ_|Mu8G,'99'{p;ovL6_g).787Nnrfd&W_ղջ #L$|J}Rg Rin0/1_b12ﵹ֤5>~7ԑ5 xdAWaAXP DHt XPhL˂ ,(&.(h(E+ A߱IQW,#0Rzg3F^Q((VXQ`E"0<:%\ŦV㺛e^ 4>}-pg BށdaS{,U}r' ,TІP0ty t<44;$<`2d0.9OC9%#⋇'$/ a,r<}~8JJxpxF_> g,l:+98`giR/_ׯDP%OYAAH| r75"ޫ_ۛM^1 tG0pX]^Y!I膝*Z㚾WN{{rMJ:34 cG<^gT$-R;ƫu*)mck)ÁjT MlTn[i+wϻ (-QE"2GAZ[\^ս/& s^QFFJ }}ksI6dsI6dsrS+MƁ7bQ$@ʀ#>e-/ ]X}K:MTm\,4O4Y2K;'/)`yP]ƲSSvMy.NƭE$S(+m>+׆~y7s7 ݘt-缷usI;L1Jb& yM -ۖUwyțdneT7vvDL@Yڏg؎zR/PC4<ǟݯa"CJ!:c@u \u \@I ZΩzR\ǀu \u \u zHj Luch6({{S*2v:0N>3R;AO9NyXd[&){{Fa)R NaB' eSeDȀ"duNz4,Zhɢ%]--[n/!\S C;)*A|N {Vb;Ktl\_4A9ߙ3g{ǚ舢a±_`: bIeDEXm(%y3ge[8+%nk X 3;-CHe[;"KG.#RswꝲJmoNg-Xn[YE40h^1[S|*aⓤ`+H)]>IrnvQq<'V^ǟY&$9W>4i_r >Ț/8&o4#<[oO&i% zc!%ot,X&zՈqBq~/&'FӬ&IM_IiB2څs\fӳtc~,M&`lexO`kCמ eqo%IK^1~:*I1X1;i;Y='d$0Lp't1>B̩ܫBx8K+0&]acgՔ1[&|qڜӽy[ iN=g;joi lifecyclelifecyclesupersededsuperseded vctrs/man/figures/sizes-recycling.png0000644000176200001440000002012313622451540017452 0ustar liggesusersPNG  IHDRyyajsRGB pHYs.#.#x?v iTXtXML:com.adobe.xmp 2 5 1 2 Ү$IDATx pT M0X"B[S*L”2m(С 86mymZ[*@I0E*K20aBi)`,"CbxIxK]͒=g#gcfwIjjjRn(***//wݸ{ [=/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/.aT[lihh;vlvvC zdM,}>dʕ.]~׬Y"Cd!h_etXgqkG6F nȚjիWW^d}_g Y_ްa#<5))ɻ+w3fHMMxu:ujܹu ]yMbN#G 2G2!@P[sСaK\m6zӦMׯߺunzɜe˖eee566]vGL4I敞.k;wtfZPP!JΝ7nܜ9sNbD@aa=n]Jp)7sL#SL6l=߿ᄏ`z^HuOs*۫wխNHiV:_'563I 5[ Bf&uz 8D5"$.@Xo'FD$-!@ֈh3DYc"mfX k@@Y#Ldp kDI"`]Nd63I 5[ Bf&uz 8D5"$.@Xo'FD$-!@ֈh3DYc"mfX k@@Y#Ldp kDI"`]NI"&go=l   D$5!x& L )#$EEEkeee 8IY~m֛jH kvy#`V1M5 5R;ϼ0+@֘Rg kzS d3o 5fTFj7fTC@Y#dYo! Uy捀YƬ7*@H+))YrK|~5kD>82 D .+VxgwB 2< ON\zuK,gɌD}YNlذG\H~bf̘꟮SN͝;7;9;d: fN2G2dHv.nLܶPN:th#Ӿ۶m9ӦMׯߺunzɜe˖eee566]vGL4IPd;wtUPP;JΝ7nܜ9sNbqֲ)6nꫯ~뭷˗/gϞ=zT&… =.\8qC\ַ#F۷or.a S."7x @/^?~4 T?q8p@;4bp &7cǎ͚5K?5)Zϟ~x] d$tYe -G'I%$vΎk(Nݻs۷,Ns\C9XR_ޙeYȏD_"EEE΂²1ƳL'&GRQ:0麀k^-xL何5Ñ+>P)PFh6C@Y#Ldap! Txa08*@m4zO~z/%kb TngRum Ro.pYm}&  ߩw~|R-m{TS$'5:YӶ/G =Rb:{:0"]3՝̎TC ds|Z+wبԴuVbwlYm۶-ɓK,3fm6}C!ظf͚}kzwQXXX[[kCtMdˆ]Pe_{֍w_qbE̙3L2eذa߿ᄏ`zz5Ȅq' w7^1567+[X˚O ܪ[i{eۅ5)@'J8vνq]#Ɔ:5򚣭/ܣTpҟWz#kSh \V, lAJ~e^Q\1NNAb!UT{oc־݅kԺgݱ?D jZ|;" _էނOD /OyHx#~7~Nt^=mk5䩋@>H{ީ!*Kؑ(wƟ[#w3hYSLTC =dzFoPݳLgM޽ `K߾}Woܼ/233L.!ה~~\i_RR`lj5o~Gh3 STTT^^WVVfVd%nKœUȄ>8apl2} ekEd]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#얧:[jvw˼jW;fSߝf0.@'`zR/ޮzzJ~k[ޝGtA:vjXqd&0G-B'ݠѫ۰ԒYc ~_ztwz)U_ԛ >-uV=oB1L(~?='Ҷ\sZ&GyRӖ}z 8N =R[XΞ_ٻ˽;ysT` kEgnT=nxZ+wبԴuVbw$`P߹sPPPJΝ7nܜ9sNOV1?dnNPm^>y^TsUgՑӿF|I-æ}񚚚,o71bٳg]sB49Ŀ1p^*or+VӕM s~9p&| U]]=a„CZ|| ;6k,R!ag/}Pp>}uw"6&77ѣ10f̘kرcѢEΣݯO|eժU}{?;xq<\CŧNUy`lJʹc16(aÆoʤI篥};ciG&\VrھU7,z'n6Fٳ?*͛,v~ x͟n_2yac9k<"uuލK27m_/ M&XY3|o~j IwSc*Yx]燍IOOٳSv!W XL&WnU^v5 L7 (xkťYm/9L=jNuwn k-TGW.+jaWOB$`Ym:*u]m=yU5jݳ?Inp..Ԫ?~:AwD'~?˒|L#ּYcޜ]xD~M»YaJB/WeO{kĉY'4n&GjݍzB/+ϫcGnvD krSwm5 jO݅ v; kS՟?qwIc?|BenVY_}ܯ_WUrtة)TŹIo7Wԣ8qGaZ"qř;;)++ Z,$@j6SEYc k5"`QOi 5T(@Xħ4AfX k,SAdf3U, 5) FP*F@Y#LdE|J# HlE">$@j6SEYc k5"`QOi 5T(@Xħ4AfX k,SAdf3U, 5) FP*F@Y#LdE|J# HlE">$`3V5FIENDB`vctrs/man/figures/coerce.png0000644000176200001440000002226213473164157015617 0ustar liggesusersPNG  IHDRm3sRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$"@IDATxyUǕMҠ9BI" 2D3=J%JHIA(*Q%Ji4@za$s[ny-Z0b~&Md̘RJŋg(DoFd٫Wp 8w\IM ^x$e˖_|1Nz'% h ϟFTAd/C̙suwȑCf}Ν{ΐ T!C (SOs:0JH*\0U,}'H@%\B6ܹC=$bjժUuAu"ɓtM._<=n 9(0ł\s5v̙Č[z &$ |.]-eCLPjXb^gy:1Ǡ~@y1n:ն1d9Q vIٳ0wBjcGQ@I#Hl!p0Ukn߾aWBd<&`+ke7o22ۆ =z[]Ug%/ch"z/q+iX՛oYf͚I_~ɖ-۰aøQLt9YPr١^+#F]tEC_E (/ŪW_-o~Ie"̨mۚR p#l&LB0E`ѢEH6|F FAH5jILTv']vfB"҅8Xƨ~AO?[F$ӧO6Tu3/RDás{m都|gba~tY߾} Dr١^k8n'N7: yf~b_|IbpI !0ӚsӦM4(*X'Nʔ)c6T"F1&&Z.kuv0'78Åd̢SKr[re/M*[7x+5Ʉ XK1IX3 \.f-aX$[]"sz(-~m6B WX1A2X@Goɴ/ͨ2B"t.r!ra0oƖEAZ l`UV?&L{N]H5kpk𹕝 R#.n#YkF\l!Ot#0B<vڕ&L41p=pvc";DoH. ;AH@*QVQǜS>S[2ͭOMwj&=/RKf ÛA^<ް Wqgȑ# UL:l F^yf>i8'Iu9[Y}*$^Qps@|bN I ?nɁXxa;n}6IY3 ȇ1eﺳ69 Ɂ=;A\^xygGqPo%JA͛7K0&BGyL`GEt:|G*&s~8d9xW}լY ~ʝ;7͆Wei*~gML&#a_~x6Abɇnkw?>h4|H$ Qipl('1ŕQ9L3 I9T9kW2=^A8/%ܔ2ʫNW9"'GoT!?fW̒+@_|ӧts=tP!?P2ʟZql>UF#҂|Cqm a8ߺ)eT\{?} ǦyPqSSF*ک}ڵ|6NTt[oRpC/ ›x7gFwEn+" ijbȧǃ'ϓr->tSʨtx::jԨ͛:?,v馔Qlz{#Fy e˖mԨIIQF%4R5ITPC%q"EU2*2) 4Ta4h+7 &q>|le<͊MDۑ[N2^ۆZ+yæXnQB>-Իo߾/Onc;h )eTrCj߽bmFr$W ]^HIN:Ż<'qSTr;5uT u|#5 :GlٲÞإK^H"OqW!]({cƌ ;ٳdɒgիWwIl{֭v-_qp(HEs )J~/[s5ok9sT\ܹs0'^c/^nj6{%Jp'܉hB"+V Q L2@hr:u9(׃ EL` xhǎ  H?3s+`|sɼhѢ;Ңj֭͛ǁ^3fFš5k6oތͥd0>ӦM9r$ >+hDܹwE+߬YLH*v7}t Ġ':r̝;R4loׯX̙3S:˗<X  U@9Qq7K) d IEyozj:uDT0I3f(= ;,I}I[UhQ\Ӻui! 6B 5kLXl8Dnk֬CIءۿP2W\XeT:vX/_" ^^eիc&pMM81G ƬgΜ1p*9c>s:c#z''p(sfT޽M>}Zj*32L0{ŋKC,FիbŊF8Nnժ XMYSƸ0S/($ls2ʡF荒/ ڹKİ2t♏;FNom# Fr >q6v)(B]&bAqY>bQƇ ^(YȲΌ.\00İ:$PTO 8S 0OLFyNV("!<[!Z V(fZ%{2ʁ >[(3+k~V~'baAGA]`j$ؑ3V퀊(,$9xgF2;m[۠a3ƍC1k6%3>T-eI 'm'EY5J+%g2z:7301جڵk? ,L 56aO&.'XMT°!GF-J  0[)o4dfq2rpGlQbM2q~a ȔbOt/]r$6DL6 ,"{U_rUp42b,_|a#Gf!lv1XV̠Ef۰KjɹcċcZ6'Ico RJIq@ w8ni<]ulْ "vqA6ڃס\ӦM?[ZLcXֳJ1Md|Ν̞C 1( u`4Yu$|6'I~a>L{ؒA'6]yloЏƔrJLyOCۇL3:`?@1aPN19X V} ό].v'OL%Qx26uoj3H9c䷮إ{dWo^ [ٖr~k"_f l%p봐mwc{pr!]~r*XwX$8gN#eW#܆jff]Gcќ0# cЙ^`eǜ~d$1FъdwnzL1\ YqhYG1MW8+^ b 7=6GdHÒ O;d he}1rD[iA)RLXs 3„K#!̤U|eצΔ+}dcnL+tR7ul\҉% 3[ oELX0acc9C;&;Gd&>QY2Tc_5,.A @ N5v!>l2TPNʲ-ɒ/@O`8' 'oЛ |̗V D3J̘8ͱ85O* F 9Jp#:E ax{0"E(;[!J>S2Ͻʨ3$Qg T7E{Ώ )xآ'zE$G0q?QPHxȈBxQ@g}9YC@/ͭ8#rGSw(᥹gQh"e;4"2MU!rVPF9㣩;Q܊3(g|4Up2^[pF@匏*PFKs+(TE(wxinEe3>C@/ͭ8#rGSw(᥹gQh"e;4"2MU!rVPF9㣩;Q܊3(g|4Up2^[pF@匏*.{ׯmQ- a*A*1 Ӣ@ʨH4BeT iQE eT$Ā2*"2*Pb@@xZT@ RF?~]D"aÇ?~4" "eO?=gΜX4EŽ;vzXв@HSHdڴiEM@ E Dɨ,Y_~TR͚53fݻww߾}jբN:si*sUVeΜnݺJ2LfϞ}С 6n8o޼&I@ wdW"E  y'N#G… WZJԫWIҘ1c2et饗VP!_|mڴ! IFɓ'[l!CC /pРAgʔ)Ydɟ?:us%$AQd'"[.P3Çm۶"V58n޼PB[&i&xb7k,|W ^~eٳg{A]vqf͚f˖mذaQD"x!ǫoHrz=yG7Xb0DD5jԨAbGy}TwرΝ;}ѣ*EO`(Ac"zu֊+f͚4z꘎۷㾮*뚊y*<URDʕ]vr[vm6HZ]&?#Qٳgb *kk;f% ߸YN#G4`ݺueʔL95׳zݺuș^b]TlY/_1$3gl`̛7įXqƹsٳ͉͛3L6 ()kFu}ׯ<,X%IM6eڶl2: D':u! ޽{&M؅ϙ3'!![liժI/@5>^똼Ѱܹs׏YaK]tgV$ ^c{CpQê 4Q$ϤItO0ԿG s&$lĕ(Q T'N(P\-χ~hXw~XM 򤅮iưEaMv`曏?;4ElŋDbJ,i(v ۊ"CQ(z7٣c ~zǎyf\)ƤUd`ă|k׮eO:}46mW(gT4^HDJh("'QNh"e[4"2 MS"rWPF9i[Qn('t4Mp2-b_pB࿶iR IENDB`vctrs/man/figures/lifecycle-stable.svg0000644000176200001440000000167413762413632017601 0ustar liggesuserslifecyclelifecyclestablestable vctrs/man/figures/lifecycle-experimental.svg0000644000176200001440000000171613762413632021021 0ustar liggesuserslifecyclelifecycleexperimentalexperimental vctrs/man/figures/combined.png0000644000176200001440000013327413473164157016145 0ustar liggesusersPNG  IHDRN^ sRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$@IDATx ~՜8_a #!)d _ Z6[ڣˤm$UeS٩f̘ cf,3>>w>z:Ϲgs>糟.w}7@`Eݿ/Oxi\ Xtou]a=G_M|`~lX\?|Bvo@ÿ,~v@ 4 ;qr=A}WZii2 袋2>pF>Ͻ59gaR2+J ,,3- 67r-?Oq=9yWzچ:%;d<9]߷?^xa!{A +$~Ї.&я'?mOCzAK_Q<p@T n@ӰBg|y!A'gv^BcpF/rmiaM6.+_fmo?g=YD@/zыN:$/}K)gk =p!\r%CЇ={I~w11Ur(^Շb.OxŔLj%Gy+ Wyٯ_%N̍7xQG/xwBÏ3J$/3^39Oh` !4Jq@|tCb @Ayy=)O!~#Ŋ~vX(zPn׿5O(=oڭK _(CvyjG\Ұ%$J;?o9JZv{页o~Be:B Ud.e;12Mo|~-^}տv:l& Ho2=\yH7D|e]bwşCbLC-)pX{J{Ygu`CS׽6Iovg0씜DiS<0i[}Fob-fka6!ƌNa.ͤv7A{ ZH 1gAm?ZCvp /.pDv'\oNźERwd [s=ӟMb%~;8AqP> /ӴV^QE{-k%W/&*KvmQ_qkEE\>|o2>SOŧ1lLm5iv7ߜPIF8Zk:sĬ)(jqcJF@5v^WOx4~ 07t~=IObZh1tʳR~hdѭݺV[3շm.=VOݝ_2Y^וc4[m_#skL*aChq@6\?S%O,dNqKh Ә')iqvװ Ao^2En?tʭZ;ޛL]srnQG ɤOq/n:ec|4vO~buKc̲G}- wxџY-6lc+Nx XzR0Ӟ4Nb/X~T7S>|k_ФzG /d8yBַ̻ %[x}bZR83ԧ~#=q84I[z/I4ib]T7)|'?I]! w7`3=ѰY3o!!훈FL^WN}U>oKlan֝CHlMGs[K `}g|饗TROu42.?A c7#F{y{`ҬL~ń]ON%:k}Xv%3ECK0c=hm0PBQ_o}&b7Y//Eߟ9@?+68<  m%r7{YOI|KA3b=1˾|TADv lܽaPkNC-.9UHM>"j\mj[4Yn4 W?C0V)3h!@i 0eMr$kY]P@I##j2>7EUxU?J<5Oz)\vŷv$ =~$qp++ܿ'M;BRq&`#Jb/Mj}sPHsT+' b"%=@GKxÀ=yD?xyh}"`7onO#8¹n=>__Q `Y;Ӱ1>}% 1fV{7 YDX~u]׮10(V% ъ"ZGbozj!`Mg䐓BSoo\\[o5k*h%chF`Xq}1My˱~dD1F83\X,-y+Ex: Ue:}%" "hXބfYV@,^z>#9CL~f1b"dЇ>+ Oxc쉰B82Vy|~!aN(hyRъ}c^uodDψg+L[ &)9rsk]**%Պӯ5 MM$SLs]ggg]҃G67hQHYV`TFD?묠?d+%&:O~rNЋ]?S`ҟ4Ԝ?OY59m_EXFKA3-9KL?:]> ǪsbTnKldkcWs}'X,<h$'`H9hL^Fo}x0BF(q5X^xE?XXVµ(|rAo:ޫdN 3)GaG\ƮȾad[(ʺNCy`~beІmx =ӏκɘcO9v}b LӖ8榛nb_͔;O!5ꚅgf:렧Nmlu ؔp[]T? \? u) _FGf |3"oAvVQd%03KQkZ C)a2#$Zi</B]tIx3ԙ*ҭtkeIzEÖwfvoVD@x%f[nȑ b˄}C11y( ʮy [FgJӃ#Dr\ph_#W;}oxh6%Bc~=Z8v^Vow"T?=fl̼3t8A[ijtQ3X°wgZvT)}1=ϵY#/E┻N2=tN[,|$i/%-^xLůd= dY@zm[kzqyNׁV4ǣjmFiR+F,ov%b\p1b*Zv9do2Ϙ~4"[EaqNlN3;{^1BcA!ybN!AVKm2ܺQX-V/Ń[ 8&|d~5icz[ J.쏠pT`Pz*I4nsd0JxduE+)E&L#NzTz vV"ğM=B/ ptW^f{yM0Z_>w H|a!rXS(E:$׸`kRoTi}3N;C9-ж(b b/K)xxߢP5X?/6N&RIڢ~U ҫ׿ Jsj% ?^cϳrwOx&Ní!b6XodҡmVxjx5LW@'"谹q&mظ-H!G% b Oʡ0R:`Nԧ>5zM%Q}Զ&Տgvǥ-𒣿~ "HXȽ]thA;ǎ^DdG.[)h(i~t?qxa-miy#m`~, KBiEZ VP+91)yq鷮,`E/"xC bD?<$atiդu}^pZlM%08;=CG=–h ڦ@iS+VH6It( vdnGi@ؤL~za .z(`UW]Ey,(BkM [N{s MEfF#3QDMӤ\C]L$-VhR`_ve@&&l#Ϧ"܋#hQH ~맗>}f<[]wyD@;k ,l rh}/!g'Bկ~u~i\B@)jNl8ۖthֵ.Gؑ :cHdd=`ooQBʏ 7u1~m?{HނȈ'(CJQt8+9?>ܵn`{{' rf|?%]n bmMLh▛I/$ 2 򝎮t#Z:ؐw+lf.] o~4Rz&j_L?Kn{{%Djlb?;k}Aj1 ͩTv$ cb-HDw$}AOz:uf(wyˇ?☮qoD~USydUeUʐ ڌoFB,LqOCN׭RCBn.J\8~YA-zhYtڋ\x߶PӖlZ8D+ )##j%ZHc>32[ nT\a^v[[S@>Uօx!qug.;9I^dfM5~2H PWWD%/ނ1J6K@}D|@sM: fۤJ+c@sG+T׏n V& 6z~n5 5aP]D_^K?@ҫX {mx%-0ݏ H/YG<ҁ4)F+@_&fƧmLFjNBGj;D(i,S/tnrQC/ٜwicy'~/m̯ Y0} BL,g0^.$ax6NcIB<Ǚ]!@Vg.ҏ\%SW\An?wGt}H3'J.NUSFvD-x!{`̥^:0]avm -rĜWM[jIS 7Kr KnI\ ZAiu"9x,TTy#Dbf3%]^W4cIɏl2-fL1,"orƲO\TS S0fCbq9[MozΆ㤉|>m 5kN[襷bx Jac`Ϥ2'JgKc{`]%NS^ͺćmLd=ct)L5s$Hǁn#c49o<-_\9K;{`aQK;9?Ci{a.,^`Cgb 3[e8X'heUz  0sg 7q!0@ 0Xgx)z3eWwEXgx+C>+Jβ|@ i(۸ΜƛnǏ~ܒv=~Ox/뺣B <Xt_(컝Bo勽tv_ҧ{{X9-;"?R.wq%ns}w)|M0QIHš(Ȯ{3}&D9l^֤^b ȿ ]8*?#6ZocDqu WhO|Fo} 7%,4\ފufi/ c HXhg1G%.kAnH|!iwaT7^yLي:4[g~]CxH,%"{&eq@蝿o{F;&~lO3>mo{- n֋?Frq^Gz2^Լ}i{L,ʃ< 11^zyjPQ u^*pPZ*gxh~E[oԧ>[{*P[oPpwk&DL$^ x~뮻|G No2R$4?!yHpz]in _ƙ^pN6s`[p?W U;iQy뭷[4mf5(_ѯb~v8{F^~*^]x!g@.Ds[CmBq3x^`>%2?k -S-f{ H {}[2dޭۢ} ꫻ DP55n-kx3/'Cin r!Zn#}?߳l9v @PҎ;XiMƒ{ Mʫn-੡<OSbfyc{5d66ɡcEK;$%\ !1_waJV[ bg=Wf&=9^Dzp_2]?/B GL3xU3`h:il<Bro|#fHΘ?$d ϥ{ţ|*ICytwI{PBn/K,#ScfP ?կ~5%9Z+S01ĄRK8r6X`H My%9͌()>9/.cñ%G 0p 䴌n&ܞ*fz)}ةbXaNGdkKb7$h*c>91V.s!pN13&p܄L^b"@E \IUi#믿]ZD>\e97ewWtI#AHp A$~x &NQ7mCu&Tz *=!KZ&fCy_ m2p,0$!)дLK&s,J^Z} +0tAOg`+@yDr^(!%gW|U&Z Mz7ZY]k @kONv/ZȥMuRhR,:Dњ3% ,㳘"aHhy8e283PPO&lC; 20B Vz%g<A^ysBXW@ B/dV8[(BXjz[ڼL#8-+OcAx{m#h]'l׽Y tK^(1<xo۩87l|0хUq2kg[ԟ@Z$-9 +fZDN[hם`Af!-A9jF[k.L]Py$t e AF|)bJDH%򶷽M_+NbEj(n.N?LNW2CXc\6w9Sq^LѽoR{ ܲ%;ƈ]֊~)gJiQia); O`G`rc_,0;  .,X*mD@ kQzQt!!`'S*O' >Z@Mk2Z$pDdaXdO:뮻 rb1z@|{,^;N@<Rχ #leǤ&,k:0Ze4'c(3Z`Yd(4 IG>'p*d ~, B YR~S ɍC_N:Ci-8]|_yH~p=B1g9s4~[*49e]A"!Qг۵Gf3P kby_ew2gwhojupkFoYqꩧ> ,86uG!Ӕ$qh(9pwu i}CzD-8U1(П Q1b/Ozғp:TJ,1(PEbN+*SD @{{kT#_΋_%bWt_'GB>k_ZX;85Sn.BD|p: >9N9ig1ĞEB1~D_h&l*hRMi>t  vX͘Ik$]DaVQB Jʃ*wƿvbO1aL0eq&^J0ƒ3`jqPLuY FVC +M=gk69@ ;;>&/IŐ^WIUD[յ+t$Jf I2[2*Ճ'ќV*][4%&ڽɒQ~ ѕ"`c;Cb @@?P/;r!@.XldR{5;X hiK#ݢ@TڌvɒԞ0^ DHPk\^Idotn!GEg\b ?siXƄԘ~c+?@qdGiI"*W^0I6!Ԁ|p1:{p_EW*Vi/:ݓR~PF *}чxk 3 ¶7sEL"eE.AnɢLW1(j^\r<^}h"4];#1GȳBRb侇>߭j br=t[D,iGzP],?:!g@ k֑y ^R i4BiE^*-PcDb>6Ӿ2vEm.AҢLC琼:뤈- Gd_;>r}i=A,"iHdH^c_}(cӈ2$@@̐ P &fH*/4Bd{)j6/fE,/V$"l*ff %W22._'w Z >?}}֦5blLo'Cb((g2u\{̧џ;;i/TNh Jm+fUNAP46Jۛ]?; *ة]̉ S1AC6Cb(}r-S1a2:]v*};H{Ï$#' e* }(dr1;و?GNJQ3L:;2,2}dA,R1p^L:\*LO[$|?u_hi4s*/fNI2 ]݃rvLo@%⧀Bgvoc\wode,J/,8Zç=?hbGf *hV4+j' Lv< [n99;2QHHD^)B|] u/rDHڱ#'$ T|+ t՞2حaI*-;Z[]8s9#Wg&a0a( vygĒ7/d Bz}z;ށGZy2(Ga<5:Ւv ;ny"Eh@@=]ʌA)d2 LC8%#.{071,{gy):10 ˄YAmȠ*0O)"Yiv}w/vaa;Suܢ°!"~04?>43$L%.qhIOzy+dVØUʗ-/9 },02b#AYa -_gcrQµ vQ ҄u诞RahលlAhLjX[;Y;&䠪x[Nz_}'cƇ b%_B>;γÌ5LmM~?UG!ϔ oqe HtL؟pyYeUx}ӋSCzKhI;F˴0WRvջcQ"t QmeTLli _y;,o솉 _Q 4:v"7[\! 'k8ng$ªR^X?w0,E{rLJf&`@F;#)^5R9"fkdM i:8``@赨M2w7Ͻ^jUϯA1'ʰ wͪPW}vCb!..]r/LI|&rT.=#gGv/B ""uu09a̰Q:5 Oh8 L!))5I-)jyȟK =M<ϔ.j.#"Jhl_S$y36B&hXcl6Wq%C] yr 6Z=Ղ*3d/J>]2L@ EAJnM 4*8舛fٙ.ZK/%Ցz/U9k8/e;&P[^6A w'/DŽS 7pԘ^®XbA龍6膛~oV}`_QHf5ePc |j72J2a>{Yus  VXW[u?vlJĤ'*?O8Gþ}N;>ݳ*p-^RO%In:J@:|ZZ,4[Zзۻ1iG XZ oif4Ӄ2&X8!ٹzS^#BR{`SXѺm@ _- ?zx';\A2ྲ j LQP'!D'98`4/ދy.9Q$f`1C C& iUW]5N9W2uϩ f_q\~Gs}Ap.|=]BV< ?\ں+p]^l0` ~?:4W!> ;PVzh[׭ev~I.|'Ӌ6ԉ/`Z=R?>6a|PO&d40 秉z P->"~ RM`)6њւbWp7ch=t]n|ukN/Tַ 6mi^/ųT@2etZPi1bv?tmeog.1DM u@v-f/]ώv?V bql\/84ƀڟ3*Ŏsq3#jyX+Mx>*oh._-,Tں*_pX4S3 ve@V׋0pqc-21~_pD|w3ŷ_tQ|W^yZa g4^ MKRKq*s嬷N ]E[bŠTD:M!6 ~%-PVd2GɊZ-29n nfG <{Pt659嬳=/ iu=MVJ_2zByTE-Cwy:Gq@IDATJ`~y`,}G # #%:vS(IbC@|L]lvs]}v[k~ПN w@u֏U1A >( e%S, Q (U57 *s±7Fnw{z{u3T8h 䈘{".3KN;`,ɝYwn֫ ?INm}OnLh";fʇ)ui9s f"^ ?ȋ8uT;)@Wм:s=c wז*HO~q#-(يsmaF3  <'b]<:WUwacKf [uW?{;!DJ,-'6kA яV"#찴p+ ) XSGzMUOPzI(X$RM5r-EzjhJKhRo]0v#S42V:ŗ6*:Y]>9DigqF1AL¶>C[y <&dt!)U|U ׎Rװ2UiOJEnhB.՛͔1knĵgV ˲z.,p?T4%- -FLJʔ2Tźصh.nd^L1@UZ[ 1&% {,O?Xl4A;f%Q[ou$"^D@bx4M"" 1sh\y#QJ(?SQy>ZuWKQDDxtluj>h{@1{5KWt_) YBk]M\7v"V` HKBG0şUS.m8J%et))k3q-s뢎 i;g1u)Y}lusEg8>d?CPd, ,xĐ餡!jXizȾ1;: ;C(8IN &IU(bjH1;'ƅ+ql>v*1DnnSſ7 D;tsX[YWQK9aoNy0y_c‼8hAOn)$5$N-[) l蒰$v$}{n~'u:c Pf Y >=(} sWK{ի^JTp[ll]1c[N hĭisyqit\&jj fk.=~;邏Nb@i͖ za fNr:R_KVcᢀF Mᅺav)hOVE4Đb;B9TwLWNPKZ,7 e[qD]6fÃag&\*&;gui"SֈL{;ށ^lc+^C9eArf;.%<+bGe/ f "RLɞ UsH0ʿ2@{6)ӯx S3fnnF O6\N[~RrvK ϒdeCL{$aA]؈$e~ -g \d(sG|gZ @xNl|I/E??\cQ}%/\$ G`gsZ\Ҡ,B1s)e0]+K l K|]V]@ M3~inZ'wZq3e*8'8 b=b-F1jw(|1!^bŘ9 O\`}ԍe{ꩧUc@6JG$!%Σ]{-QŹMTQ#cŲ-6@bǺບLG %<͍VB'w!D旌+ˆ^'O^*rчp* ADkULC ];se[,ai!wi~*0SLM?c(1P(7~B\AOl?$ LÉ]Qǚg~8|}Iꐇ;a`|g+cvioxum .B ^{A}6ۤ{OozӛHɰKTIAC0f?G?\3mꖶcU&ZbiuBE.dm9:\%4lm49(>~JԱpn&84 \p@$BgF3C= _`9U_I3ĦFI_x "/ڏ'Xq4@8# ҜL$ccHYЩ8ɑ\6䞙bW/+$QVr`%|bɱbJ)D,fG;"9v#Zmpsm3։h1(jkyzV -؊ty+MoV,9O ]~?N<%`DVx@0[@5D(=r V8,.zZS*!2g1A '.DffҌ]vK {KAGlJĈ*B aR 4Y]eHH?Kc.ڻ +ΥHfLXILtIW:S`xb6H%Lfq_̥wŠY2=H3(zF\BNH1!/ \6h9=&Lf Gꇧ EeZ+Fk(Exdr`vկ )YOY}`2vuq^"L@Iĉօ( ɨ Շ,h&RW +䃿 1>rADq~.kV_ed܁4VЋI5ПtjۈL(-b`캗 /Gs!M5̿S/I}^Wx]ɮjΆ>^11]>p(z';xC>0@ *PAqA E,(7K|EVZe(WB;,*Rd7xq݇ԧ6X4;mlI ^ҝ }ĸLdð`6UXf@Ɩ,/8k|Wb Bmz[zS=a5ax8)GԚ/P9sR rzY2ϣD{d0UNfH}%]bV/˒ᾱ -T̖bw8)>{^,=CkXMjk)X4Ґ~ gܙӉK#:>GWo$=BIZ뛻B/"VT1+5h@mQl͒0Ԋ?0}#qQ"Mo4ۧuBX$qwZ[c{1K3 EbP@/%ЋFO/h5ޙp0;ϴ|f]RTq?%M ^ Hc7ia">2i#GI8 RC2!!WRq2i1 nnz\r GCMco,JE IX!i3{+kr6x@7e yVĕ=* 嗴@ᭇB^ kq< ?#5ǟOKt c -r4:Q<.KwB[K"F5Ejԝ*. xMZ b9ibq_:&i2T/ܤ:Y-= >1+B?V5QDH?6b\;M(IX- +2"A窸*/"O* [/Eྊ@_b"ŐعFA,[vDb7WwM~k %(6~;?>>]*fϰۭdIq ȼOKڋI_4cY:EdQӉ(=ǣR|9b*OUsSGerϫri w}?z\ReZRLXRm4+va2)$N%àgl` GBmfqݡ_Z"8vFWQ0t&bS[G1nVJp̓F(8^^ge=-miRWB *X ,FT+ l Py[UP 11| 8 #DqqqU\ ;RNS4Uwk"] TFXڸ@ hQ Ze!74Q~sԷ*t!Ue80ؠj2:}z`sXz ߆nh vqG}R[VkZ5?_u|{=pKh]OAeu@C!_F6Pb@]Tn[oŊs !Rii^nJl54n}^a. ڄݝCleN ϺxQG,4Ǟu6Ey%/i\X3lښ7i9Ϻt>K{4tMa3,.>HfVB /{?YF{!Ok} ~blV MS}d,NYvr?{̖msb5 &pnL=[=]Xی >G/#ė{@]w]^Bq8mzFN)A8uUԙeq/}.a*\; &#Y |'rp͓zׄFQtCcbP],(lb+Y.Bvtzg$u=$L(>b М 6eLv=lg(mݶ#G/΄.i~yZ/@_Ҷ<RO;,d0m+;kiD? DiC\E%!! 7MjOnhJ%UWP`HZqv2񴻹 O[0fvsQ9i~p_ 9I#U\z;Ӥj\[ni, QrwarG8`Ebۯ)ZoI_(-]00ZW8m\6.w\8 /,IE UMPepԟ믿~~#$P'2~"15c"$\zQe \(ͬte 6+EL nvyKJXB[ s!CkOpGL|S?Glυ1@|$үzMWDfY̰baQT>w] 6W@`ߟvia1q"&"$`:as[8Ƒ D^.h<`$ cJ ֚Lܮmd:(4֥Od>V3Eo4B;եۜb gi>뭷bԟ1_.*HsKWa뼦Qo45qXYIue)ql>Ca7.\pA76;F&'|2܋.dA|D"ȡj#T ?c# pa@Ymի[Пc3 /K!X}pe \5@>Z-`ՋNgp^*8 GC'tTmKѾAve;G(j`G-"9uL:T#;]1?'maʻbOSEX&blQ/KPyx?< erd!Tv'r{.T*z%~{lg_7) /LWrÔ.j̽SN&^Nk45U@dJjXg1h$~\2^:.=]ܪL_yx(eԐ 4˝bsOg~tgDjM7t؀r79BA"u'_uUiv3@'pBiA_:vQlӚDiu45AirjAZ=iQ}14D3!NDLvܗtnL%yEg&di0X!{Q˱x :)Y: {K֘'pdm1"˟Ͻޛ/})Sqf JoCr'D]51h55)ĔwDЏ5b>lH(*R2}+pK|Σ!UY2'<Ƞ-F*a(j?  =.J/)F*x4 ٽ8'.܌~kV'< jvz]kʼtk>0na*,71&?!S!F2ffPZ2RfIqd 5`h>ф/t4W\qx (}UQRЈ!U4b82Yy^_T@Rc^nc׌m]T>׾Vc5gCW~ N1 \Ux w*ы:t>-% A>R)c{͟#-*?v~R&y{^C8^$[.4agfϸɭ;ә6 T$ZdWS/" 㾢 M @H,??svjoؓBTa0-+J얭HaDQPņң"뮻N}'@x}OI\|]\4K|-Ӗժp!D13JI\q۷w/*OmFGF-\%LT: O?֥tWcobkwk1 bLFŀr} s!ˀr..1b"{`6 ne_ʰY* `ݪ5\uc]yKUʔC_H>}bj=6ྉ`>HJa{a|nS+N,qlza2='F7D'4Mt *"^̋'W+ BVbaEȬqdkǬ~oJB/b>nH> X/ {'.Gզˠ$]1Zgu8]ZQ=ep%BJqu`BcI- S1 %;o3аhE!l"2w3A)ԕ!C,&%ЈpU^4G;ܐWqүc" }}8ڑ@ _#Pg9sU؀¿,U(G"r 1O^QQR1JO?6;:Fa2'9$BP Z2/3C-7}|Ӧ?s`RJ_lm<7fM0H2nN(ėimV[6\3_s7>rSiYJBIV?TT6 K3J~=ִӋЊ{l}%0OT>LAESPpD*"Д'9 ^B>_r&dW!Qe&crd2؟, &VBh c!(ĄG~( =#s=2WT r'}vTD瞓BVf^'`*Aغwt&Xݴ/r4s4 [~ dg[`ȌIS1Ჴh"U\$$[2S @l~.צnZc2Qx~pl]m;?m?^4s4 / -̢JcQL>ϑ_2φ%֚z#')LA (r^{̻H3p._6aҮռrõUyjh$\%W3jyMy駏fK='#%\|yyJR\{ C5 h ͷ^xaS'2=>vҺL1tW mj ``E$La\OFXH%kg5-!m.+iާ+oNNIGVcNHїK+S IYEJtZr4LX&d*j.w6B}5vf4f =A(] -Y}7})% 4:,`c#XZmP|UDp"h. dk CQMDlh/槷oUeY)pJbUR*bP҈OvQ8W@ +zd=8e`~sNr'hSkh~!wQB~Q/w}o}[_p$^9v(˯]vldCLDlHf,8- sMw;b;1f2_pq '@H4˜O<5{GjbwN9lJӴ`F|mދ5J+'#av3Թ }(0K+oqKV=y򕯼UPqX5q"""pqrڨAbs ; tv;S ߱k8∴h)$qxTyR8rXTzw5&Kad[7L.*_S/,酲*zX+AeoXvʖ~[;q f3!} CF 6 6:s=&T>mY 92F UT*:ɘ8~_9paa|]?b58%8DGHVBAd}bډ&ӀR[*д|Q:E_W+;y^AXzRBaA(/ȑ(:%Q ʘ^p$ φZHr[_bڨD`r..]IױXbVGS ?asf7r^d qg/K]Q i"xޫ,h=v'eIҚ'H&~r AҺ;O!؍$4'BGȐ ڨtmw9}`9Gg.~Do|c h&9%5iv6}AT4LK-(H/)C)G(;Zt9ty4֊r L>_}c|m喣 zO D;@Gmo ufaXPJ=-Ƀq% Yc8-1HnlΤݍmp ؔ`*To:$}ZNyu S]6 5[tR([z[p_ha*)* ߪ5cK ܉V_}u:\l!N>0,73|yV[Sx 4%_VunZa΀}#.b9!8zj1SEI814L44#q& P#+~sb p2AHU>w2YQU^94|^y۱ +J<^ %SW iV#qBG_W^yDۤbj%gtAR]p_B!-<+îvђ]+c*@ޛ1jmp+K̬j`JaeC1';tIJ#\pF8b h{@y]§(mYQ4ؤPu%OPྴ!=D4ڪv435Z1rPEK-V11N ȗ󡎙^35 i&) Rb@٪We]6>:V 5ƝZ^ ZWWۋ UZ@ؐ hl)lD**Һ)3fmb5# 8|o|_a^~Ҿ&J/8r ,(`R+s@Jdw001W\qjD=暴؝6.% f0D.ϴr8\hH7SH|Жi'U.tyH2kh MTETpʦyY*`Tޟ#1fg#.KW'-U$Lj8\l<-=zF0) OK1i ,u Zh]خ+d|, xaOzgbң K~2ի4 NxI"3IpvXrџwՇܦVߝ^eTh%؀[2 b;i-Dp;o׫a?Qt]p&D*4W.+fW3_ Hog2Hst> &m>VbN^k3Ҋ+9d:1;ʖW2WÙ jȘazuUW-|%'^JzZָ,^k%zcz9ꨣ`y֖lOa_vͷG/࿒4*ްacC/7RXnn 㥷J_{?i&{_Jw3 `?xu/+Oh'.Bunz9kOß@odY@~zYv-VZ{yba-J}u6uq]_sȝww1>p=`:lq}CluGV!;|_?uu 7)9&7yݓ~#Glޚsr'p7}p_{Z #g~ccn<9:fɺS~ZLٖ诡|:Z.[0Vly.U΀R@`i|$Eμ_`=n8&(tSu #n6BC|i SgQys@x!Q'Zlj>[o2+/&UܗC!TF <"҉qC'Z1-wha=s bLD>] `!wohUXLg񄩅uD?DB /t?6G B ?/n;[ Ҏ& b >PI0  -~\1b!>-@R"@xBy6-#9'zLD"!1i h^"plPMC ^{5W\DK/pacL"nTZ㉰t磰YQ煍fBa`ԥtTE: xҿU|QAf AWHzE";16bQɐibƟ!&%iHep%ʻ`ؖ<\3to-r~ՑE' NwR9ؙ9`vX8.21IS&M5?'v/pXуK]Ġ?»cXo3{(W+}JIHwtiPxPO[!t)2htjq CS@#s>\2BtQH[&A48D$L$a >8mƶAٺfMA b 1jM.(I+ՈјFAv[*j!Iֶ#Ü9|_o~!Ι33s< #S X..$}Ɖ:eK[{B .`Z sq42IDATT tR`'ƝVrCqEӟT,]j(X@ؠ]`HGi7acz햫ƾ jMvba{/C(b~fŌ%0kf7cΡgLm\hDǗ s b[{`mdo<'GlRJ_Xw93K=q!,;[TcgB(ܭJޜӧρƠDr ǫM8Հ^'* EbK8xM>fc >ȱ$ـ;'nJk0x?|I8?'*!_~_j`b%"l Y̤gMjs&bP)Y((7,M':@t Θ!X7 hnY:G˼.)gЅ@0 dƦA{}ݢ m3KW]u-,^ dq]2ZQ꿕ˣ'?9G}?C Hւkǩ;+%OUtFyt9\:g:?,lX6G&Ʊ[ՕW^C!bZtGKe E>OjSiğ8Xj\ '{X?쓝Gc_HCJO)Y~ 9w@-n$wډ~v%/mT;t:kR`P`q4s5#:;9٠5&1Psڔq+ S78*kTl 1u&}lII5z5z:;qza&HQ3ؿxW-T r|"=dL%cS~_ge@C5ȕL^xER'2I\o(޽3S7hr)(t/a hN>O\wu=nd}7 7'|nJFs~jM&da&k.(ek0XgW})|5[ n-2Lq)pGwǢ5 6Yj gPv\ fb&GsW\}M7N`t]^}{_5WV 3ɧKh@S@['vh5LKf٠^J0#jJ^Ζ}z1!~'85$+G咜6a$hF:/7!$7;y_-\WVZL<㮐]jOؙ6XEWNmZv#֫2 %þECh%'Y}a@##Ox#X.xb|'B& p )|d|E-)S^]d,~i4NQUr\2}7_p)"q˯k)n}}=JJR`Ig UxdĆq nqUwqGRbk r R6 ^o1pT+!fmS)8S`1Gf d&XBʁ/t(IʰI4qf^7:6IETgFa;cʹ]V/U T FR%I"AN|8 _ѿOSQabʨ24WPu eSQQ4gsO3n+ԨJcx%0G~ ">b, Q HbqF<`'xi;$/Mg=^<ςdS]YMZS)"Ttce<]p1f3R93W t!xXK}5IV l@?q.AYl5y/C*>0;Qʉ(vߓv<'޿g/yS5ql|nY(W =);v1o9×\G.5"=)0o"~%ap谞hz6[cj}Ϟ=twg9RNŭDko97thr Lh)m0T /~'^rk^p6gë&Y}TR`-( ZLhLb&yQu8#+yQk+VNvau;κrH Ag,\% ܹԈ֗dbQD6 3Ĩ5: [++N6 VZ =)Dh\ʉWLP3Jf,=\JAD?+SV ,8>L%ѹ4/ o_$L!} S+A}I#>.3ڔ1+7wb_8R)ȓ:%kX2H\KGԍ ȟVfʼe<3HKQP3xq)IQ K]5v$RweßsHRg!`ڞ3iڱ%g%P.Ufgyf;"cH!mW:*ERBr)@c|s;mpШؖXpK!ɋ쭝O'%~5pZ9ǗvUocѺ,/jR`R@vF9Ǩ2V>}K?YIe2%g8=F0?X.@K5yhrϳy¹61uzhVV ?|'PܡRV},<˳8"ggm2jat8Tʧ/~% #/qm\.9SU~l]R4%-kJur98}$߬qu$||%ܨҠcTӟ4ERyY0~GHV/}<ofsY2H'g/gidrꒈ*^ИoBGVjͤRw3(t\) 0RشQ=+] q-OhK0?f8DE+_qnD&|GE#vzA]}~p QSNC3xE1icw޴;jқf*HPT{lè~ZS)b ŜT'c㠋?`Fo:8h%Dw BQ4[Ql>v6fzSĦ"s<1.Ww #e]Fz}q׃bhymsυꅼW <. x2M'Qe6ظ`!E7X}5822y.k¾;ubУqu^Eo OApٿ?\ hU2_0(wg<ZsiK\ɵoPD?ß 9\tP_YK6ή.&F /n)JYaD;GawBPyc ͱ Z6^Jʧ(ohoZ+ 8ho߾FMygr(Hǒy.N~nq㨟ڹ[s:e~;<0|_PsBpEڸR`e傠3gPG^ +C=+ς%kS)-}(:h믿wF,&|)3l!=_ v HlVcd lJǑ0kY1[LO)bso`n8>杣H  ״~_ .m{.B#KUjN-6SO=Ԭ1יN;Mh/־U -B|UtO$߆1eZ']d{E{!ϛB:FY> |GOLFþ̿gu6ГbȚCwUW1Z%Q@mE5yüo:D՚JuG3 d,>> z}.ǩv,B6:Cyo˘7HD\Ԑ}!:ڼ9'uŹ8*h^Sxo1@\s'#5aKڕBT)S̙p13Jp|xJڽ@䬃b}6 6V_r`0ξrhų76j艧Y T ,l lQߗ% ATm5SjT >mgPt%4p NiR+tmV));˂*jIy2PpK_z1 6`TWqTn[&;gB%r>Hj߈w^䜰v Pcb%wtž6jMr)?Վtzrdue'u_ fȭz㗥yd; Aw \osfRv8V;plf $oBF;;@dneBK͑iƣŁ 5W}*3AXp$\TCmנpY)" 9ZN/uhuQ>&fIv%YY0_OO@d2ȔQ5 8!Q ꅱ+MyJEQO?¬IF d]o!sGkPX_}*"9_iK>yž-ZCU ,gRpS.`UujUC?'x饗ҧ,u{(DRI;|Qc[Jrtl O̒M\7(. `[ϕlB%QI(v_{p{w̴֒ (h'q";v#1Z}ٳ'&^WG)Vص:x/,ؖ6,{ bQއ9裏޳Ly.ھR`;RsmGž3W L@4}l ۾2oɻTwH(E|5ycJ{Qf~GMks > UQ33Ku<}WWCt\VG!(ԅP6Ԍ%Ÿt~np>ی9m쫩M֢gv0HpO$3Mo'L> OCMC@2d0W?>:AuHj('u[|P˂$1B;d~[HhE+5+ YxX8[MRnVLމ|ܛMńEK2]l)WCt".ٌ'LNx[!.;_kiaA@)Jm uBBfX],M:*ey ]JۅpW:>NW2ZذU5盃KK^')JS I:I;LIж^mIlh*\6;?m kh:qXQIhANc39(Vw Qګqnuc%?-frГ7.+ CAY?p6%1jt`A=- Cp-0˯|+O8h>b_BZʠB7\ؿDbrO Ֆ_rnh716wz_uXxax1oP|RqiF@-,)]2w\6b>ϥ'm&W؋~絯}*FK"CL}'$VCXmA餓NFKlkgYC@I$- `+QPf )α3dL7Rn8s0\- (Dh)|>~_ZKbQw>5?FhL3e}x(3%q'8~}j*,I*ttҫҋq/6z[:EGiD5D4\Yy}n)/.&F:2QzjZIEvaA]%_=q6 K,6f'9b(Hr-SQQAgbNF]•}Ӟָuu>uC1s`!o3-s:Lٽ/0ӆ!űNeJ/9 W  '/W3U[E፱XP•M޼2u] ϠCO% ':J2<3+=NEh6ܷo_g}tg 7l'!lNqjj,fɄ-ư(Xce(}V(z=t`!"g>±8jy[}5]-:JkB10e V/}ט؟qZ}Eq4}3ϴ-NE|GQg7R3zPAAx`?PoQ:At qJϢQg>@H9V`XvR$j׫1H٠25)3+51PnLh?/SoRTqgi%Ӛrܟ=sмMcfѪ*CE_oQp &^jK.uvN:eꪫ\|IS>v%a#_GYO’PQ} :GiLka1wB|:`ov :d#TgqFw Ѥ^|7tSpܻw/5*?ѱ8s=8A\_˾q/8º>믿 : Gt|d|Xq%_Sɽi33H"3el9=}ZGis(fVdkC=iOrl:oF!-d9tZo Ѵ e^4݀}m[z`;bUE,;+ y26ڎ48PDiáJ}-Ʈhg_;j:Vz`\?5uSAz[D+<}a%y([LIOL1e&k!ѷi̇?Z=Xd6`n 63U:]bw'ܹ[+{k7N¥R+h1i&+UR3ߢBܸA|2mCҠޚP qoLrO+0wyW_L |^a}GA~1Q쓳y晇z(PtNkk7M?n4p>CtxWa3ߛ2b`w!^GipM(F0ycdS?3>FsI@(wn~nW-oyKv NmgJCk Y=yF[T 2ڽ{9眃#,3}ī Q:˴,v9 XT'X ˨3ȳ>CPN,m{Y%Ba}(fqbg'm.*%n~jOט5Lnt8: 3d#QviLߏ` ="0l]"qDB(YfZf 11̀݌ VyC<"s8L'TՃx5HBB v(13kQ"ևb&ףzz*u}NF}Hl>fZ;N᳥"c(smΧ)C# 8SXk SdO?\{ |ːӜXk"< l\G0t.Fs6˷W.鐪Xv a詘MiH!=RKt*vŃH1:=u@umfMRY˔I3ohN ]1O)sz}攴ey9(J0!%dɽ5~K&nGjgc-E yƻ]8v'FDZ}$EEi"s]TXqCjԦ37m+c?uc9F4{!Eʟ"^eeMP:h١;rHlXO<1Ze60L|@yIt><y/b[->K8΃$[J@(OM3 ݙlFŇQ֓b2|5Rb, Nm@D7(X{ㄬX`tE'0J qIGl'ʒ[Or O?;F[G_mU5LpsX9:c:bc!}`'a棼3t26WJd$iY 4+,IKFw"O2W&coؘg+,<&2!K=/.,_)X|V cH!B7L[35jr q9*;=oZΨM T Ѱuze9 ~bV^FVt,*C,1keF4 .j>;zy ,upy( ^i ǃI,=$dئx*NW}T륏U d;%&l]t&9=av2+(yb֒WEhA( ѭ=m숓7\*Pc_E4@VQHe3a> q1߅|/HAƲx = F'B:SN`_o=#Ӎv.7(4bs>fadl ~, D/ *4 Zr$OLCv1H(21|/*(MYo]@vbeX?ޞ~6Yd^z)R-Eur"o)O>Y3ЉXe\ L~?~ov_uo>g]J=`[hF34^ֲQ^Q׊(ߣO_ƶnkFo5?῍wB}Ϟ=H6yz=w]eb߾eO|O/~i&t=,-e5.o}[}/Ң*Aߋ.=_ß5Fyow ۣx1QePY)P)0o)}TXVv(AB(8؜h|/O{B z[Ww}ɞ&6%ƃ+]r/t"p=^E,۞եy K[bmeA:{yՄE!W%0 $Ʌ b_Y${l;$PլE/ LTJc!Z"⮻$2(i~Q:ɒJte5muV-Z \l[-\8A ]$EK~֣-|/_: LN h~^|/y/%7L-N*f٤# a,:;PdإZc/1w:G@IPF : Αsft(LmdQR⬌bV^Gw*cJCx/zig~/pV~9Zc_|x.H|_cM@w@Y[®Ŭ1uX5 MkҲ/P 2/WQ49uhepˏU~r';ﵿj0'&0-os`Z3q 9r˾}΂u3$& XevK- AO_ZXobBzk:J  jFJ3[8# R䅹D&vkowT3IuY(%%C8 Qu>M}=zuFYGiSoP hS`T wh\pȨ>;翗[b|XA38?S8qa,K~WyRھL^H'6߸ol;P3.d k-y_h YT= ̹>{كFtNYm 9cM46o!/ҼENMԦLnF$vB.|@W⹞+4|:(AVuI[bpd![8Md5^"? &=o>z<.UW|_ʴ @iǻ0.nYG# 2#D ȕ ū,Udʴsc?6]v1}-:Ӆ8)!9}JCQ%~2mv6ue3--:|GdGnCʓ{+DTRWsdnwI80w;fXo@Q"Ba[QڐYA?A ^|}s  <|yu~#zqN>d0>d %h6K¶=oFN|FC/_E*?^H^[:@]3t\fل\2v;/;E':J$vGh_1amfx_6}5\_k}T~8!8Pw;[B>AtpK+_D;2TV1:XxC?Uen2u(E|״UfžǞY-a{R\2:W$+1nۯzի #umK1 D?LU2mX<ʥ%AR/I[*XIǂg4 rގrZK@Hr݃o>L>LeR2Nzk!G7!-<4"d1X}/BmU"VeԐ[1ԧ1TI o`ɩiE+M+^AhÔ9 }g\TE.((P)ѕWULo,?I-CLBh{ J@%mwo|/,0T kB|h]OD'WQXx@D ]ˏh"XnV)?0TC<>̝8\%e(lV<Ψb$In)=}/4{a[Ë^ؗ#7/AV'uV=E::Jw]26_I|!,0v\7geعs[vDo2 Produced by OmniGraffle 6.6.2 2020-05-22 13:14:22 +0000Canvas 1Layer 1vec_count()vec_proxy_equal()vec_slice()vec_order()vec_proxy()vec_restore()vec_proxy_compare() vctrs/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171213762413632020420 0ustar liggesuserslifecyclelifecycledeprecateddeprecated vctrs/man/figures/vec-count-deps.graffle0000644000176200001440000000545513663716767020055 0ustar liggesusers\[WH~~u:0Ȓ`Ι# K$H}囮`X2LXRu}_WK6~^Mj {^0eu{͕mt?#ߋ^X]oڣZmp{l@Uc"IFZ+R`:‘}l^[a&́=M6W~ڸT7m7}FE{AO]oYhD T6ZÙ N]Lz+9Q胟6$K px(8z9Fk*\DpIFd5zb8 ۽?q2jR`х?mnǷoh@k7*I$S^3@&L q{[wrT3'bW LrњioG¬a总n-&է0..'72stJ=x}o,eSq|Bܦ3Q$%c "huB L_!ƿ|JN¨|ucz3,*m{wnFp/kW7*ʦ! "$%ONC=ּTACfnϾ L/" G]Q X)2ϥn:Y|L4vӫ7! q.}fJ*(iNe=7p(&!6, knb eS$)-cM(Rf̖XH) ̀9# Verqn3%Tm٦m[r!% Z\E_8*@5Ց_v&Y7PEyyTdKA3Q6jrbQQe3DTh~*=uQ 9lv&d0f8MNq/앇V$38Iǎ^ "u7fZ?ʟT>?1- IbCaQfbjF' 7DmYǀ6as@:Df_l N!l'O{<>ɣO,9` %}dZ1 8ۄܟXW3W6/?iiwwW<͟17ŨAc=tzS.L"0XܙS\dvZfs,g?0EGf6w!=7mrk?&3BA6Z6GHF /?e~"L[A!p17u)CHQ#TKlҴ- YJ.m"5ӽpFW='v ^^aSSG?a*cp<}rhC0*!lJn"Ŷ9/XbI{@^z?FQ#ӗ{nyIEI9AF h +_ jhrf;I?Yr?룳~{DJa | 4aAHcgH@cߩ? z7g#'%׌#1I`2/42TTǩ IƑ}v\$*;he% #rEn8ir^~Y7!u(,HğIC 7PKHĤ M,jIդNm3} X `|I3Ka YuܐzC 7't")=*fLX8NL>ɃK2tHm| S tnI&˔.7Fņ~DF 6IFf%֟h!B{9nbS,M 9:B$Xl-+X6 5칛=zF>T KHTud5DYX7S9μ.^oxIT_LV(&zٷ2tk, Yln;0T7EAxkR ;Rnp |7cнTb:3'^읗K&I_0_}Y}V<wWvv{A!aOy@[ko#-nض~msѾ8>?}뻃|.wO6wv1y{;iw?knU?_ܣ)+B7*Qna0zUiuƯ1mH>-A6vƼ mv[g#\G:1|G(@iɜ;y(Z(.U 4]LᓊTk1Aj.9IP'yOSe8!>W!V;AEwdT `w\uQH=YiTl4v'.daOiQ{,. wt5Cm#ƪk?$3t|IR꺾s4 o]A~XLjGP+[09q`WA?U ^ERJ̱NkaoZ4[vctrs/man/figures/vec-count-deps.png0000644000176200001440000004331613671672047017220 0ustar liggesusersPNG  IHDRbKGD pHYs B(xtIME lj IDATxwXT (E4 "("6ƄDKMQ؍MĨ` **X@ĀB& 3?+Ig̙s^5}81B!4 B@!P%BB!T@ !*BH&I! ]ͫWp P0kE޽1m4(((P@i.c!]IJJ )) sss H%$$@QQo߆*:u*q9HKKS@ #FСCg !cKOų0c DEEQ0J R ށ,***(P%BB!T@ !*ݻ(//=t*SN!&&ߏ?^\5w\SJHuVzذa.^333"##qyl۶GPB= ŋE---,[7xO> "00BpM̞=[۷c݈;-[uuu3HLL9#GG:_^`mm7obҤIu.Z(%%eee@KK ֆ]]]hkkJJJt (!]!q!44wFHHqǥK[nbŊr3gNm[~pttѣ>>>|2PlXd `ذaHOOÁ4ˡ^c֥gϞRRRb?b hѯ_?DEEEEE(..FQQ QXX|˗}sssE={W^OEEP%Ś5kwww(++1SSSxzz<==addJ2]AB<JJJD)l@ z\^^ 991p8%%k.򐗗^ן˗tٳgz*PRRӃ%,,,DׯMbOyzzb…@XX 0331"###b޽Xp!á5j6mڄDѣppp )) )))ƕ+W0sEFFqqqӧЧOXZZ }c-Aƨx1c\|)))@~`eec%0JH%$|||pM m6@UU\..^iӦaʕO?$8[_5 %%Z `ff3f PSSCuu58NQۏWZooo0ưk.ZZZ0aك~!!!HKKɓۭաAx IIIdffBQQ666ӧOR;v`ɒ%Յ Ə:P%wD{ fff&deeaee[[[\+WD߾})`@bUg]1{lфiiiGhh(,X `ĉ?~<)A;]éƒ%999ڊؠ_~56(_g؅w' XZZbԩ6m%4%Wff&"""p-DEE᯿Buu5!C>= Фl 8::v½{?ׯ|||C[[FǸu"""TƍƍaggUU&\I HUTT@^^KÁ7 )) AAAؽ{7V\c?G9.\ʄB!puܼyn˗/ahh''' 6 ÇG~Z3f //ϟC#F#k׮noƑ#Gp1`ʔ)Xp!)1P6>}WիG^^,,,GGG899W^vzz:jXZZv |>}O<2dɒTRRĉ!''G ѣG駟/@^^ .Ă X)JG ݻp.^8puumbA߿?rssiN ك |/`jjJJHqe?x1c֖Q B9s7oFll,OUV_~*O||<Ο? . ** ***pwwMMMM nʕ+q L<6l!!66W9ǏcGnn.~7T}0h 9sf“'OoNNNt?FB+WHJJB>}l2Pp8څMݽ{N: ٳ'.ܮ̙3/K[ ,I8r>OQT4 (@T@I ޽{a`޽9r$]]\EEN8///য়~¤IƍXt)OBZ-[pssLuN(ڵk ???"""iiiؼy3LLL(P2 @TT,--qa Pc;233w^ddd~СC)H_X|9ϟOOOS`#,\X~=O >w 3_|X:t h' qxyy 4BWWDHdii7ooŗ_~ 777deeQ`V\\-[Ǐ6߿|tθt hرc8z(^~Q`!bرcXt)&L` +WFGaԩBHp8|Wظq#O_~,6p5̝;\.W\3,+V2|}}Q\\  @>իW^^^Iyg ,Çcv-ϟ?ǬYӧOcܸqBH5k0sLHIIav~!!!0`deeOœ*&M?k׮Ν;) T@;/֭[S>Chh((0V3~x:t8x  .\14=˗q L4IigFQQLgSm?k׮E^7jLxxxQQQ BԢEPTT9s@UUnnn*m/^zR,]哒0f <<DBHXj [nʊҊ远:u %%%˗{xtt4`eeP*vyf7^^^ɡPm;;wP(Blܸ/=ݻwq!88< p8HKKS0Hp88x tuu1n8SPZ+֌*z^RRSNÇ!))X5 &MO?.~#GDYYe455q!PHΆq&IF@߲w^HKKxɓڵk ooo߿'QRR߿~r\ <'i8{,cF[W\\Ç* !!@O>?ORy扽:c sΥ 6cmmǏ *_uSBB\.6668}41e['ĉ!'''vD0|p i3111Err2 hDOIIIp\Ӹw(HAҘ={vdddGǡH ~ $"ȑ#!!!P|W1beih 2OOjjͥKh 6lnܸH*`nn?2.T&v҄ pxzzN()c[^^3g`ƌؿ?ἣ>?~||ӧ}6x<B!`lleZUUsa i H^`_a~*vcP^]Ԃ:zԩ`C\\Ҿ2v؁Dx.....PQQ=ĶcܸqPVV/D{zzb˖-B=D<}4,,,@BbȐ!(**P(ĸqL|^Zl'Odrrr,&&edd0e떖LGG%$$v٥KXrr2bc1fϞͶlƎ+CII07o۱cG.n2{ gϏ]p 6L_?~|x<̎?.zmlƌ5OHH` Ă5ƫbΙ=3|ήQˊ]qkr2Ƙvl֭,66~e1@~6qDsڮ6#Mٳ';tM'}Owo17n@FFx<?~\Ϟ= I&&nHHH[n59wǏYfa̙֚[\ۿPPP3gn/ھy?z̜9b^^^b?iŚ5kwwwdٌ1S4|722Bzzzs8ɉ+**2{X Ըϯ1Ղ A k.]T,2>guBEEakk ___XZZgϞd0;dL+w<{ip1 :'OGӏiPPP6k IUUU\}Kgu>٨FPP]~ԨQr QQQGf?fϞ-xaѢEipŭߨQp5S㟵5 f...L(9oԶߖcllٲe5^j#??x;yC9yB (itpjkw^dgg7xPs,X.0n6ݻp8ܿtԙ#IyB(O[X,5k-Q<8`Q$  {g}uuu|I  '':::mHVV;fEEEO?YfV6~~~PPPMڑ@~~>VX!vжGGV;[;Bdd$^zաgqq1n:u甆ѣۮU@*l޼̓fKII%? &4۷/~F֐]]]$&&‚" @FFͥ`wb 2Yx}+I'Ptttwr`۶m\RP 9 h~~>4 c 9fΜ{>Mܶmo .!:4>O!T@;& Ф D4)vލ[n Pxadd M555Œ%Kg:*G}^zQ H-5iBc;w (!GkMXJJJذakdffRB'Mh?Xz5u !T@;j\~BA r{n9rQQQ1PXB!n߾$ iIkС>}:/^L?ڱȠ>٤ Me֯_OIH+pرc 3g sqygc\rcƌ`eeEJjرc3r ЖQPPSLA߾}) ]c 066yyy%8::Ν;u9?~Lj$ix*]̛I:zB3nnnԩNsEtt4j&##___ @[wp(]Dg4>Xp!JEԩ u^Ur?@WW::'ݘ7o)څnΝ;@'R0:e˖ŋHHHT}8qK.EAAx<^mmmNB=z?#zNYYt-@ƠA uIח#@ 'Oy扊4>C*4m[wŵkKw2SL˗/qƍN/;uB322```@Mꔖѡ֖C#ж3dĉunHt`۶mv[[:O㠤>z=ܜ'vs88q"B!( c 9fΜigzCKK ׯ_nj3 !!!z^JJ (iS8Z,͛WcF:֜4'Ad &ZqN0p!2 ƣmHJMt4LP^]{xUQܽ]uq::m"P9xciI2r1=hҡ(-.Wܣ1`<tDR8_5Fl9BIv,ף)~eus V]]bhhhP0:m۶oӧO[FiU9UYv[Ǥ$xxx 77%%%]~s1HFы^lh?BV]/?@x󖎁xiILX8]'qqq9rd*emMF&EQd`GT! h+p84iLMM믿ҎVX!C`„  B;XZ I&!**یz:7&ܽ{AhgCA4iBwp].]D!-S-WT@ہP(ЛI+ F3:u 111"33;w.H+* //?#"""~a͚5GϞ=) ʹuV?lذ/^wIwNhѼ1uTDDD 88ΛڒwߡV= ŋE---,[LL\\\\\{{Fͷ-[ =z޽{mprrƎ4@bb" O?}4,,,@f xdÆ 8rLLLDիXr%;e5CCC1~x7EYYY2d  1n8!$$'O… 1cF/n=j{ڵ2dTUU駟˖-w@GG6mݠy%.Z4i7YYYlΝdEEEVPPٞ={ڤJ>sf/. cÇ>|󙝝;{,+..fzzzoa^bfWnXZZ2RRRؙ3gĶcFFFڵkL [XcBUUU1CCCϪc%&&2yyyv%̬ؾ}6c%$$0yyy\Az0f}JaSΙ(np٦c}xI&''bbbXFFspp`[lۆϏ͞=mٲ;1XQQSRRb)))1͛vQﺋ[qmϟ?~ϟ3--z׏1Ǝ?^|x<svvfǏoV^5Sͫsa7ScER/ÇS0Z?377 (cyzzݻwJϊءC.g֖-Z]x0@ФbiiY+ĵc| e˗/gqqq>O]]edd7]xlҤIj1Μ9ŮϪU۽6cMÓ'O2;;;㠠 *6 GYYݻ7SSSчgWf<ikkz١CةSؔ)SXii)STTwc,;;CC{3i;CXf eee0`jj OOO*###78y޽6`Ϟ=6m;Cɓ222G544׬@EE`9od;K/ekJr8ɉ+**Xll Ȁ磲R̂ ASc/;;%z @Ьj(Z"hpjt"cFOaoozȓ&xzz"??Xt) III033䠣aÆ{GXXXφFnn.6m۷nnnHHH^!##III5j\DTTTѣppphroXZZVoc4㑝jnz-N}f?fϞ-:FFFXh|||\oq\o`aa )))TxW_}%:Eo߾0æLRSSk-smrYttt~}hذavG={2cccc7n`:::L^^'O6VVVΝ;tԘ);v,+,,͛73~gc;v` LUUӔ|}}k/88.ܦ_SԩSԔ2MMMrrrꍭdZZZ,;; ֮]+zFmh=jϏ>|:uM:U +//gVVVlݝ 66;TS򊎁v2Bݽ{mܸqe&--$%%ْ%KثWD888ٳgkJuVVV:ߓɄB'|tttjAFii)+..x=o6c>Lܱcǘyf1mڜw`„Babۘc1zjlٲFT}ضA˗/ozוSM+*Tvv6ۿ?ۼy3Fc?iiivN>deevJꖓ˗/9KJJj{۲75-Zۗ ::˩7Ź)yU_:0̛7QQQ믿`ccӭ㑔SDj^AAApzE ikk޽{PPPحb WWVmc޽nġκ~r 455[,v<;HNNLK.u]VeTy*]̵k>}`]f$z-XXX@]]:t[YYYt?޽{cС066*]Hyy9v >. kkkxyyQ`"=zTCSSBPDGGnp: /8< `Hr%OFdLaС].t 5'dcȐ!޳~xx`MAlhLZk? j<|111B޽aooO (iMu>˗KK}p;}\(jⷸK侈Bɝ;wp )S1*!&&4woSNAVV&&GJEU(,kl ygrrr;v,,--]MܼyBbw0#;6-xÇq…Y[[cTEM8ԍO%RW8,, /l22qqqpqq Uo?~<ƍeee -[ =z޽{OTUU(:866C AQQB!ƍ0`ɢ6/\3fx=ˆ pQ^uU\͎aUU=z`l޼ nݺjJիW8uoߎ}A~(8F~G?l555aȑHMMɓqΝ3<<-Nχc֫W/X[[͛4iR6)))x1RSSEf}r"999JB  33eee۷y>iiiTtB!T@I #Gؽ{7BBB64.]\.VX.9s4+++ z2O>D@@`֭_ؼy31zh._ ???;`mm?>(++c̙8p֭[gbƍgϞRRRu~7wwwDDD_@ WAhh(?~BHJJoRSSϟ@ @=jP---Jk֬ݡ LMM V###7hLQQŢq@ qvw Ȁ磲R A^_^,%㐔L+p@ZZ pjϺRrBb0CIDATXԿ<!!!PUUEϞ=1l0уP%- .D@@nnn {… 555 <6㑝 ^첣F¦Mccc=z׬Y333={6"##!)) aѢEc`ii$$$ʪV{)Sf?m>pm$$$劝X^^<{ )))BVV*++!++ ???駟RH_WA߾}acc000m`cc^z>@ZZpE|WPQQӑ`xjYF4!C믿!j*[믿b߾}6m$zk„ gϞZm -- H Ӄ7.]ֹt`󔖖86ի߻w999ǸqdXBT< iitCnߎGĂc*v9@Pk) BzOVVtuuE͖,Y'N*6سg^%njCm[~~>444عsg 777l۶ O?'NSdijLh|Q}RP(DYYj\Kyǃ@UUpss B@#!!Q(JFFFlӫxϞ=bGycn \_k׮AMM u0b7qwVSAA#G}-gggCVVRRR4TUU;D())Aee%x<x<a``P2Xǣ\1www :T\\\233eee(++CII "Fۓn O5 fcgr8055˗/QQQQ Ǐݻ@ =#F#::Ң*)) X[[(d==lmmkL8TWWڭs[n޽{o R$Ɛ< // c8p !*III=(ԁ1*Zg؈vaWVV.244Fo344P($zQc9www8::BZZ>LMMajjJMBk D D~Ѭ +.A -l@+ KVp+mfTt թ$&L(TUU?YRҷuww^,5wgcۋ0~qu 9S'1Lbe~P24&.nfp{-dn8>xHΉE.,~m(m%yf;~*(&"PPPPP@@@@@@@@@@,eDyL7"IENDB`vctrs/man/howto-faq-fix-scalar-type-error.Rd0000644000176200001440000000622414027045674020534 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-fix-scalar-type-error} \alias{howto-faq-fix-scalar-type-error} \title{FAQ - Why isn't my class treated as a vector?} \description{ The tidyverse is a bit stricter than base R regarding what kind of objects are considered as vectors (see the \link[=faq-error-scalar-type]{user FAQ} about this topic). Sometimes vctrs won’t treat your class as a vector when it should. \subsection{Why isn’t my list class considered a vector?}{ By default, S3 lists are not considered to be vectors by vctrs:\if{html}{\out{
}}\preformatted{my_list <- structure(list(), class = "my_class") vctrs::vec_is(my_list) #> [1] FALSE }\if{html}{\out{
}} To be treated as a vector, the class must either inherit from \code{"list"} explicitly:\if{html}{\out{
}}\preformatted{my_explicit_list <- structure(list(), class = c("my_class", "list")) vctrs::vec_is(my_explicit_list) #> [1] TRUE }\if{html}{\out{
}} Or it should implement a \code{vec_proxy()} method that returns its input if explicit inheritance is not possible or troublesome:\if{html}{\out{
}}\preformatted{#' @export vec_proxy.my_class <- function(x, ...) x vctrs::vec_is(my_list) #> [1] FALSE }\if{html}{\out{
}} Note that explicit inheritance is the preferred way because this makes it possible for your class to dispatch on \code{list} methods of S3 generics:\if{html}{\out{
}}\preformatted{my_generic <- function(x) UseMethod("my_generic") my_generic.list <- function(x) "dispatched!" my_generic(my_list) #> Error in UseMethod("my_generic"): no applicable method for 'my_generic' applied to an object of class "my_class" my_generic(my_explicit_list) #> [1] "dispatched!" }\if{html}{\out{
}} } \subsection{Why isn’t my data frame class considered a vector?}{ The most likely explanation is that the data frame has not been properly constructed. However, if you get an “Input must be a vector” error with a data frame subclass, it probably means that the data frame has not been properly constructed. The main cause of these errors are data frames whose \emph{base class} is not \code{"data.frame"}:\if{html}{\out{
}}\preformatted{my_df <- data.frame(x = 1) class(my_df) <- c("data.frame", "my_class") vctrs::vec_assert(my_df) #> Error: `my_df` must be a vector, not a object. }\if{html}{\out{
}} This is problematic as many tidyverse functions won’t work properly:\if{html}{\out{
}}\preformatted{dplyr::slice(my_df, 1) #> Error: Input must be a vector, not a object. }\if{html}{\out{
}} It is generally not appropriate to declare your class to be a superclass of another class. We generally consider this undefined behaviour (UB). To fix these errors, you can simply change the construction of your data frame class so that \code{"data.frame"} is a base class, i.e. it should come last in the class vector:\if{html}{\out{
}}\preformatted{class(my_df) <- c("my_class", "data.frame") vctrs::vec_assert(my_df) dplyr::slice(my_df, 1) #> x #> 1 1 }\if{html}{\out{
}} } } vctrs/man/partial_frame.Rd0000644000176200001440000000142314027045462015267 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/partial-frame.R \name{partial_frame} \alias{partial_frame} \title{Partially specify columns of a data frame} \usage{ partial_frame(...) } \arguments{ \item{...}{Attributes of subclass} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This special class can be passed to \code{.ptype} in order to specify the types of only some of the columns in a data frame. } \examples{ pf <- partial_frame(x = double()) pf vec_rbind( data.frame(x = 1L, y = "a"), data.frame(x = FALSE, z = 10), .ptype = partial_frame(x = double(), a = character()) ) } \keyword{internal} vctrs/man/vec_as_names.Rd0000644000176200001440000001362313653027721015113 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_as_names} \alias{vec_as_names} \title{Retrieve and repair names} \usage{ vec_as_names( names, ..., repair = c("minimal", "unique", "universal", "check_unique"), repair_arg = "", quiet = FALSE ) } \arguments{ \item{names}{A character vector.} \item{...}{These dots are for future extensions and must be empty.} \item{repair}{Either a string or a function. If a string, it must be one of \code{"check_unique"}, \code{"minimal"}, \code{"unique"}, or \code{"universal"}. If a function, it is invoked with a vector of minimal names and must return minimal names, otherwise an error is thrown. \itemize{ \item Minimal names are never \code{NULL} or \code{NA}. When an element doesn't have a name, its minimal name is an empty string. \item Unique names are unique. A suffix is appended to duplicate names to make them unique. \item Universal names are unique and syntactic, meaning that you can safely use the names as variables without causing a syntax error. } The \code{"check_unique"} option doesn't perform any name repair. Instead, an error is raised if the names don't suit the \code{"unique"} criteria.} \item{repair_arg}{If specified and \code{repair = "check_unique"}, any errors will include a hint to set the \code{repair_arg}.} \item{quiet}{By default, the user is informed of any renaming caused by repairing the names. This only concerns unique and universal repairing. Set \code{quiet} to \code{TRUE} to silence the messages.} } \description{ \code{vec_as_names()} takes a character vector of names and repairs it according to the \code{repair} argument. It is the r-lib and tidyverse equivalent of \code{\link[base:make.names]{base::make.names()}}. vctrs deals with a few levels of name repair: \itemize{ \item \code{minimal} names exist. The \code{names} attribute is not \code{NULL}. The name of an unnamed element is \code{""} and never \code{NA}. For instance, \code{vec_as_names()} always returns minimal names and data frames created by the tibble package have names that are, at least, \code{minimal}. \item \code{unique} names are \code{minimal}, have no duplicates, and can be used where a variable name is expected. Empty names, \code{...}, and \code{..} followed by a sequence of digits are banned. \itemize{ \item All columns can be accessed by name via \code{df[["name"]]} and \code{df$`name` } and \code{with(df, `name`)}. } \item \code{universal} names are \code{unique} and syntactic (see Details for more). \itemize{ \item Names work everywhere, without quoting: \code{df$name} and \code{with(df, name)} and \code{lm(name1 ~ name2, data = df)} and \code{dplyr::select(df, name)} all work. } } \code{universal} implies \code{unique}, \code{unique} implies \code{minimal}. These levels are nested. } \section{\code{minimal} names}{ \code{minimal} names exist. The \code{names} attribute is not \code{NULL}. The name of an unnamed element is \code{""} and never \code{NA}. Examples:\preformatted{Original names of a vector with length 3: NULL minimal names: "" "" "" Original names: "x" NA minimal names: "x" "" } } \section{\code{unique} names}{ \code{unique} names are \code{minimal}, have no duplicates, and can be used (possibly with backticks) in contexts where a variable is expected. Empty names, \code{...}, and \code{..} followed by a sequence of digits are banned. If a data frame has \code{unique} names, you can index it by name, and also access the columns by name. In particular, \code{df[["name"]]} and \code{df$`name`} and also \code{with(df, `name`)} always work. There are many ways to make names \code{unique}. We append a suffix of the form \code{...j} to any name that is \code{""} or a duplicate, where \code{j} is the position. We also change \code{..#} and \code{...} to \code{...#}. Example:\preformatted{Original names: "" "x" "" "y" "x" "..2" "..." unique names: "...1" "x...2" "...3" "y" "x...5" "...6" "...7" } Pre-existing suffixes of the form \code{...j} are always stripped, prior to making names \code{unique}, i.e. reconstructing the suffixes. If this interacts poorly with your names, you should take control of name repair. } \section{\code{universal} names}{ \code{universal} names are \code{unique} and syntactic, meaning they: \itemize{ \item Are never empty (inherited from \code{unique}). \item Have no duplicates (inherited from \code{unique}). \item Are not \code{...}. Do not have the form \code{..i}, where \code{i} is a number (inherited from \code{unique}). \item Consist of letters, numbers, and the dot \code{.} or underscore \verb{_} characters. \item Start with a letter or start with the dot \code{.} not followed by a number. \item Are not a \link{reserved} word, e.g., \code{if} or \code{function} or \code{TRUE}. } If a vector has \code{universal} names, variable names can be used "as is" in code. They work well with nonstandard evaluation, e.g., \code{df$name} works. vctrs has a different method of making names syntactic than \code{\link[base:make.names]{base::make.names()}}. In general, vctrs prepends one or more dots \code{.} until the name is syntactic. Examples:\preformatted{ Original names: "" "x" NA "x" universal names: "...1" "x...2" "...3" "x...4" Original names: "(y)" "_z" ".2fa" "FALSE" universal names: ".y." "._z" "..2fa" ".FALSE" } } \examples{ # By default, `vec_as_names()` returns minimal names: vec_as_names(c(NA, NA, "foo")) # You can make them unique: vec_as_names(c(NA, NA, "foo"), repair = "unique") # Universal repairing fixes any non-syntactic name: vec_as_names(c("_foo", "+"), repair = "universal") } \seealso{ \code{\link[rlang:names2]{rlang::names2()}} returns the names of an object, after making them \code{minimal}. The \href{https://principles.tidyverse.org/names-attribute.html}{Names attribute} section in the "tidyverse package development principles". } vctrs/man/vec_count.Rd0000644000176200001440000000324413663716767014473 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_count} \alias{vec_count} \title{Count unique values in a vector} \usage{ vec_count(x, sort = c("count", "key", "location", "none")) } \arguments{ \item{x}{A vector (including a data frame).} \item{sort}{One of "count", "key", "location", or "none". \itemize{ \item "count", the default, puts most frequent values at top \item "key", orders by the output key column (i.e. unique values of \code{x}) \item "location", orders by location where key first seen. This is useful if you want to match the counts up to other unique/duplicated functions. \item "none", leaves unordered. }} } \value{ A data frame with columns \code{key} (same type as \code{x}) and \code{count} (an integer vector). } \description{ Count the number of unique values in a vector. \code{vec_count()} has two important differences to \code{table()}: it returns a data frame, and when given multiple inputs (as a data frame), it only counts combinations that appear in the input. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_order]{vec_order()}} } } \examples{ vec_count(mtcars$vs) vec_count(iris$Species) # If you count a data frame you'll get a data frame # column in the output str(vec_count(mtcars[c("vs", "am")])) # Sorting --------------------------------------- x <- letters[rpois(100, 6)] # default is to sort by frequency vec_count(x) # by can sort by key vec_count(x, sort = "key") # or location of first value vec_count(x, sort = "location") head(x) # or not at all vec_count(x, sort = "none") } vctrs/man/vec_bind.Rd0000644000176200001440000001470513671672047014252 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind.R \name{vec_bind} \alias{vec_bind} \alias{vec_rbind} \alias{vec_cbind} \title{Combine many data frames into one data frame} \usage{ vec_rbind( ..., .ptype = NULL, .names_to = rlang::zap(), .name_repair = c("unique", "universal", "check_unique"), .name_spec = NULL ) vec_cbind( ..., .ptype = NULL, .size = NULL, .name_repair = c("unique", "universal", "check_unique", "minimal") ) } \arguments{ \item{...}{Data frames or vectors. When the inputs are named: \itemize{ \item \code{vec_rbind()} assigns names to row names unless \code{.names_to} is supplied. In that case the names are assigned in the column defined by \code{.names_to}. \item \code{vec_cbind()} creates packed data frame columns with named inputs. } \code{NULL} inputs are silently ignored. Empty (e.g. zero row) inputs will not appear in the output, but will affect the derived \code{.ptype}.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.names_to}{This controls what to do with input names supplied in \code{...}. \itemize{ \item By default, input names are \link[rlang:zap]{zapped}. \item If a string, specifies a column where the input names will be copied. These names are often useful to identify rows with their original input. If a column name is supplied and \code{...} is not named, an integer column is used instead. \item If \code{NULL}, the input names are used as row names. }} \item{.name_repair}{One of \code{"unique"}, \code{"universal"}, or \code{"check_unique"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options. With \code{vec_rbind()}, the repair function is applied to all inputs separately. This is because \code{vec_rbind()} needs to align their columns before binding the rows, and thus needs all inputs to have unique names. On the other hand, \code{vec_cbind()} applies the repair function after all inputs have been concatenated together in a final data frame. Hence \code{vec_cbind()} allows the more permissive minimal names repair.} \item{.name_spec}{A name specification (as documented in \code{\link[=vec_c]{vec_c()}}) for combining the outer inputs names in \code{...} and the inner row names of the inputs. This only has an effect when \code{.names_to} is set to \code{NULL}, which causes the input names to be assigned as row names.} \item{.size}{If, \code{NULL}, the default, will determine the number of rows in \code{vec_cbind()} output by using the standard recycling rules. Alternatively, specify the desired number of rows, and any inputs of length 1 will be recycled appropriately.} } \value{ A data frame, or subclass of data frame. If \code{...} is a mix of different data frame subclasses, \code{vec_ptype2()} will be used to determine the output type. For \code{vec_rbind()}, this will determine the type of the container and the type of each column; for \code{vec_cbind()} it only determines the type of the output container. If there are no non-\code{NULL} inputs, the result will be \code{data.frame()}. } \description{ This pair of functions binds together data frames (and vectors), either row-wise or column-wise. Row-binding creates a data frame with common type across all arguments. Column-binding creates a data frame with common length across all arguments. } \section{Invariants}{ All inputs are first converted to a data frame. The conversion for 1d vectors depends on the direction of binding: \itemize{ \item For \code{vec_rbind()}, each element of the vector becomes a column in a single row. \item For \code{vec_cbind()}, each element of the vector becomes a row in a single column. } Once the inputs have all become data frames, the following invariants are observed for row-binding: \itemize{ \item \code{vec_size(vec_rbind(x, y)) == vec_size(x) + vec_size(y)} \item \code{vec_ptype(vec_rbind(x, y)) = vec_ptype_common(x, y)} } Note that if an input is an empty vector, it is first converted to a 1-row data frame with 0 columns. Despite being empty, its effective size for the total number of rows is 1. For column-binding, the following invariants apply: \itemize{ \item \code{vec_size(vec_cbind(x, y)) == vec_size_common(x, y)} \item \code{vec_ptype(vec_cbind(x, y)) == vec_cbind(vec_ptype(x), vec_ptype(x))} } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_init]{vec_init()}} \item \code{\link[=vec_assign]{vec_assign()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies of \code{vec_rbind()}}{ \itemize{ \item \code{\link[base:c]{base::c()}} } If columns to combine inherit from a common class, \code{vec_rbind()} falls back to \code{base::c()} if there exists a \code{c()} method implemented for this class hierarchy. } } \examples{ # row binding ----------------------------------------- # common columns are coerced to common class vec_rbind( data.frame(x = 1), data.frame(x = FALSE) ) # unique columns are filled with NAs vec_rbind( data.frame(x = 1), data.frame(y = "x") ) # null inputs are ignored vec_rbind( data.frame(x = 1), NULL, data.frame(x = 2) ) # bare vectors are treated as rows vec_rbind( c(x = 1, y = 2), c(x = 3) ) # default names will be supplied if arguments are not named vec_rbind( 1:2, 1:3, 1:4 ) # column binding -------------------------------------- # each input is recycled to have common length vec_cbind( data.frame(x = 1), data.frame(y = 1:3) ) # bare vectors are treated as columns vec_cbind( data.frame(x = 1), y = letters[1:3] ) # if you supply a named data frame, it is packed in a single column data <- vec_cbind( x = data.frame(a = 1, b = 2), y = 1 ) data # Packed data frames are nested in a single column. This makes it # possible to access it through a single name: data$x # since the base print method is suboptimal with packed data # frames, it is recommended to use tibble to work with these: if (rlang::is_installed("tibble")) { vec_cbind(x = tibble::tibble(a = 1, b = 2), y = 1) } # duplicate names are flagged vec_cbind(x = 1, x = 2) } \seealso{ \code{\link[=vec_c]{vec_c()}} for combining 1d vectors. } vctrs/man/vec_size.Rd0000644000176200001440000000720013663716767014311 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{vec_size} \alias{vec_size} \alias{vec_size_common} \alias{list_sizes} \alias{vec_is_empty} \title{Number of observations} \usage{ vec_size(x) vec_size_common(..., .size = NULL, .absent = 0L) list_sizes(x) vec_is_empty(x) } \arguments{ \item{x, ...}{Vector inputs or \code{NULL}.} \item{.size}{If \code{NULL}, the default, the output size is determined by recycling the lengths of all elements of \code{...}. Alternatively, you can supply \code{.size} to force a known size; in this case, \code{x} and \code{...} are ignored.} \item{.absent}{The size used when no input is provided, or when all input is \code{NULL}. If left as \code{NULL} when no input is supplied, an error is thrown.} } \value{ An integer (or double for long vectors). \code{vec_size_common()} returns \code{.absent} if all inputs are \code{NULL} or absent, \code{0L} by default. } \description{ \code{vec_size(x)} returns the size of a vector. \code{vec_is_empty()} returns \code{TRUE} if the size is zero, \code{FALSE} otherwise. The size is distinct from the \code{\link[=length]{length()}} of a vector because it generalises to the "number of observations" for 2d structures, i.e. it's the number of rows in matrix or a data frame. This definition has the important property that every column of a data frame (even data frame and matrix columns) have the same size. \code{vec_size_common(...)} returns the common size of multiple vectors. \code{list_sizes()} returns an integer vector containing the size of each element of a list. It is nearly equivalent to, but faster than, \code{map_int(x, vec_size)}, with the exception that \code{list_sizes()} will error on non-list inputs, as defined by \code{\link[=vec_is_list]{vec_is_list()}}. \code{list_sizes()} is to \code{vec_size()} as \code{\link[=lengths]{lengths()}} is to \code{\link[=length]{length()}}. } \details{ There is no vctrs helper that retrieves the number of columns: as this is a property of the \link[=vec_ptype_show]{type}. \code{vec_size()} is equivalent to \code{NROW()} but has a name that is easier to pronounce, and throws an error when passed non-vector inputs. } \section{Invariants}{ \itemize{ \item \code{vec_size(dataframe)} == \code{vec_size(dataframe[[i]])} \item \code{vec_size(matrix)} == \code{vec_size(matrix[, i, drop = FALSE])} \item \code{vec_size(vec_c(x, y))} == \code{vec_size(x)} + \code{vec_size(y)} } } \section{The size of NULL}{ The size of \code{NULL} is hard-coded to \code{0L} in \code{vec_size()}. \code{vec_size_common()} returns \code{.absent} when all inputs are \code{NULL} (if only some inputs are \code{NULL}, they are simply ignored). A default size of 0 makes sense because sizes are most often queried in order to compute a total size while assembling a collection of vectors. Since we treat \code{NULL} as an absent input by principle, we return the identity of sizes under addition to reflect that an absent input doesn't take up any size. Note that other defaults might make sense under different circumstances. For instance, a default size of 1 makes sense for finding the common size because 1 is the identity of the recycling rules. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} } } \examples{ vec_size(1:100) vec_size(mtcars) vec_size(array(dim = c(3, 5, 10))) vec_size_common(1:10, 1:10) vec_size_common(1:10, 1) vec_size_common(integer(), 1) list_sizes(list("a", 1:5, letters)) } \seealso{ \code{\link[=vec_slice]{vec_slice()}} for a variation of \code{[} compatible with \code{vec_size()}, and \code{\link[=vec_recycle]{vec_recycle()}} to recycle vectors to common length. } vctrs/man/vec_assert.Rd0000644000176200001440000000606113650511520014614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{vec_assert} \alias{vec_assert} \alias{vec_is} \title{Assert an argument has known prototype and/or size} \usage{ vec_assert(x, ptype = NULL, size = NULL, arg = as_label(substitute(x))) vec_is(x, ptype = NULL, size = NULL) } \arguments{ \item{x}{A vector argument to check.} \item{ptype}{Prototype to compare against. If the prototype has a class, its \code{\link[=vec_ptype]{vec_ptype()}} is compared to that of \code{x} with \code{identical()}. Otherwise, its \code{\link[=typeof]{typeof()}} is compared to that of \code{x} with \code{==}.} \item{size}{Size to compare against} \item{arg}{Name of argument being checked. This is used in error messages. The label of the expression passed as \code{x} is taken as default.} } \value{ \code{vec_is()} returns \code{TRUE} or \code{FALSE}. \code{vec_assert()} either throws a typed error (see section on error types) or returns \code{x}, invisibly. } \description{ \itemize{ \item \code{vec_is()} is a predicate that checks if its input is a vector that conforms to a prototype and/or a size. \item \code{vec_assert()} throws an error when the input is not a vector or doesn't conform. } } \section{Scalars and vectors}{ Informally, a vector is a collection that makes sense to use as column in a data frame. An object is a vector if one of the following conditions hold: \itemize{ \item A \code{\link[=vec_proxy]{vec_proxy()}} method is implemented for the class of the object. \item The \link[=typeof]{base type} of the object is atomic: \code{"logical"}, \code{"integer"}, \code{"double"}, \code{"complex"}, \code{"character"}, \code{"raw"} \item The object is a \link{data.frame}. \item The base type is \code{"list"}, and one of: \itemize{ \item The object is a bare \code{"list"} without a \code{"class"} attribute. \item The object explicitly inherits from \code{"list"}. That is, the \code{"class"} attribute contains \code{"list"} and \code{inherits(x, "list")} is \code{TRUE}. } } Otherwise an object is treated as scalar and cannot be used as a vector. In particular: \itemize{ \item \code{NULL} is not a vector. \item S3 lists like \code{lm} objects are treated as scalars by default. \item Objects of type \link{expression} are not treated as vectors. \item Support for S4 vectors is currently limited to objects that inherit from an atomic type. \item Subclasses of \link{data.frame} that \emph{append} their class to the \code{"class"} attribute are not treated as vectors. If you inherit from an S3 class, always prepend your class to the \code{"class"} attribute for correct dispatch. } } \section{Error types}{ \code{vec_is()} never throws. \code{vec_assert()} throws the following errors: \itemize{ \item If the input is not a vector, an error of class \code{"vctrs_error_scalar_type"} is raised. \item If the prototype doesn't match, an error of class \code{"vctrs_error_assert_ptype"} is raised. \item If the size doesn't match, an error of class \code{"vctrs_error_assert_size"} is raised. } Both errors inherit from \code{"vctrs_error_assert"}. } vctrs/man/vec_as_index.Rd0000644000176200001440000000226414027045462015114 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_as_index} \alias{vec_as_index} \title{Convert to an index vector} \usage{ vec_as_index(i, n, names = NULL) } \arguments{ \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{n}{A single integer representing the total size of the object that \code{i} is meant to index into.} \item{names}{If \code{i} is a character vector, \code{names} should be a character vector that \code{i} will be matched against to construct the index. Otherwise, not used. The default value of \code{NULL} will result in an error if \code{i} is a character vector.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_as_index()} has been renamed to \code{\link[=vec_as_location]{vec_as_location()}} and is deprecated as of vctrs 0.2.2. } \keyword{internal} vctrs/man/vec_default_ptype2.Rd0000644000176200001440000000371613653764444016270 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cast.R, R/type2.R \name{vec_default_cast} \alias{vec_default_cast} \alias{vec_default_ptype2} \title{Default cast and ptype2 methods} \usage{ vec_default_cast(x, to, ..., x_arg = "", to_arg = "") vec_default_ptype2(x, y, ..., x_arg = "", y_arg = "") } \arguments{ \item{x}{Vectors to cast.} \item{to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} \item{...}{For \code{vec_cast_common()}, vectors to cast. For \code{vec_cast()}, \code{vec_cast_default()}, and \code{vec_restore()}, these dots are only for future extensions and should be empty.} \item{x_arg}{Argument names for \code{x} and \code{to}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument names for \code{x} and \code{to}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \description{ These functions are automatically called when no \code{\link[=vec_ptype2]{vec_ptype2()}} or \code{\link[=vec_cast]{vec_cast()}} method is implemented for a pair of types. \itemize{ \item They apply special handling if one of the inputs is of type \code{AsIs} or \code{sfc}. \item They attempt a number of fallbacks in cases where it would be too inconvenient to be strict: \itemize{ \item If the class and attributes are the same they are considered compatible. \code{vec_default_cast()} returns \code{x} in this case. \item In case of incompatible data frame classes, they fall back to \code{data.frame}. If an incompatible subclass of tibble is involved, they fall back to \code{tbl_df}. } \item Otherwise, an error is thrown with \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} or \code{\link[=stop_incompatible_cast]{stop_incompatible_cast()}}. } } \keyword{internal} vctrs/man/vec_match.Rd0000644000176200001440000000466213663716767014444 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_match} \alias{vec_match} \alias{vec_in} \title{Find matching observations across vectors} \usage{ vec_match( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) vec_in( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) } \arguments{ \item{needles, haystack}{Vector of \code{needles} to search for in vector haystack. \code{haystack} should usually be unique; if not \code{vec_match()} will only return the location of the first match. \code{needles} and \code{haystack} are coerced to the same type prior to comparison.} \item{...}{These dots are for future extensions and must be empty.} \item{na_equal}{If \code{TRUE}, missing values in \code{needles} can be matched to missing values in \code{haystack}. If \code{FALSE}, they propagate, missing values in \code{needles} are represented as \code{NA} in the return value.} \item{needles_arg, haystack_arg}{Argument tags for \code{needles} and \code{haystack} used in error messages.} } \value{ A vector the same length as \code{needles}. \code{vec_in()} returns a logical vector; \code{vec_match()} returns an integer vector. } \description{ \code{vec_in()} returns a logical vector based on whether \code{needle} is found in haystack. \code{vec_match()} returns an integer vector giving location of \code{needle} in \code{haystack}, or \code{NA} if it's not found. } \details{ \code{vec_in()} is equivalent to \link{\%in\%}; \code{vec_match()} is equivalent to \code{match()}. } \section{Missing values}{ In most cases places in R, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. The exception is in matching functions like \code{\link[=match]{match()}} and \code{\link[=merge]{merge()}}, where an \code{NA} will match another \code{NA}. By \code{vec_match()} and \code{vec_in()} will match \code{NA}s; but you can control this behaviour with the \code{na_equal} argument. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ hadley <- strsplit("hadley", "")[[1]] vec_match(hadley, letters) vowels <- c("a", "e", "i", "o", "u") vec_match(hadley, vowels) vec_in(hadley, vowels) # Only the first index of duplicates is returned vec_match(c("a", "b"), c("a", "b", "a", "b")) } vctrs/man/vec_slice.Rd0000644000176200001440000000705613663716767014447 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{vec_slice} \alias{vec_slice} \alias{vec_slice<-} \alias{vec_assign} \title{Get or set observations in a vector} \usage{ vec_slice(x, i) vec_slice(x, i) <- value vec_assign(x, i, value, ..., x_arg = "", value_arg = "") } \arguments{ \item{x}{A vector} \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{value}{Replacement values. \code{value} is cast to the type of \code{x}, but only if they have a common type. See below for examples of this rule.} \item{...}{These dots are for future extensions and must be empty.} \item{x_arg, value_arg}{Argument names for \code{x} and \code{value}. These are used in error messages to inform the user about the locations of incompatible types and sizes (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} and \code{\link[=stop_incompatible_size]{stop_incompatible_size()}}).} } \value{ A vector of the same type as \code{x}. } \description{ This provides a common interface to extracting and modifying observations for all vector types, regardless of dimensionality. It is an analog to \code{[} that matches \code{\link[=vec_size]{vec_size()}} instead of \code{length()}. } \section{Genericity}{ Support for S3 objects depends on whether the object implements a \code{\link[=vec_proxy]{vec_proxy()}} method. \itemize{ \item When a \code{vec_proxy()} method exists, the proxy is sliced and \code{vec_restore()} is called on the result. \item Otherwise \code{vec_slice()} falls back to the base generic \code{[}. } Note that S3 lists are treated as scalars by default, and will cause an error if they don't implement a \code{\link[=vec_proxy]{vec_proxy()}} method. } \section{Differences with base R subsetting}{ \itemize{ \item \code{vec_slice()} only slices along one dimension. For two-dimensional types, the first dimension is subsetted. \item \code{vec_slice()} preserves attributes by default. \item \verb{vec_slice<-()} is type-stable and always returns the same type as the LHS. } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies}{ \itemize{ \item \code{base::`[`} } If a non-data-frame vector class doesn't have a \code{\link[=vec_proxy]{vec_proxy()}} method, the vector is sliced with \code{[} instead. } } \examples{ x <- sample(10) x vec_slice(x, 1:3) # You can assign with the infix variant: vec_slice(x, 2) <- 100 x # Or with the regular variant that doesn't modify the original input: y <- vec_assign(x, 3, 500) y x # Slicing objects of higher dimension: vec_slice(mtcars, 1:3) # Type stability -------------------------------------------------- # The assign variant is type stable. It always returns the same # type as the input. x <- 1:5 vec_slice(x, 2) <- 20.0 # `x` is still an integer vector because the RHS was cast to the # type of the LHS: vec_ptype(x) # Compare to `[<-`: x[2] <- 20.0 vec_ptype(x) # Note that the types must be coercible for the cast to happen. # For instance, you can cast a double vector of whole numbers to an # integer vector: vec_cast(1, integer()) # But not fractional doubles: try(vec_cast(1.5, integer())) # For this reason you can't assign fractional values in an integer # vector: x <- 1:3 try(vec_slice(x, 2) <- 1.5) } \keyword{internal} vctrs/man/s3_register.Rd0000644000176200001440000000477414042542500014716 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/register-s3.R \name{s3_register} \alias{s3_register} \title{Register a method for a suggested dependency} \arguments{ \item{generic}{Name of the generic in the form \code{pkg::generic}.} \item{class}{Name of the class} \item{method}{Optionally, the implementation of the method. By default, this will be found by looking for a function called \code{generic.class} in the package environment. Note that providing \code{method} can be dangerous if you use devtools. When the namespace of the method is reloaded by \code{devtools::load_all()}, the function will keep inheriting from the old namespace. This might cause crashes because of dangling \code{.Call()} pointers.} } \description{ Generally, the recommend way to register an S3 method is to use the \code{S3Method()} namespace directive (often generated automatically by the \verb{@export} roxygen2 tag). However, this technique requires that the generic be in an imported package, and sometimes you want to suggest a package, and only provide a method when that package is loaded. \code{s3_register()} can be called from your package's \code{.onLoad()} to dynamically register a method only if the generic's package is loaded. } \details{ For R 3.5.0 and later, \code{s3_register()} is also useful when demonstrating class creation in a vignette, since method lookup no longer always involves the lexical scope. For R 3.6.0 and later, you can achieve a similar effect by using "delayed method registration", i.e. placing the following in your \code{NAMESPACE} file:\preformatted{if (getRversion() >= "3.6.0") \{ S3method(package::generic, class) \} } } \section{Usage in other packages}{ To avoid taking a dependency on vctrs, you copy the source of \href{https://github.com/r-lib/vctrs/blob/master/R/register-s3.R}{\code{s3_register()}} into your own package. It is licensed under the permissive \href{https://choosealicense.com/licenses/unlicense/}{unlicense} to make it crystal clear that we're happy for you to do this. There's no need to include the license or even credit us when using this function. } \examples{ # A typical use case is to dynamically register tibble/pillar methods # for your class. That way you avoid creating a hard dependency on packages # that are not essential, while still providing finer control over # printing when they are used. .onLoad <- function(...) { s3_register("pillar::pillar_shaft", "vctrs_vctr") s3_register("tibble::type_sum", "vctrs_vctr") } } \keyword{internal} vctrs/man/partial_factor.Rd0000644000176200001440000000220414027045462015451 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/partial-factor.R \name{partial_factor} \alias{partial_factor} \title{Partially specify a factor} \usage{ partial_factor(levels = character()) } \arguments{ \item{levels}{Character vector of labels.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This special class can be passed as a \code{ptype} in order to specify that the result should be a factor that contains at least the specified levels. } \examples{ # Assert that `x` is a factor vec_assert(factor("x"), partial_factor()) # Testing with `factor()` is too strict, # because it tries to match the levels exactly # rather than learning them from the data. try(vec_assert(factor("x"), factor())) # You can also enforce a minimum set of levels try(vec_assert(factor("x"), partial_factor("y"))) vec_assert(factor(c("x", "y")), partial_factor("y")) pf <- partial_factor(levels = c("x", "y")) pf vec_ptype_common(factor("v"), factor("w"), .ptype = pf) } \keyword{internal} vctrs/man/new_vctr.Rd0000644000176200001440000000704214027045462014313 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-vctr.R \name{new_vctr} \alias{new_vctr} \alias{vctr} \title{vctr (vector) S3 class} \usage{ new_vctr(.data, ..., class = character(), inherit_base_type = NULL) } \arguments{ \item{.data}{Foundation of class. Must be a vector} \item{...}{Name-value pairs defining attributes} \item{class}{Name of subclass.} \item{inherit_base_type}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A single logical, or \code{NULL}. Does this class extend the base type of \code{.data}? i.e. does the resulting object extend the behaviour of the underlying type? Defaults to \code{FALSE} for all types except lists, which are required to inherit from the base type.} } \description{ This abstract class provides a set of useful default methods that makes it considerably easier to get started with a new S3 vector class. See \code{vignette("s3-vector")} to learn how to use it to create your own S3 vector classes. } \details{ List vctrs are special cases. When created through \code{new_vctr()}, the resulting list vctr should always be recognized as a list by \code{vec_is_list()}. Because of this, if \code{inherit_base_type} is \code{FALSE} an error is thrown. } \section{Base methods}{ The vctr class provides methods for many base generics using a smaller set of generics defined by this package. Generally, you should think carefully before overriding any of the methods that vctrs implements for you as they've been carefully planned to be internally consistent. \itemize{ \item \code{[[} and \code{[} use \code{NextMethod()} dispatch to the underlying base function, then restore attributes with \code{vec_restore()}. \code{rep()} and \verb{length<-} work similarly. \item \verb{[[<-} and \verb{[<-} cast \code{value} to same type as \code{x}, then call \code{NextMethod()}. \item \code{as.logical()}, \code{as.integer()}, \code{as.numeric()}, \code{as.character()}, \code{as.Date()} and \code{as.POSIXct()} methods call \code{vec_cast()}. The \code{as.list()} method calls \code{[[} repeatedly, and the \code{as.data.frame()} method uses a standard technique to wrap a vector in a data frame. \item \code{as.factor()}, \code{as.ordered()} and \code{as.difftime()} are not generic functions in base R, but have been reimplemented as generics in the \code{generics} package. \code{vctrs} extends these and calls \code{vec_cast()}. To inherit this behaviour in a package, import and re-export the generic of interest from \code{generics}. \item \code{==}, \code{!=}, \code{unique()}, \code{anyDuplicated()}, and \code{is.na()} use \code{\link[=vec_proxy]{vec_proxy()}}. \item \code{<}, \code{<=}, \code{>=}, \code{>}, \code{min()}, \code{max()}, \code{range()}, \code{median()}, \code{quantile()}, and \code{xtfrm()} methods use \code{\link[=vec_proxy_compare]{vec_proxy_compare()}}. \item \code{+}, \code{-}, \code{/}, \code{*}, \code{^}, \code{\%\%}, \code{\%/\%}, \code{!}, \code{&}, and \code{|} operators use \code{\link[=vec_arith]{vec_arith()}}. \item Mathematical operations including the Summary group generics (\code{prod()}, \code{sum()}, \code{any()}, \code{all()}), the Math group generics (\code{abs()}, \code{sign()}, etc), \code{mean()}, \code{is.nan()}, \code{is.finite()}, and \code{is.infinite()} use \code{\link[=vec_math]{vec_math()}}. \item \code{dims()}, \verb{dims<-}, \code{dimnames()}, \verb{dimnames<-}, \code{levels()}, and \verb{levels<-} methods throw errors. } } \keyword{internal} vctrs/man/vec_names.Rd0000644000176200001440000000561213712211241014413 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_names} \alias{vec_names} \alias{vec_names2} \alias{vec_set_names} \title{Get or set the names of a vector} \usage{ vec_names2( x, ..., repair = c("minimal", "unique", "universal", "check_unique"), quiet = FALSE ) vec_names(x) vec_set_names(x, names) } \arguments{ \item{x}{A vector with names} \item{...}{These dots are for future extensions and must be empty.} \item{repair}{Either a string or a function. If a string, it must be one of \code{"check_unique"}, \code{"minimal"}, \code{"unique"}, or \code{"universal"}. If a function, it is invoked with a vector of minimal names and must return minimal names, otherwise an error is thrown. \itemize{ \item Minimal names are never \code{NULL} or \code{NA}. When an element doesn't have a name, its minimal name is an empty string. \item Unique names are unique. A suffix is appended to duplicate names to make them unique. \item Universal names are unique and syntactic, meaning that you can safely use the names as variables without causing a syntax error. } The \code{"check_unique"} option doesn't perform any name repair. Instead, an error is raised if the names don't suit the \code{"unique"} criteria.} \item{quiet}{By default, the user is informed of any renaming caused by repairing the names. This only concerns unique and universal repairing. Set \code{quiet} to \code{TRUE} to silence the messages.} \item{names}{A character vector, or \code{NULL}.} } \value{ \code{vec_names2()} returns the names of \code{x}, repaired. \code{vec_names()} returns the names of \code{x} or \code{NULL} if unnamed. \code{vec_set_names()} returns \code{x} with names updated. } \description{ These functions work like \code{\link[rlang:names2]{rlang::names2()}}, \code{\link[=names]{names()}} and \code{\link[=names<-]{names<-()}}, except that they return or modify the the rowwise names of the vector. These are: \itemize{ \item The usual \code{names()} for atomic vectors and lists \item The row names for data frames and matrices \item The names of the first dimension for arrays Rowwise names are size consistent: the length of the names always equals \code{\link[=vec_size]{vec_size()}}. } \code{vec_names2()} returns the repaired names from a vector, even if it is unnamed. See \code{\link[=vec_as_names]{vec_as_names()}} for details on name repair. \code{vec_names()} is a bare-bones version that returns \code{NULL} if the vector is unnamed. \code{vec_set_names()} sets the names or removes them. } \examples{ vec_names2(1:3) vec_names2(1:3, repair = "unique") vec_names2(c(a = 1, b = 2)) # `vec_names()` consistently returns the rowwise names of data frames and arrays: vec_names(data.frame(a = 1, b = 2)) names(data.frame(a = 1, b = 2)) vec_names(mtcars) names(mtcars) vec_names(Titanic) names(Titanic) vec_set_names(1:3, letters[1:3]) vec_set_names(data.frame(a = 1:3), letters[1:3]) } vctrs/man/vec_as_location.Rd0000644000176200001440000000747113650511520015614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/subscript-loc.R \name{vec_as_location} \alias{vec_as_location} \alias{num_as_location} \alias{vec_as_location2} \alias{num_as_location2} \title{Create a vector of locations} \usage{ vec_as_location( i, n, names = NULL, ..., missing = c("propagate", "error"), arg = NULL ) num_as_location( i, n, ..., missing = c("propagate", "error"), negative = c("invert", "error", "ignore"), oob = c("error", "extend"), zero = c("remove", "error", "ignore"), arg = NULL ) vec_as_location2( i, n, names = NULL, ..., missing = c("error", "propagate"), arg = NULL ) num_as_location2( i, n, ..., negative = c("error", "ignore"), missing = c("error", "propagate"), arg = NULL ) } \arguments{ \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{n}{A single integer representing the total size of the object that \code{i} is meant to index into.} \item{names}{If \code{i} is a character vector, \code{names} should be a character vector that \code{i} will be matched against to construct the index. Otherwise, not used. The default value of \code{NULL} will result in an error if \code{i} is a character vector.} \item{...}{These dots are for future extensions and must be empty.} \item{missing}{Whether to throw an \code{"error"} when \code{i} is a missing value, or \code{"propagate"} it (return it as is). By default, vector subscripts can contain missing values and scalar subscripts can't.} \item{arg}{The argument name to be displayed in error messages when \code{vec_as_location()} and \code{vec_as_location2()} are used to check the type of a function input.} \item{negative}{Whether to throw an \code{"error"} when \code{i} is a negative location value, or \code{"ignore"} it.} \item{oob}{If \code{"error"}, throws an informative \code{"error"} if some elements are out-of-bounds. If \code{"extend"}, out-of-bounds locations are allowed if they are consecutive after the end. This can be used to implement extendable vectors like \code{letters[1:30]}.} \item{zero}{Whether to \code{"remove"} zero values, throw an informative \code{"error"}, or \code{"ignore"} them.} } \value{ \code{vec_as_location()} returns an integer vector that can be used as an index in a subsetting operation. \code{vec_as_location2()} returns an integer of size 1 that can be used a scalar index for extracting an element. } \description{ These helpers provide a means of standardizing common indexing methods such as integer, character or logical indexing. \itemize{ \item \code{vec_as_location()} accepts integer, character, or logical vectors of any size. The output is always an integer vector that is suitable for subsetting with \code{[} or \code{\link[=vec_slice]{vec_slice()}}. It might be a different size than the input because negative selections are transformed to positive ones and logical vectors are transformed to a vector of indices for the \code{TRUE} locations. \item \code{vec_as_location2()} accepts a single number or string. It returns a single location as a integer vector of size 1. This is suitable for extracting with \code{[[}. } } \examples{ x <- array(1:6, c(2, 3)) dimnames(x) <- list(c("r1", "r2"), c("c1", "c2", "c3")) # The most common use case validates row indices vec_as_location(1, vec_size(x)) # Negative indices can be used to index from the back vec_as_location(-1, vec_size(x)) # Character vectors can be used if `names` are provided vec_as_location("r2", vec_size(x), rownames(x)) # You can also construct an index for dimensions other than the first vec_as_location(c("c2", "c1"), ncol(x), colnames(x)) } \keyword{internal} vctrs/man/table.Rd0000644000176200001440000000044613723213047013552 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-table.R \name{table} \alias{table} \title{Table S3 class} \description{ These functions help the base table class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/vec_proxy.Rd0000644000176200001440000001436314027045462014506 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/proxy.R \name{vec_proxy} \alias{vec_proxy} \alias{vec_restore} \title{Proxy and restore} \usage{ vec_proxy(x, ...) vec_restore(x, to, ..., n = NULL) } \arguments{ \item{x}{A vector.} \item{...}{These dots are for future extensions and must be empty.} \item{to}{The original vector to restore to.} \item{n}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} The total size to restore to. This is currently passed by \code{vec_slice()} to solve edge cases arising in data frame restoration. In most cases you don't need this information and can safely ignore that argument. This parameter should be considered internal and experimental, it might change in the future.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_proxy()} returns the data structure containing the values of a vector. This data structure is usually the vector itself. In this case the proxy is the \link[base:identity]{identity function}, which is the default \code{vec_proxy()} method. Only experts should implement special \code{vec_proxy()} methods, for these cases: \itemize{ \item A vector has vectorised attributes, i.e. metadata for each element of the vector. These \emph{record types} are implemented in vctrs by returning a data frame in the proxy method. If you're starting your class from scratch, consider deriving from the \code{\link[=new_rcrd]{rcrd}} class. It implements the appropriate data frame proxy and is generally the preferred way to create a record class. \item When you're implementing a vector on top of a non-vector type, like an environment or an S4 object. This is currently only partially supported. \item S3 lists are considered scalars by default. This is the safe choice for list objects such as returned by \code{stats::lm()}. To declare that your S3 list class is a vector, you normally add \code{"list"} to the right of your class vector. Explicit inheritance from list is generally the preferred way to declare an S3 list in R, for instance it makes it possible to dispatch on \code{generic.list} S3 methods. If you can't modify your class vector, you can implement an identity proxy (i.e. a proxy method that just returns its input) to let vctrs know this is a vector list and not a scalar. } \code{vec_restore()} is the inverse operation of \code{vec_proxy()}. It should only be called on vector proxies. \itemize{ \item It undoes the transformations of \code{vec_proxy()}. \item It restores attributes and classes. These may be lost when the memory values are manipulated. For example slicing a subset of a vector's proxy causes a new proxy to be allocated. } By default vctrs restores all attributes and classes automatically. You only need to implement a \code{vec_restore()} method if your class has attributes that depend on the data. } \section{Proxying}{ You should only implement \code{vec_proxy()} when your type is designed around a non-vector class. I.e. anything that is not either: \itemize{ \item An atomic vector \item A bare list \item A data frame } In this case, implement \code{vec_proxy()} to return such a vector class. The vctrs operations such as \code{\link[=vec_slice]{vec_slice()}} are applied on the proxy and \code{vec_restore()} is called to restore the original representation of your type. The most common case where you need to implement \code{vec_proxy()} is for S3 lists. In vctrs, S3 lists are treated as scalars by default. This way we don't treat objects like model fits as vectors. To prevent vctrs from treating your S3 list as a scalar, unclass it in the \code{vec_proxy()} method. For instance, here is the definition for \code{list_of}:\preformatted{vec_proxy.vctrs_list_of <- function(x) \{ unclass(x) \} } Another case where you need to implement a proxy is \link[=new_rcrd]{record types}. Record types should return a data frame, as in the \code{POSIXlt} method:\preformatted{vec_proxy.POSIXlt <- function(x) \{ new_data_frame(unclass(x)) \} } Note that you don't need to implement \code{vec_proxy()} when your class inherits from \code{vctrs_vctr} or \code{vctrs_rcrd}. } \section{Restoring}{ A restore is a specialised type of cast, primarily used in conjunction with \code{NextMethod()} or a C-level function that works on the underlying data structure. A \code{vec_restore()} method can make the following assumptions about \code{x}: \itemize{ \item It has the correct type. \item It has the correct names. \item It has the correct \code{dim} and \code{dimnames} attributes. \item It is unclassed. This way you can call vctrs generics with \code{x} without triggering an infinite loop of restoration. } The length may be different (for example after \code{\link[=vec_slice]{vec_slice()}} has been called), and all other attributes may have been lost. The method should restore all attributes so that after restoration, \code{vec_restore(vec_data(x), x)} yields \code{x}. To understand the difference between \code{vec_cast()} and \code{vec_restore()} think about factors: it doesn't make sense to cast an integer to a factor, but if \code{NextMethod()} or another low-level function has stripped attributes, you still need to be able to restore them. The default method copies across all attributes so you only need to provide your own method if your attributes require special care (i.e. they are dependent on the data in some way). When implementing your own method, bear in mind that many R users add attributes to track additional metadata that is important to them, so you should preserve any attributes that don't require special handling for your class. } \section{Dependencies}{ \itemize{ \item \code{x} must be a vector in the vctrs sense (see \code{\link[=vec_is]{vec_is()}}) \item By default the underlying data is returned as is (identity proxy) } All vector classes have a proxy, even those who don't implement any vctrs methods. The exception is S3 lists that don't inherit from \code{"list"} explicitly. These might have to implement an identity proxy for compatibility with vctrs (see discussion above). } \keyword{internal} vctrs/man/vec_recycle.Rd0000644000176200001440000000415313663716767014771 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recycle.R \name{vec_recycle} \alias{vec_recycle} \alias{vec_recycle_common} \title{Vector recycling} \usage{ vec_recycle(x, size, ..., x_arg = "") vec_recycle_common(..., .size = NULL) } \arguments{ \item{x}{A vector to recycle.} \item{size}{Desired output size.} \item{...}{\itemize{ \item For \code{vec_recycle_common()}, vectors to recycle. \itemize{ \item For \code{vec_recycle()}, these dots should be empty. } }} \item{x_arg}{Argument name for \code{x}. These are used in error messages to inform the user about which argument has an incompatible size.} \item{.size}{Desired output size. If omitted, will use the common size from \code{\link[=vec_size_common]{vec_size_common()}}.} } \description{ \code{vec_recycle(x, size)} recycles a single vector to given size. \code{vec_recycle_common(...)} recycles multiple vectors to their common size. All functions obey the vctrs recycling rules, described below, and will throw an error if recycling is not possible. See \code{\link[=vec_size]{vec_size()}} for the precise definition of size. } \section{Recycling rules}{ The common size of two vectors defines the recycling rules, and can be summarise with the following table: \figure{sizes-recycling.png} (Note \code{NULL}s are handled specially; they are treated like empty arguments and hence don't affect the size) This is a stricter set of rules than base R, which will usually return output of length \code{max(nx, ny)}, warning if the length of the longer vector is not an integer multiple of the length of the shorter. We say that two vectors have \strong{compatible size} if they can be recycled to be the same length. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ # Inputs with 1 observation are recycled vec_recycle_common(1:5, 5) vec_recycle_common(integer(), 5) \dontrun{ vec_recycle_common(1:5, 1:2) } # Data frames and matrices are recycled along their rows vec_recycle_common(data.frame(x = 1), 1:5) vec_recycle_common(array(1:2, c(1, 2)), 1:5) vec_recycle_common(array(1:3, c(1, 3, 1)), 1:5) } vctrs/man/vec_identify_runs.Rd0000644000176200001440000000174013753021253016177 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/runs.R \name{vec_identify_runs} \alias{vec_identify_runs} \title{Runs} \usage{ vec_identify_runs(x) } \arguments{ \item{x}{A vector.} } \value{ An integer vector with the same size as \code{x}. A scalar integer attribute, \code{n}, is attached. } \description{ \code{vec_identify_runs()} returns a vector of identifiers for the elements of \code{x} that indicate which run of repeated values they fall in. The number of runs is also returned as an attribute, \code{n}. } \details{ Unlike \code{\link[base:rle]{base::rle()}}, adjacent missing values are considered identical when constructing runs. For example, \code{vec_identify_runs(c(NA, NA))} will return \code{c(1, 1)}, not \code{c(1, 2)}. } \examples{ x <- c("a", "z", "z", "c", "a", "a") vec_identify_runs(x) y <- c(1, 1, 1, 2, 2, 3) # With multiple columns, the runs are constructed rowwise df <- data_frame( x = x, y = y ) vec_identify_runs(df) } vctrs/man/vec_detect_complete.Rd0000644000176200001440000000255714027045462016467 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/complete.R \name{vec_detect_complete} \alias{vec_detect_complete} \title{Complete} \usage{ vec_detect_complete(x) } \arguments{ \item{x}{A vector} } \value{ A logical vector with the same size as \code{x}. } \description{ \code{vec_detect_complete()} detects "complete" observations. An observation is considered complete if it is non-missing. For most vectors, this implies that \code{vec_detect_complete(x) == !vec_equal_na(x)}. For data frames and matrices, a row is only considered complete if all elements of that row are non-missing. To compare, \code{!vec_equal_na(x)} detects rows that are partially complete (they have at least one non-missing value). } \details{ A \link[=new_rcrd]{record} type vector is considered complete if any field is non-missing. } \examples{ x <- c(1, 2, NA, 4, NA) # For most vectors, this is identical to `!vec_equal_na(x)` vec_detect_complete(x) !vec_equal_na(x) df <- data_frame( x = x, y = c("a", "b", NA, "d", "e") ) # This returns `TRUE` where all elements of the row are non-missing. # Compare that with `!vec_equal_na()`, which detects rows that have at # least one non-missing value. df2 <- df df2$all_non_missing <- vec_detect_complete(df) df2$any_non_missing <- !vec_equal_na(df) df2 } \seealso{ \code{\link[stats:complete.cases]{stats::complete.cases()}} } vctrs/man/new_factor.Rd0000644000176200001440000000257313650511520014611 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-factor.R \name{new_factor} \alias{new_factor} \alias{new_ordered} \alias{vec_ptype2.factor} \alias{vec_ptype2.ordered} \alias{vec_cast.factor} \alias{vec_cast.ordered} \title{Factor/ordered factor S3 class} \usage{ new_factor(x = integer(), levels = character(), ..., class = character()) new_ordered(x = integer(), levels = character()) \method{vec_ptype2}{factor}(x, y, ...) \method{vec_ptype2}{ordered}(x, y, ...) \method{vec_cast}{factor}(x, to, ...) \method{vec_cast}{ordered}(x, to, ...) } \arguments{ \item{x}{Integer values which index in to \code{levels}.} \item{levels}{Character vector of labels.} \item{..., class}{Used to for subclasses.} } \description{ A \link{factor} is an integer with attribute \code{levels}, a character vector. There should be one level for each integer between 1 and \code{max(x)}. An \link{ordered} factor has the same properties as a factor, but possesses an extra class that marks levels as having a total ordering. } \details{ These functions help the base factor and ordered factor classes fit in to the vctrs type system by providing constructors, coercion functions, and casting functions. \code{new_factor()} and \code{new_ordered()} are low-level constructors - they only check that types, but not values, are valid, so are for expert use only. } \keyword{internal} vctrs/man/vec_split.Rd0000644000176200001440000000240613663716767014475 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/split.R \name{vec_split} \alias{vec_split} \title{Split a vector into groups} \usage{ vec_split(x, by) } \arguments{ \item{x}{Vector to divide into groups.} \item{by}{Vector whose unique values defines the groups.} } \value{ A data frame with two columns and size equal to \code{vec_size(vec_unique(by))}. The \code{key} column has the same type as \code{by}, and the \code{val} column is a list containing elements of type \code{vec_ptype(x)}. Note for complex types, the default \code{data.frame} print method will be suboptimal, and you will want to coerce into a tibble to better understand the output. } \description{ This is a generalisation of \code{\link[=split]{split()}} that can split by any type of vector, not just factors. Instead of returning the keys in the character names, the are returned in a separate parallel vector. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_group_loc]{vec_group_loc()}} \item \code{\link[=vec_chop]{vec_chop()}} } } \examples{ vec_split(mtcars$cyl, mtcars$vs) vec_split(mtcars$cyl, mtcars[c("vs", "am")]) if (require("tibble")) { as_tibble(vec_split(mtcars$cyl, mtcars[c("vs", "am")])) as_tibble(vec_split(mtcars, mtcars[c("vs", "am")])) } } vctrs/man/int64.Rd0000644000176200001440000000134413622451540013425 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-integer64.R \name{vec_ptype_full.integer64} \alias{vec_ptype_full.integer64} \alias{vec_ptype_abbr.integer64} \alias{vec_ptype2.integer64} \alias{vec_cast.integer64} \title{64 bit integers} \usage{ \method{vec_ptype_full}{integer64}(x, ...) \method{vec_ptype_abbr}{integer64}(x, ...) \method{vec_ptype2}{integer64}(x, y, ...) \method{vec_cast}{integer64}(x, to, ...) } \description{ A \code{integer64} is a 64 bits integer vector, implemented in the \code{bit64} package. } \details{ These functions help the \code{integer64} class from \code{bit64} in to the vctrs type system by providing coercion functions and casting functions. } \keyword{internal} vctrs/man/vec_as_subscript.Rd0000644000176200001440000000407514027045462016025 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/subscript.R \name{vec_as_subscript} \alias{vec_as_subscript} \alias{vec_as_subscript2} \title{Convert to a base subscript type} \usage{ vec_as_subscript( i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL ) vec_as_subscript2( i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL ) } \arguments{ \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{...}{These dots are for future extensions and must be empty.} \item{logical, location, character}{How to handle logical, numeric, and character subscripts. If \code{"cast"} and the subscript is not one of the three base types (logical, integer or character), the subscript is \link[=vec_cast]{cast} to the relevant base type, e.g. factors are coerced to character. \code{NULL} is treated as an empty integer vector, and is thus coercible depending on the setting of \code{numeric}. Symbols are treated as character vectors and thus coercible depending on the setting of \code{character}. If \code{"error"}, the subscript type is disallowed and triggers an informative error.} \item{arg}{The argument name to be displayed in error messages when \code{vec_as_location()} and \code{vec_as_location2()} are used to check the type of a function input.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Convert \code{i} to the base type expected by \code{\link[=vec_as_location]{vec_as_location()}} or \code{\link[=vec_as_location2]{vec_as_location2()}}. The values of the subscript type are not checked in any way (length, missingness, negative elements). } \keyword{internal} vctrs/man/vec_unique.Rd0000644000176200001440000000343413663716767014652 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_unique} \alias{vec_unique} \alias{vec_unique_loc} \alias{vec_unique_count} \title{Find and count unique values} \usage{ vec_unique(x) vec_unique_loc(x) vec_unique_count(x) } \arguments{ \item{x}{A vector (including a data frame).} } \value{ \itemize{ \item \code{vec_unique()}: a vector the same type as \code{x} containing only unique values. \item \code{vec_unique_loc()}: an integer vector, giving locations of unique values. \item \code{vec_unique_count()}: an integer vector of length 1, giving the number of unique values. } } \description{ \itemize{ \item \code{vec_unique()}: the unique values. Equivalent to \code{\link[=unique]{unique()}}. \item \code{vec_unique_loc()}: the locations of the unique values. \item \code{vec_unique_count()}: the number of unique values. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \section{Missing values}{ In most cases, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. This behaviour would be unappealing here, so these functions consider all \code{NAs} to be equal. (Similarly, all \code{NaN} are also considered to be equal.) } \examples{ x <- rpois(100, 8) vec_unique(x) vec_unique_loc(x) vec_unique_count(x) # `vec_unique()` returns values in the order that encounters them # use sort = "location" to match to the result of `vec_count()` head(vec_unique(x)) head(vec_count(x, sort = "location")) # Normally missing values are not considered to be equal NA == NA # But they are for the purposes of considering uniqueness vec_unique(c(NA, NA, NA, NA, 1, 2, 1)) } \seealso{ \link{vec_duplicate} for functions that work with the dual of unique values: duplicated values. } vctrs/man/vec_compare.Rd0000644000176200001440000000246313663716767014773 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare.R \name{vec_compare} \alias{vec_compare} \title{Compare two vectors} \usage{ vec_compare(x, y, na_equal = FALSE, .ptype = NULL) } \arguments{ \item{x, y}{Vectors with compatible types and lengths.} \item{na_equal}{Should \code{NA} values be considered equal?} \item{.ptype}{Override to optionally specify common type} } \value{ An integer vector with values -1 for \code{x < y}, 0 if \code{x == y}, and 1 if \code{x > y}. If \code{na_equal} is \code{FALSE}, the result will be \code{NA} if either \code{x} or \code{y} is \code{NA}. } \description{ Compare two vectors } \section{S3 dispatch}{ \code{vec_compare()} is not generic for performance; instead it uses \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} to } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_recycle_common]{vec_recycle_common()}} \item \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} } } \examples{ vec_compare(c(TRUE, FALSE, NA), FALSE) vec_compare(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) vec_compare(1:10, 5) vec_compare(runif(10), 0.5) vec_compare(letters[1:10], "d") df <- data.frame(x = c(1, 1, 1, 2), y = c(0, 1, 2, 1)) vec_compare(df, data.frame(x = 1, y = 1)) } vctrs/DESCRIPTION0000644000176200001440000000340714042554003013122 0ustar liggesusersPackage: vctrs Title: Vector Helpers Version: 0.3.8 Authors@R: c(person(given = "Hadley", family = "Wickham", role = "aut", email = "hadley@rstudio.com"), person(given = "Lionel", family = "Henry", role = c("aut", "cre"), email = "lionel@rstudio.com"), person(given = "Davis", family = "Vaughan", role = "aut", email = "davis@rstudio.com"), person(given = "data.table team", role = "cph", comment = "Radix sort based on data.table's forder() and their contribution to R's order()"), person(given = "RStudio", role = "cph")) Description: Defines new notions of prototype and size that are used to provide tools for consistent and well-founded type-coercion and size-recycling, and are in turn connected to ideas of type- and size-stability useful for analysing function interfaces. License: MIT + file LICENSE URL: https://vctrs.r-lib.org/ BugReports: https://github.com/r-lib/vctrs/issues Depends: R (>= 3.3) Imports: ellipsis (>= 0.2.0), glue, rlang (>= 0.4.10) Suggests: bit64, covr, crayon, dplyr (>= 0.8.5), generics, knitr, pillar (>= 1.4.4), pkgdown, rmarkdown, testthat (>= 2.3.0), tibble, withr, xml2, waldo (>= 0.2.0), zeallot VignetteBuilder: knitr Encoding: UTF-8 Language: en-GB RoxygenNote: 7.1.1 NeedsCompilation: yes Packaged: 2021-04-29 15:14:11 UTC; lionel Author: Hadley Wickham [aut], Lionel Henry [aut, cre], Davis Vaughan [aut], data.table team [cph] (Radix sort based on data.table's forder() and their contribution to R's order()), RStudio [cph] Maintainer: Lionel Henry Repository: CRAN Date/Publication: 2021-04-29 16:00:03 UTC vctrs/build/0000755000176200001440000000000014042546502012514 5ustar liggesusersvctrs/build/vignette.rds0000644000176200001440000000045614042546502015060 0ustar liggesusersAO003&3x}e/DwZjҒq܈W`ھIaFpְᾃKؓ Au\v Ҝ n1M6N3ͥ `!\\0%l1Ǒ*K҉gZYՙdo\_MlLz:_l42! u/x.̋CXfJʏd`a[Dn3,zj&K"'xj꘢٪߾ai缢BP!LJji}~lvctrs/tests/0000755000176200001440000000000014042546502012557 5ustar liggesusersvctrs/tests/testthat/0000755000176200001440000000000014042554003014412 5ustar liggesusersvctrs/tests/testthat/test-type-unspecified.txt0000644000176200001440000000002214042546251021405 0ustar liggesusers [0] vctrs/tests/testthat/test-s4.R0000644000176200001440000000377013723213047016054 0ustar liggesusers test_that("basics", { x <- rando(10) expect_true(vec_is(x)) expect_equal(vec_size(x), 10) expect_identical(vec_ptype_common(x, x), vec_ptype(x)) }) test_that("casting of rando works", { x <- as_rando(1:10) expect_equal(vec_cast(x, rando()), x) expect_equal(vec_cast(NA, rando()), as_rando(NA)) expect_equal(vec_cast(unspecified(2), rando()), as_rando(c(NA, NA))) expect_error(vec_cast(x, factor()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(factor(), x), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2 for rando works", { x <- as_rando(1:10) expect_equal(vec_ptype(vec_ptype2(x, x)), rando()) expect_equal(vec_ptype2(x, NA), rando()) expect_equal(vec_ptype2(NA, x), rando()) expect_equal(vec_ptype2(unspecified(), x), rando()) expect_equal(vec_ptype2(x, unspecified()), rando()) expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, ""), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2("", x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(data.frame(), x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, data.frame()), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype_abbr.rando", { expect_equal(vec_ptype_abbr(as_rando(1:10)), "vctrs_rn") expect_equal(vec_ptype_full(as_rando(1:10)), "vctrs_rando") }) test_that("proxy and data", { x <- rando(10) expect_identical(vec_ptype(vec_proxy(x)), x[0]) expect_identical(vec_data(x), x@.Data) expect_false(isS4(vec_data(x))) expect_s4_class(vec_restore(vec_data(x), x), "vctrs_rando") expect_true(isS4(vec_restore(vec_data(x), x))) }) test_that("unset_s4() copies and works", { # Initial condition x <- rando() expect_true(isS4(x)) # Unsetting has no side effect on x unset_s4(x) expect_true(isS4(x)) # Unsetting actually works y <- unset_s4(x) expect_false(isS4(y)) }) vctrs/tests/testthat/test-vctrs.R0000644000176200001440000000155513723213047016666 0ustar liggesusers test_that("generics are extensible", { expect_error(vec_cast(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_restore(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_equal(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_compare(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype2(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_abbr(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_full(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_arith(NA, NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_finalise(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_assign(NA, NA, NA, NA), class = "rlib_error_dots_nonempty") }) vctrs/tests/testthat/test-complete.R0000644000176200001440000001247114027045462017336 0ustar liggesusers# vec_slice_complete ----------------------------------------------------------- test_that("can slice complete", { df <- data_frame(x = c(1, NA, 3), y = c(1, 2, NA)) expect_identical(vec_slice_complete(df), vec_slice(df, 1)) }) test_that("vec_slice_complete() works with size 0 input", { expect_identical(vec_slice_complete(integer()), integer()) expect_identical(vec_slice_complete(data.frame()), data.frame()) }) # vec_locate_complete ---------------------------------------------------------- test_that("vec_locate_complete() can locate complete", { df <- data_frame(x = c(1, NA, 3), y = c(1, 2, NA)) expect_identical(vec_locate_complete(df), 1L) }) test_that("vec_locate_complete() works with size 0 input", { expect_identical(vec_locate_complete(logical()), integer()) expect_identical(vec_locate_complete(data.frame()), integer()) }) # vec_detect_complete ---------------------------------------------------------- test_that("works with size zero input", { expect_identical(vec_detect_complete(integer()), logical()) expect_identical(vec_detect_complete(data.frame()), logical()) }) test_that("NA_real_ and NaN are both missing", { expect_identical(vec_detect_complete(c(NA_real_, NaN)), c(FALSE, FALSE)) }) test_that("works rowwise", { df <- data_frame(x = c(NA, 1, NA, 2), y = c(NA, NA, 2, 3)) expect <- c(FALSE, FALSE, FALSE, TRUE) expect_identical(vec_detect_complete(df), expect) df <- data_frame(x = c(1, 1), y = c(2, 2), z = c(1, NA)) expect <- c(TRUE, FALSE) expect_identical(vec_detect_complete(df), expect) }) test_that("works with data frames with rows but no columns", { expect_identical(vec_detect_complete(new_data_frame(n = 5L)), rep(TRUE, 5)) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, NA, 2, 2), b = c(1, 2, NA, 3)) df <- data_frame(x = rep(1, 4), y = col) expect <- c(TRUE, FALSE, FALSE, TRUE) expect_identical(vec_detect_complete(df), expect) }) test_that("works with various types", { expect <- c(TRUE, TRUE, FALSE, TRUE, FALSE) expect_identical(vec_detect_complete(c(TRUE, TRUE, NA, FALSE, NA)), expect) expect_identical(vec_detect_complete(c(1L, 1L, NA, 2L, NA)), expect) expect_identical(vec_detect_complete(c(1, 1, NA, 2, NA)), expect) expect_identical(vec_detect_complete(complex(real = c(1, 1, NA, 2, 2), imaginary = c(1, 1, 2, 2, NA))), expect) expect_identical(vec_detect_complete(c("a", "a", NA, "b", NA)), expect) expect_identical(vec_detect_complete(list(1, 1, NULL, 2, NULL)), expect) # No missing raw value expect_identical(vec_detect_complete(as.raw(c(1, 1, 2, 2, 3))), rep(TRUE, 5)) }) test_that("takes the equality proxy", { x <- as.POSIXlt(c(NA, 0), origin = "1970-01-01") df <- data_frame(a = 1:2, x = x) expect <- c(FALSE, TRUE) expect_identical(vec_detect_complete(x), expect) expect_identical(vec_detect_complete(df), expect) }) test_that("columns with a data frame proxy are only incomplete if all rows are incomplete", { df <- data_frame( x = c(NA, 1, 2, 3), y = new_rcrd(list(a = c(1, 1, NA, NA), b = c(2, 2, 2, NA))), z = new_rcrd(list(a = c(1, NA, 1, 1), b = c(2, NA, NA, 1))) ) expect_identical(vec_detect_complete(df), c(FALSE, FALSE, TRUE, FALSE)) }) test_that("can have rcrd fields of all types", { make_rcrd <- function(x) { new_rcrd(list(x = x)) } expect <- c(TRUE, TRUE, FALSE, TRUE, FALSE) expect_identical(vec_detect_complete(make_rcrd(c(TRUE, TRUE, NA, FALSE, NA))), expect) expect_identical(vec_detect_complete(make_rcrd(c(1L, 1L, NA, 2L, NA))), expect) expect_identical(vec_detect_complete(make_rcrd(c(1, 1, NA, 2, NA))), expect) expect_identical(vec_detect_complete(make_rcrd(complex(real = c(1, 1, NA, 2, 2), imaginary = c(1, 1, 2, 2, NA)))), expect) expect_identical(vec_detect_complete(make_rcrd(c("a", "a", NA, "b", NA))), expect) expect_identical(vec_detect_complete(make_rcrd(list(1, 1, NULL, 2, NULL))), expect) # No missing raw value expect_identical(vec_detect_complete(make_rcrd(as.raw(c(1, 1, 2, 2, 3)))), rep(TRUE, 5)) }) test_that("works with arrays", { x <- array(c(1, 2, 3, NA), c(2, 2)) y <- array(c(1:3, NA, 5:8), c(2, 2, 2)) expect_identical(vec_detect_complete(x), c(TRUE, FALSE)) expect_identical(vec_detect_complete(y), c(TRUE, FALSE)) }) # vec_proxy_complete ----------------------------------------------------------- test_that("generally returns equality proxy", { x <- 1:5 y <- as.POSIXlt("2019-01-01") + 1:5 expect_identical(vec_proxy_complete(x), vec_proxy_equal(x)) expect_identical(vec_proxy_complete(y), vec_proxy_equal(y)) expect_identical(vec_proxy_complete(mtcars), vec_proxy_equal(mtcars)) }) test_that("returns equality proxy with arrays", { x <- array(1) y <- array(1, c(2, 2)) z <- array(1, c(2, 2, 2)) expect_identical(vec_proxy_complete(x), vec_proxy_equal(x)) expect_identical(vec_proxy_complete(y), vec_proxy_equal(y)) expect_identical(vec_proxy_complete(z), vec_proxy_equal(z)) }) test_that("non data frame input that has a data frame equality proxy has the correct completeness proxy", { x <- 1:2 y <- new_rcrd(list(a = c(NA, 1), b = c(NA, NA))) z <- data_frame(c = 1:2, d = 3:4) df <- data_frame(x = x, y = y, z = z) y_expect <- c(NA, FALSE) df_expect <- data_frame(x = x, y = y_expect, z) expect_identical(vec_proxy_complete(y), y_expect) expect_identical(vec_proxy_complete(df), df_expect) }) vctrs/tests/testthat/test-type-rational.R0000644000176200001440000000126713723213047020315 0ustar liggesusers # These tests check the rational type from the S3 vignette test_that("equality proxy is taken (#375)", { local_rational_class() x <- rational(c(1, 2, 1, 2, 6), c(1, 1, 2, 2, 2)) expect_identical(x == rational(3, 1), c(FALSE, FALSE, FALSE, FALSE, TRUE)) expect_identical(unique(x), rational(c(1, 2, 1, 6), c(1, 1, 2, 2))) }) test_that("compare proxy is taken", { local_rational_class() x <- rational(c(1, 2, 1, 2, 6), c(1, 1, 2, 2, 2)) expect_identical(sort(x), rational(c(1, 1, 2, 2, 6), c(2, 1, 2, 1, 2))) }) test_that("can find common type and cast to rational", { local_rational_class() x <- rational(1:2, 2:1) expect_identical(vec_cast_common(x, x), list(x, x)) }) vctrs/tests/testthat/test-cast-list.R0000644000176200001440000000106313723213047017422 0ustar liggesusers test_that("silently extracts elements of length 1", { expect_equal(vec_list_cast(list(1, 2), double()), c(1, 2)) }) test_that("elements of length 0 become NA without error", { x <- list(1, double()) out <- vec_list_cast(x, double()) expect_equal(out, c(1, NA)) }) test_that("elements of length >1 are truncated with error", { x <- list(1, c(2, 1), c(3, 2, 1)) expect_lossy(vec_list_cast(x, dbl()), dbl(1, 2, 3), x = list(), to = dbl()) x <- list(c(2, 1), c(3, 2, 1)) expect_lossy(vec_list_cast(x, dbl()), dbl(2, 3), x = list(), to = dbl()) }) vctrs/tests/testthat/test-partial-frame-print.txt0000644000176200001440000000006414042546241022013 0ustar liggesuserspartial_frame< x: integer {partial} y: double > vctrs/tests/testthat/test-fill.R0000644000176200001440000000763513753021253016457 0ustar liggesuserstest_that("works with empty input", { x <- integer() expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("works with data frames with rows but no columns", { x <- new_data_frame(n = 2L) expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("vectors with all missing values are left unchanged", { x <- c(NA, NA, NA) expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("`NA_real_` and `NaN` are both considered missing", { expect_identical( vec_fill_missing(c(1, NA_real_, NaN)), c(1, 1, 1) ) }) test_that("missings are filled correctly", { x <- c(NA, 1, NA, 2, NA, NA) expect_identical(vec_fill_missing(x, "down"), c(NA, 1, 1, 2, 2, 2)) expect_identical(vec_fill_missing(x, "up"), c(1, 1, 2, 2, NA, NA)) expect_identical(vec_fill_missing(x, "downup"), c(1, 1, 1, 2, 2, 2)) expect_identical(vec_fill_missing(x, "updown"), c(1, 1, 2, 2, 2, 2)) }) test_that("`max_fill` limits the sequential fill amount", { x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) expect_identical(vec_fill_missing(x, "down", max_fill = 1), c(NA, NA, 1, 1, NA, NA, 3, 3, NA)) expect_identical(vec_fill_missing(x, "downup", max_fill = 1), c(NA, 1, 1, 1, NA, NA, 3, 3, NA)) expect_identical(vec_fill_missing(x, "down", max_fill = 2), c(NA, NA, 1, 1, 1, NA, 3, 3, 3)) expect_identical(vec_fill_missing(x, "downup", max_fill = 2), c(1, 1, 1, 1, 1, NA, 3, 3, 3)) expect_identical(vec_fill_missing(x, "up", max_fill = 1), c(NA, 1, 1, NA, NA, 3, 3, NA, NA)) expect_identical(vec_fill_missing(x, "updown", max_fill = 1), c(NA, 1, 1, NA, NA, 3, 3, 3, NA)) expect_identical(vec_fill_missing(x, "up", max_fill = 2), c(1, 1, 1, NA, 3, 3, 3, NA, NA)) expect_identical(vec_fill_missing(x, "updown", max_fill = 2), c(1, 1, 1, NA, 3, 3, 3, 3, 3)) }) test_that("fills data frames", { df <- data_frame(x = c(NA, NA, NA, 2), y = c(NA, 1, NA, 3)) expect_identical(vec_fill_missing(df, "down"), vec_slice(df, c(1, 2, 2, 4))) expect_identical(vec_fill_missing(df, "up"), vec_slice(df, c(2, 2, 4, 4))) }) test_that("can fill rcrd types", { x <- new_rcrd(list(x = c(1, NA, NA), y = c(1, 2, NA))) expect_identical(vec_fill_missing(x, "down"), vec_slice(x, c(1, 2, 2))) expect_identical(vec_fill_missing(x, "up"), vec_slice(x, c(1, 2, 3))) expect_identical(vec_fill_missing(x, "updown"), vec_slice(x, c(1, 2, 2))) }) test_that("validates `direction`", { expect_error(vec_fill_missing(1, 1), "`direction` must be one of") expect_error(vec_fill_missing(1, "foo"), "`direction` must be one of") }) test_that("validates `max_fill`", { expect_error(vec_fill_missing(1, max_fill = -1), "`max_fill` must be") expect_error(vec_fill_missing(1, max_fill = c(1L, 2L)), "`max_fill` must be") expect_error(vec_fill_missing(1, max_fill = NA_integer_), "`max_fill` must be") expect_error(vec_fill_missing(1, max_fill = "x"), class = "vctrs_error_incompatible_type") }) vctrs/tests/testthat/test-ptype-abbr-full.R0000644000176200001440000000423414027045462020531 0ustar liggesusers test_that("input must be a vector", { expect_error(vec_ptype_abbr(sum), "Not a vector") expect_error(vec_ptype_full(sum), "Not a vector") }) test_that("NULL has method", { expect_equal(vec_ptype_abbr(NULL), "NULL") expect_equal(vec_ptype_full(NULL), "NULL") }) test_that("non objects default to type + shape", { expect_equal(vec_ptype_abbr(ones(10)), "dbl") expect_equal(vec_ptype_abbr(ones(0, 10)), "dbl[,10]") expect_equal(vec_ptype_abbr(ones(10, 0)), "dbl[,0]") expect_equal(vec_ptype_full(ones(10)), "double") expect_equal(vec_ptype_full(ones(0, 10)), "double[,10]") expect_equal(vec_ptype_full(ones(10, 0)), "double[,0]") }) test_that("non objects can omit shape", { expect_equal(vec_ptype_abbr(ones(10), suffix_shape = FALSE), "dbl") expect_equal(vec_ptype_abbr(ones(0, 10), suffix_shape = FALSE), "dbl") expect_equal(vec_ptype_abbr(ones(10, 0), suffix_shape = FALSE), "dbl") }) test_that("objects default to first class", { x <- structure(1, class = c("foofy", "goofy")) expect_equal(vec_ptype_full(x), "foofy") expect_equal(vec_ptype_abbr(x), "foofy") }) test_that("atomic vectors and arrays as expected", { expect_equal(vec_ptype_full(1:5), "integer") dbl_mat <- array(double(), c(0, 3)) expect_equal(vec_ptype_full(dbl_mat), "double[,3]") }) test_that("complex and factor as expected (#323)", { expect_equal(vec_ptype_abbr(0i), "cpl") expect_equal(vec_ptype_abbr(factor()), "fct") }) test_that("named lists are always tagged (#322)", { expect_identical(vec_ptype_abbr(list(x = 1, y = 2)), "named list") expect_identical(vec_ptype_abbr(list(x = 1, y = 2), prefix_named = TRUE), "named list") }) test_that("named atomics are tagged optionally (#781)", { expect_identical(vec_ptype_abbr(c(x = 1, y = 2), prefix_named = TRUE), "named dbl") expect_identical(vec_ptype_abbr(c(x = 1L, y = 2L), prefix_named = TRUE), "named int") }) test_that("vec_ptype_abbr() adds named tag in case of row names", { expect_equal( vec_ptype_abbr(mtcars, prefix_named = TRUE), "named df[,11]" ) mat <- matrix(1:4, 2) rownames(mat) <- c("foo", "bar") expect_equal( vec_ptype_abbr(mat, prefix_named = TRUE), "named int[,2]" ) }) vctrs/tests/testthat/test-type-data-frame.R0000644000176200001440000004640414027045462020511 0ustar liggesusers # printing ---------------------------------------------------------------- test_that("data frames print nicely", { expect_equal(vec_ptype_abbr(mtcars), "df[,11]") verify_output(test_path("test-type-data-frame.txt"), { vec_ptype_show(mtcars) vec_ptype_show(iris) }) }) test_that("embedded data frames print nicely", { df <- data.frame(x = 1:3) df$a <- data.frame(a = 1:3, b = letters[1:3]) df$b <- list_of(1, 2, 3) df$c <- as_list_of(split(data.frame(x = 1:3, y = letters[1:3]), 1:3)) verify_output(test_path("test-type-data-frame-embedded.txt"), { vec_ptype_show(df) }) }) # coercing ---------------------------------------------------------------- test_that("data frame only combines with other data frames or NULL", { dt <- data.frame(x = 1) expect_equal(vec_ptype_common(dt, NULL), vec_ptype(dt)) expect_error(vec_ptype_common(dt, 1:10), class = "vctrs_error_incompatible_type") }) test_that("data frame takes max of individual variables", { dt1 <- data.frame(x = FALSE, y = 1L) dt2 <- data.frame(x = 1.5, y = 1.5) expect_equal(vec_ptype_common(dt1, dt2), vec_ptype_common(dt2)) }) test_that("data frame combines variables", { dt1 <- data.frame(x = 1) dt2 <- data.frame(y = 1) dt3 <- max(dt1, dt2) expect_equal( vec_ptype_common(dt1, dt2), vec_ptype_common(data.frame(x = double(), y = double())) ) }) test_that("empty data frame still has names", { df <- data.frame() out <- vec_ptype_common(df, df) expect_equal(names(out), character()) }) test_that("combining data frames with foreign classes uses fallback", { foo <- foobar(data.frame()) df <- data.frame() # Same type fallback expect_identical(vec_ptype_common(foo, foo, foo), foo) expect_incompatible_df(vec_ptype_common(foo, foo, df, foo), df) expect_identical( expect_df_fallback_warning(vec_ptype2_fallback(foo, df)), new_fallback_df(df, c("vctrs_foobar", "data.frame")) ) expect_identical( expect_df_fallback_warning(vec_ptype2_fallback(df, foo)), new_fallback_df(df, c("data.frame", "vctrs_foobar")) ) expect_identical( expect_df_fallback_warning(vec_ptype_common_df_fallback(foo, df)), df ) expect_identical( expect_df_fallback_warning(vec_ptype_common_df_fallback(df, foo)), df ) cnds <- list() withCallingHandlers( warning = function(cnd) { cnds <<- append(cnds, list(cnd)) invokeRestart("muffleWarning") }, expect_identical( vec_ptype_common_df_fallback(foo, df, foo, foo), df ) ) # There should be only one warning even if many fallbacks expect_length(cnds, 1) expect_s3_class(cnds[[1]], "warning") expect_match(cnds[[1]]$message, "falling back to ") expect_incompatible_df( vec_cbind(foobar(data.frame(x = 1)), data.frame(y = 2)), data.frame(x = 1, y = 2) ) expect_incompatible_df( vec_rbind(foo, data.frame(), foo), df ) verify_errors({ foo <- structure(mtcars[1:3], class = c("foo", "data.frame")) bar <- structure(mtcars[4:6], class = c("bar", "data.frame")) baz <- structure(mtcars[7:9], class = c("baz", "data.frame")) # Nested expect_warning() require testthat > 2.3.2 suppressWarnings(expect_warning(vec_ptype_common_df_fallback(foo, bar, baz))) suppressWarnings(expect_warning(vec_ptype_common_df_fallback(foo, baz, bar, baz, foo, bar))) with_fallback_warning(expect_df_fallback_warning(invisible(vec_rbind(foo, data.frame(), foo)))) with_fallback_warning(expect_df_fallback_warning(invisible(vec_cbind(foo, data.frame(x = 1))))) with_fallback_warning(expect_df_fallback_warning(invisible(vec_cbind(foo, data.frame(x = 1), bar)))) with_fallback_quiet(invisible(vec_rbind(foo, data.frame(), foo))) with_fallback_quiet(invisible(vec_cbind(foo, data.frame(x = 1)))) with_fallback_quiet(invisible(vec_cbind(foo, data.frame(x = 1), bar))) }) }) # casting ----------------------------------------------------------------- test_that("safe casts work as expected", { df <- data.frame(x = 1, y = 0) expect_equal(vec_cast(NULL, df), NULL) expect_equal(vec_cast(df, df), df) expect_equal(vec_cast(data.frame(x = TRUE, y = FALSE), df), df) }) test_that("warn about lossy coercions", { df1 <- data.frame(x = factor("foo"), y = 1) df2 <- data.frame(x = factor("bar")) expect_lossy(vec_cast(df1, df1[1]), df1[1], x = df1, to = df1[1]) expect_lossy( vec_cast(df1[1], df2), data.frame(x = factor(NA, levels = "bar")), x = factor("foo"), to = factor("bar") ) out <- allow_lossy_cast( allow_lossy_cast( vec_cast(df1, df2), factor("foo"), factor("bar") ), df1, df2 ) expect_identical(out, data.frame(x = factor(NA, levels = "bar"))) }) test_that("invalid cast generates error", { expect_error(vec_cast(1L, data.frame()), class = "vctrs_error_incompatible_type") }) test_that("column order matches type", { df1 <- data.frame(x = 1, y = "a") df2 <- data.frame(x = TRUE, z = 3) df3 <- vec_cast(df2, vec_ptype_common(df1, df2)) expect_named(df3, c("x", "y", "z")) }) test_that("restore generates correct row/col names", { df1 <- data.frame(x = NA, y = 1:4, z = 1:4) df1$x <- data.frame(a = 1:4, b = 1:4) df2 <- vec_restore(lapply(df1[1:3], vec_slice, 1:2), df1) expect_named(df2, c("x", "y", "z")) expect_equal(.row_names_info(df2), -2) }) test_that("restore keeps automatic row/col names", { df1 <- data.frame(x = NA, y = 1:4, z = 1:4) df1$x <- data.frame(a = 1:4, b = 1:4) df2 <- vec_restore(df1, df1) expect_named(df2, c("x", "y", "z")) expect_equal(.row_names_info(df2), -4) }) test_that("cast to empty data frame preserves number of rows", { out <- vec_cast(new_data_frame(n = 10L), new_data_frame()) expect_equal(nrow(out), 10L) }) test_that("can cast unspecified to data frame", { df <- data.frame(x = 1, y = 2L) expect_identical(vec_cast(unspecified(3), df), vec_init(df, 3)) }) test_that("cannot cast list to data frame", { df <- data.frame(x = 1, y = 2L) expect_error(vec_cast(list(df, df), df), class = "vctrs_error_incompatible_type") }) test_that("can restore lists with empty names", { expect_identical(vec_restore(list(), data.frame()), data.frame()) }) test_that("can restore subclasses of data frames", { expect_identical(vec_restore(list(), subclass(data.frame())), subclass(data.frame())) local_methods( vec_restore.vctrs_foobar = function(x, to, ..., i) "dispatched" ) expect_identical(vec_restore(list(), subclass(data.frame())), "dispatched") }) test_that("df_cast() checks for names", { x <- new_data_frame(list(1)) y <- new_data_frame(list(2)) expect_error(vec_cast_common(x, y), "must have names") }) test_that("casting to and from data frame preserves row names", { out <- vec_cast(mtcars, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) out <- vec_cast(out, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) }) # new_data_frame ---------------------------------------------------------- test_that("can construct an empty data frame", { expect_identical(new_data_frame(), data.frame()) }) test_that("can validly set the number of rows when there are no columns", { expect <- structure( list(), class = "data.frame", row.names = .set_row_names(2L), names = character() ) expect_identical(new_data_frame(n = 2L), expect) }) test_that("can add additional classes", { expect_s3_class(new_data_frame(class = "foobar"), "foobar") expect_s3_class(new_data_frame(class = c("foo", "bar")), c("foo", "bar")) }) test_that("can add additional attributes", { expect <- data.frame() attr(expect, "foo") <- "bar" attr(expect, "a") <- "b" expect_identical(new_data_frame(foo = "bar", a = "b"), expect) }) test_that("size is pulled from first column if not supplied", { x <- new_data_frame(list(x = 1:5, y = 1:6)) expect_identical(.row_names_info(x, type = 1), -5L) }) test_that("can construct a data frame without column names", { expect_named(new_data_frame(list(1, 2)), NULL) }) test_that("the names on an empty data frame are an empty character vector", { expect_identical(names(new_data_frame()), character()) }) test_that("class attribute", { expect_identical( class(new_data_frame(list(a = 1))), "data.frame" ) expect_identical( class(new_data_frame(list(a = 1), class = "tbl_df")), c("tbl_df", "data.frame") ) expect_identical( class(new_data_frame(list(a = 1), class = c("tbl_df", "tbl", "data.frame"))), c("tbl_df", "tbl", "data.frame", "data.frame") ) expect_identical( class(new_data_frame(list(a = 1), class = "foo_frame")), c("foo_frame", "data.frame") ) expect_identical( class(exec(new_data_frame, list(a = 1), !!!attributes(new_data_frame(list(), class = "tbl_df")))), c("tbl_df", "data.frame", "data.frame") ) expect_identical( class(exec(new_data_frame, list(a = 1), !!!attributes(new_data_frame(list(b = 1), class = "tbl_df")))), c("tbl_df", "data.frame", "data.frame") ) }) test_that("attributes with special names are merged", { expect_identical( names(new_data_frame(list(a = 1))), "a" ) expect_identical( names(new_data_frame(list(a = 1), names = "name")), "name" ) expect_identical( names(new_data_frame(list(1), names = "name")), "name" ) expect_identical( attr(new_data_frame(list()), "row.names"), integer() ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L)), -3L ) expect_error(new_data_frame(list(), n = 1L, row.names = 1:3), ".") expect_identical( .row_names_info(new_data_frame(list(), n = 3L, row.names = 1:3)), 3L ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L, row.names = c(NA, -3L))), -3L ) expect_identical( attr(new_data_frame(list(), n = 1L, row.names = "rowname"), "row.names"), "rowname" ) }) test_that("n and row.names (#894)", { # Can omit n if row.names attribute is given expect_identical( row.names(new_data_frame(list(), row.names = "rowname")), "rowname" ) expect_identical( attr(new_data_frame(list(), row.names = 2L), "row.names"), 2L ) expect_identical( row.names(new_data_frame(list(), row.names = chr())), chr() ) }) test_that("`x` must be a list", { expect_error(new_data_frame(1), "`x` must be a list") }) test_that("if supplied, `n` must be an integer of size 1", { expect_error(new_data_frame(n = c(1L, 2L)), "must be an integer of size 1") expect_error(new_data_frame(n = "x"), "must be an integer of size 1") }) test_that("`class` must be a character vector", { expect_error(new_data_frame(class = 1), "must be NULL or a character vector") }) test_that("flatten info is computed", { df_flatten_info <- function(x) { .Call(vctrs_df_flatten_info, x) } expect_identical(df_flatten_info(mtcars), list(FALSE, ncol(mtcars))) df <- tibble(x = 1, y = tibble(x = 2, y = tibble(x = 3), z = 4), z = 5) expect_identical(df_flatten_info(df), list(TRUE, 5L)) }) test_that("can flatten data frames", { df_flatten <- function(x) { .Call(vctrs_df_flatten, x) } expect_identical(df_flatten(mtcars), mtcars) df <- tibble(x = 1, y = tibble(x = 2, y = tibble(x = 3), z = 4), z = 5) expect_identical(df_flatten(df), new_data_frame(list(x = 1, x = 2, x = 3, z = 4, z = 5))) }) test_that("can flatten data frames with rcrd columns containing 1 field (#1318)", { col <- new_rcrd(list(x = 1)) df <- data_frame(col = col, y = 1) expect_identical(vec_proxy_equal(df), data_frame(x = 1, y = 1)) }) test_that("new_data_frame() zaps existing attributes", { struct <- structure(list(), foo = 1) expect_identical( attributes(new_data_frame(struct)), attributes(new_data_frame(list())), ) expect_identical( attributes(new_data_frame(struct, bar = 2)), attributes(new_data_frame(list(), bar = 2)), ) }) # data_frame -------------------------------------------------------------- test_that("can construct data frames with empty input", { expect_identical(data_frame(), new_data_frame()) expect_named(data_frame(), character()) }) test_that("input is tidy recycled", { expect_identical( data_frame(x = 1, y = 1:3), data_frame(x = c(1, 1, 1), y = 1:3) ) expect_identical( data_frame(x = 1, y = integer()), data_frame(x = double(), y = integer()) ) expect_error(data_frame(1:2, 1:3), class = "vctrs_error_incompatible_size") }) test_that("dots are dynamic", { list_2_data_frame <- function(x) data_frame(!!!x) expect_identical( list_2_data_frame(list(x = 1, y = 2)), data_frame(x = 1, y = 2) ) }) test_that("unnamed input is auto named with empty strings", { expect_named(data_frame(1, 2, .name_repair = "minimal"), c("", "")) }) test_that("unnamed data frames are auto spliced", { expect_identical( data_frame(w = 1, data_frame(x = 2, y = 3), z = 4), data_frame(w = 1, x = 2, y = 3, z = 4) ) }) test_that("named data frames are not spliced", { df_col <- data_frame(x = 2, y = 3) df <- data_frame(w = 1, col = data_frame(x = 2, y = 3), z = 4) expect_identical(df$col, df_col) }) test_that("spliced data frames without names are caught", { df_col <- new_data_frame(list(1)) expect_error(data_frame(df_col), "corrupt data frame") }) test_that("`NULL` inputs are dropped", { expect_identical(data_frame(NULL, x = 1, NULL), data_frame(x = 1)) }) test_that("`NULL` inputs are dropped before name repair", { expect_identical( data_frame(x = NULL, x = 1, .name_repair = "check_unique"), data_frame(x = 1) ) }) test_that("`.size` can force a desired size", { df <- data_frame(x = 1, .size = 5) expect_identical(df$x, rep(1, 5)) expect_size(data_frame(.size = 5), 5L) }) test_that("`.name_repair` repairs names", { expect_named( expect_message(data_frame(x = 1, x = 1, .name_repair = "unique")), c("x...1", "x...2") ) }) test_that("`.name_repair` happens after auto-naming with empty strings", { expect_named( expect_message(data_frame(1, 2, .name_repair = "unique")), c("...1", "...2") ) }) test_that("`.name_repair` happens after splicing", { expect_named( expect_message(data_frame(x = 1, data_frame(x = 2), .name_repair = "unique")), c("x...1", "x...2") ) }) # fallback ---------------------------------------------------------------- test_that("data frame fallback handles column types (#999)", { df1 <- foobar(data.frame(x = 1)) df2 <- foobar(data.frame(x = 1, y = 2)) df3 <- foobar(data.frame(x = "", y = 2)) common <- foobar(data.frame(x = dbl(), y = dbl())) expect_identical(vec_ptype2(df1, df2), common) expect_identical(vec_ptype2(df2, df1), common) expect_error( vec_ptype2(df1, df3), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(df3, df1), class = "vctrs_error_incompatible_type" ) expect_identical( vec_cast(df1, df2), foobar(data.frame(x = 1, y = na_dbl)) ) expect_error( vec_cast(df2, df1), class = "vctrs_error_cast_lossy" ) expect_identical( vec_rbind(df1, df2), foobar(data.frame(x = c(1, 1), y = c(NA, 2))) ) # Attributes are not restored df1_attrib <- foobar(df1, foo = "foo") df2_attrib <- foobar(df2, bar = "bar") exp <- data.frame(x = c(1, 1), y = c(NA, 2)) expect_incompatible_df( vec_rbind(df1_attrib, df2_attrib), exp ) out <- with_methods( `[.vctrs_foobar` = function(x, i, ...) { new_data_frame( NextMethod(), dispatched = TRUE, class = "vctrs_foobar" ) }, vec_rbind(df1_attrib, df2_attrib) ) expect_identical(out, foobar(exp, dispatched = TRUE)) }) test_that("falls back to tibble for tibble subclasses (#1025)", { foo <- foobar(tibble::as_tibble(mtcars)) expect_s3_class(expect_df_fallback_warning_maybe(vec_rbind(foo, mtcars)), "tbl_df") expect_s3_class(expect_df_fallback_warning_maybe(vec_rbind(foo, mtcars, mtcars)), "tbl_df") expect_s3_class(expect_df_fallback_warning_maybe(vec_rbind(foo, mtcars, foobar(mtcars))), "tbl_df") verify_errors({ with_fallback_warning(expect_df_fallback_warning( vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobaz(mtcars) ) )) with_fallback_warning(expect_df_fallback_warning( vec_rbind( tibble::as_tibble(mtcars), foobar(tibble::as_tibble(mtcars)) ) )) with_fallback_warning(expect_df_fallback_warning( vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobar(tibble::as_tibble(mtcars)) ) )) with_fallback_quiet( vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobaz(mtcars) ) ) with_fallback_quiet( vec_rbind( tibble::as_tibble(mtcars), foobar(tibble::as_tibble(mtcars)) ) ) with_fallback_quiet( vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobar(tibble::as_tibble(mtcars)) ) ) }) }) test_that("fallback is recursive", { df <- mtcars[1:3, 1, drop = FALSE] foo <- new_data_frame(list(x = foobar(df, foo = TRUE))) bar <- new_data_frame(list(x = foobar(df, bar = TRUE))) baz <- new_data_frame(list(y = 1:3, x = foobar(df, bar = TRUE))) exp <- new_data_frame(list(x = vec_rbind(df, df))) expect_incompatible_df(vec_rbind(foo, bar), exp) exp <- new_data_frame(list(x = vec_rbind(df, df), y = c(NA, NA, NA, 1:3))) expect_incompatible_df(vec_rbind(foo, baz), exp) }) test_that("data frame output is informative", { verify_output(test_path("error", "test-type-data-frame.txt"), { "# combining data frames with foreign classes uses fallback" foo <- structure(mtcars[1:3], class = c("foo", "data.frame")) bar <- structure(mtcars[4:6], class = c("bar", "data.frame")) baz <- structure(mtcars[7:9], class = c("baz", "data.frame")) vec_ptype_common_df_fallback(foo, bar, baz) vec_ptype_common_df_fallback(foo, baz, bar, baz, foo, bar) with_fallback_warning(invisible(vec_rbind(foo, data.frame(), foo))) with_fallback_warning(invisible(vec_rbind(foo, baz, bar, baz, foo, bar))) with_fallback_warning(invisible(vec_cbind(foo, data.frame(x = 1)))) with_fallback_warning(invisible(vec_cbind(foo, data.frame(x = 1), bar))) with_fallback_quiet(invisible(vec_rbind(foo, data.frame(), foo))) with_fallback_quiet(invisible(vec_rbind(foo, baz, bar, baz, foo, bar))) with_fallback_quiet(invisible(vec_cbind(foo, data.frame(x = 1)))) with_fallback_quiet(invisible(vec_cbind(foo, data.frame(x = 1), bar))) "# falls back to tibble for tibble subclasses (#1025)" with_fallback_warning( invisible(vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobaz(mtcars) )) ) with_fallback_warning( invisible(vec_rbind( tibble::as_tibble(mtcars), foobar(tibble::as_tibble(mtcars)) )) ) with_fallback_warning( invisible(vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobar(tibble::as_tibble(mtcars)) )) ) with_fallback_quiet( invisible(vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobaz(mtcars) )) ) with_fallback_quiet( invisible(vec_rbind( tibble::as_tibble(mtcars), foobar(tibble::as_tibble(mtcars)) )) ) with_fallback_quiet( invisible(vec_rbind( foobar(tibble::as_tibble(mtcars)), mtcars, foobar(tibble::as_tibble(mtcars)) )) ) }) }) vctrs/tests/testthat/test-type-table.R0000644000176200001440000001742714027045462017602 0ustar liggesusers # Print ------------------------------------------------------------------- test_that("ptype print methods are descriptive", { tab1 <- new_table() tab2 <- new_table(dim = c(0L, 1L, 2L, 1L)) expect_equal(vec_ptype_abbr(tab1), "table") expect_equal(vec_ptype_abbr(tab2), "table[,1,2,1]") expect_equal(vec_ptype_full(tab1), "table") expect_equal(vec_ptype_full(tab2), "table[,1,2,1]") }) # Coercion ---------------------------------------------------------------- test_that("can find a common type among tables with identical dimensions", { tab1 <- new_table() tab2 <- new_table(1:2, dim = c(1L, 2L, 1L)) expect_identical(vec_ptype2(tab1, tab1), zap_dimnames(new_table())) expect_identical(vec_ptype2(tab2, tab2), zap_dimnames(new_table(dim = c(0L, 2L, 1L)))) }) test_that("size is not considered in the ptype", { x <- new_table(1:2, dim = 2L) y <- new_table(1:3, dim = 3L) expect_identical(vec_ptype2(x, y), zap_dimnames(new_table())) }) test_that("vec_ptype2() can broadcast table shapes", { x <- new_table(dim = c(0L, 1L)) y <- new_table(dim = c(0L, 2L)) expect_identical(vec_ptype2(x, y), zap_dimnames(new_table(dim = c(0L, 2L)))) x <- new_table(dim = c(0L, 1L, 3L)) y <- new_table(dim = c(0L, 2L, 1L)) expect_identical(vec_ptype2(x, y), zap_dimnames(new_table(dim = c(0L, 2L, 3L)))) }) test_that("implicit axes are broadcast", { x <- new_table(dim = c(0L, 2L)) y <- new_table(dim = c(0L, 1L, 3L)) expect_identical(vec_ptype2(x, y), zap_dimnames(new_table(dim = c(0L, 2L, 3L)))) }) test_that("errors on non-broadcastable dimensions", { x <- new_table(dim = c(0L, 2L)) y <- new_table(dim = c(0L, 3L)) expect_error(vec_ptype2(x, y), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2() errors on non-tables", { expect_error(vec_ptype2(new_table(), 1), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(new_table(), 1L), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(new_table(), "1"), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1L, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2("1", new_table()), class = "vctrs_error_incompatible_type") }) test_that("common types have symmetry when mixed with unspecified input", { x <- new_table() expect_identical(vec_ptype2(x, NA), new_table()) expect_identical(vec_ptype2(NA, x), new_table()) x <- new_table(dim = c(0L, 2L)) expect_identical(vec_ptype2(x, NA), new_table(dim = c(0L, 2L))) expect_identical(vec_ptype2(NA, x), new_table(dim = c(0L, 2L))) }) test_that("`table` delegates coercion", { expect_identical( vec_ptype2(new_table(1), new_table(FALSE)), zap_dimnames(new_table(double())) ) expect_error( vec_ptype2(new_table(1), new_table("")), class = "vctrs_error_incompatible_type" ) }) # Casting ----------------------------------------------------------------- test_that("can cast to an identically shaped table", { x <- new_table(1:5, dim = 5L) y <- new_table(1:8, dim = c(2L, 2L, 2L)) expect_identical(vec_cast(x, x), x) expect_identical(vec_cast(y, y), y) }) test_that("vec_cast() can broadcast table shapes", { # We test only the dim here and not the class because on R 3.2 # the `[.table` method did not exist and `shape_broadcast()` # gives back a matrix, not a table. x <- new_table(dim = c(0L, 1L)) y <- new_table(dim = c(0L, 2L)) expect_identical(dim(vec_cast(x, y)), c(0L, 2L)) x <- new_table(dim = c(0L, 1L, 1L)) y <- new_table(dim = c(0L, 2L, 3L)) expect_identical(dim(vec_cast(x, y)), c(0L, 2L, 3L)) }) test_that("cannot decrease axis length", { x <- new_table(dim = c(0L, 3L)) y <- new_table(dim = c(0L, 1L)) expect_error(vec_cast(x, y), "Non-recyclable", class = "vctrs_error_incompatible_type") }) test_that("cannot decrease dimensionality", { x <- new_table(dim = c(0L, 1L, 1L)) y <- new_table(dim = c(0L, 1L)) expect_error(vec_cast(x, y), "decrease dimensions", class = "vctrs_error_incompatible_type") }) test_that("vec_cast() errors on non-tables", { expect_error(vec_cast(new_table(), 1), class = "vctrs_error_incompatible_type") expect_error(vec_cast(new_table(), 1L), class = "vctrs_error_incompatible_type") expect_error(vec_cast(new_table(), "1"), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1L, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_cast("1", new_table()), class = "vctrs_error_incompatible_type") }) test_that("can cast from, but not to, unspecified", { x <- new_table() expect_error(vec_cast(x, NA), class = "vctrs_error_incompatible_type") expect_identical(vec_cast(NA, x), new_table(NA_integer_, dim = 1L)) x <- new_table(dim = c(0L, 2L)) expect_error(vec_cast(x, NA), class = "vctrs_error_incompatible_type") expect_identical(vec_cast(NA, x), new_table(c(NA_integer_, NA_integer_), dim = c(1L, 2L))) }) test_that("`table` delegates casting", { expect_identical( vec_cast(new_table(1), new_table(FALSE)), new_table(TRUE) ) expect_error( vec_cast(new_table(1), new_table("")), class = "vctrs_error_incompatible_type" ) }) # Misc -------------------------------------------------------------------- test_that("`new_table()` validates input", { expect_error(new_table(1L, 1), "`dim` must be an integer vector") expect_error(new_table(1:2, 1L), "must match the length of `x`") }) test_that("ptype is correct", { tab1 <- new_table(1L, dim = 1L) tab2 <- new_table(1:2, dim = c(1L, 2L, 1L)) expect_identical(vec_ptype(tab1), new_table()) expect_identical(vec_ptype(tab2), new_table(dim = c(0L, 2L, 1L))) }) test_that("can use a table in `vec_c()`", { expect_identical(vec_c(new_table()), new_table()) expect_identical(vec_c(new_table(), new_table()), new_table()) x <- new_table(1:5, 5L) y <- new_table(1:4, dim = c(2L, 2L)) expect_identical(vec_c(x, x), new_table(c(1:5, 1:5), dim = 10L)) expect_identical(vec_c(y, y), new_table(c(1:2, 1:2, 3:4, 3:4), dim = c(4L, 2L))) expect_identical(vec_c(x, y), new_table(c(1:5, 1:2, 1:5, 3:4), dim = c(7L, 2L))) }) test_that("names of the first dimension are kept in `vec_c()`", { x <- new_table(1:4, c(2L, 2L)) dimnames(x) <- list(c("r1", "r2"), c("c1", "c2")) xx <- vec_c(x, x) expect_identical(dimnames(xx), list(c("r1", "r2", "r1", "r2"), NULL)) }) test_that("can use a table in `vec_unchop()`", { x <- new_table(1:4, dim = c(2L, 2L)) expect_identical(vec_unchop(list(x)), x) expect_identical(vec_unchop(list(x, x), list(1:2, 4:3)), vec_slice(x, c(1:2, 2:1))) }) test_that("can concatenate tables", { x <- table(1:2) out <- vec_c(x, x) exp <- new_table(rep(1L, 4), dimnames = list(c("1", "2", "1", "2"))) expect_identical(out, exp) out <- vec_rbind(x, x) exp <- data_frame(`1` = new_table(c(1L, 1L)), `2` = new_table(c(1L, 1L))) expect_identical(out, exp) y <- table(list(1:2, 3:4)) # FIXME out <- vec_c(y, y) exp <- new_table( matrix(int(1, 0, 1, 0, 0, 1, 0, 1), nrow = 4), dim = c(4L, 2L), dimnames = list(c("1", "2", "1", "2"), NULL) ) expect_identical(out, exp) out <- vec_rbind(y, y) exp <- new_data_frame(list( `3` = int(1, 0, 1, 0), `4` = int(0, 1, 0, 1) ), row.names = c("1...1", "2...2", "1...3", "2...4") ) expect_identical(out, exp) skip("FIXME: dimnames of matrices are not properly concatenated") }) test_that("can concatenate tables of type double (#1190)", { x <- table(c(1, 2)) / 2 out <- vec_c(x, x) exp <- new_table(c(0.5, 0.5, 0.5, 0.5), dimnames = list(c("1", "2", "1", "2"))) expect_identical(out, exp) out <- vec_rbind(x, x) exp <- data_frame(`1` = new_table(c(0.5, 0.5)), `2` = new_table(c(0.5, 0.5))) expect_identical(out, exp) }) vctrs/tests/testthat/test-recycle.R0000644000176200001440000001143314027045462017151 0ustar liggesusers # vec_recycle ------------------------------------------------------------- test_that("vec_recycle recycles size 1 to any other size", { x <- 1 x0 <- numeric() x2 <- c(x, x) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) }) test_that("vec_recycle(): incompatible lengths get error messages", { x2 <- c(1, 2) expect_error(vec_recycle(x2, 1), class = "vctrs_error_recycle_incompatible_size") expect_error(vec_recycle(x2, 0), class = "vctrs_error_recycle_incompatible_size") expect_error(vec_recycle(x2, 3), class = "vctrs_error_recycle_incompatible_size") }) test_that("can recycle arrays", { x <- matrix(1:2, 1) x2 <- matrix(1:2, 2, 2, byrow = TRUE) x0 <- matrix(integer(), 0, 2) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) # List arrays data <- c(list(1), list(2)) x <- matrix(data, 1) x2 <- matrix(data, 2, 2, byrow = TRUE) x0 <- matrix(list(), 0, 2) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) }) # Empty ------------------------------------------------------------------- test_that("empty input returns empty list", { expect_equal(vec_recycle_common(), list()) }) # Vectors ----------------------------------------------------------------- test_that("NULL is idempotent", { expect_equal(vec_recycle_common(NULL, NULL), list(NULL, NULL)) expect_equal(vec_recycle_common(1:5, NULL), list(1:5, NULL)) expect_equal(vec_recycle_common(NULL, 1:5), list(NULL, 1:5)) }) test_that("equal lengths returned as is", { x <- 1:3 expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x[1], x[1]), list(x[1], x[1])) expect_equal(vec_recycle_common(x[0], x[0]), list(x[0], x[0])) }) test_that("vec_recycle_common recycles size 1 to any other size", { x1 <- 1 x3 <- rep(1, 3) x0 <- numeric() expect_equal(vec_recycle_common(x1, x3), list(x3, x3)) expect_equal(vec_recycle_common(x3, x1), list(x3, x3)) expect_equal(vec_recycle_common(x1, x0), list(x0, x0)) }) test_that("vec_recycle_common(): incompatible lengths get error messages", { expect_error(vec_recycle_common(1:2, 1:3), class = "vctrs_error_incompatible_size") expect_error(vec_recycle_common(1:3, 1:2), class = "vctrs_error_incompatible_size") expect_error(vec_recycle_common(numeric(), 1:2), class = "vctrs_error_incompatible_size") }) # Matrices ---------------------------------------------------------------- test_that("can vec_recycle_common matrices", { x <- matrix(nrow = 4, ncol = 4) x1 <- x[1, , drop = FALSE] expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x1, x), list(x, x)) }) test_that("recycling matrices respects incompatible sizes", { x <- matrix(nrow = 4, ncol = 4) x2 <- x[1:2, , drop = FALSE] x0 <- x[0, , drop = FALSE] expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size") expect_error(vec_recycle_common(x0, x), class = "vctrs_error_incompatible_size") }) test_that("can vec_recycle_common data frames", { x <- data.frame(a = rep(1, 3), b = rep(2, 3)) x1 <- vec_slice(x, 1L) expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x1, x), list(x, x)) }) test_that("recycling data frames respects incompatible sizes", { x <- data.frame(a = rep(1, 3), b = rep(2, 3)) x2 <- vec_slice(x, 1:2) x0 <- vec_slice(x, integer()) expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size") expect_error(vec_recycle_common(x0, x), class = "vctrs_error_incompatible_size") }) test_that("can vec_recycle_common matrix and data frame", { mt <- matrix(nrow = 2, ncol = 2) df <- data.frame(x = c(1, 1), y = c(2, 2)) expect_equal( vec_recycle_common(vec_slice(mt, 1L), df), list(mt, df) ) expect_equal( vec_recycle_common(mt, vec_slice(df, 1L)), list(mt, df) ) }) test_that("recycling data frames with matrices respects incompatible sizes", { mt <- matrix(nrow = 2, ncol = 2) df <- data.frame(x = c(1, 1), y = c(2, 2)) expect_error( vec_recycle_common(vec_slice(mt, integer()), df), class = "vctrs_error_incompatible_size" ) expect_error( vec_recycle_common(mt, vec_slice(df, 0L)), class = "vctrs_error_incompatible_size" ) }) test_that("recycling to size 1 has informative error", { verify_errors({ expect_error(vec_recycle(1:2, 1), class = "vctrs_error_recycle_incompatible_size") }) }) test_that("recycling has informative errors", { verify_output(test_path("error", "test-recycle.txt"), { "# incompatible recycling size has informative error" vec_recycle(1:2, 4) vec_recycle(1:2, 4, x_arg = "foo") "# recycling to size 1 has informative error" vec_recycle(1:2, 1) }) }) vctrs/tests/testthat/test-type-data-frame-embedded.txt0000644000176200001440000000035414042546246022653 0ustar liggesusers> vec_ptype_show(df) Prototype: data.frame< x: integer a: data.frame< a: integer b: character > b: list_of c: list_of< data.frame< x: integer y: character > > > vctrs/tests/testthat/test-type-list-of.R0000644000176200001440000001255514027065532020065 0ustar liggesuserstest_that("list_of inherits from list", { x1 <- list_of(1, 1) expect_s3_class(x1, "list") }) test_that("list_of works like list", { x1 <- list_of(1, 1) expect_type(x1, "list") expect_s3_class(x1, "vctrs_list_of") expect_equal(attr(x1, "ptype"), double()) x2 <- list_of(1, 1, .ptype = integer()) expect_equal(attr(x1, "ptype"), integer()) x3 <- as_list_of(list(1, 1)) expect_equal(x3, x1) x4 <- list_of(a = 1, b = 2) expect_equal(x4$b, 2) expect_error(x4$c, "Invalid index", fixed = TRUE) }) test_that("list_of errors if it can't find common type", { expect_error(list_of(1, "a"), class = "vctrs_error_incompatible_type") expect_error(list_of(), "find common type") }) test_that("can use as_list_of to change type", { x1 <- list_of(1) expect_equal(as_list_of(x1), x1) x2 <- as_list_of(x1, .ptype = integer()) expect_identical(x2[[1]], 1L) }) test_that("is_list_of as expected", { expect_false(is_list_of(list(1))) expect_true(is_list_of(list_of(1))) }) test_that("print method gives human friendly output", { skip_on_cran() # Depends on tibble # FIXME: Disable crayon until we switch to testthat 3e local_options(crayon.enabled = FALSE) x <- list_of(1, 2:3) expect_known_output({ print(x) cat("\n") print(tibble::tibble(x)) }, file = test_path("test-list_of-print.txt") ) }) test_that("str method is reasonably correct", { x <- list_of(1, 2:3) expect_known_output({ str(x) cat("\n") str(list(list(x, y = 2:1))) }, file = test_path("test-list_of-str.txt") ) expect_known_output({ str(x[0]) cat("\n") str(list(list(x, y = 2:1))) }, file = test_path("test-list_of-str-empty.txt") ) }) test_that("constructor requires list input", { expect_error(new_list_of(1), "must be a list") expect_error(new_list_of(mtcars), "must be a list") }) test_that("constructor requires size 0 ptype", { expect_error(new_list_of(ptype = 1), "must have size 0") }) # Subsetting -------------------------------------------------------------- test_that("[ preserves type", { x <- list_of(1) expect_equal(x[1], x) }) test_that("[<-, [[<- and $<- coerce their input", { x <- list_of(x = 1, y = 1, z = 1, w = 1) x[1] <- list(FALSE) expect_identical(x, list_of(x = 0, y = 1, z = 1, w = 1)) x[[2]] <- FALSE expect_identical(x, list_of(x = 0, y = 0, z = 1, w = 1)) x$z <- FALSE expect_identical(x, list_of(x = 0, y = 0, z = 0, w = 1)) x[3:4] <- c(TRUE, FALSE) expect_identical(x, list_of(x = 0, y = 0, z = 1, w = 0)) x[[2]] <- NULL expect_equal(x, list_of(x = 0, y = NULL, z = 1, w = 0)) expect_error(x[[2]] <- list(20), class = "vctrs_error_incompatible_type") expect_error(x$y <- list(20), class = "vctrs_error_incompatible_type") x[3:4] <- list(NULL) expect_equal(x, list_of(x = 0, y = NULL, z = NULL, w = NULL)) expect_equal(is.na(x), c(FALSE, TRUE, TRUE, TRUE)) }) test_that("assingment can increase size of vector", { x <- list_of(x = 1) x[[2]] <- 2 x$z <- 3 x[4:5] <- c(4,5) expect_length(x, 5) }) # Type system ------------------------------------------------------------- test_that("list_of() are vectors", { expect_true(vec_is_vector(list_of(1))) expect_true(vec_is(list_of(1))) }) test_that("list coercions are symmetric and unchanging", { types <- list( list(), list_of(.ptype = integer()), list_of(.ptype = double()), list_of(.ptype = character()) ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) expect_known_output( mat, test_path("test-list_of-type.txt"), print = TRUE, width = 200 ) }) test_that("max, list_of> is list_of>", { r_int <- list_of(.ptype = integer()) r_dbl <- list_of(.ptype = double()) expect_equal(vec_ptype_common(r_int, r_int), r_int) expect_equal(vec_ptype_common(r_int, r_dbl), r_int) }) test_that("safe casts work as expected", { x <- list_of(1) expect_equal(vec_cast(NULL, x), NULL) expect_equal(vec_cast(NA, x), list_of(NULL, .ptype = double())) # These used to be allowed expect_error(vec_cast(1L, x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(1), x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(TRUE), x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) test_that("lossy casts generate warning (no longer the case)", { # This used to be a lossy cast warning expect_error( vec_cast(list(c(1.5, 1), 1L), to = list_of(1L)), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), list_of(1)), class = "vctrs_error_incompatible_type") }) test_that("validation", { expect_error(validate_list_of(list_of(1, 2, 3)), NA) expect_error( validate_list_of(new_list_of(list(factor("foo")), vec_ptype(factor("bar")))), class = "vctrs_error_cast_lossy" ) }) test_that("list_of() has as.character() method (tidyverse/tidyr#654)", { exp <- rep(paste0("<", vec_ptype_abbr(mtcars), ">"), 2) expect_identical(as.character(list_of(mtcars, mtcars)), exp) }) test_that("vec_ptype2(>, NA) is symmetric (#687)", { lof <- list_of(1, 2, 3) expect_identical(vec_ptype2(lof, NA), vec_ptype(lof)) expect_identical(vec_ptype2(NA, lof), vec_ptype(lof)) }) vctrs/tests/testthat/test-print-str-attr.txt0000644000176200001440000000032114042546241021043 0ustar liggesusers int [1:100] 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16... @ x: chr "a string" @ y: int [1:20] 1 2 3 4 5 6 7 8 9 10 ... @ z:'data.frame': 3 obs. of 1 variable: ..$ x: int [1:3] 1 2 3 vctrs/tests/testthat/test-type-integer64.R0000644000176200001440000001066413723213047020314 0ustar liggesusers test_that("casting of integer64 works", { x <- bit64::as.integer64(1:10) expect_equal(vec_cast(x, bit64::integer64()), x) expect_equal(vec_cast(x, integer()), 1:10) expect_equal(vec_cast(1:10, bit64::integer64()), x) expect_equal(vec_cast(x, double()), as.double(x)) expect_equal(vec_cast(as.numeric(1:10), bit64::integer64()), x) expect_equal(vec_cast(x, logical()), rep(TRUE, 10L)) expect_equal(vec_cast(c(TRUE, FALSE), bit64::integer64()), bit64::as.integer64(c(1, 0))) expect_equal(vec_cast(NA, bit64::integer64()), bit64::as.integer64(NA)) expect_equal(vec_cast(unspecified(2), bit64::integer64()), bit64::as.integer64(c(NA, NA))) expect_error(vec_cast(x, factor()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(factor(), x), class = "vctrs_error_incompatible_type") # These used to be allowed expect_error(vec_cast(x, character()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(as.character(1:10), bit64::integer64()), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2 for integer64 works", { x <- bit64::as.integer64(1:10) expect_equal(vec_ptype2(x, x), bit64::integer64()) expect_equal(vec_ptype2(x, 1L), bit64::integer64()) expect_equal(vec_ptype2(1L, x), bit64::integer64()) expect_equal(vec_ptype2(x, TRUE), bit64::integer64()) expect_equal(vec_ptype2(TRUE, x), bit64::integer64()) expect_equal(vec_ptype2(x, NA), bit64::integer64()) expect_equal(vec_ptype2(NA, x), bit64::integer64()) expect_equal(vec_ptype2(unspecified(), x), bit64::integer64()) expect_equal(vec_ptype2(x, unspecified()), bit64::integer64()) expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, ""), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2("", x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(data.frame(), x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, data.frame()), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype_abbr.integer64", { expect_equal(vec_ptype_abbr(bit64::as.integer64(1:10)), "int64") expect_equal(vec_ptype_full(bit64::as.integer64(1:10)), "integer64") }) test_that("can sort integer64", { x <- bit64::as.integer64(c(-1, -3, -2, 1)) expect_identical(vec_order(x), int(2, 3, 1, 4)) expect_identical(x[vec_order(x)], bit64::as.integer64(c(-3, -2, -1, 1))) }) test_that("can slice integer64 objects of all dimensions", { x <- bit64::as.integer64(1:8) expect <- bit64::as.integer64(c(1, 3)) expect_identical(vec_slice(x, c(1, 3)), expect) dim(x) <- c(4, 2) expect <- bit64::as.integer64(c(1, 3, 5, 7)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, c(1, 3)), expect) dim(x) <- c(2, 2, 2) expect <- bit64::as.integer64(c(2, 4, 6, 8)) dim(expect) <- c(1, 2, 2) expect_identical(vec_slice(x, 2), expect) }) test_that("can slice integer64 objects with `NA_integer_`", { idx <- c(NA_integer_, 1) x <- bit64::as.integer64(1:8) expect <- bit64::as.integer64(c(NA, 1)) expect_identical(vec_slice(x, idx), expect) dim(x) <- c(4, 2) expect <- bit64::as.integer64(c(NA, 1, NA, 5)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, idx), expect) dim(x) <- c(2, 2, 2) expect <- bit64::as.integer64(c(NA, 1, NA, 3, NA, 5, NA, 7)) dim(expect) <- c(2, 2, 2) expect_identical(vec_slice(x, idx), expect) }) test_that("can init integer64 objects", { idx <- c(NA_integer_, NA_integer_) x <- bit64::as.integer64(1:8) expect_identical(vec_init(x, 2), vec_slice(x, idx)) dim(x) <- c(4, 2) expect_identical(vec_init(x, 2), vec_slice(x, idx)) dim(x) <- c(2, 2, 2) expect_identical(vec_init(x, 2), vec_slice(x, idx)) }) test_that("can chop integer64 objects with `NA_integer_` indices", { idx <- list(NA_integer_, 1) x <- bit64::as.integer64(1:8) expect <- list( bit64::as.integer64(NA), bit64::as.integer64(1) ) expect_identical(vec_chop(x, idx), expect) dim(x) <- c(4, 2) expect <- list( bit64::as.integer64(c(NA, NA)), bit64::as.integer64(c(1, 5)) ) dim(expect[[1]]) <- c(1, 2) dim(expect[[2]]) <- c(1, 2) expect_identical(vec_chop(x, idx), expect) dim(x) <- c(2, 2, 2) expect <- list( bit64::as.integer64(c(NA, NA, NA, NA)), bit64::as.integer64(c(1, 3, 5, 7)) ) dim(expect[[1]]) <- c(1, 2, 2) dim(expect[[2]]) <- c(1, 2, 2) expect_identical(vec_chop(x, idx), expect) }) vctrs/tests/testthat/test-type-factor.txt0000644000176200001440000000027014042546250020371 0ustar liggesusers ordered<> factor<> character ordered<> "ordered<>" NA "character" factor<> NA "factor<>" "character" character "character" "character" "character" vctrs/tests/testthat/test-group.R0000644000176200001440000001347513723213047016665 0ustar liggesusers # group id ---------------------------------------------------------------- test_that("vec_group_id detects groups in order of appearance", { x <- c(2, 4, 2, 1, 4) expect <- structure(c(1L, 2L, 1L, 3L, 2L), n = 3L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works for size 0 input", { expect <- structure(integer(), n = 0L) expect_equal(vec_group_id(NULL), expect) expect_equal(vec_group_id(numeric()), expect) }) test_that("vec_group_id works on base S3 objects", { x <- factor(c("x", "y", "x")) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) x <- new_date(c(0, 1, 0)) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works row wise on data frames", { df <- data.frame(x = c(1, 2, 1, 1), y = c(2, 3, 2, 3)) expect <- structure(c(1L, 2L, 1L, 3L), n = 3L) expect_equal(vec_group_id(df), expect) }) test_that("vec_group_id works row wise on arrays", { x <- array(c(1, 1, 1, 2, 4, 2), c(3, 2)) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works with different encodings", { expect <- structure(c(1L, 1L, 1L), n = 1L) expect_equal(vec_group_id(encodings()), expect) }) test_that("vec_group_id takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), c(1, 1, 1, 2)) # Compares on only the first field expect <- structure(c(1L, 2L, 1L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- structure(c(1L, 2L, 1L, 1L), n = 2L) expect_equal(vec_group_id(df), expect) }) # group rle --------------------------------------------------------------- test_that("vec_group_rle returns a `vctrs_group_rle` object", { expect_s3_class(vec_group_rle(1), "vctrs_group_rle") }) test_that("vec_group_rle works with size 0 input", { expect <- new_group_rle(integer(), integer(), 0L) expect_equal(vec_group_rle(integer()), expect) expect_equal(vec_group_rle(NULL), expect) }) test_that("vec_group_rle detects groups in order of appearance", { x <- c(2, 2, 3, 1, 1) expect <- new_group_rle(1:3, c(2L, 1L, 2L), 3L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle can refer to groups it has already seen", { x <- c(2, 3, 2) expect <- new_group_rle(c(1L, 2L, 1L), rep(1L, 3), 2L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle works on base S3 objects", { expect <- new_group_rle(c(1L, 2L, 1L, 3L), c(1L, 2L, 1L, 1L), 3L) x <- factor(c("x", "y", "y", "x", "z")) expect_equal(vec_group_rle(x), expect) x <- new_date(c(0, 1, 1, 0, 2)) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), c(1, 1, 1, 2)) # Compares on only the first field expect <- new_group_rle(c(1L, 2L, 1L), c(1L, 1L, 2L), 2L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle works row wise on data frames", { df <- data.frame(x = c(1, 1, 2, 1), y = c(2, 2, 3, 2)) expect <- new_group_rle(c(1L, 2L, 1L), c(2L, 1L, 1L), 2L) expect_equal(vec_group_rle(df), expect) }) test_that("vec_group_rle takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- new_group_rle(c(1L, 2L, 1L), c(1L, 1L, 2L), 2L) expect_equal(vec_group_rle(df), expect) }) test_that("can access fields", { x <- vec_group_rle(c(1, 1, 2)) expect_equal(fields(x), c("group", "length")) expect_identical(field(x, "group"), c(1L, 2L)) expect_identical(field(x, "length"), c(2L, 1L)) }) test_that("can access number of groups", { x <- vec_group_rle(c(1, 1, 2)) expect_identical(attr(x, "n"), 2L) }) test_that("print method is useful", { x <- new_group_rle(c(1L, 2L, 1L), c(3L, 2L, 1L), 2L) expect_known_output(print(x), file = test_path("test-type-group-rle.txt")) }) # group loc -------------------------------------------------------------- test_that("can locate unique groups of an empty vector", { out <- vec_group_loc(integer()) expect_s3_class(out, "data.frame") expect_equal(out$key, integer()) expect_equal(out$loc, list()) }) test_that("can locate unique groups of a data frame", { df <- data_frame(x = c(1, 1, 1, 2, 2), y = c("a", "a", "b", "a", "b")) out <- vec_group_loc(df) expect_equal(nrow(out), 4L) expect_equal(out$key, vec_unique(df)) }) test_that("can locate unique groups of a data frame with a list column", { df <- data_frame(x = list(1:2, 1:2, "a", 5.5, "a")) out <- vec_group_loc(df) expect_equal(nrow(out), 3L) expect_equal(out$key, vec_unique(df)) }) test_that("`x` must be a vector", { expect_error(vec_group_loc(environment()), class = "vctrs_error_scalar_type") }) test_that("`key` column retains full type information", { x <- factor(letters[c(1, 2, 1)], levels = letters[1:3]) out <- vec_group_loc(x) expect_equal(levels(out$key), levels(x)) }) test_that("vec_group_loc takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_equal(vec_group_loc(x)$key, x[1:2]) expect_equal(vec_group_loc(x)$loc, list(c(1L, 3L), 2L)) x <- as.POSIXlt(new_datetime(c(1, 2, 1))) expect_equal(vec_group_loc(x)$key, x[1:2]) expect_equal(vec_group_loc(x)$loc, list(c(1L, 3L), 2L)) }) test_that("vec_group_loc takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- data_frame(key = vec_slice(df, c(1, 2)), loc = list(c(1L, 3L, 4L), 2L)) expect_equal(vec_group_loc(df), expect) }) test_that("vec_group_loc works with different encodings", { encs <- encodings() expect_identical(nrow(vec_group_loc(encs)), 1L) }) vctrs/tests/testthat/test-c.R0000644000176200001440000004012013753021253015735 0ustar liggesusers test_that("zero length input returns NULL", { expect_equal(vec_c(), NULL) expect_equal(vec_c(NULL), NULL) expect_equal(vec_c(NULL,), NULL) expect_equal(vec_c(NULL, NULL), NULL) }) test_that("NULL is idempotent", { expect_equal(vec_c(NULL, 1L), 1L) expect_equal(vec_c(1L, NULL), 1L) }) test_that("NA is idempotent", { expect_equal(vec_c(NA, 1L), c(NA, 1L)) expect_equal(vec_c(NA, "x"), c(NA, "x")) expect_equal(vec_c(NA, factor("x")), factor(c(NA, "x"))) expect_equal(vec_c(NA, new_date(0)), new_date(c(NA, 0))) expect_equal(vec_c(NA, new_datetime(0)), new_datetime(c(NA, 0))) expect_equal(vec_c(NA, new_duration(0)), new_duration(c(NA, 0))) }) test_that("NA is logical if no other types intervene", { expect_equal(vec_c(logical()), logical()) expect_equal(vec_c(NA), NA) expect_equal(vec_c(NA, NA), c(NA, NA)) }) test_that("different types are coerced to common", { expect_equal(vec_c(TRUE, 1L, 1), c(1, 1, 1)) expect_equal(vec_c(TRUE, 2:4), 1:4) }) test_that("specified .ptypes do not allow more casts", { expect_error( vec_c(TRUE, .ptype = character()), class = "vctrs_error_incompatible_type" ) }) test_that("combines outer an inner names", { expect_equal(vec_c(x = 1), c(x = 1)) expect_equal(vec_c(c(x = 1)), c(x = 1)) expect_equal(vec_c(c(x = 1:2)), c(x1 = 1, x2 = 2)) expect_error(vec_c(y = c(x = 1)), "Please supply") }) test_that("can bind data.frame columns", { df <- data.frame(x = NA, y = 1:2) df$x <- data.frame(a = 1:2) expected <- data.frame(x = NA, y = c(1:2, 1:2)) expected$x <- data.frame(a = c(1:2, 1:2)) expect_equal(vec_c(df, df), expected) }) test_that("vec_c() handles matrices", { m <- matrix(1:4, nrow = 2) dimnames(m) <- list(c("foo", "bar"), c("baz", "quux")) # FIXME: `vec_ptype_common(m, m)` doesn't return dimension names exp <- matrix(c(1:2, 1:2, 3:4, 3:4), nrow = 4) rownames(exp) <- c("foo", "bar", "foo", "bar") expect_identical(vec_c(m, m), exp) expect_error(vec_c(outer = m), "Please supply") }) test_that("vec_c() includes index in argument tag", { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) expect_known_output(file = test_path("test-type-vec-c-error.txt"), { try2(vec_c(df1, df2)) try2(vec_c(df1, df1, df2)) try2(vec_c(foo = df1, bar = df2)) }) }) test_that("vec_c() handles record classes", { local_rational_class() out <- vec_c(rational(1, 2), 1L, NA) expect_true(vec_is(out, rational(1, 2))) expect_size(out, 3) expect_identical(vec_proxy(out), data.frame(n = c(1L, 1L, NA), d = c(2L, 1L, NA))) }) test_that("can mix named and unnamed vectors (#271)", { expect_identical(vec_c(c(a = 1), 2), c(a = 1, 2)) expect_identical(vec_c(0, c(a = 1), 2, b = 3), c(0, a = 1, 2, b =3)) }) test_that("vec_c() repairs names", { # Default minimal repair expect_named(vec_c(a = 1, a = 2, `_` = 3), c("a", "a", "_")) out <- vec_c(!!!set_names(1, NA)) expect_named(out, "") expect_named(vec_c(a = 1, a = 2, `_` = 3, .name_repair = "unique"), c("a...1", "a...2", "_")) expect_error(vec_c(a = 1, a = 2, `_` = 3, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") expect_named(vec_c(a = 1, a = 2, `_` = 3, .name_repair = "universal"), c("a...1", "a...2", "._")) expect_named(vec_c(a = 1, a = 2, .name_repair = ~ toupper(.)), c("A", "A")) }) test_that("vec_c() doesn't use outer names for data frames (#524)", { x <- data.frame(inner = 1) expect_equal(vec_c(outer = x), x) a <- data.frame(x = 1L) b <- data.frame(x = 2L) expect_equal(vec_c(foo = a, bar = b), data.frame(x = 1:2)) }) test_that("vec_c() preserves row names and inner names", { x <- data.frame(a = 1, row.names = "r1") y <- data.frame(a = 2, row.names = "r2") expect_equal(rownames(vec_c(x, y)), c("r1", "r2")) expect_equal(rownames(vec_c(x, x)), c("r1...1", "r1...2")) vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) oo_x <- set_names(as.POSIXlt(c("2020-01-01", "2020-01-02", "2020-01-03")), letters[1:3]) oo_y <- as.POSIXlt(c(FOO = "2020-01-04")) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "d") mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("d")) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x, oo = oo_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y, oo = oo_y), row.names = c("quux") ) nested_out <- vec_c(nested_x, nested_y) expect_identical(row.names(nested_out), c("foo", "bar", "baz", "quux")) expect_identical(row.names(nested_out$df), c("a", "b", "c", "d")) expect_identical(row.names(nested_out$mat), c("a", "b", "c", "d")) expect_identical(names(nested_out$vec), c("a", "b", "c", "FOO")) expect_identical(names(nested_out$oo), c("a", "b", "c", "FOO")) }) test_that("vec_c() outer names work with proxied objects", { x <- as.POSIXlt(new_datetime(0)) exp <- set_names(x, "outer") expect_equal(vec_c(outer = x), exp) named_x <- set_names(x, "inner") exp <- set_names(named_x, "outer_inner") expect_error(vec_c(outer = named_x), "Please supply") expect_equal(vec_c(outer = named_x, .name_spec = "{outer}_{inner}"), exp) xs <- as.POSIXlt(new_datetime(c(0, 1))) exp <- set_names(xs, c("outer_1", "outer_2")) expect_error(vec_c(outer = xs), "Please supply") expect_equal(vec_c(outer = xs, .name_spec = "{outer}_{inner}"), exp) }) test_that("vec_c() works with simple homogeneous foreign S3 classes", { expect_identical(vec_c(foobar(1), foobar(2)), vec_c(foobar(c(1, 2)))) expect_identical(vec_c(NULL, foobar(1), foobar(2)), vec_c(foobar(c(1, 2)))) }) test_that("vec_c() works with simple homogeneous foreign S4 classes", { joe1 <- .Counts(c(1L, 2L), name = "Joe") joe2 <- .Counts(3L, name = "Joe") expect_identical(vec_c(joe1, joe2), .Counts(1:3, name = "Joe")) }) test_that("vec_c() fails with complex foreign S3 classes", { verify_errors({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type") }) }) test_that("vec_c() fails with complex foreign S4 classes", { verify_errors({ joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") expect_error(vec_c(joe, jane), class = "vctrs_error_incompatible_type") }) }) test_that("vec_c() falls back to c() if S3 method is available", { # Check off-by-one error expect_error( vec_c(foobar(1), "", foobar(2)), class = "vctrs_error_incompatible_type" ) # Fallback when the class implements `c()` method <- function(...) rep_along(list(...), "dispatched") local_methods( c.vctrs_foobar = method ) expect_identical( vec_c(foobar(1), foobar(2, class = "foo")), c("dispatched", "dispatched") ) expect_identical( vec_c(NULL, foobar(1), NULL, foobar(2, class = "foo")), c("dispatched", "dispatched") ) # Registered fallback s3_register("base::c", "vctrs_c_fallback", method) expect_identical( vec_c( structure(1, class = "vctrs_c_fallback"), structure(2, class = "vctrs_c_fallback") ), c("dispatched", "dispatched") ) # Don't fallback for S3 lists which are treated as scalars by default expect_error( vec_c(foobar(list(1)), foobar(list(2))), class = "vctrs_error_scalar_type" ) }) test_that("c() fallback is consistent (FIXME)", { out <- with_methods( c.vctrs_foobar = function(...) structure(NextMethod(), class = "dispatched"), list( direct = vec_c(foobar(1L), foobar(2L)), df = vec_c(data_frame(x = foobar(1L)), data_frame(x = foobar(2L))), tib = vec_c(tibble(x = foobar(1L)), tibble(x = foobar(2L))), foreign_df = vec_c(foobaz(data_frame(x = foobar(1L))), foobaz(data_frame(x = foobar(2L)))) ) ) # Proper `c()` dispatch: expect_identical(out$direct, structure(1:2, class = "dispatched")) # Inconsistent: expect_identical(out$df$x, foobar(1:2)) expect_identical(out$tib$x, foobar(1:2)) expect_identical(out$foreign_df$x, foobar(1:2)) }) test_that("vec_c() falls back to c() if S4 method is available", { joe1 <- .Counts(c(1L, 2L), name = "Joe") joe2 <- .Counts(3L, name = "Joe") c_counts <- function(x, ...) { xs <- list(x, ...) xs_data <- lapply(xs, function(x) x@.Data) new_data <- do.call(c, xs_data) .Counts(new_data, name = x@name) } local_s4_method("c", methods::signature(x = "vctrs_Counts"), c_counts) expect_identical( vec_c(joe1, joe2), .Counts(c(1L, 2L, 3L), name = "Joe") ) expect_identical( vec_c(NULL, joe1, joe2), .Counts(c(1L, 2L, 3L), name = "Joe") ) }) test_that("vec_c() fallback doesn't support `name_spec` or `ptype`", { verify_errors({ expect_error( with_c_foobar(vec_c(foobar(1), foobar(2), .name_spec = "{outer}_{inner}")), "name specification" ) # Used to be an error about `ptype` expect_error( with_c_foobar(vec_c(foobar(1), foobar(2), .ptype = "")), class = "vctrs_error_incompatible_type" ) }) }) test_that("vec_c() doesn't fall back when ptype2 is implemented", { new_quux <- function(x) structure(x, class = "vctrs_quux") with_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) new_quux(int()), vec_cast.vctrs_quux.vctrs_foobar = function(x, to, ...) new_quux(x), vec_restore.vctrs_quux = function(x, ...) new_quux(x), c.vctrs_foobar = function(...) foobar(NextMethod()), { expect_s3_class(c(foobar(1:3), foobar(4L)), "vctrs_foobar") expect_s3_class(vec_c(foobar(1:3), foobar(4L)), "vctrs_quux") } ) }) test_that("vec_c() falls back even when ptype is supplied", { expect_foobar(vec_c(foobar(1), foobar(2), .ptype = foobar(dbl()))) with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), { expect_quux(vec_c(foobar(1), foobar(2), .ptype = foobar(dbl()))) expect_quux(vec_c(foobar(1, foo = TRUE), foobar(2, bar = TRUE), .ptype = foobar(dbl()))) } ) }) test_that("vec_implements_ptype2() is FALSE for scalars", { expect_false(vec_implements_ptype2(quote(foo))) }) test_that("vec_implements_ptype2() and vec_c() fallback are compatible with old registration", { foo <- structure(NA, class = "vctrs_implements_ptype2_false") expect_false(vec_implements_ptype2(foo)) vec_ptype2.vctrs_implements_ptype2_true <- function(...) NULL s3_register( "vctrs::vec_ptype2", "vctrs_implements_ptype2_true", vec_ptype2.vctrs_implements_ptype2_true ) bar <- structure(NA, class = "vctrs_implements_ptype2_true") expect_true(vec_implements_ptype2(bar)) local_methods( `c.vctrs_implements_ptype2_true` = function(...) stop("never called") ) expect_identical(vec_c(bar), bar) }) test_that("can ignore names in `vec_c()` by providing a `zap()` name-spec (#232)", { expect_error(vec_c(a = c(b = 1:2))) expect_identical(vec_c(a = c(b = 1:2), b = 3L, .name_spec = zap()), 1:3) verify_errors({ expect_error( vec_c(a = c(b = letters), b = 1, .name_spec = zap()), class = "vctrs_error_incompatible_type" ) }) }) test_that("can concatenate subclasses of `vctrs_vctr` which don't have ptype2 methods", { x <- new_vctr(1, class = "vctrs_foo") expect_identical(vec_c(x, x), new_vctr(c(1, 1), class = "vctrs_foo")) }) test_that("base c() fallback handles unspecified chunks", { local_methods( c.vctrs_foobar = function(...) { x <- NextMethod() # Should not be passed any unspecified chunks if (anyNA(x)) { abort("tilt") } foobar(x) }, `[.vctrs_foobar` = function(x, i, ...) { # Return a quux to detect dispatch quux(NextMethod()) } ) out <- vec_c(foobar(1:2), rep(NA, 2)) expect_identical(out, quux(c(1:2, NA, NA))) out <- vec_c(rep(NA, 2), foobar(1:2), NA) expect_identical(out, quux(c(NA, NA, 1:2, NA))) }) test_that("can zap outer names from a name-spec (#1215)", { zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner expect_null( names(vec_c(a = 1:2, .name_spec = zap_outer_spec)) ) expect_identical( names(vec_c(a = 1:2, c(foo = 3L), .name_spec = zap_outer_spec)), c("", "", "foo") ) expect_null( names(vec_unchop(list(a = 1:2), indices = list(1:2), name_spec = zap_outer_spec)) ) expect_identical( names(vec_unchop(list(a = 1:2, c(foo = 3L)), indices = list(1:2, 3), name_spec = zap_outer_spec)), c("", "", "foo") ) }) test_that("named empty vectors force named output (#1263)", { x <- set_names(int(), chr()) expect_named(vec_c(x), chr()) expect_named(vec_c(x, x), chr()) expect_named(vec_c(x, 1L), "") expect_named(vec_c(x, 1), "") expect_named(vec_unchop(list(x), list(int())), chr()) expect_named(vec_unchop(list(x, x), list(int(), int())), chr()) expect_named(vec_unchop(list(x, 1L), list(int(), 1)), "") # FIXME: `vec_cast_common()` dropped names # https://github.com/r-lib/vctrs/issues/623 expect_failure( expect_named(vec_unchop(list(x, 1), list(int(), 1)), "") ) }) # Golden tests ------------------------------------------------------- test_that("vec_c() has informative error messages", { verify_output(test_path("error", "test-c.txt"), { "# vec_c() fails with complex foreign S3 classes" x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") vec_c(x, y) "# vec_c() fails with complex foreign S4 classes" joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") vec_c(joe, jane) "# vec_c() fallback doesn't support `name_spec` or `ptype`" with_c_foobar(vec_c(foobar(1), foobar(2), .name_spec = "{outer}_{inner}")) with_c_foobar(vec_c(foobar(1), foobar(2), .ptype = "")) "# can ignore names by providing a `zap()` name-spec (#232)" vec_c(a = c(b = letters), b = 1, .name_spec = zap()) }) }) test_that("concatenation performs expected allocations", { verify_output(test_path("performance", "test-c.txt"), { ints <- rep(list(1L), 1e2) dbls <- rep(list(1), 1e2) # Extra allocations from `list2()`, see r-lib/rlang#937 "# `vec_c()` " "Integers" with_memory_prof(vec_c(!!!ints)) "Doubles" with_memory_prof(vec_c(!!!dbls)) "Integers to integer" with_memory_prof(vec_c(!!!ints, ptype = int())) "Doubles to integer" with_memory_prof(vec_c(!!!dbls, ptype = int())) "# `vec_unchop()` " "Integers" with_memory_prof(vec_unchop(ints)) "Doubles" with_memory_prof(vec_unchop(dbls)) "Integers to integer" with_memory_prof(vec_unchop(ints, ptype = int())) "Doubles to integer" with_memory_prof(vec_unchop(dbls, ptype = int())) "# Concatenation with names" "Named integers" ints <- rep(list(set_names(1:3, letters[1:3])), 1e2) with_memory_prof(vec_unchop(ints)) "Named matrices" mat <- matrix(1:4, 2, dimnames = list(c("foo", "bar"))) mats <- rep(list(mat), 1e2) with_memory_prof(vec_unchop(mats)) "Data frame with named columns" df <- data_frame( x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c("A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb"))) ) dfs <- rep(list(df), 1e2) with_memory_prof(vec_unchop(dfs)) "Data frame with rownames (non-repaired, non-recursive case)" df <- data_frame(x = 1:2) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_unchop(dfs)) "Data frame with rownames (repaired, non-recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_unchop(dfs)) # FIXME: The following recursive cases duplicate rownames # excessively because df-cols are restored at each chunk # assignment, causing a premature name-repair "FIXME (#1217): Data frame with rownames (non-repaired, recursive case)" df <- data_frame( x = 1:2, y = data_frame(x = 1:2) ) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_unchop(dfs)) "FIXME (#1217): Data frame with rownames (repaired, recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_unchop(dfs)) }) }) vctrs/tests/testthat/test-type-date-time.txt0000644000176200001440000000200514042546246020767 0ustar liggesusers date datetime datetime POSIXlt duration duration date "date" "datetime" "datetime" "datetime" NA NA datetime "datetime" "datetime" "datetime" "datetime" NA NA datetime "datetime" "datetime" "datetime" "datetime" NA NA POSIXlt "datetime" "datetime" "datetime" "datetime" NA NA duration NA NA NA NA "duration" "duration" duration NA NA NA NA "duration" "duration" vctrs/tests/testthat/test-partial-factor-print-empty.txt0000644000176200001440000000002314042546241023326 0ustar liggesuserspartial_factor< > vctrs/tests/testthat/test-bind.R0000644000176200001440000007555014027045462016451 0ustar liggesusers # rows -------------------------------------------------------------------- test_that("empty inputs return an empty data frame", { expect_equal(vec_rbind(), data_frame()) expect_equal(vec_rbind(NULL, NULL), data_frame()) }) test_that("vec_rbind(): NULL is idempotent", { df <- data_frame(x = 1) expect_equal(vec_rbind(df, NULL), df) }) test_that("vec_rbind() output is tibble if any input is tibble", { df <- data_frame(x = 1) dt <- tibble::tibble(x = 1) expect_s3_class(vec_rbind(dt), "tbl_df") expect_s3_class(vec_rbind(dt, df), "tbl_df") expect_s3_class(vec_rbind(df, dt), "tbl_df") }) test_that("type of column is common type of individual columns", { x_int <- data_frame(x = 1L) x_dbl <- data_frame(x = 2.5) x_chr <- data_frame(x = "a") expect_equal(vec_rbind(x_int, x_int), data_frame(x = c(1L, 1L))) expect_equal(vec_rbind(x_int, x_dbl), data_frame(x = c(1, 2.5))) expect_error(vec_rbind(x_int, x_chr), class = "vctrs_error_incompatible_type") }) test_that("result contains union of columns", { expect_named( vec_rbind( data_frame(x = 1), data_frame(y = 1) ), c("x" , "y") ) expect_named( vec_rbind( data_frame(y = 1, x = 1), data_frame(y = 1, z = 2) ), c("y", "x", "z") ) }) test_that("all inputs coerced to data frames", { expect_equal( vec_rbind(data_frame(x = 1L), c(x = 1.5)), data_frame(x = c(1, 1.5)) ) }) test_that("names are supplied if needed", { expect_message(out <- vec_rbind(data_frame(...1 = 1), 1), "->") expect_equal(out, data_frame(...1 = c(1, 1))) }) test_that("matrix becomes data frame and has names properly repaired", { x <- matrix(1:4, nrow = 2) expect_equal(vec_rbind(x), data.frame(...1 = 1:2, ...2 = 3:4)) }) test_that("can bind data.frame columns", { df <- data.frame(x = NA, y = 1:2) df$x <- data.frame(a = 1:2) expected <- data.frame(x = NA, y = c(1:2, 1:2)) expected$x <- data.frame(a = c(1:2, 1:2)) expect_equal(vec_rbind(df, df), expected) }) test_that("can rbind unspecified vectors", { expect_identical(vec_rbind(NA), data_frame(...1 = NA)) expect_identical(vec_rbind(NA, NA), data_frame(...1 = lgl(NA, NA))) df <- data.frame(x = 1) expect_identical(vec_rbind(NA, df), data.frame(x = c(NA, 1))) expect_identical(vec_rbind(df, NA), data.frame(x = c(1, NA))) expect_identical(vec_rbind(NA, df, NA), data.frame(x = c(NA, 1, NA))) expect_identical(vec_rbind(c(x = NA), data.frame(x = 1)), data.frame(x = c(NA, 1))) expect_identical(vec_rbind(c(y = NA), df), data.frame(y = c(NA, NA), x = c(NA, 1))) out <- suppressMessages(vec_rbind(c(x = NA, x = NA), df)) exp <- data.frame(x...1 = c(NA, NA), x...2 = c(NA, NA), x = c(NA, 1)) expect_identical(out, exp) }) test_that("as_df_row() tidies the names of unspecified vectors", { expect_identical(as_df_row(c(NA, NA)), c(NA, NA)) expect_identical(as_df_row(unspecified(2)), unspecified(2)) expect_identical(as_df_row(c(a = NA, a = NA), quiet = TRUE), data.frame(a...1 = NA, a...2 = NA)) expect_identical(as_df_row(c(a = TRUE, a = TRUE), quiet = TRUE), data.frame(a...1 = TRUE, a...2 = TRUE)) }) test_that("can rbind spliced lists", { data <- list(c(a = 1, b = 2), c(a = TRUE, b = FALSE)) expect_identical(vec_rbind(!!!data), data_frame(a = c(1, 1), b = c(2, 0))) }) test_that("can rbind list columns", { out <- vec_rbind(data_frame(x = list(1, 2)), data_frame(x = list(3))) expect_identical(out, data_frame(x = list(1, 2, 3))) }) test_that("can rbind lists", { out <- vec_rbind(list(x = 1:2)) expect_identical(out, data_frame(x = list(c(1L, 2L)))) out <- vec_rbind(list(x = 1:2, y = 3L)) expect_identical(out, data_frame(x = list(c(1L, 2L)), y = list(3L))) out <- vec_rbind(list(x = 1, y = 2), list(y = "string")) expect_identical(out, data_frame(x = list(1, NULL), y = list(2, "string"))) }) test_that("can rbind factors", { fctr <- factor(c("a", "b")) expect_equal(vec_rbind(fctr), data_frame(...1 = fctr[1], ...2 = fctr[2])) fctr_named <- set_names(fctr) expect_equal(vec_rbind(fctr_named), data_frame(a = fctr[1], b = fctr[2])) }) test_that("can rbind dates", { date <- new_date(c(0, 1)) expect_equal(vec_rbind(date), data_frame(...1 = date[1], ...2 = date[2])) date_named <- set_names(date, c("a", "b")) expect_equal(vec_rbind(date_named), data_frame(a = date[1], b = date[2])) }) test_that("can rbind POSIXlt objects into POSIXct objects", { datetime <- as.POSIXlt(new_datetime(0)) expect_s3_class(vec_rbind(datetime, datetime)[[1]], "POSIXct") datetime_named <- set_names(datetime, "col") expect_named(vec_rbind(datetime_named, datetime_named), "col") }) test_that("can rbind table objects (#913)", { x <- new_table(1:4, c(2L, 2L)) y <- x colnames <- c("c1", "c2") rownames <- c("r1", "r2", "r3", "r4") dimnames(x) <- list(rownames[1:2], colnames) dimnames(y) <- list(rownames[3:4], colnames) expect <- data.frame(c1 = c(1:2, 1:2), c2 = c(3:4, 3:4), row.names = rownames) expect_identical(vec_rbind(x, y), expect) }) test_that("can rbind missing vectors", { expect_identical(vec_rbind(na_int), data_frame(...1 = na_int)) expect_identical(vec_rbind(na_int, na_int), data_frame(...1 = int(na_int, na_int))) }) test_that("vec_rbind() respects size invariants (#286)", { expect_identical(vec_rbind(), new_data_frame(n = 0L)) expect_identical(vec_rbind(int(), int()), new_data_frame(n = 2L)) expect_identical(vec_rbind(int(), TRUE), new_data_frame(list(...1 = lgl(NA, TRUE)))) expect_identical(vec_rbind(int(), new_data_frame(n = 2L), int()), new_data_frame(n = 4L)) }) test_that("can repair names in `vec_rbind()` (#229)", { expect_error(vec_rbind(.name_repair = "none"), "can't be `\"none\"`") expect_error(vec_rbind(.name_repair = "minimal"), "can't be `\"minimal\"`") expect_named(vec_rbind(list(a = 1, a = 2), .name_repair = "unique"), c("a...1", "a...2")) expect_error(vec_rbind(list(a = 1, a = 2), .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") expect_named(vec_rbind(list(`_` = 1)), "_") expect_named(vec_rbind(list(`_` = 1), .name_repair = "universal"), c("._")) expect_named(vec_rbind(list(a = 1, a = 2), .name_repair = ~ toupper(.)), c("A", "A")) }) test_that("can construct an id column", { df <- data.frame(x = 1) expect_named(vec_rbind(df, df, .names_to = "id"), c("id", "x")) expect_equal(vec_rbind(df, df, .names_to = "id")$id, c(1L, 2L)) expect_equal(vec_rbind(a = df, b = df, .names_to = "id")$id, c("a", "b")) expect_equal(vec_rbind(a = df, df, .names_to = "id")$id, c("a", "")) }) test_that("vec_rbind() fails with arrays of dimensionality > 3", { expect_error(vec_rbind(array(NA, c(1, 1, 1))), "Can't bind arrays") }) test_that("row names are preserved by vec_rbind()", { df1 <- mtcars[1:3, ] df2 <- mtcars[4:5, ] expect_identical(vec_rbind(df1, df2), mtcars[1:5, ]) row.names(df2) <- NULL out <- mtcars[1:5, ] row.names(out) <- c(row.names(df1), "...4", "...5") expect_identical(vec_rbind(df1, df2), out) }) test_that("can assign row names in vec_rbind()", { df1 <- mtcars[1:3, ] df2 <- mtcars[4:5, ] expect_error( vec_rbind( foo = df1, df2, .names_to = NULL ), "specification" ) # Combination out <- vec_rbind( foo = df1, df2, .names_to = NULL, .name_spec = "{outer}_{inner}" ) exp <- mtcars[1:5, ] row.names(exp) <- c(paste0("foo_", row.names(df1)), row.names(df2)) expect_identical(out, exp) out <- vec_rbind(foo = df1, df2, .names_to = "id") exp <- mtcars[1:5, ] exp <- vec_cbind(id = c(rep("foo", 3), rep("", 2)), exp) expect_identical(out, exp) # Sequence out <- vec_rbind( foo = unrownames(df1), df2, bar = unrownames(mtcars[6, ]), .names_to = NULL, .name_spec = "{outer}_{inner}" ) exp <- mtcars[1:6, ] row.names(exp) <- c(paste0("foo_", 1:3), row.names(df2), "bar") expect_identical(out, exp) out <- vec_rbind( foo = unrownames(df1), df2, bar = unrownames(mtcars[6, ]), .names_to = "id" ) exp <- mtcars[1:6, ] exp <- vec_cbind(id = c(rep("foo", 3), rep("", 2), "bar"), exp) row.names(exp) <- c(paste0("...", 1:3), row.names(df2), "...6") expect_identical(out, exp) }) test_that("vec_rbind() takes the proxy and restores", { df <- foobar(data.frame(x = 1)) # This data frame subclass has an identity proxy and the restore # method falls back to a bare data frame if `$x` has any missing values. # In `vec_rbind()`, the `vec_init()` call will create a bare data frame, # but at the end it is `vec_restore()`d to the right class. local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_proxy.vctrs_foobar = function(x, ...) { x }, vec_restore.vctrs_foobar = function(x, to, ...) { if (any(is.na(x$x))) { new_data_frame(x) } else { vec_restore_default(x, to) } } ) expect_identical( vec_rbind(df, df), foobar(data.frame(x = c(1, 1))) ) }) test_that("vec_rbind() proxies before initializing", { df <- foobar(data.frame(x = 1)) # This data frame subclass doesn't allow `NA`s in columns. # If initialization happened before proxying, it would try to # create `NA` rows with `vec_init()`. local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_proxy.vctrs_foobar = function(x, ...) { new_data_frame(x) }, vec_restore.vctrs_foobar = function(x, to, ...) { if (any(is.na(x$x))) { abort("`x` can't have NA values.") } vec_restore_default(x, to) } ) expect_identical( vec_rbind(df, df), foobar(data.frame(x = c(1, 1))) ) }) test_that("vec_rbind() requires a data frame proxy for data frame ptypes", { df <- foobar(data.frame(x = 1)) local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x, vec_proxy.vctrs_foobar = function(x, ...) 1 ) expect_error(vec_rbind(df, df), "Attempt to restore data frame from a double") }) test_that("monitoring: name repair while rbinding doesn't modify in place", { df <- new_data_frame(list(x = 1, x = 1)) expect <- new_data_frame(list(x = 1, x = 1)) # Name repair occurs expect_named(vec_rbind(df), c("x...1", "x...2")) # No changes to `df` expect_identical(df, expect) }) test_that("performance: Row binding with S3 columns doesn't duplicate on every assignment (#1151)", { skip_if_not_testing_performance() x <- as.Date("2000-01-01") x <- rep(x, 100) df <- data.frame(x = x) lst <- rep_len(list(df), 10000) expect_time_lt(vec_rbind(!!!lst), 5) }) test_that("performance: Row binding with df-cols doesn't duplicate on every assignment (#1122)", { skip_if_not_testing_performance() df_col <- new_data_frame(list(x = 1:1000)) df <- new_data_frame(list(y = df_col)) lst <- rep_len(list(df), 10000) expect_time_lt(vec_rbind(!!!lst), 5) }) # cols -------------------------------------------------------------------- test_that("empty inputs give data frame", { expect_equal(vec_cbind(), data_frame()) expect_equal(vec_cbind(NULL), data_frame()) expect_equal(vec_cbind(data.frame(a = 1), NULL), data_frame(a = 1)) }) test_that("number of rows is preserved with zero column data frames (#1281)", { df <- new_data_frame(n = 2L) expect_size(vec_cbind(df, df), 2L) }) test_that("vec_cbind(): NULL is idempotent", { df <- data_frame(x = 1) expect_equal(vec_cbind(df, NULL), df) }) test_that("outer names are respected", { expect_named(vec_cbind(x = 1, y = 4), c("x", "y")) expect_named(vec_cbind(a = data.frame(x = 1)), "a") }) test_that("inner names are respected", { expect_named(vec_cbind(data_frame(x = 1), data_frame(y = 1)), c("x", "y")) }) test_that("nameless vectors get tidy defaults", { expect_named(vec_cbind(1:2, 1), c("...1", "...2")) }) test_that("matrix becomes data frame", { x <- matrix(1:4, nrow = 2) expect_equal(vec_cbind(x), data.frame(...1 = 1:2, ...2 = 3:4)) # Packed if named expect_equal(vec_cbind(x = x), data_frame(x = x)) }) test_that("duplicate names are de-deduplicated", { expect_message( expect_named(vec_cbind(x = 1, x = 1), c("x...1", "x...2")), "x -> x...1", fixed = TRUE ) expect_named(vec_cbind(data.frame(x = 1), data.frame(x = 1)), c("x...1", "x...2")) }) test_that("rows recycled to longest", { df <- data.frame(x = 1:3) expect_dim(vec_cbind(df), c(3, 1)) expect_dim(vec_cbind(df, NULL), c(3, 1)) expect_dim(vec_cbind(df, y = 1), c(3, 2)) expect_dim(vec_cbind(data.frame(x = 1), y = 1:3), c(3, 2)) expect_dim( vec_cbind( data.frame(a = 1, b = 2), y = 1:3 ), c(3, 3) ) }) test_that("vec_cbind() output is tibble if any input is tibble", { df <- data.frame(x = 1) dt <- tibble::tibble(y = 2) expect_s3_class(vec_cbind(dt), "tbl_df") expect_s3_class(vec_cbind(df, dt), "tbl_df") expect_s3_class(vec_cbind(dt, df), "tbl_df") }) test_that("can override default .nrow", { expect_dim(vec_cbind(1, .size = 3), c(3, 1)) }) test_that("can repair names in `vec_cbind()` (#227)", { expect_error(vec_cbind(a = 1, a = 2, .name_repair = "none"), "can't be `\"none\"`") expect_named(vec_cbind(a = 1, a = 2, .name_repair = "unique"), c("a...1", "a...2")) expect_error(vec_cbind(a = 1, a = 2, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") expect_named(vec_cbind(`_` = 1, .name_repair = "universal"), "._") expect_named(vec_cbind(a = 1, a = 2, .name_repair = "minimal"), c("a", "a")) expect_named(vec_cbind(a = 1, a = 2, .name_repair = toupper), c("A", "A")) }) test_that("can supply `.names_to` to `vec_rbind()` (#229)", { expect_error(vec_rbind(.names_to = letters), "must be") expect_error(vec_rbind(.names_to = 10), "must be") x <- data_frame(foo = 1:2, bar = 3:4) y <- data_frame(foo = 5L, bar = 6L) expect_identical( vec_rbind(a = x, b = y, .names_to = "quux"), data_frame(quux = c("a", "a", "b"), foo = c(1L, 2L, 5L), bar = c(3L, 4L, 6L)) ) expect_identical( vec_rbind(a = x, b = y, .names_to = "foo"), data_frame(foo = c("a", "a", "b"), bar = c(3L, 4L, 6L)) ) # No names expect_identical( vec_rbind(x, y, .names_to = "quux"), data_frame(quux = c(1L, 1L, 2L), foo = c(1L, 2L, 5L), bar = c(3L, 4L, 6L)) ) expect_identical( vec_rbind(x, y, .names_to = "foo"), data_frame(foo = c(1L, 1L, 2L), bar = c(3L, 4L, 6L)) ) # Partial names expect_identical(vec_rbind(x, b = y, .names_to = "quux")$quux, c("", "", "b")) }) test_that("can supply existing `.names_to`", { x <- data.frame(a = 1, id = TRUE) expect_identical( vec_rbind(foo = x, bar = c(a = 2), .names_to = "id"), data_frame(a = c(1, 2), id = c("foo", "bar")) ) y <- data.frame(id = TRUE, a = 1) expect_identical( vec_rbind(foo = y, bar = c(a = 2), .names_to = "id"), data_frame(id = c("foo", "bar"), a = c(1, 2)) ) }) test_that("vec_cbind() returns visibly (#452)", { # Shouldn't be needed once `check_unique` is implemented in C expect_visible(vctrs::vec_cbind(x = 1, .name_repair = "check_unique")) }) test_that("vec_cbind() packs named data frames (#446)", { expect_identical(vec_cbind(data_frame(y = 1:3)), data_frame(y = 1:3)) expect_identical(vec_cbind(x = data_frame(y = 1:3)), data_frame(x = data_frame(y = 1:3))) }) test_that("vec_cbind() packs 1d arrays", { a <- array(1:2) expect_identical(vec_cbind(a), data_frame(...1 = 1:2)) expect_identical(vec_cbind(x = a), data_frame(x = a)) }) test_that("vec_cbind() packs named matrices", { m <- matrix(1:4, 2) expect_identical(vec_cbind(m), data_frame(...1 = 1:2, ...2 = 3:4)) expect_identical(vec_cbind(x = m), data_frame(x = m)) }) test_that("vec_cbind() never packs named vectors", { expect_identical(vec_cbind(1:2), data_frame(...1 = 1:2)) expect_identical(vec_cbind(x = 1:2), data_frame(x = 1:2)) }) test_that("names are repaired late if unpacked", { df <- data_frame(b = 2, b = 3, .name_repair = "minimal") out1 <- vec_cbind(a = 1, df) out2 <- vec_cbind(a = 1, as.matrix(df)) out3 <- vec_cbind(a = 1, matrix(1:2, nrow = 1)) expect_named(out1, c("a", "b...2", "b...3")) expect_named(out2, c("a", "b...2", "b...3")) expect_named(out3, c("a", "...2", "...3")) }) test_that("names are not repaired if packed", { df <- data_frame(b = 2, b = 3, .name_repair = "minimal") out1 <- vec_cbind(a = 1, packed = df) out2 <- vec_cbind(a = 1, packed = as.matrix(df)) out3 <- vec_cbind(a = 1, packed = matrix(1:2, nrow = 1)) expect_named(out1, c("a", "packed")) expect_named(out2, c("a", "packed")) expect_named(out3, c("a", "packed")) expect_named(out1$packed, c("b", "b")) expect_identical(colnames(out2$packed), c("b", "b")) expect_identical(colnames(out3$packed), NULL) }) test_that("vec_cbind() fails with arrays of dimensionality > 3", { a <- array(NA, c(1, 1, 1)) expect_error(vec_cbind(a), "Can't bind arrays") expect_error(vec_cbind(x = a), "Can't bind arrays") }) test_that("monitoring: name repair while cbinding doesn't modify in place", { df <- new_data_frame(list(x = 1, x = 1)) expect <- new_data_frame(list(x = 1, x = 1)) # Name repair occurs expect_named(vec_cbind(df), c("x...1", "x...2")) # No changes to `df` expect_identical(df, expect) }) test_that("vec_rbind() consistently handles unnamed outputs", { # Name repair of columns is a little weird but unclear we can do better expect_identical( vec_rbind(1, 2, .names_to = NULL), data.frame(...1 = c(1, 2)) ) expect_identical( vec_rbind(1, 2, ...10 = 3, .names_to = NULL), data.frame(...1 = c(1, 2, 3), row.names = c("...1", "...2", "...3")) ) expect_identical( vec_rbind(a = 1, b = 2, .names_to = NULL), data.frame(...1 = c(1, 2), row.names = c("a", "b")) ) expect_identical( vec_rbind(c(a = 1), c(b = 2), .names_to = NULL), data.frame(a = c(1, NA), b = c(NA, 2)) ) }) test_that("vec_rbind() ignores named inputs by default (#966)", { expect_identical( vec_rbind(foo = c(a = 1)), data.frame(a = 1) ) expect_identical( vec_rbind(foo = c(a = 1), .names_to = NULL), data.frame(a = 1, row.names = "foo") ) }) test_that("vec_cbind() consistently handles unnamed outputs", { expect_identical( vec_cbind(1, 2), data.frame(...1 = 1, ...2 = 2) ) expect_identical( vec_cbind(1, 2, ...10 = 3), data.frame(...1 = 1, ...2 = 2, ...3 = 3) ) expect_identical( vec_cbind(a = 1, b = 2), data.frame(a = 1, b = 2) ) expect_identical( vec_cbind(c(a = 1), c(b = 2)), new_data_frame(list(...1 = c(a = 1), ...2 = c(b = 2))) ) }) test_that("rbind() and cbind() have informative outputs when repairing names", { verify_output(test_path("output", "bind-name-repair.txt"), { "# vec_rbind()" vec_rbind(1, 2) vec_rbind(1, 2, .names_to = NULL) vec_rbind(1, 2, ...10 = 3) vec_rbind(1, 2, ...10 = 3, .names_to = NULL) vec_rbind(a = 1, b = 2) vec_rbind(a = 1, b = 2, .names_to = NULL) vec_rbind(c(a = 1), c(b = 2)) vec_rbind(c(a = 1), c(b = 2), .names_to = NULL) "Silent when assigning duplicate row names of df-cols" df <- new_data_frame(list(x = mtcars[1:3, 1, drop = FALSE])) vec_rbind(df, df) vec_rbind(mtcars[1:4, ], mtcars[1:3, ]) "# vec_cbind()" vec_cbind(1, 2) vec_cbind(1, 2, ...10 = 3) vec_cbind(a = 1, b = 2) vec_cbind(c(a = 1), c(b = 2)) }) }) test_that("cbind() deals with row names", { expect_identical( vec_cbind(mtcars[1:3], foo = 1), cbind(mtcars[1:3], foo = 1) ) expect_identical( vec_cbind(mtcars[1:3], mtcars[4]), cbind(mtcars[1:3], mtcars[4]) ) out <- vec_cbind( mtcars[1, 1, drop = FALSE], unrownames(mtcars[1:3, 2, drop = FALSE]) ) exp <- mtcars[1:3, c(1, 2)] exp[[1]] <- exp[[1, 1]] row.names(exp) <- paste0(c("Mazda RX4..."), 1:3) expect_identical(out, exp) }) test_that("prefer row names of first named input (#1058)", { df0 <- unrownames(mtcars[1:5, 1:3]) df1 <- mtcars[1:5, 4:6] df2 <- mtcars[5:1, 7:9] expect_identical( row.names(vec_cbind(df0, df1, df2)), row.names(df1) ) expect_identical( row.names(vec_cbind(df0, df2, df1)), row.names(df2) ) }) test_that("can rbind data frames with matrix columns (#625)", { df <- tibble(x = 1:2, y = matrix(1:4, nrow = 2)) expect_identical(vec_rbind(df, df), vec_slice(df, c(1, 2, 1, 2))) }) test_that("rbind repairs names of data frames (#704)", { df <- data_frame(x = 1, x = 2, .name_repair = "minimal") df_repaired <- data_frame(x...1 = 1, x...2 = 2) expect_identical(vec_rbind(df), df_repaired) expect_identical(vec_rbind(df, df), vec_rbind(df_repaired, df_repaired)) expect_error( vec_rbind(df, df, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique" ) }) test_that("vec_rbind() works with simple homogeneous foreign S3 classes", { expect_identical( vec_rbind(set_names(foobar(1), "x"), set_names(foobar(2), "x")), data_frame(x = foobar(c(1, 2))) ) }) test_that("vec_rbind() works with simple homogeneous foreign S4 classes", { skip_if_cant_set_names_on_s4() joe1 <- .Counts(1L, name = "Joe") joe2 <- .Counts(2L, name = "Joe") expect_identical( vec_rbind(set_names(joe1, "x"), set_names(joe2, "x")), data_frame(x = .Counts(1:2, name = "Joe")) ) }) test_that("vec_rbind() fails with complex foreign S3 classes", { verify_errors({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") expect_error( vec_rbind(set_names(x, "x"), set_names(y, "x")), class = "vctrs_error_incompatible_type" ) }) }) test_that("vec_rbind() fails with complex foreign S4 classes", { verify_errors({ joe <- .Counts(1L, name = "Joe") jane <- .Counts(2L, name = "Jane") expect_error(vec_rbind(joe, jane), class = "vctrs_error_incompatible_type") }) }) test_that("vec_rbind() falls back to c() if S3 method is available", { x <- foobar(1, foo = 1) y <- foobar(2, bar = 2) x_df <- data_frame(x = x) y_df <- data_frame(x = y) expect_error(vec_rbind(x_df, y_df), class = "vctrs_error_incompatible_type") out <- with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), vec_rbind(x_df, y_df) ) expect_identical(out, data_frame(x = quux(c(1, 2)))) # Fallback is used with data frame subclasses, with or without # ptype2 method foo_df <- foobaz(x_df) bar_df <- foobaz(y_df) out <- with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), vec_rbind(foo_df, bar_df) ) expect_identical(out, foobaz(data_frame(x = quux(c(1, 2))))) out <- with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), vec_ptype2.vctrs_foobaz.vctrs_foobaz = function(...) foobaz(df_ptype2(...)), vec_rbind(foo_df, bar_df) ) expect_identical(out, foobaz(data_frame(x = quux(c(1, 2))))) skip("FIXME: c() fallback with recursion through df-col") wrapper_x_df <- data_frame(x = x_df) wrapper_y_df <- data_frame(x = y_df) out <- with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), vec_rbind(wrapper_x_df, wrapper_y_df) ) expect_identical(out, data_frame(data_frame(x = quux(c(1, 2))))) }) test_that("c() fallback works with unspecified columns", { local_methods( c.vctrs_foobar = function(...) foobar(NextMethod()), `[.vctrs_foobar` = function(x, i, ...) foobar(NextMethod(), dispatched = TRUE) ) out <- vec_rbind( data_frame(x = foobar(1)), data_frame(y = foobar(2)) ) expect_identical(out, data_frame( x = foobar(c(1, NA), dispatched = TRUE), y = foobar(c(NA, 2), dispatched = TRUE) )) }) test_that("c() fallback works with vctrs-powered data frame subclass", { local_methods( c.vctrs_quux = function(...) quux(NextMethod(), c_dispatched = TRUE), `[.vctrs_quux` = function(x, i, ...) quux(NextMethod(), bracket_dispatched = TRUE) ) local_foobar_df_methods() ### Joint case df1 <- foobar(data_frame(x = quux(1:3))) df2 <- data_frame(x = quux(4:5)) out <- vctrs::vec_rbind(df1, df2) exp <- foobar(data_frame(x = quux(1:5, c_dispatched = TRUE))) expect_identical(out, exp) out <- vctrs::vec_rbind(df2, df1) exp <- foobar(data_frame(x = quux(c(4:5, 1:3), c_dispatched = TRUE))) expect_identical(out, exp) ### Disjoint case df1 <- foobar(data_frame(x = quux(1:3))) df2 <- data.frame(y = 4:5) out <- vctrs::vec_rbind(df1, df2) exp <- foobar(data_frame( x = quux(c(1:3, NA, NA), bracket_dispatched = TRUE), y = c(rep(NA, 3), 4:5) )) expect_identical(out, exp) out <- vctrs::vec_rbind(df2, df1) exp <- foobar(data_frame( y = c(4:5, rep(NA, 3)), x = quux(c(NA, NA, 1:3), bracket_dispatched = TRUE) )) expect_identical(out, exp) }) test_that("vec_rbind() falls back to c() if S3 method is available for S4 class", { joe <- data_frame(x = .Counts(c(1L, 2L), name = "Joe")) jane <- data_frame(x = .Counts(3L, name = "Jane")) expect_error(vec_rbind(joe, jane), class = "vctrs_error_incompatible_type") out <- with_methods( c.vctrs_Counts = function(...) .Counts(NextMethod(), name = "dispatched"), vec_rbind(joe, jane) ) expect_identical(out$x, .Counts(1:3, name = "dispatched")) }) test_that("vec_cbind() and vec_rbind() have informative error messages", { skip_if_cant_set_names_on_s4() verify_output(test_path("error", "test-bind.txt"), { "# vec_rbind() fails with complex foreign S3 classes" x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") vec_rbind(set_names(x, "x"), set_names(y, "x")) "# vec_rbind() fails with complex foreign S4 classes" joe <- .Counts(1L, name = "Joe") jane <- .Counts(2L, name = "Jane") vec_rbind(set_names(joe, "x"), set_names(jane, "x")) }) }) test_that("rbind supports names and inner names (#689)", { skip_if(getRversion() >= "4.1.0", "work around r-devel bug") # Introduced in # https://github.com/wch/r-source/commit/275bb3db02491899bbadc28fea69dcdd6fedf41e out <- vec_rbind( data_frame(x = list(a = 1, b = 2)), data_frame(x = list(3)), data_frame(x = list(d = 4)) ) expect_identical(out$x, list(a = 1, b = 2, 3, d = 4)) vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) oo_x <- set_names(as.POSIXlt(c("2020-01-01", "2020-01-02", "2020-01-03")), letters[1:3]) oo_y <- c(FOO = as.POSIXlt(c("2020-01-04"))) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "d") mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("d")) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x, oo = oo_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y, oo = oo_y), row.names = c("quux") ) nested_out <- vec_rbind(nested_x, nested_y) expect_identical(row.names(nested_out), c("foo", "bar", "baz", "quux")) expect_identical(row.names(nested_out$df), c("a", "b", "c", "d")) expect_identical(row.names(nested_out$mat), c("a", "b", "c", "d")) expect_identical(names(nested_out$vec), c("a", "b", "c", "FOO")) expect_identical(names(nested_out$oo), c("a", "b", "c", "FOO")) }) test_that("vec_rbind() doesn't fall back to c() with proxied classes (#1119)", { foobar_rcrd <- function(x, y) new_rcrd(list(x = x, y = y), class = "vctrs_foobar") x <- foobar_rcrd(x = 1:2, y = 3:4) y <- foobar_rcrd(x = 5L, y = 6L) out <- vec_rbind(x, x) exp <- data_frame( ...1 = foobar_rcrd(x = c(1L, 1L), y = c(3L, 3L)), ...2 = foobar_rcrd(x = c(2L, 2L), y = c(4L, 4L)) ) expect_identical(out, exp) out <- vec_rbind(data_frame(x = x), data_frame(x = x)) exp <- data_frame( x = foobar_rcrd(x = c(1L, 2L, 1L, 2L), y = c(3L, 4L, 3L, 4L)) ) expect_identical(out, exp) }) test_that("vec_rbind() fallback works with tibbles", { x <- foobar("foo") df <- data_frame(x = x) tib <- tibble(x = x) local_methods(c.vctrs_foobar = function(...) quux(NextMethod())) exp <- tibble(x = quux(c("foo", "foo"))) expect_identical(vec_rbind(tib, tib), exp) expect_identical(vec_rbind(df, tib), exp) expect_identical(vec_rbind(tib, df), exp) }) test_that("vec_rbind() zaps names when name-spec is zap() and names-to is NULL", { expect_identical( vec_rbind(foo = c(x = 1), .names_to = NULL, .name_spec = zap()), data.frame(x = 1) ) }) test_that("can't zap names when `.names_to` is supplied", { expect_identical( vec_rbind(foo = c(x = 1), .names_to = zap(), .name_spec = zap()), data.frame(x = 1) ) expect_error( vec_rbind(foo = c(x = 1), .names_to = "id", .name_spec = zap()), "Can't zap outer names when `.names_to` is supplied.", fixed = TRUE ) }) test_that("can zap outer names from a name-spec (#1215)", { zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner df <- data.frame(x = 1:2) df_named <- data.frame(x = 3L, row.names = "foo") expect_null( vec_names(vec_rbind(a = df, .names_to = NULL, .name_spec = zap_outer_spec)) ) expect_identical( vec_names(vec_rbind(a = df, df_named, .name_spec = zap_outer_spec)), c("...1", "...2", "foo") ) }) test_that("column names are treated consistently in vec_rbind()", { exp <- data.frame(a = c(1L, 1L), b = c(2L, 2L)) x <- c(a = 1L, b = 2L) expect_identical(vec_rbind(x, x), exp) x <- array(1:2, dimnames = list(c("a", "b"))) expect_identical(vec_rbind(x, x), exp) x <- matrix(1:2, nrow = 1, dimnames = list(NULL, c("a", "b"))) expect_identical(vec_rbind(x, x), exp) x <- array(1:6, c(1, 2, 1), dimnames = list(NULL, c("a", "b"), NULL)) expect_error(vec_rbind(x, x), "Can't bind arrays") }) # Golden tests ------------------------------------------------------- test_that("rows-binding performs expected allocations", { verify_output(test_path("performance", "test-bind.txt"), { ints <- rep(list(1L), 1e2) named_ints <- rep(list(set_names(1:3, letters[1:3])), 1e2) "Integers as rows" suppressMessages(with_memory_prof(vec_rbind(!!!ints))) suppressMessages(with_memory_prof(vec_rbind(!!!named_ints))) "Data frame with named columns" df <- data_frame( x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c("A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb"))) ) dfs <- rep(list(df), 1e2) with_memory_prof(vec_rbind(!!!dfs)) "Data frame with rownames (non-repaired, non-recursive case)" df <- data_frame(x = 1:2) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind(!!!dfs)) "Data frame with rownames (repaired, non-recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind(!!!dfs)) # FIXME: The following recursive cases duplicate rownames # excessively because df-cols are restored at each chunk # assignment, causing a premature name-repair "FIXME (#1217): Data frame with rownames (non-repaired, recursive case)" df <- data_frame( x = 1:2, y = data_frame(x = 1:2) ) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind(!!!dfs)) "FIXME (#1217): Data frame with rownames (repaired, recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind(!!!dfs)) }) }) vctrs/tests/testthat/test-print-str-mtcars.txt0000644000176200001440000000116214042546241021366 0ustar liggesusersdf[,11] [1:32] 'data.frame': 32 obs. of 11 variables: $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... $ cyl : num 6 6 4 6 8 6 8 4 4 6 ... $ disp: num 160 160 108 258 360 ... $ hp : num 110 110 93 110 175 105 245 62 95 123 ... $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... $ wt : num 2.62 2.88 2.32 3.21 3.44 ... $ qsec: num 16.5 17 18.6 19.4 17 ... $ vs : num 0 0 1 1 0 1 0 1 1 1 ... $ am : num 1 1 1 0 0 0 0 0 0 0 ... $ gear: num 4 4 4 3 3 3 3 4 4 4 ... $ carb: num 4 4 1 1 2 1 4 2 2 4 ... @ row.names: chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ... vctrs/tests/testthat/test-partial-factor-print-learned.txt0000644000176200001440000000003214042546241023602 0ustar liggesuserspartial_factor< fd1ad > vctrs/tests/testthat/test-type-dplyr.R0000644000176200001440000001157313663716767017663 0ustar liggesusers # `grouped_df` ------------------------------------------------------- bare_mtcars <- unrownames(mtcars) test_that("grouped-df is proxied and restored", { gdf <- dplyr::group_by(bare_mtcars, cyl) expect_identical(vec_proxy(gdf), gdf) expect_identical(vec_restore(bare_mtcars, gdf), gdf) expect_identical(vec_ptype(gdf), gdf[0, ]) gdf <- dplyr::group_by(bare_mtcars, cyl, am, vs) expect_identical(gdf[0, ], vec_ptype(gdf)) out <- vec_ptype(dplyr::group_by(bare_mtcars, cyl, .drop = FALSE)) expect_drop(out, FALSE) }) test_that("can take the common type of grouped tibbles and tibbles", { gdf <- dplyr::group_by(bare_mtcars, cyl) expect_identical(vec_ptype2(gdf, data.frame()), vec_ptype(gdf)) expect_identical(vec_ptype2(data.frame(), gdf), vec_ptype(gdf)) expect_identical(vec_ptype2(gdf, tibble()), vec_ptype(gdf)) expect_identical(vec_ptype2(tibble(), gdf), vec_ptype(gdf)) gdf_nodrop <- dplyr::group_by(bare_mtcars, cyl, .drop = FALSE) expect_drop(vec_ptype2(gdf, gdf_nodrop), FALSE) expect_drop(vec_ptype2(gdf_nodrop, gdf), FALSE) expect_drop(vec_ptype2(gdf_nodrop, bare_mtcars), FALSE) expect_drop(vec_ptype2(bare_mtcars, gdf_nodrop), FALSE) }) test_that("the common type of grouped tibbles includes the union of grouping variables", { gdf1 <- dplyr::group_by(bare_mtcars, cyl) gdf2 <- dplyr::group_by(bare_mtcars, am, vs) expect_identical( vec_ptype2(gdf1, gdf2), vec_ptype(dplyr::group_by(bare_mtcars, cyl, am, vs)) ) }) test_that("can cast to and from `grouped_df`", { gdf <- dplyr::group_by(unrownames(bare_mtcars), cyl) input <- bare_mtcars[10] cast_gdf <- dplyr::group_by(vec_cast(bare_mtcars[10], bare_mtcars), cyl) expect_error( vec_cast(input, dplyr::group_by(bare_mtcars["cyl"], cyl)), class = "vctrs_error_cast_lossy" ) expect_identical( vec_cast(input, gdf), cast_gdf ) expect_identical( vec_cast(gdf, bare_mtcars), unrownames(bare_mtcars) ) expect_identical( vec_cast(tibble::as_tibble(input), gdf), unrownames(cast_gdf) ) tib <- tibble::as_tibble(bare_mtcars) expect_identical( unrownames(vec_cast(gdf, tib)), tib ) }) test_that("casting to `grouped_df` doesn't require grouping variables", { expect_identical( vec_cast(bare_mtcars[10], dplyr::group_by(bare_mtcars, cyl)), dplyr::group_by(vec_cast(bare_mtcars[10], bare_mtcars), cyl) ) }) test_that("casting to `grouped_df` handles `drop`", { gdf_nodrop <- dplyr::group_by(bare_mtcars, cyl, .drop = FALSE) expect_identical(vec_cast(bare_mtcars, gdf_nodrop), gdf_nodrop) }) test_that("can cbind grouped data frames", { gdf <- dplyr::group_by(bare_mtcars[-10], cyl) df <- unrownames(bare_mtcars)[10] expect_identical( unrownames(vec_cbind(gdf, df)), tibble::as_tibble(bare_mtcars)[c(1:9, 11, 10)] ) gdf1 <- dplyr::group_by(bare_mtcars[2], cyl) gdf2 <- dplyr::group_by(bare_mtcars[8:9], vs, am) expect_identical( unrownames(vec_cbind(gdf1, gdf2)), tibble::as_tibble(bare_mtcars)[c(2, 8, 9)] ) }) # `rowwise` ---------------------------------------------------------- test_that("rowwise can be proxied and restored", { rww <- dplyr::rowwise(unrownames(bare_mtcars)) expect_identical(vec_proxy(rww), rww) expect_identical(vec_restore(unrownames(bare_mtcars), rww), rww) expect_identical(vec_ptype(rww), rww[0, ]) }) test_that("can take the common type of rowwise tibbles and tibbles", { rww <- dplyr::rowwise(bare_mtcars) expect_identical(vec_ptype2(rww, data.frame()), vec_ptype(rww)) expect_identical(vec_ptype2(data.frame(), rww), vec_ptype(rww)) expect_identical(vec_ptype2(rww, tibble()), vec_ptype(rww)) expect_identical(vec_ptype2(tibble(), rww), vec_ptype(rww)) }) test_that("can cast to and from `rowwise_df`", { rww <- unrownames(dplyr::rowwise(bare_mtcars)) input <- bare_mtcars[10] cast_rww <- dplyr::rowwise(vec_cast(bare_mtcars[10], bare_mtcars)) expect_error( vec_cast(input, dplyr::rowwise(bare_mtcars["cyl"])), class = "vctrs_error_cast_lossy" ) expect_identical( vec_cast(input, rww), cast_rww ) expect_identical( vec_cast(rww, bare_mtcars), unrownames(bare_mtcars) ) expect_identical( vec_cast(tibble::as_tibble(input), rww), unrownames(cast_rww) ) tib <- tibble::as_tibble(bare_mtcars) expect_identical( unrownames(vec_cast(rww, tib)), tib ) }) test_that("can cbind rowwise data frames", { df <- unrownames(bare_mtcars) rww <- dplyr::rowwise(df[-2]) gdf <- dplyr::group_by(df[2], cyl) exp <- dplyr::rowwise(df[c(1, 3:11, 2)]) expect_identical(vec_cbind(rww, df[2]), exp) # Suboptimal expect_identical(vec_cbind(rww, gdf), exp) }) test_that("no common type between rowwise and grouped data frames", { expect_df_fallback_warning( out <- vec_ptype_common_df_fallback(dplyr::rowwise(bare_mtcars), dplyr::group_by(bare_mtcars, cyl)) ) expect_identical(out, tibble::as_tibble(bare_mtcars[0, ])) }) vctrs/tests/testthat/helper-conditions.R0000644000176200001440000000175113712211241020164 0ustar liggesusers with_subscript_data <- function(expr, subscript_arg, subscript_elt = NULL, subscript_action = NULL) { local_options(rlang_force_unhandled_error = TRUE) tryCatch( expr, vctrs_error_subscript = function(cnd) { cnd$subscript_arg <- subscript_arg cnd$subscript_elt <- subscript_elt cnd$subscript_action <- subscript_action cnd_signal(cnd) } ) } with_tibble_cols <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "column", subscript_action = "rename" ) } with_tibble_rows <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "row", subscript_action = "remove" ) } with_dm_tables <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "table", subscript_action = "extract" ) } vctrs/tests/testthat/test-type2.txt0000644000176200001440000000070014042546252017177 0ustar liggesusers logical integer double character raw list logical "logical" "integer" "double" NA NA NA integer "integer" "integer" "double" NA NA NA double "double" "double" "double" NA NA NA character NA NA NA "character" NA NA raw NA NA NA NA "raw" NA list NA NA NA NA NA "list" vctrs/tests/testthat/test-partial-factor-print-partial.txt0000644000176200001440000000004414042546241023627 0ustar liggesuserspartial_factor< bf275 {partial} > vctrs/tests/testthat/helper-type-dplyr.R0000644000176200001440000000023413650511520020123 0ustar liggesusers expect_drop <- function(x, value) { drop <- dplyr::group_by_drop_default(x) if (value) { expect_true(drop) } else { expect_false(drop) } } vctrs/tests/testthat/test-type-misc.R0000644000176200001440000001106113712211241017420 0ustar liggesusers test_that("`numeric_version` is a vector (#723)", { x <- numeric_version("0.1.0") y <- numeric_version("0.2.0") z <- c(x, y) expect_true(vec_is(x)) expect_true(vec_equal(x, x)) expect_false(vec_equal(x, y)) expect_identical(vec_equal(y, z), c(FALSE, TRUE)) expect_identical(vec_unique(z), z) expect_identical(vec_unique(c(y, z, x)), z[2:1]) }) test_that("`numeric_version` falls back to base methods", { x <- utils::packageVersion("rlang") y <- utils::packageVersion("vctrs") z <- c(x, y) # `z` is a `list-of`-like type but slicing 1 element returns the # atomic type. To implement this in vctrs we'd need to provide a way # of customising the "wrapper" type for size > 1 vectors. expect_identical(vec_slice(z, 1:2), z) expect_identical(vec_slice(z, 1), x) expect_identical(vec_slice(z, 2), y) expect_identical(vec_c(x, y), z) }) test_that("common type of data.table and data.frame is data.table", { # As data.table is not in Suggests, these checks are only run on the # devs' machines testthat_import_from("data.table", "data.table") expect_identical( vec_ptype2(data.table(x = TRUE), data.table(y = 2)), data.table(x = lgl(), y = dbl()) ) expect_identical( vec_ptype2(data.table(x = TRUE), data.frame(y = 2)), data.table(x = lgl(), y = dbl()) ) expect_identical( vec_ptype2(data.frame(y = 2), data.table(x = TRUE)), data.table(y = dbl(), x = lgl()) ) expect_identical( vec_cast(data.table(y = 2), data.table(x = TRUE, y = 1L)), data.table(x = NA, y = 2L) ) expect_identical( vec_cast(data.frame(y = 2), data.table(x = TRUE, y = 1L)), data.table(x = NA, y = 2L) ) expect_identical( vec_cast(data.table(y = 2), data.frame(x = TRUE, y = 1L)), data.frame(x = NA, y = 2L) ) }) test_that("data.table and tibble do not have a common type", { testthat_import_from("data.table", "data.table") expect_incompatible_df( vec_ptype_common(data.table(x = TRUE), tibble(y = 2)), tibble(x = lgl(), y = dbl()) ) expect_incompatible_df( vec_ptype_common(tibble(y = 2), data.table(x = TRUE)), tibble(y = dbl(), x = lgl()) ) expect_incompatible_df_cast( vec_cast(tibble(y = 2), data.table(x = TRUE, y = 1L)), data.frame(x = NA, y = 2L) ) expect_incompatible_df_cast( vec_cast(data.table(y = 2), tibble(x = TRUE, y = 1L)), tibble(x = NA, y = 2L) ) }) test_that("can slice `ts` vectors", { x <- ts(1:3) expect_identical(vec_ptype(x), x[0]) expect_identical(vec_slice(x, 2), x[2]) }) test_that("can concatenate `ts` vectors", { x <- ts(1:3) expect_identical(vec_c(x, x), c(x, x)) df <- data_frame(x = x) expect_identical(vec_rbind(df, df), data_frame(x = c(x, x))) }) test_that("`omit` class is numeric (#1160)", { x <- c(NA, 1:3, NA) omit <- attr(na.omit(x), "na.action") expect_identical(vec_ptype_common(omit, omit), structure(int(), class = "omit")) expect_identical(vec_ptype_common(1.5, omit), dbl()) expect_identical(vec_ptype_common(omit, 1L), int()) expect_identical(vec_cast_common(omit, omit), list(omit, omit)) expect_identical(vec_cast_common(omit, 1L), list(unstructure(omit), 1L)) expect_identical(vec_cast_common(1.5, omit), list(1.5, unstructure(as.double(omit)))) expect_error(vec_cast(1L, omit), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1.0, omit), class = "vctrs_error_incompatible_type") expect_identical(vec_slice(omit, 1), structure(1L, class = "omit")) expect_identical(vec_c(omit, omit), structure(c(1L, 5L, 1L, 5L), class = "omit")) expect_identical(vec_c(omit, omit, 10L), c(1L, 5L, 1L, 5L, 10L)) expect_identical(vec_slice(x, omit), x[omit]) }) test_that("`exclude` class is numeric (#1160)", { x <- c(NA, 1:3, NA) exc <- attr(na.exclude(x), "na.action") expect_identical(vec_ptype_common(exc, exc), structure(int(), class = "exclude")) expect_identical(vec_ptype_common(1.5, exc), dbl()) expect_identical(vec_ptype_common(exc, 1L), int()) expect_identical(vec_cast_common(exc, exc), list(exc, exc)) expect_identical(vec_cast_common(exc, 1L), list(unstructure(exc), 1L)) expect_identical(vec_cast_common(1.5, exc), list(1.5, unstructure(as.double(exc)))) expect_error(vec_cast(1L, exc), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1.0, exc), class = "vctrs_error_incompatible_type") expect_identical(vec_slice(exc, 1), structure(1L, class = "exclude")) expect_identical(vec_c(exc, exc), structure(c(1L, 5L, 1L, 5L), class = "exclude")) expect_identical(vec_c(exc, exc, 10L), c(1L, 5L, 1L, 5L, 10L)) expect_identical(vec_slice(x, exc), x[exc]) }) vctrs/tests/testthat/helper-types.R0000644000176200001440000000405013723213047017162 0ustar liggesusers# Don't call tibble::tibble() to avoid catch-22, because tibble now uses vctrs bare_tibble <- structure(data.frame(), class = c("tbl_df", "tbl", "data.frame")) base_empty_types <- list( null = NULL, logical = lgl(), integer = int(), double = dbl(), complex = cpl(), character = chr(), raw = bytes(), list = list(), dataframe = data.frame() ) base_s3_empty_types <- list( bare_factor = new_factor(), bare_ordered = new_ordered(), bare_date = new_date(), bare_posixct = new_datetime(tzone = "UTC"), bare_posixlt = as.POSIXlt(new_datetime(tzone = "UTC")), bare_tibble = bare_tibble ) proxied_empty_types <- list( double = new_hidden(), dataframe = bare_tibble, dataframe = structure(data.frame(), class = c("vctrs_foobar", "data.frame")) ) empty_types <- c( base_empty_types, proxied_empty_types, # Non proxied type scalar = foobar(list()), scalar = function() NULL ) atomics <- list(TRUE, 1L, 1.0, 1i, "foo", bytes(1)) vectors <- c(atomics, list(list())) records <- list( df = data.frame(x = 1), rcrd = new_rcrd(list(x = 1)), posixlt = as.POSIXlt("2020-01-01") ) tuple <- function(x = integer(), y = integer()) { fields <- vec_recycle_common( x = vec_cast(x, integer()), y = vec_cast(y, integer()) ) new_rcrd(fields, class = "vctrs_tuple") } tuple_methods <- list( format.vctrs_tuple = function(x, ...) { paste0("(", field(x, "x"), ",", field(x, "y"), ")") }, vec_ptype2.vctrs_tuple.vctrs_tuple = function(x, y, ...) x, vec_cast.vctrs_tuple.vctrs_tuple = function(x, to, ...) x ) local_tuple_methods <- function(frame = caller_env()) { local_methods(.frame = frame, !!!tuple_methods) } set_tuple_methods <- function(env = global_env()) { env_bind(env, !!!tuple_methods) } local_comparable_tuple <- function(frame = caller_env()) { local_tuple_methods(frame = frame) # Compare only on first field local_methods( .frame = frame, vec_proxy_equal.vctrs_tuple = function(x, ...) field(x, "x") ) } c_na <- function(...) { x <- c(...) names(x)[names(x) == ""] <- NA_character_ x } vctrs/tests/testthat/test-arith.R0000644000176200001440000000146613723213047016635 0ustar liggesusers test_that("logical/integer/numeric works", { expect_equal(vec_arith("+", TRUE, TRUE), 2L) expect_equal(vec_arith("+", TRUE, 1L), 2L) expect_equal(vec_arith("+", TRUE, 1), 2) expect_equal(vec_arith("+", 1L, TRUE), 2L) expect_equal(vec_arith("+", 1L, 1L), 2L) expect_equal(vec_arith("+", 1L, 1), 2) expect_equal(vec_arith("+", 1, TRUE), 2L) expect_equal(vec_arith("+", 1, 1L), 2L) expect_equal(vec_arith("+", 1, 1), 2) }) test_that("default is error", { f <- new_vctr(1:10, class = "foo") expect_error(vec_arith("+", f, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", TRUE, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", 1L, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", 1, f), class = "vctrs_error_incompatible_op") }) vctrs/tests/testthat/out/0000755000176200001440000000000014024440767015234 5ustar liggesusersvctrs/tests/testthat/out/vec-ptype-0.txt0000644000176200001440000000002014042546252020032 0ustar liggesusersPrototype: NULL vctrs/tests/testthat/out/vec-ptype-1.txt0000644000176200001440000000002314042546252020036 0ustar liggesusersPrototype: integer vctrs/tests/testthat/out/vec-ptype-3.txt0000644000176200001440000000021714042546252020045 0ustar liggesusersPrototype: 0. ( , ) = 1. ( , ) = 2. ( , ) = vctrs/tests/testthat/out/vec-ptype-2.txt0000644000176200001440000000014614042546252020045 0ustar liggesusersPrototype: 0. ( , ) = 1. ( , ) = vctrs/tests/testthat/test-list_of-type.txt0000644000176200001440000000064414042546250020557 0ustar liggesusers list list_of list_of list_of list "list" NA NA NA list_of NA "list_of" "list_of" NA list_of NA "list_of" "list_of" NA list_of NA NA NA "list_of" vctrs/tests/testthat/test-type-asis.R0000644000176200001440000000540013650511520017430 0ustar liggesusers# ------------------------------------------------------------------------------ # Printing test_that("I() wraps contents", { f <- factor() expect_equal(vec_ptype_abbr(I(f)), "I") expect_equal(vec_ptype_full(I(f)), "I>") }) test_that("AsIs class stripped from I()", { df <- data.frame(x = 1, y = 1:2) class(df) <- c("myclass", "data.frame") expect_equal(vec_ptype_full(I(df)), "I>") expect_equal(vec_ptype_full(I(df[1])), "I>") expect_equal(vec_ptype_full(I(df[0])), "I>") }) # ------------------------------------------------------------------------------ # Proxy / restore test_that("can slice AsIs class", { df <- data.frame(x = I(1:3), y = I(list(4, 5, 6))) expect_identical(vec_slice(df, 2:3), unrownames(df[2:3, ])) }) # ------------------------------------------------------------------------------ # Coercion test_that("can take the common type of identical AsIs objects", { expect_identical(vec_ptype2(I(1), I(1)), I(numeric())) }) test_that("AsIs objects throw ptype2 errors with their underlying types", { verify_errors({ expect_error(vec_ptype2(I(1), I("x")), class = "vctrs_error_incompatible_type") }) }) test_that("AsIs always wraps the common type", { expect_identical(vec_ptype2(I(1L), 1), I(numeric())) expect_identical(vec_ptype2(1, I(1L)), I(numeric())) }) # ------------------------------------------------------------------------------ # Casting test_that("can cast one AsIs to another AsIs", { expect_identical(vec_cast(I(1), I(1)), I(1)) expect_identical(vec_cast(I(1), I(1L)), I(1L)) }) test_that("AsIs objects throw cast errors with their underlying types", { verify_errors({ expect_error(vec_cast(I(1), I(factor("x"))), class = "vctrs_error_incompatible_type") }) }) test_that("casting from an AsIs drops the AsIs class", { expect_identical(vec_cast(I(1), 1), 1) }) test_that("casting to an AsIs adds the AsIs class", { expect_identical(vec_cast(1, I(1)), I(1)) }) # ------------------------------------------------------------------------------ # Misc test_that("can `vec_c()` with only AsIs objects", { expect_identical(vec_c(I(1), I(2)), I(c(1, 2))) expect_identical(vec_c(I(1), I(2L)), I(c(1, 2))) }) test_that("can `vec_c()` with AsIs objects mixed with other types", { expect_identical(vec_c(I(1L), 1), I(c(1, 1))) }) # ------------------------------------------------------------------------------ # Errors test_that("AsIs handling has meaningful errors", { verify_output(test_path("error/test-type-asis.txt"), { "# AsIs objects throw ptype2 errors with their underlying types" vec_ptype2(I(1), I("x")) "# AsIs objects throw cast errors with their underlying types" vec_cast(I(1), I(factor("x"))) }) }) vctrs/tests/testthat/test-cast.R0000644000176200001440000002003413723213047016450 0ustar liggesusers test_that("vec_cast() has helpful error messages", { verify_output(test_path("error", "test-cast.txt"), { "# Casting to named argument mentions 'match type '" vec_cast(1, "", x_arg = "foo", to_arg = "bar") vec_cast(1, "", x_arg = "foo") }) }) # vec_cast --------------------------------------------------------------- test_that("new classes are uncoercible by default", { x <- structure(1:10, class = "vctrs_nonexistant") expect_error(vec_cast(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(x, 1), class = "vctrs_error_incompatible_type") }) test_that("casting requires vectors", { expect_error(vec_cast(NULL, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(NA, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(list(), quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), NULL), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), NA), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), list()), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), quote(name)), class = "vctrs_error_scalar_type") }) test_that("casting between `NULL` and partial types is allowed", { expect_identical(vec_cast(NULL, partial_factor()), NULL) expect_identical(vec_cast(partial_factor(), NULL), partial_factor()) }) test_that("dimensionality matches output" ,{ x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) x <- matrix(1, nrow = 2, ncol = 2) expect_error(vec_cast(x, logical()), class = "vctrs_error_incompatible_type") }) test_that("empty input to vec_cast_common() returns list()", { expect_equal(vec_cast_common(), list()) expect_equal(vec_cast_common(NULL, NULL), list(NULL, NULL)) }) test_that("identical structures can be cast to each other", { expect_identical(vec_cast(foobar("foo"), foobar("bar")), foobar("foo")) }) test_that("cast common preserves names", { expect_identical(vec_cast_common(foo = 1, bar = 2L), list(foo = 1, bar = 2)) }) test_that("cast errors create helpful messages (#57, #225)", { expect_known_output(file = test_path("test-cast-error-nested.txt"), { # Lossy cast try2(vec_cast("foo", 10)) # Incompatible cast try2(vec_cast(factor("foo"), 10)) ## Nested data frames # Lossy cast x <- tibble(a = tibble(b = "foo")) y <- tibble(a = tibble(b = 10)) try2(vec_cast(x, y)) # Incompatible cast x <- tibble(a = tibble(b = factor("foo"))) try2(vec_cast(x, y)) # Common cast error try2(vec_cast_common(x, y)) }) }) test_that("unspecified can be cast to shaped vectors", { x <- matrix(letters[1:4], 2) expect_identical(vec_cast(NA, x), matrix(chr(NA, NA), 1)) x <- foobar(c(1:4)) dim(x) <- c(2, 2) out <- vec_cast(NA, x) exp <- foobar(int(c(NA, NA))) dim(exp) <- c(1, 2) expect_identical(out, exp) }) test_that("vec_cast() only falls back when casting to base type", { expect_incompatible_df_cast(vec_cast(foobar(mtcars), mtcars), mtcars) expect_error( vec_cast(mtcars, foobar(mtcars)), class = "vctrs_error_incompatible_type" ) }) # Conditions -------------------------------------------------------------- test_that("can suppress cast errors selectively", { f <- function() vec_cast(factor("a"), to = factor("b")) expect_error(regexp = NA, allow_lossy_cast(f())) expect_error(regexp = NA, allow_lossy_cast(f(), x_ptype = factor("a"))) expect_error(regexp = NA, allow_lossy_cast(f(), to_ptype = factor("b"))) expect_error(regexp = NA, allow_lossy_cast(f(), x_ptype = factor("a"), to_ptype = factor("b"))) expect_error(allow_lossy_cast(f(), x_ptype = factor("c")), class = "vctrs_error_cast_lossy") expect_error(allow_lossy_cast(f(), x_ptype = factor("b"), to_ptype = factor("a")), class = "vctrs_error_cast_lossy") expect_error(allow_lossy_cast(f(), x_ptype = factor("a"), to_ptype = factor("c")), class = "vctrs_error_cast_lossy") }) test_that("can signal deprecation warnings for lossy casts", { local_lifecycle_warnings() lossy_cast <- function() { maybe_lossy_cast( TRUE, factor("foo"), factor("bar"), lossy = TRUE, .deprecation = TRUE, x_arg = "x", to_arg = "to" ) } expect_true(expect_warning(lossy_cast(), "detected a lossy transformation")) expect_true(expect_warning(regexp = NA, allow_lossy_cast(lossy_cast()))) expect_true(expect_warning(regexp = NA, allow_lossy_cast(lossy_cast(), factor("foo"), factor("bar")))) expect_true(expect_warning(allow_lossy_cast(lossy_cast(), factor("bar"), double()))) }) # vec_cast_common() ------------------------------------------------------- test_that("vec_ptype_common() optionally falls back to base class", { x <- foobar(NA, foo = 1) y <- foobaz(NA, bar = 2) x_df <- data_frame(x = x) y_df <- data_frame(x = y) expect_error( vec_ptype_common_opts(x, y, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype_common_opts(x_df, y_df, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast_common_opts(x, y, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast_common_opts(x_df, y_df, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) class(y) <- c("foo", class(x)) y_df <- data_frame(x = y) common_sentinel <- vec_ptype_common_opts(x, y, .opts = full_fallback_opts()) expect_true(is_common_class_fallback(common_sentinel)) expect_identical(fallback_class(common_sentinel), "vctrs_foobar") common_sentinel <- vec_ptype_common_opts(x_df, y_df, .opts = full_fallback_opts()) expect_true(is_common_class_fallback(common_sentinel$x)) expect_identical(fallback_class(common_sentinel$x), "vctrs_foobar") common <- vec_cast_common_opts(x = x, y = y, .opts = full_fallback_opts()) expect_identical(common, list(x = x, y = y)) common <- vec_cast_common_opts(x = x_df, y = y_df, .opts = full_fallback_opts()) expect_identical(common, list(x = x_df, y = y_df)) }) test_that("vec_ptype_common_fallback() collects common type", { x <- foobar(1, foo = 1, class = c("quux", "baz")) y <- foobar(2, bar = 2, class = "baz") x_df <- data_frame(x = x) y_df <- data_frame(x = y) out <- vec_ptype_common_fallback(x, y) expect_identical(typeof(out), "double") expect_true(is_common_class_fallback(out)) expect_identical(fallback_class(out), c("baz", "vctrs_foobar")) out <- vec_ptype_common_fallback(x_df, y_df) expect_identical(typeof(out$x), "double") expect_true(is_common_class_fallback(out$x)) expect_identical(fallback_class(out$x), c("baz", "vctrs_foobar")) # Different base types can't fall back to common class z <- foobar(3L, baz = 3) expect_error( vec_ptype_common_fallback(x, z), class = "vctrs_error_incompatible_type" ) z_df <- data_frame(x = z) expect_error( vec_ptype_common_fallback(x_df, z_df), class = "vctrs_error_incompatible_type" ) }) test_that("fallback sentinel is returned with unspecified inputs", { fallback <- vec_ptype_common_fallback(foobar(1), foobar(1)) expect_identical(vec_ptype_common_fallback(NA, foobar(1)), fallback) expect_identical(vec_ptype_common_fallback(foobar(1), NA), fallback) }) test_that("vec_ptype_common() supports subclasses of list", { x <- structure(list(1), class = c("vctrs_foo", "list")) y <- structure(list(2), class = c("bar", "vctrs_foo", "list")) expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type") out <- with_methods( c.vctrs_foo = function(...) quux(NextMethod()), vec_c(x, y) ) expect_identical(out, quux(list(1, 2))) }) test_that("vec_cast_common_fallback() works with tibbles", { x <- foobar("foo") df <- data_frame(x = x) tib <- tibble(x = x) exp <- list(tib, tib) expect_identical(vec_cast_common_fallback(tib, tib), exp) expect_identical(vec_cast_common_fallback(tib, df), exp) expect_identical(vec_cast_common_fallback(df, tib), exp) }) vctrs/tests/testthat/helper-s4.R0000644000176200001440000000340613656733470016364 0ustar liggesusers .rando <- setClass( "vctrs_rando", contains = "numeric", slots = list(.Data = "numeric") ) rando <- function(n = 0) { .rando(as.numeric(seq_len(n))) } as_rando <- function(x) { rando(length(x)) } setMethod("[", "vctrs_rando", function(x, i, j, ..., drop = TRUE) { new_n <- length(vec_as_location(i, length(x@.Data), names(x@.Data))) rando(new_n) }) .Counts <- methods::setClass( "vctrs_Counts", contains = "integer", slots = c(name = "character") ) local_c_counts <- function(frame = caller_env()) { c_counts <- function(x, ...) { xs <- list(x, ...) xs_data <- lapply(xs, function(x) x@.Data) new_data <- do.call(c, xs_data) .Counts(new_data, name = "Dispatched") } local_s4_method( frame = frame, "c", methods::signature(x = "vctrs_Counts"), c_counts ) } local_s4_method <- function(generic, signature, method, frame = caller_env()) { methods::setMethod(generic, signature, method) exit_expr <- call2(methods::removeMethod, generic, signature, where = topenv(frame)) local_exit(exit_expr, frame = frame) } with_s4_method <- function(generic, signature, method, expr) { local_s4_method(generic, signature, method) expr } local_exit <- function(expr, frame = caller_env()) { # We are at top-level when only one frame refers to the global environment if (is_reference(frame, global_env())) { is_global_frame <- sys.parents() == 0 if (sum(is_global_frame) == 1) { abort("Can't add an exit event at top-level") } } # Inline everything so the call will succeed in any environment expr <- call2(on.exit, expr, add = TRUE) eval_bare(expr, frame) invisible(expr) } skip_if_cant_set_names_on_s4 <- function() { skip_if(getRversion() < "3.5.0", message = "Can't set names on S4 objects") } vctrs/tests/testthat/output/0000755000176200001440000000000014024414711015753 5ustar liggesusersvctrs/tests/testthat/output/test-vec-as-names.txt0000644000176200001440000000063714042546240021761 0ustar liggesusers> vec_as_names(c("x", "x"), repair = "unique") Message: New names: * x -> x...1 * x -> x...2 [1] "x...1" "x...2" > vec_as_names(c("x", "x"), repair = "unique", quiet = TRUE) [1] "x...1" "x...2" > vec_as_names(c("x", "x"), repair = "check_unique", repair_arg = "repair") Error: Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. i Use argument `repair` to specify repair strategy. vctrs/tests/testthat/output/bind-name-repair.txt0000644000176200001440000000416314042546234021640 0ustar liggesusers vec_rbind() =========== > vec_rbind(1, 2) Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 ...1 1 1 2 2 > vec_rbind(1, 2, .names_to = NULL) Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 ...1 1 1 2 2 > vec_rbind(1, 2, ...10 = 3) Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 ...1 1 1 2 2 3 3 > vec_rbind(1, 2, ...10 = 3, .names_to = NULL) Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 ...1 ...1 1 ...2 2 ...3 3 > vec_rbind(a = 1, b = 2) Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 ...1 1 1 2 2 > vec_rbind(a = 1, b = 2, .names_to = NULL) Message: New names: * `` -> ...1 Message: New names: * `` -> ...1 ...1 a 1 b 2 > vec_rbind(c(a = 1), c(b = 2)) a b 1 1 NA 2 NA 2 > vec_rbind(c(a = 1), c(b = 2), .names_to = NULL) a b 1 1 NA 2 NA 2 > # Silent when assigning duplicate row names of df-cols > df <- new_data_frame(list(x = mtcars[1:3, 1, drop = FALSE])) > vec_rbind(df, df) mpg 1 21.0 2 21.0 3 22.8 4 21.0 5 21.0 6 22.8 > vec_rbind(mtcars[1:4, ], mtcars[1:3, ]) mpg cyl disp hp drat wt qsec vs am gear carb Mazda RX4...1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag...2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 Datsun 710...3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 Mazda RX4...5 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag...6 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 Datsun 710...7 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 vec_cbind() =========== > vec_cbind(1, 2) Message: New names: * `` -> ...1 * `` -> ...2 ...1 ...2 1 1 2 > vec_cbind(1, 2, ...10 = 3) Message: New names: * `` -> ...1 * `` -> ...2 * ...10 -> ...3 ...1 ...2 ...3 1 1 2 3 > vec_cbind(a = 1, b = 2) a b 1 1 2 > vec_cbind(c(a = 1), c(b = 2)) Message: New names: * `` -> ...1 * `` -> ...2 ...1 ...2 1 1 2 vctrs/tests/testthat/test-order-radix.R0000644000176200001440000010316114042540502017733 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(integer()), integer()) }) test_that("can order integers", { x <- c(2L, 3L, 1L, 5L) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:5 expect_identical(vec_order_radix(x), order(x)) }) test_that("orders correctly around the UINT8_MAX boundary", { x <- 251:255 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1L, 3L, 1L, 3L) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c(1L, NA_integer_, 3L) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("integer, small: `NA` order can be first", { x <- c(1L, NA_integer_, 3L) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("double: `direction` can be set to `desc`", { x <- c(1, 5, 3) expect_identical(vec_order_radix(x, direction = "desc"), c(2L, 3L, 1L)) x <- c(1L, .Machine$integer.max, 3L) expect_identical(vec_order_radix(x, direction = "desc"), c(2L, 3L, 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_integer_, NA_integer_) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order when in expected order", { x <- c(1L, 1L, 2L, NA, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5) x <- c(3L, 3L, 2L, NA, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5) x <- c(NA, NA, 1L, 1L, 2L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5) x <- c(NA, NA, 3L, 3L, 2L) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, 2L, 1L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1) x <- c(NA, 1L, 2L) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1) x <- c(2L, 1L, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1) x <- c(1L, 2L, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1) }) # ------------------------------------------------------------------------------ # vec_order_radix() - counting # To trigger counting ordering, get above the insertion order boundary and then # have a range less than the counting order range boundary. test_that("can order integers with counting order", { x <- (ORDER_INSERTION_BOUNDARY + 1L):1L expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:(ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L) expect_identical(vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L, 1:ORDER_INSERTION_BOUNDARY) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # To trigger radix ordering, get above the insertion order boundary and then # have a range greater than the counting order range boundary. test_that("can order integers with radix order", { x <- c(INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L, 1:ORDER_INSERTION_BOUNDARY) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- c(1:ORDER_INSERTION_BOUNDARY, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical(vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L, 1:ORDER_INSERTION_BOUNDARY, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) }) test_that("can order all 1 value", { x <- rep(1L, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical(vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE)) }) test_that("all `NA` values works - ensures that we can compute the 'range' of all NAs", { x <- rep(NA_integer_, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical(vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE)) }) test_that("can order with many NAs first", { x <- c(rep(NA_integer_, ORDER_INSERTION_BOUNDARY + 1L), 2L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical(vec_order_radix(x, na_value = "smallest"), base_order(x, na.last = FALSE)) }) # ------------------------------------------------------------------------------ # vec_order_radix() # Really this just goes through the integer infrastructure. Just checking that # it is working. test_that("can order size zero input", { expect_identical(vec_order_radix(logical()), integer()) }) test_that("can order logicals", { x <- c(FALSE, TRUE, FALSE) expect_identical(vec_order_radix(x), order(x)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(TRUE, NA, FALSE) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA, NA) expect_identical(vec_order_radix(x), order(x)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(double()), integer()) }) test_that("can order doubles", { x <- c(2, 3, 1, 5) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:5 + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1, 3, 1, 3) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c(1, NA_real_, 3) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("double: `NA` order can be first", { x <- c(1, NA_real_, 3) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3, NA_real_, 1, 2) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_real_, NA_real_) expect_identical(vec_order_radix(x), order(x)) }) test_that("NA_real_ and NaN look identical for ordering", { x <- c(NA_real_, NaN) expect_identical(vec_order_radix(x, na_value = "largest"), c(1L, 2L)) expect_identical(vec_order_radix(x, na_value = "smallest"), c(1L, 2L)) }) test_that("double: -Inf / Inf order correctly", { x <- c(0, -Inf, Inf) expect_identical(vec_order_radix(x, direction = "asc"), c(2L, 1L, 3L)) expect_identical(vec_order_radix(x, direction = "desc"), c(3L, 1L, 2L)) }) test_that("double: -0 and 0 order identically / stably", { x <- c(0, -0) expect_identical(vec_order_radix(x, direction = "desc"), c(1L, 2L)) expect_identical(vec_order_radix(x, direction = "asc"), c(1L, 2L)) }) test_that("can order when in expected order", { x <- c(1, 1, 2, NA, NaN) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5) x <- c(3, 3, 2, NA, NaN) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5) x <- c(NA, NaN, 1, 1, 2) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5) x <- c(NA, NaN, 3, 3, 2) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, 2, 1) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1) x <- c(NA, 1, 2) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1) x <- c(2, 1, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1) x <- c(1, 2, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # To trigger radix ordering, get above the insertion order boundary. There is # no intermediate counting sort for doubles. test_that("can order doubles with radix order", { x <- (ORDER_INSERTION_BOUNDARY + 1L):1L + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:(ORDER_INSERTION_BOUNDARY + 1L) + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L) + 0 expect_identical(vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3, NA_real_, 1, 2, 1:ORDER_INSERTION_BOUNDARY) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- rep(NA_real_, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("NA_real_ and NaN look identical for ordering", { x <- rep(c(NA_real_, NaN), ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x, na_value = "largest"), seq_along(x)) expect_identical(vec_order_radix(x, na_value = "smallest"), seq_along(x)) }) test_that("-Inf / Inf order correctly", { x <- c(rep(0, ORDER_INSERTION_BOUNDARY), -Inf, Inf) expect_identical(vec_order_radix(x, direction = "asc"), order(x, decreasing = FALSE)) expect_identical(vec_order_radix(x, direction = "desc"), order(x, decreasing = TRUE)) }) test_that("double, large: -0 and 0 order identically / stably", { x <- c(rep(0, ORDER_INSERTION_BOUNDARY), -0) expect_identical(vec_order_radix(x, direction = "desc"), order(x, decreasing = TRUE)) expect_identical(vec_order_radix(x, direction = "asc"), order(x, decreasing = FALSE)) }) # ------------------------------------------------------------------------------ # vec_order_radix() test_that("can order size zero input", { expect_identical(vec_order_radix(complex()), integer()) }) test_that("can order complex", { x <- complex(real = c(3, 1, 2)) expect_identical(vec_order_radix(x), c(2L, 3L, 1L)) }) test_that("ordering on ties is done stably", { x <- complex(real = c(1, 3, 1, 3)) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("imaginary section is used to break ties", { x <- complex( real = c(1L, 2L, 1L), imaginary = c(3L, 2L, 1L) ) expect_identical(vec_order_radix(x), c(3L, 1L, 2L)) }) test_that("can be used in a data frame", { x <- c(1L, 1L, 1L, 2L, 1L) y <- complex( real = c(1L, 2L, 1L, 3L, 1L), imaginary = c(3L, 2L, 1L, 4L, 1L) ) z <- c(1, 2, 5, 4, 3) # as second column df1 <- data.frame(x = x, y = y) # as first column df2 <- data.frame(y = y, x = x) # as second column with a third after it to break ties df3 <- data.frame(x = x, y = y, z = z) # Base R can't do radix sorting with complex expect_identical(vec_order_radix(df1), c(3L, 5L, 1L, 2L, 4L)) expect_identical(vec_order_radix(df2), c(3L, 5L, 1L, 2L, 4L)) expect_identical(vec_order_radix(df3), c(5L, 3L, 1L, 2L, 4L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- complex(real = c(3, NA, 1.5, 2, NA), imaginary = c(1, 1, 1, 1, 2)) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) # Base R is actually wrong here! It doesn't consider the imaginary part # when ordering in decreasing order with `NA` real. expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], #x[order(x, na.last = TRUE, decreasing = TRUE)] x[c(1L, 4L, 3L, 5L, 2L)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], #x[order(x, na.last = FALSE, decreasing = TRUE)] x[c(5L, 2L, 1L, 4L, 3L)] ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(character()), integer()) }) test_that("can order characters", { x <- c("xy", "x", "a", "bc") expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- c("a", "b", "c") expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c("ab", "ba", "ab", "ba") expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c("x", NA_character_, "y") expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("character, small: `NA` order can be first", { x <- c("x", NA_character_, "y") expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("character, small: `direction` can be set to `desc`", { x <- c("x", "abcde", "yz") expect_identical(vec_order_radix(x, direction = "desc"), c(3L, 1L, 2L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c("aaa", NA_character_, "a", "aa") expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_character_, NA_character_) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order empty string vs ASCII value 1 'Start of Header'", { x <- c("\001", "") expect_identical(vec_order_radix(x), c(2L, 1L)) }) test_that("can be used in a data frame", { x <- c(1L, 4L, 1L, 3L, 1L) y <- c("zy", "zz", "abcd", "gfa", "zy") z <- c(1, 2, 5, 4, 3) # as second column df1 <- data.frame(x = x, y = y) # as first column df2 <- data.frame(y = y, x = x) # as second column with a third after it to break ties df3 <- data.frame(x = x, y = y, z = z) expect_identical(vec_order_radix(df1), base_order(df1)) expect_identical(vec_order_radix(df2), base_order(df2)) expect_identical(vec_order_radix(df3), base_order(df3)) }) test_that("can have multiple character columns in a data frame", { df <- data.frame( x = c("def", "aba", "aba", "aba", "q"), y = c("zy", "zz", "zz", "gfa", "zy"), z = c("foo", "qux", "bar", "baz", "boo") ) expect_identical(vec_order_radix(df), base_order(df)) }) test_that("can order with varying encodings by converting to UTF-8", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1, "AC") expect_identical(vec_order_radix(x), c(4L, 1L, 2L, 3L)) expect_identical(vec_order_radix(x, direction = "desc"), c(1L, 2L, 3L, 4L)) }) test_that("can order when in expected order", { x <- c("a", "a", "b", NA, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5) x <- c("c", "c", "b", NA, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5) x <- c(NA, NA, "a", "a", "b") expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5) x <- c(NA, NA, "c", "c", "b") expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, "b", "a") expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1) x <- c(NA, "a", "b") expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1) x <- c("b", "a", NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1) x <- c("a", "b", NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # Have to get the number of unique strings above the ORDER_INSERTION_BOUNDARY # to trigger radix ordering. test_that("can order character vectors", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) expect_identical(vec_order_radix(x), base_order(x)) }) test_that("ordering on ties is done stably", { x <- c(paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)), "x1") expect_identical(vec_order_radix(x)[1:2], c(1L, length(x))) }) test_that("`NA` order defaults to last", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "y") expect_identical(vec_order_radix(x)[length(x)], length(x) - 1L) }) test_that("character, large: `NA` order can be first", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "y") expect_identical(vec_order_radix(x, na_value = "smallest")[[1L]], length(x) - 1L) }) test_that("character, large: `direction` can be set to `desc`", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) expect_identical(vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE)) }) test_that("all combinations of `direction` and `na_value` work", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "x", "aa", "x1") expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[base_order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[base_order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[base_order(x, na.last = TRUE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[base_order(x, na.last = FALSE, decreasing = TRUE)] ) }) # ------------------------------------------------------------------------------ # vec_order_radix() test_that("list elements are ordered by first appearance", { expect_identical(vec_order_radix(list(1:2, "a", 1:2)), c(1L, 3L, 2L)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("data frame with no columns and no rows returns integer()", { x <- data.frame() expect_identical(vec_order_radix(x), integer()) }) test_that("data frame with no columns and some rows returns sequential rows", { x <- new_data_frame(n = 5L) expect_identical(vec_order_radix(x), 1:5) }) test_that("can order with multiple pre-sorted keys", { df <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_order_radix(df), 1:2) }) test_that("first column has ordering presedence", { df <- data.frame(x = c(3L, 2L, 1L), y = c(1L, 2L, 3L)) expect_identical(vec_order_radix(df), 3:1) }) test_that("secondary columns break ties - integer", { df <- data.frame( x = c(1L, 2L, 1L), y = c(3L, 2L, 1L) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("secondary columns break ties - double", { df <- data.frame( x = c(1, 2, 1), y = c(3, 2, 1) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("secondary columns break ties - logical", { df <- data.frame( x = c(FALSE, TRUE, FALSE), y = c(TRUE, TRUE, FALSE) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("orders correctly when first column is already ordered but second isn't", { df <- data.frame( x = c(1L, 1L, 2L, 2L), y = c(3L, 2L, 4L, 1L) ) expect_identical(vec_order_radix(df), c(2L, 1L, 4L, 3L)) }) test_that("orders correctly when first column is already ordered but second isn't - character", { df <- data.frame( x = c("a", "a", "b", "b"), y = c("c", "b", "d", "a") ) expect_identical(vec_order_radix(df), c(2L, 1L, 4L, 3L)) }) test_that("`direction` is recycled", { df <- data.frame( x = c(1L, 1L, 2L, 2L), y = c(3L, 2L, 4L, 1L) ) expect_identical(vec_order_radix(df, direction = "desc"), c(3L, 4L, 1L, 2L)) }) test_that("`na_value` is recycled", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA), y = c(3L, 2L, 4L, 1L, NA) ) expect_identical(vec_order_radix(df, na_value = "smallest"), c(5L, 2L, 1L, 4L, 3L)) }) test_that("`direction` can be a vector", { df <- data.frame( x = c(1L, 1L, 2L, 2L), y = c(3L, 2L, 4L, 1L) ) expect_identical(vec_order_radix(df, direction = c("desc", "asc")), c(4L, 3L, 2L, 1L)) }) test_that("`na_value` can be a vector", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, NA), y = c(3L, 2L, 4L, 1L, NA, 2) ) expect_identical(vec_order_radix(df, na_value = c("smallest", "largest")), c(6L, 5L, 2L, 1L, 4L, 3L)) }) test_that("`na_value` and `direction` can both be vectors", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, NA), y = c(3L, 2L, 4L, 1L, NA, 2) ) expect_identical( vec_order_radix(df, direction = c("desc", "asc"), na_value = c("smallest", "largest")), 6:1 ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - counting test_that("can order 2+ integer column chunks with counting sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1L, y = c(rep(2L, quarter_low), rep(1L, quarter_high), rep(3L, half)) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix test_that("can order 2+ integer column chunks with radix sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1L, y = c(rep(2L, quarter_low), rep(1L, quarter_high), rep(3L, half), INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) ) expect_identical(vec_order_radix(df), base_order(df)) }) test_that("can order 2+ double column chunks with radix sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1, y = c(rep(2, quarter_low), rep(1, quarter_high), rep(3, half), INT_ORDER_COUNTING_RANGE_BOUNDARY + 1) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - chr_transform test_that("`chr_transform` transforms string input", { x <- c("b", "a", "A") expect_identical(vec_order_radix(x, chr_transform = tolower), c(2L, 3L, 1L)) expect_identical(vec_order_radix(x, chr_transform = ~tolower(.x)), c(2L, 3L, 1L)) }) test_that("`chr_transform` works with data frame columns and is applied to all string columns", { df <- data_frame(x = c(1, 1, 1), y = c("B", "a", "a"), z = c("a", "D", "c")) expect_identical(vec_order_radix(df, chr_transform = tolower), c(3L, 2L, 1L)) }) test_that("`chr_transform` is validated", { expect_error(vec_order_radix("x", chr_transform = 1), "Can't convert `chr_transform` to a function") expect_error(vec_order_radix("x", chr_transform = ~c("y", "z")), "1, not 2") expect_error(vec_order_radix("x", chr_transform = ~1), "character vector") expect_error(vec_order_radix("x", chr_transform = function() {"y"})) }) # ------------------------------------------------------------------------------ # vec_order_radix() - error checking test_that("`na_value` is checked", { expect_error(vec_order_radix(1L, na_value = "x"), "\"largest\" or \"smallest\"") expect_error(vec_order_radix(1L, na_value = c(TRUE, TRUE)), "must be a character vector") expect_error(vec_order_radix(1L, na_value = NA_character_), "can't be missing") }) test_that("`direction` is checked", { expect_error(vec_order_radix(1L, direction = "x"), "must contain only") expect_error(vec_order_radix(1L, direction = c("asc", "asc")), "single value") expect_error(vec_order_radix(1L, direction = NA_character_), "can't be missing") expect_error(vec_order_radix(data.frame(x = 1), direction = c("asc", "asc")), "length 1 or") }) test_that("`x` is checked", { expect_error(vec_order_radix(foobar()), class = "vctrs_error_scalar_type") }) # ------------------------------------------------------------------------------ # vec_order_radix() - groups test_that("groups can be reallocated if we exceed the max group data size", { set.seed(123) # The first column has all unique groups so 1 more than the default group # data size is needed and will be reallocated on the fly df <- data.frame( x = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE), y = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE), z = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - comparison proxy test_that("ordering works with rcrd types", { x <- tuple(c(1, 2, 1), c(3, 2, 1)) expect_identical(vec_order_radix(x), c(3L, 1L, 2L)) }) test_that("data frame comparison proxies don't allow vector `direction` or `na_value`", { x <- tuple(c(1, 2, 1), c(3, 2, 1)) expect_error(vec_order_radix(x, direction = c("desc", "asc")), "single value") expect_error(vec_order_radix(x, na_value = c("largest", "smallest")), "single value") }) test_that("ordering works with df-cols", { df_col <- new_data_frame(list(y = c(2, 1, 2), z = c(3, 3, 3))) df <- new_data_frame(list(x = c(1, 1, 1), y = df_col)) expect_identical(vec_order_radix(df), c(2L, 1L, 3L)) # Can only supply a max of 2 `direction` or `na_value` values which get internally # expanded to 3 to match the flattened df proxy expect_identical(vec_order_radix(df, direction = c("asc", "desc")), c(1L, 3L, 2L)) expect_error(vec_order_radix(df, direction = c("desc", "desc", "asc")), "or length equal to") }) test_that("ordering works with df-cols with 0 cols", { df_col <- new_data_frame(list(), n = 3L) df <- new_data_frame(list(x = c(1, 3, 1), y = df_col, z = c(2, 1, 1))) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) # Can supply 3 `direction` values even though the 0-col df-col gets dropped expect_identical(vec_order_radix(df, direction = c("asc", "desc", "desc")), c(1L, 3L, 2L)) expect_error(vec_order_radix(df, direction = c("desc", "asc")), "or length equal to") }) test_that("ordering works with rcrd cols", { y <- tuple(c(1, 2, 1), c(3, 2, 1)) df <- new_data_frame(list(z = c(1, 1, 1), y = y)) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) # Can only supply a max of 2 `direction` values which get internally # expanded to 3 to match the flattened df proxy expect_identical(vec_order_radix(df, direction = c("asc", "desc")), c(2L, 1L, 3L)) expect_error(vec_order_radix(df, direction = c("desc", "desc", "asc")), "or length equal to") }) # ------------------------------------------------------------------------------ # vec_order_locs() test_that("`vec_order_locs()` is working", { x <- c(1, 3, 1, 5, 2, 5, 1) expect <- new_data_frame( list( key = c(1, 2, 3, 5), loc = list(c(1L, 3L, 7L), 5L, 2L, c(4L, 6L)) ) ) expect_identical(vec_order_locs(x), expect) }) test_that("`chr_transform` can result in keys being seen as identical", { x <- c("b", "A", "a") y <- c("b", "a", "A") x_expect <- data_frame(key = c("A", "b"), loc = list(c(2L, 3L), 1L)) y_expect <- data_frame(key = c("a", "b"), loc = list(c(2L, 3L), 1L)) expect_identical(vec_order_locs(x, chr_transform = tolower), x_expect) expect_identical(vec_order_locs(y, chr_transform = tolower), y_expect) }) # ------------------------------------------------------------------------------ # `vec_order_radix()` - Pre-existing tests test_that("can request NAs sorted first", { expect_equal(vec_order_radix(c(1, NA), "asc", "largest"), 1:2) expect_equal(vec_order_radix(c(1, NA), "desc", "largest"), 1:2) expect_equal(vec_order_radix(c(1, NA), "asc", "smallest"), 2:1) expect_equal(vec_order_radix(c(1, NA), "desc", "smallest"), 2:1) }) test_that("can sort data frames", { df <- data.frame(x = c(1, 2, 1), y = c(1, 2, 2)) out1 <- vec_sort(df) expect_equal(out1, data.frame(x = c(1, 1, 2), y = c(1, 2, 2))) out2 <- vec_sort(df, "desc") expect_equal(out2, data.frame(x = c(2, 1, 1), y = c(2, 2, 1))) }) test_that("can sort empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_sort(df1), df1) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_sort(df2), df2) }) test_that("can order tibbles that contain non-comparable objects", { expect_equal(vec_order_radix(data_frame(x = list(10, 2, 1))), 1:3) }) test_that("can order matrices and arrays (#306)", { x <- matrix(c(1, 1, 1, 1, 2, 1), ncol = 2) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) x <- array(1:8, c(2, 2, 2)) x[2] <- 1 x[3] <- 5 expect_identical(vec_order_radix(x), 2:1) }) test_that("can order empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_order_radix(df1), integer()) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_order_radix(df2), integer()) }) test_that("can order data frames with data frame columns (#527)", { expect_equal( vec_order_radix(iris), vec_order_radix(data_frame(iris = iris)) ) }) test_that("can order data frames (and subclasses) with matrix columns", { df <- new_data_frame(n = 2L) df$x <- new_data_frame(list(y = matrix(1:2, 2))) expect_identical(vec_order_radix(df), 1:2) df$x <- tibble::tibble(y = matrix(1:2, 2)) expect_identical(vec_order_radix(df), 1:2) }) vctrs/tests/testthat/test-split.R0000644000176200001440000000225013723213047016651 0ustar liggesusers test_that("can split empty vector", { out <- vec_split(integer(), character()) expect_s3_class(out, "data.frame") expect_equal(out$key, character()) expect_equal(out$val, list()) }) test_that("split data frame with data frame", { df <- data.frame(x = c(1, 1, 2), y = c(1, 1, 1)) out <- vec_split(df, df) expect_s3_class(out, "data.frame") expect_equal(out$key, data.frame(x = c(1, 2), y = c(1, 1))) expect_equal(out$val, list( data.frame(x = c(1, 1), y = c(1, 1)), data.frame(x = 2, y = 1) )) }) test_that("x and by must be same size", { expect_error( vec_split(1:3, 1:2), "same size" ) }) test_that("split takes the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_identical(nrow(vec_split(1:3, x)), 2L) }) test_that("split works with different encodings", { encs <- encodings() expect_identical(nrow(vec_split(1:3, encs)), 1L) }) test_that("`key` and `value` retain names", { x <- c(a = 1, b = 2, c = 1, a = 1) split <- vec_split(x, x) expect_identical(split$key, c(a = 1, b = 2)) expect_identical(split$val[[1]], c(a = 1, c = 1, a = 1)) expect_identical(split$val[[2]], c(b = 2)) }) vctrs/tests/testthat/test-order.R0000644000176200001440000000541614027045462016642 0ustar liggesuserstest_that("can request NAs sorted first", { expect_equal(vec_order(c(1, NA), "asc", "largest"), 1:2) expect_equal(vec_order(c(1, NA), "desc", "largest"), 2:1) expect_equal(vec_order(c(1, NA), "asc", "smallest"), 2:1) expect_equal(vec_order(c(1, NA), "desc", "smallest"), 1:2) }) test_that("can order complex vectors", { x <- complex(real = c(1, 2, 2, 3, 3), imaginary = c(5, 4, 3, 2, NA)) expect_equal(vec_order(x, direction = "asc", na_value = "largest"), c(1, 3, 2, 4, 5)) expect_equal(vec_order(x, direction = "desc", na_value = "largest"), rev(c(1, 3, 2, 4, 5))) expect_equal(vec_order(x, direction = "asc", na_value = "smallest"), c(5, 1, 3, 2, 4)) expect_equal(vec_order(x, direction = "desc", na_value = "smallest"), rev(c(5, 1, 3, 2, 4))) }) test_that("can sort data frames", { df <- data.frame(x = c(1, 2, 1), y = c(1, 2, 2)) out1 <- vec_sort(df) expect_equal(out1, data.frame(x = c(1, 1, 2), y = c(1, 2, 2))) out2 <- vec_sort(df, "desc") expect_equal(out2, data.frame(x = c(2, 1, 1), y = c(2, 2, 1))) }) test_that("can sort empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_sort(df1), df1) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_sort(df2), df2) }) test_that("can order tibbles that contain non-comparable objects", { expect_equal(vec_order(data_frame(x = list(10, 2, 1))), 1:3) }) test_that("can order matrices and arrays (#306)", { x <- matrix(c(1, 1, 1, 1, 2, 1), ncol = 2) expect_identical(vec_order(x), c(1L, 3L, 2L)) x <- array(1:8, c(2, 2, 2)) x[2] <- 1 x[3] <- 5 expect_identical(vec_order(x), 2:1) }) test_that("can order empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_order(df1), integer()) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_order(df2), integer()) }) test_that("can order data frames with data frame columns (#527)", { expect_equal( vec_order(iris), vec_order(data_frame(iris = iris)) ) }) test_that("can order data frames (and subclasses) with matrix columns", { df <- new_data_frame(n = 2L) df$x <- new_data_frame(list(y = matrix(1:2, 2))) expect_identical(vec_order(df), 1:2) df$x <- tibble::tibble(y = matrix(1:2, 2)) expect_identical(vec_order(df), 1:2) }) test_that("classed proxies do not affect performance (tidyverse/dplyr#5423)", { skip_on_cran() x <- glue::glue("{1:10000}") expect_time_lt(vec_order(x), 0.2) }) test_that("can order data frames that don't allow removing the column names (#1298)", { skip_if_not_installed("withr") local_methods( `names<-.vctrs_foobar` = function(x, value) { if (is.null(value)) { abort("Cannot remove names.") } NextMethod() } ) df <- foobar(data.frame(x = 1, y = 2)) expect_silent(expect_identical(vec_order(df), 1L)) }) vctrs/tests/testthat/test-cast-error-nested.txt0000644000176200001440000000062514042546236021505 0ustar liggesusers vec_cast("foo", 10): Can't convert to . vec_cast(factor("foo"), 10): Can't convert > to . vec_cast(x, y): Can't convert `a$b` to match type of `a$b` . vec_cast(x, y): Can't convert `a$b` > to match type of `a$b` . vec_cast_common(x, y): Can't combine `..1$a$b` > and `..2$a$b` . vctrs/tests/testthat/test-hash.R0000644000176200001440000001116313723213047016444 0ustar liggesusers # Vectorised -------------------------------------------------------------- test_that("vec_hash() produces same hash for same values", { x <- vec_hash(1:3) y <- do.call(c, map(1:3, vec_hash)) expect_identical(x, y) }) test_that("F, T, and NA hash to different values", { x <- map(c(TRUE, FALSE, NA), vec_hash) expect_length(unique(x), 3) }) test_that("vec_hash of double produces different values", { x <- vec_hash(c(1, 1, 2)) expect_true(identical(x[1:4], x[5:8])) expect_false(identical(x[5:8], x[9:12])) }) test_that("NA and NaN hash to different values", { x <- vec_hash(c(NA, NaN)) expect_false(identical(x[1:4], x[5:8])) }) test_that("same string hashes to same value", { x <- vec_hash(c("1", "1", "2")) expect_true(identical(x[1:4], x[5:8])) expect_false(identical(x[5:8], x[9:12])) }) test_that("list hashes to values of individual values", { x <- vec_hash(list(1:3, letters)) expect_identical(x[1:4], obj_hash(1:3)) expect_identical(x[5:8], obj_hash(letters)) x <- map(list(list(1:3), list(letters)), vec_hash) expect_identical(x[[1]], obj_hash(1:3)) expect_identical(x[[2]], obj_hash(letters)) }) test_that("hash of data frame works down rows", { df <- data.frame(x = 1:3, y = 1:3) x <- vec_hash(df) expect_length(x, 4 * vec_size(df)) expect_identical(x[1:4], vec_hash(df[1, ])) }) test_that("hashes are consistent from run to run", { # no string, since we're currently hashing the address in string pool df <- list( lgl = c(TRUE, FALSE, NA), int = 1:100, dbl1 = as.double(1:100), dbl2 = seq(0, 1, length = 100) ) hash <- lapply(df, vec_hash) # Big-endian results are byte-swapped, but otherwise equivalent. # Swap results so that there's no need to save results twice. if (.Platform$endian == "big") { hash <- lapply( hash, function(x) { writeBin(readBin(x, "int", 100, endian = "big"), x, endian = "little") } ) } local_options(max.print = 99999) expect_known_output(print(hash), file = test_path("test-hash-hash.txt")) }) test_that("can hash list of non-vectors", { x <- list(quote(x), mean) expect_equal( vec_hash(x), c(obj_hash(x[[1]]), obj_hash(x[[2]])) ) }) test_that("can hash matrices", { x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2)) expect_identical( vec_hash(x), vec_hash(x) ) x <- matrix(c(1, 2, 3, 4), c(2, 2)) expect_identical( vec_hash(x), vec_hash(x) ) expect_false(identical( vec_hash(x), vec_hash(c(1, 2)) )) y <- matrix(c(1, 2, 3, 5), c(2, 2)) expect_false(identical( vec_hash(x), vec_hash(y) )) }) test_that("can hash NA", { expect_identical( vec_hash(NA), vec_hash(NA), ) }) test_that("can hash 1D arrays", { # 1D arrays are dispatched to `as.data.frame.vector()` which # currently does not strip dimensions. This caused an infinite # recursion. expect_length(vec_hash(array(1:2)), 8) expect_identical(vec_hash(array(1:2)), vec_hash(1:2)) }) test_that("can hash raw vectors", { expect_identical(vec_hash(0:255), vec_hash(as.raw(0:255))) }) test_that("can hash complex vectors", { expect_identical( vec_hash(c(1, 2) + 0i), c(obj_hash(c(1, 0)), obj_hash(c(2, 0))) ) }) test_that("hash treats positive and negative 0 as equivalent (#637)", { expect_equal(vec_hash(-0), vec_hash(0)) }) test_that("can hash lists of expressions", { expect_equal( vec_hash(list(expression(x), expression(y))), c(obj_hash(expression(x)), obj_hash(expression(y))) ) }) test_that("vec_hash() uses recursive equality proxy", { x <- new_data_frame(list(x = foobar(1:3))) default <- vec_hash(x) local_methods(vec_proxy_equal.vctrs_foobar = function(...) c(0, 0, 0)) overridden <- vec_hash(x) expect_false(identical(default, overridden)) }) # Object ------------------------------------------------------------------ test_that("equal objects hash to same value", { # just test function since they'll recurse through every other object type f1 <- function(x, y = NULL) x + y f2 <- function(x, y = NULL) x + y expect_false(identical(obj_hash(f1), obj_hash(f2))) expect_false(identical(vec_hash(data_frame(x = list(f1))), vec_hash(data_frame(x = list(f2))))) attr(f1, "srcref") <- NULL attr(f2, "srcref") <- NULL expect_equal(obj_hash(f1), obj_hash(f2)) expect_equal(vec_hash(data_frame(x = list(f1))), vec_hash(data_frame(x = list(f2)))) }) test_that("expression vectors hash to the same value as lists of calls/names", { expect_equal( obj_hash(expression(x, y)), obj_hash(list(as.name("x"), as.name("y"))) ) expect_equal( obj_hash(expression(mean(), sd())), obj_hash(list(call("mean"), call("sd"))) ) }) vctrs/tests/testthat/test-rep.R0000644000176200001440000000777513753021253016324 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_rep() test_that("`vec_rep()` can repeat vectors", { expect_identical(vec_rep(1:2, 5), rep(1:2, 5)) expect_identical(vec_rep(list(1, "x"), 5), rep(list(1, "x"), 5)) }) test_that("`vec_rep()` repeats data frames row wise", { x <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_rep(x, 2), vec_slice(x, c(1:2, 1:2))) }) test_that("`vec_rep()` can repeat 0 `times`", { expect_identical(vec_rep(1, 0), numeric()) }) test_that("`vec_rep()` errors on long vector output", { # Exact error message may be platform specific expect_error(vec_rep(1:2, .Machine$integer.max), "output size must be less than") }) test_that("`vec_rep()` validates `times`", { expect_error(vec_rep(1, "x"), class = "vctrs_error_incompatible_type") expect_error(vec_rep(1, c(1, 2))) expect_error(vec_rep(1, -1)) expect_error(vec_rep(1, NA_integer_)) }) # ------------------------------------------------------------------------------ # vec_rep_each() test_that("`vec_rep_each()` can repeat each element of vectors", { expect_identical(vec_rep_each(1:2, 5), rep(1:2, each = 5)) expect_identical(vec_rep_each(list(1, "x"), 5), rep(list(1, "x"), each = 5)) }) test_that("`vec_rep_each()` `times` is vectorized", { expect_identical(vec_rep_each(1:2, 1:2), rep(1:2, times = 1:2)) }) test_that("`vec_rep_each()` repeats data frames row wise", { x <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_rep_each(x, c(2, 1)), vec_slice(x, c(1, 1, 2))) }) test_that("`vec_rep_each()` can repeat 0 `times`", { expect_identical(vec_rep_each(1:2, 0), integer()) }) test_that("`vec_rep_each()` errors on long vector output", { # Exact error message may be platform specific expect_error(vec_rep_each(1:2, .Machine$integer.max), "output size must be less than") }) test_that("`vec_rep_each()` validates `times`", { expect_error(vec_rep_each(1, "x"), class = "vctrs_error_incompatible_type") expect_error(vec_rep_each(1, -1)) expect_error(vec_rep_each(c(1, 2), c(1, -1))) expect_error(vec_rep_each(1, NA_integer_)) expect_error(vec_rep_each(c(1, 2), c(1, NA_integer_))) }) test_that("`vec_rep_each()` uses recyclying errors", { expect_error(vec_rep_each(1:2, 1:3), class = "vctrs_error_recycle_incompatible_size") }) # ------------------------------------------------------------------------------ test_that("rep functions generate informative error messages", { verify_output(test_path("error", "test-rep.txt"), { "# `vec_rep()` validates `times`" vec_rep(1, "x") vec_rep(1, c(1, 2)) vec_rep(1, -1) vec_rep(1, NA_integer_) "# `vec_rep_each()` validates `times`" vec_rep_each(1, "x") vec_rep_each(1, -1) vec_rep_each(c(1, 2), c(1, -1)) vec_rep_each(1, NA_integer_) vec_rep_each(c(1, 2), c(1, NA_integer_)) "# `vec_rep_each()` uses recyclying errors" vec_rep_each(1:2, 1:3) }) }) # vec_unrep -------------------------------------------------------------------- test_that("can unrep a vector", { x <- c(1, 3, 3, 1, 5, 5, 6) expect <- data_frame( key = c(1, 3, 1, 5, 6), times = c(1L, 2L, 1L, 2L, 1L) ) expect_identical(vec_unrep(x), expect) }) test_that("can unrep a data frame", { df <- data_frame( x = c(1, 1, 2, 2, 2), y = c(1, 1, 1, 1, 2) ) expect <- data_frame( key = vec_slice(df, c(1, 3, 5)), times = c(2L, 2L, 1L) ) expect_identical(vec_unrep(df), expect) }) test_that("works with size zero input", { expect_identical(vec_unrep(integer()), data_frame(key = integer(), times = integer())) }) test_that("can roundtrip empty input", { x <- integer() compressed <- vec_unrep(x) expect_identical(vec_rep_each(compressed$key, compressed$times), x) x <- data_frame() compressed <- vec_unrep(x) expect_identical(vec_rep_each(compressed$key, compressed$times), x) }) test_that("works with data frames with rows but no columns", { x <- data_frame(.size = 5) expect <- data_frame(key = data_frame(.size = 1L), times = 5L) expect_identical(vec_unrep(x), expect) }) vctrs/tests/testthat/test-translate.R0000644000176200001440000001034313761207671017525 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_normalize_encoding() test_that("can translate a character vector of various encodings (#553)", { x <- unlist(encodings(), use.names = FALSE) results <- vec_normalize_encoding(x) expect_equal_encoding(results, encodings()$utf8) }) test_that("translates all encodings to UTF-8", { encs <- encodings() for (enc in encs) { expect_equal_encoding(vec_normalize_encoding(enc), encodings()$utf8) } }) test_that("can translate a list containing character vectors with different encodings", { results <- vec_normalize_encoding(encodings()) results <- unlist(results) expect_equal_encoding(results, encodings()$utf8) }) test_that("translation fails purposefully with any bytes", { expect_error( vec_normalize_encoding(encoding_bytes()), "translating strings with \"bytes\" encoding" ) }) test_that("translation fails purposefully when mixing with bytes with other encodings", { for (enc in encodings()) { x <- c(encoding_bytes(), enc) expect_error(vec_normalize_encoding(x), "translating strings with \"bytes\" encoding") } }) test_that("attributes are kept on translation (#599)", { encs <- encodings() x <- c(encs$utf8, encs$latin1) x <- structure(x, names = c("a", "b"), extra = 1) expect_equal(attributes(vec_normalize_encoding(x)), attributes(x)) }) test_that("translation is robust against scalar types contained in lists (#633)", { x <- list(a = z ~ y, b = z ~ z) expect_equal(vec_normalize_encoding(x), x) }) test_that("translation can still occur even if a scalar type is in a list", { encs <- encodings() x <- list(a = z ~ y, b = encs$latin1) result <- vec_normalize_encoding(x) expect_equal_encoding(result$b, encs$utf8) }) test_that("translation occurs inside scalars contained in a list", { encs <- encodings() scalar <- structure(list(x = encs$latin1), class = "scalar_list") lst <- list(scalar) result <- vec_normalize_encoding(lst) expect_equal_encoding(result[[1]]$x, encs$utf8) }) test_that("translation treats data frames elements of lists as lists (#1233)", { encs <- encodings() field <- c(encs$utf8, encs$latin1) a <- new_rcrd(list(field = field)) df <- data.frame(a = a, b = 1:2) x <- list(df) # Recursive proxy won't proxy list elements, # so the rcrd column in the data frame won't get proxied proxy <- vec_proxy_equal(x) result <- vec_normalize_encoding(proxy) expect_identical(result, x) result_field <- field(result[[1]]$a, "field") expect_field <- c(encs$utf8, encs$utf8) expect_equal_encoding(result_field, expect_field) }) test_that("attributes are translated", { utf8 <- encodings()$utf8 latin1 <- encodings()$latin1 a <- structure(1, enc = utf8) b <- structure(1, enc = latin1) c <- structure(1, enc1 = utf8, enc2 = list(latin1), enc3 = latin1) x <- list(a, b, c) result <- vec_normalize_encoding(x) a_enc <- attr(result[[1]], "enc") b_enc <- attr(result[[2]], "enc") c_enc1 <- attr(result[[3]], "enc1") c_enc2 <- attr(result[[3]], "enc2")[[1]] c_enc3 <- attr(result[[3]], "enc3") expect_equal_encoding(a_enc, utf8) expect_equal_encoding(b_enc, utf8) expect_equal_encoding(c_enc1, utf8) expect_equal_encoding(c_enc2, utf8) expect_equal_encoding(c_enc3, utf8) expect <- list( structure(1, enc = utf8), structure(1, enc1 = utf8, enc2 = list(utf8), enc3 = utf8) ) expect_identical(vec_unique(x), expect) }) test_that("attributes are translated recursively", { utf8 <- encodings()$utf8 latin1 <- encodings()$latin1 nested <- structure(1, latin1 = latin1) x <- structure(2, nested = nested, foo = 1, latin1 = latin1) result <- vec_normalize_encoding(x) attrib <- attributes(result) attrib_nested <- attributes(attrib$nested) expect_equal_encoding(attrib$latin1, utf8) expect_equal_encoding(attrib_nested$latin1, utf8) }) test_that("NAs aren't converted to 'NA' (#1291)", { utf8 <- c(NA, encodings()$utf8) latin1 <- c(NA, encodings()$latin1) result1 <- vec_normalize_encoding(utf8) result2 <- vec_normalize_encoding(latin1) expect_equal_encoding(result1, utf8) expect_equal_encoding(result2, utf8) expect_identical(result1[[1]], NA_character_) expect_identical(result2[[1]], NA_character_) }) vctrs/tests/testthat/test-partial-frame.R0000644000176200001440000000544213723213047020250 0ustar liggesusers test_that("has ok print method", { pf <- vec_ptype2(partial_frame(x = 1L), data.frame(y = 2)) expect_known_output( print(pf), test_path("test-partial-frame-print.txt") ) expect_equal(vec_ptype_abbr(pf), "prtl") }) test_that("order of variables comes from data", { pf <- partial_frame(y = 1, x = 2) df <- data.frame(x = 1, y = 2) expect_named(vec_ptype_common(pf, df), c("x", "y")) expect_named(vec_ptype_common(df, pf), c("x", "y")) }) test_that("partial variables added to end if not in data", { pf <- partial_frame(y = 1) df <- data.frame(x = 1) expect_named(vec_ptype_common(pf, df), c("x", "y")) expect_named(vec_ptype_common(df, pf), c("x", "y")) }) test_that("can assert partial frames based on column presence", { pf <- partial_frame(y = 1) expect_true(vec_is(data.frame(y = 2), pf)) expect_false(vec_is(data.frame(x = 1), pf)) expect_true(vec_is(data.frame(x = 1, y = 2), pf)) expect_true(vec_is(data.frame(x = 1, y = 2, z = 3), pf)) pf <- partial_frame(y = 1, z = 3) expect_false(vec_is(data.frame(y = 2), pf)) expect_false(vec_is(data.frame(x = 1), pf)) expect_false(vec_is(data.frame(x = 1, y = 2), pf)) expect_true(vec_is(data.frame(x = 1, y = 2, z = 3), pf)) }) test_that("can assert partial frames based on column type", { pf <- partial_frame(y = 1) expect_false(vec_is(data.frame(y = "2"), pf)) }) test_that("incompatible data frames are an error", { df <- data.frame(y = 1) expect_error(vec_ptype2(df, partial_frame(y = chr())), class = "vctrs_error_incompatible_type") expect_error(new_partial_frame(df, data.frame(y = chr())), class = "vctrs_error_incompatible_type") }) test_that("dispatch is symmetric with tibbles", { # Now causes infloop with the new double-dispatch mechanism because # of the way we call vec_ptype2() from is_same_type() for partial # types return(expect_true(TRUE)) left <- vec_ptype2(partial_frame(x = 1), tibble::tibble(x = 1)) right <- vec_ptype2(tibble::tibble(x = 1), partial_frame(x = 1)) expect_identical(left, right) }) test_that("can take the common type with partial frames", { exp <- data.frame(x = dbl(), y = chr(), a = chr()) out <- vec_ptype_common( partial_frame(x = double(), a = character()), data.frame(x = 1L, y = "a") ) expect_identical(out, exp) out <- vec_ptype_common( data.frame(x = 1L, y = "a"), partial_frame(x = double(), a = character()) ) expect_identical(out, data.frame(x = dbl(), y = chr(), a = chr())) }) test_that("can rbind with a partial frame prototype", { out <- vec_rbind( data.frame(x = 1L, y = "a"), data.frame(x = FALSE, z = 10), .ptype = partial_frame(x = double(), a = character()) ) exp <- data.frame( x = dbl(1, 0), y = chr("a", NA), z = dbl(NA, 10), a = chr(NA, NA) ) expect_identical(out, exp) }) vctrs/tests/testthat/test-type2.R0000644000176200001440000003154013723213047016565 0ustar liggesusers test_that("base coercions are symmetric and unchanging", { types <- list( logical(), integer(), double(), character(), raw(), list() ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) expect_known_output(mat, test_path("test-type2.txt"), print = TRUE) }) test_that("new classes are uncoercible by default", { x <- structure(1:10, class = "vctrs_nonexistant") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") }) test_that("vec_typeof2() returns common type", { nms <- names(base_empty_types) for (i in seq_along(base_empty_types)) { this <- nms[[i]] for (j in seq_along(base_empty_types)) { that <- nms[[j]] if (i <= j) { exp <- paste0("vctrs_type2_", this, "_", that) } else { exp <- paste0("vctrs_type2_", that, "_", this) } out <- vec_typeof2(base_empty_types[[this]], base_empty_types[[that]]) expect_identical(out, exp) } } }) test_that("vec_typeof2_s3() returns common type", { all_base_empty_types <- c(base_empty_types, base_s3_empty_types) nms_s3 <- names(base_s3_empty_types) nms <- names(all_base_empty_types) for (i in seq_along(all_base_empty_types)) { this <- nms[[i]] for (j in seq_along(all_base_empty_types)) { that <- nms[[j]] # Skip when we have two non s3 objects if (!(this %in% nms_s3) & !(that %in% nms_s3)) { next } if (i <= j) { exp <- paste0("vctrs_type2_s3_", this, "_", that) } else { exp <- paste0("vctrs_type2_s3_", that, "_", this) } out <- vec_typeof2_s3(all_base_empty_types[[this]], all_base_empty_types[[that]]) expect_identical(out, exp) } } }) test_that("vec_ptype2() dispatches when inputs have shape", { expect_identical(dim(vec_ptype2(int(), matrix(nrow = 3, ncol = 4))), c(0L, 4L)) expect_identical(dim(vec_ptype2(matrix("", nrow = 3), c("", "", ""))), c(0L, 1L)) }) test_that("vec_ptype2() requires vectors", { expect_error(vec_ptype2(NULL, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(NA, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(list(), quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), NULL), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), NA), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), list()), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), quote(name)), class = "vctrs_error_scalar_type") }) test_that("vec_ptype2() with unspecified requires vectors", { expect_error(vec_ptype2(unspecified(), quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), unspecified()), class = "vctrs_error_scalar_type") }) test_that("vec_ptype2() forwards argument tag", { expect_error(vec_ptype2(quote(name), list(), x_arg = "foo"), "`foo`", class = "vctrs_error_scalar_type") expect_error(vec_ptype2(list(), quote(name), y_arg = "foo"), "`foo`", class = "vctrs_error_scalar_type") }) test_that("stop_incompatible_type() checks for scalars", { expect_error(stop_incompatible_type(NA, foobar(), x_arg = "x", y_arg = "y"), class = "vctrs_error_scalar_type") expect_error(vec_ptype_common(NA, foobar()), class = "vctrs_error_scalar_type") expect_error(vec_ptype_common(foobar(), list()), class = "vctrs_error_scalar_type") }) test_that("vec_ptype2() methods forward args to stop_incompatible_type()", { expect_args(new_hidden(), lgl(), x_arg = "foo", y_arg = "bar") expect_args(lgl(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(int(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(dbl(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(chr(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(list(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(new_rcrd(list(x = NA)), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(data.frame(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(Sys.Date(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(as.difftime(1, units = "hours"), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(factor(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(ordered(""), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(ordered(""), factor(), x_arg = "foo", y_arg = "bar") expect_args(bit64::as.integer64(1), new_hidden(), x_arg = "foo", y_arg = "bar") }) test_that("vec_ptype2() data frame methods builds argument tags", { expect_known_output(file = test_path("test-type2-error-messages.txt"), { cat_line("Bare objects:") try2(vec_ptype2("foo", 10)) cat_line("Nested dataframes:") df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) try2(vec_ptype2(df1, df2)) }) }) test_that("stop_incompatible_type() can be called without argument tags", { expect_error(stop_incompatible_type(1, 2, x_arg = "", y_arg = ""), " and ", class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2() returns empty prototype when other input is NULL", { expect_identical(vec_ptype2(1:5, NULL), int()) expect_identical(vec_ptype2(NULL, 1:5), int()) }) test_that("Subclasses of data.frame dispatch to `vec_ptype2()` methods", { local_methods( vec_ptype2.quuxframe = function(x, y, ...) UseMethod("vec_ptype2.quuxframe"), vec_ptype2.quuxframe.data.frame = function(x, y, ...) "dispatched!", vec_ptype2.data.frame.quuxframe = function(x, y, ...) "dispatched!" ) quux <- structure(data.frame(), class = c("quuxframe", "data.frame")) expect_identical(vec_ptype2(quux, mtcars), "dispatched!") expect_identical(vec_ptype2(mtcars, quux), "dispatched!") quux <- structure(data.frame(), class = c("quuxframe", "tbl_df", "data.frame")) expect_identical(vec_ptype2(quux, mtcars), "dispatched!") expect_identical(vec_ptype2(mtcars, quux), "dispatched!") }) test_that("Subclasses of `tbl_df` do not have `tbl_df` common type (#481)", { quux <- tibble() quux <- foobar(quux) expect_incompatible_df( vec_ptype_common(quux, tibble()), tibble() ) expect_incompatible_df( vec_ptype_common(tibble(), quux), tibble() ) expect_df_fallback_warning( expect_identical( vec_ptype_common_df_fallback(quux, tibble()), tibble() ) ) expect_df_fallback_warning( expect_identical( vec_ptype_common_df_fallback(tibble(), quux), tibble() ) ) }) test_that("Column name encodings are handled correctly in the common type (#553)", { encs <- encodings() data <- list(chr()) df_utf8 <- tibble::as_tibble(set_names(data, encs$utf8)) df_unknown <- tibble::as_tibble(set_names(data, encs$unknown)) expect_identical(vec_ptype2(df_utf8, df_unknown), df_utf8) }) test_that("vec_is_subtype() determines subtyping relationship", { expect_true(vec_is_subtype(lgl(), int())) expect_false(vec_is_subtype(int(), lgl())) expect_false(vec_is_subtype(lgl(), chr())) expect_false(vec_is_subtype(chr(), lgl())) local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) UseMethod("vec_ptype2.vctrs_foobar"), vec_ptype2.vctrs_foobar.logical = function(x, y, ...) logical(), vec_ptype2.logical.vctrs_foobar = function(x, y, ...) logical() ) expect_true(vec_is_subtype(foobar(TRUE), lgl())) expect_false(vec_is_subtype(lgl(), foobar(TRUE))) }) test_that("can override scalar vector error message for base scalar types", { verify_errors({ expect_error(vec_ptype2(NULL, quote(x), y_arg = "foo"), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(x), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type") }) }) test_that("can override scalar vector error message for S3 types", { verify_errors({ expect_error(vec_ptype2(NULL, foobar(), y_arg = "foo"), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(foobar(), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type") }) }) test_that("ptype2 and cast errors when same class fallback is impossible are informative", { verify_errors({ expect_error( vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type" ) "Incompatible attributes bullets are not show when methods are implemented" with_foobar_cast <- function(expr ) { with_methods( vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) vec_default_cast(x, to, ...), expr ) } with_foobar_ptype2 <- function(expr ) { with_methods( vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) vec_default_ptype2(x, y, ...), expr ) } expect_error( with_foobar_cast(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type" ) expect_error( with_foobar_ptype2(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type" ) }) }) test_that("common type errors don't mention columns if they are compatible", { verify_errors({ df <- data.frame(x = 1, y = "") foo <- structure(df, class = c("vctrs_foo", "data.frame")) bar <- structure(df, class = c("vctrs_bar", "data.frame")) expect_error( vec_cast_no_fallback(foo, bar), class = "vctrs_error_incompatible_type" ) }) }) test_that("common type warnings for data frames take attributes into account", { verify_errors({ foobar_bud <- foobar(mtcars, bud = TRUE) foobar_boo <- foobar(mtcars, boo = TRUE) expect_df_fallback_warning(vec_ptype2_fallback(foobar_bud, foobar_boo)) "For reference, warning for incompatible classes" expect_df_fallback_warning(vec_ptype2_fallback(foobar(mtcars), foobaz(mtcars))) "For reference, error when fallback is disabled" expect_error( vec_ptype2_no_fallback(foobar(mtcars), foobaz(mtcars)), class = "vctrs_error_incompatible_type" ) }) }) test_that("vec_ptype2() methods get prototypes", { x <- NULL y <- NULL local_methods(vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x <<- x y <<- y NULL }) vec_ptype2(foobar(1:3), foobar(letters)) expect_identical(x, foobar(int())) expect_identical(y, foobar(chr())) vec_ptype2(foobar(mtcars), foobar(iris)) expect_identical(x, foobar(mtcars[0, , drop = FALSE])) expect_identical(y, foobar(iris[0, , drop = FALSE])) }) test_that("vec_ptype2() allows vec_ptype() to return another type", { out <- with_methods( vec_restore.vctrs_foobar = function(x, to, ...) unstructure(x), vec_ptype2(foobar(1), foobar(2)) ) expect_identical(out, dbl()) }) test_that("vec_ptype2() errors have informative output", { verify_output(test_path("error", "test-type2.txt"), { "# can override scalar vector error message for base scalar types" vec_ptype2(NULL, quote(x), y_arg = "foo") vec_ptype2(quote(x), NULL, x_arg = "foo") "# can override scalar vector error message for S3 types" vec_ptype2(NULL, foobar(), y_arg = "foo") vec_ptype2(foobar(), NULL, x_arg = "foo") "# ptype2 and cast errors when same class fallback is impossible are informative" vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE)) vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE)) "Incompatible attributes bullets are not show when methods are implemented" with_foobar_cast <- function(expr ) { with_methods( vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) vec_default_cast(x, to, ...), expr ) } with_foobar_ptype2 <- function(expr ) { with_methods( vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) vec_default_ptype2(x, y, ...), expr ) } with_foobar_cast(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE))) with_foobar_ptype2(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE))) "# common type errors don't mention columns if they are compatible" df <- data.frame(x = 1, y = "") foo <- structure(df, class = c("vctrs_foo", "data.frame")) bar <- structure(df, class = c("vctrs_bar", "data.frame")) vec_cast_no_fallback(foo, bar) "# common type warnings for data frames take attributes into account" foobar_bud <- foobar(mtcars, bud = TRUE) foobar_boo <- foobar(mtcars, boo = TRUE) vec_ptype2_fallback(foobar_bud, foobar_boo) "For reference, warning for incompatible classes" vec_ptype2_fallback(foobar(mtcars), foobaz(mtcars)) "For reference, error when fallback is disabled" vec_ptype2_no_fallback(foobar(mtcars), foobaz(mtcars)) }) }) vctrs/tests/testthat/helper-cast.R0000644000176200001440000000034713723213047016755 0ustar liggesusers expect_lossy_cast <- function(expr) { cnd <- NULL out <- with_handlers( warning = calling(function(x) { cnd <<- x cnd_muffle(x) }), expr ) expect_s3_class(cnd, "vctrs_warning_cast_lossy") out } vctrs/tests/testthat/test-type-vctr.R0000644000176200001440000003771414042540502017462 0ustar liggesusers test_that("constructor sets attributes", { x <- new_vctr(1:4, class = "x", x = 1) expect_equal(x, structure(1:4, class = c("x", "vctrs_vctr"), x = 1)) x <- new_vctr(1:4, class = "x", x = 1, inherit_base_type = TRUE) expect_equal(x, structure(1:4, class = c("x", "vctrs_vctr", "integer"), x = 1)) }) test_that(".data must be a vector", { expect_error(new_vctr(mean), "vector type") }) test_that("attributes other than names are ignored", { out <- new_vctr(structure(1, a = 1)) expect_null(attributes(out)$a) }) test_that("default format method is internal", { x <- new_vctr(1, class = "x") expect_equal(format(x), format(x)) }) test_that("vctr class is proxied", { expect_identical(vec_proxy(new_vctr(1:3)), new_vctr(1:3)) expect_identical(vec_proxy(new_vctr(as.list(1:3))), unclass(new_vctr(as.list(1:3)))) expect_true(vec_is(new_vctr(as.list(1:3)))) }) test_that("Can opt out of base type", { x <- new_vctr(1, class = "x", inherit_base_type = FALSE) expect_s3_class(x, c("x", "vctrs_vctr"), exact = TRUE) }) test_that("base type is always set for lists", { expect_s3_class(new_vctr(list()), "list") }) test_that("cannot opt out of the base type with lists", { expect_error(new_vctr(list(), inherit_base_type = FALSE), "must inherit from the base type") }) test_that("data frames are not allowed", { expect_error(new_vctr(mtcars), "can't be a data frame") }) test_that("attributes must be named", { expect_error(vec_set_attributes(1, list(1)), "must be named") expect_error(vec_set_attributes(1, list(y = 1, 2)), "2 does not") }) test_that("can strip all attributes without adding new ones", { expect_equal(vec_set_attributes(structure(1, a = 1), NULL), 1) }) test_that("`c.vctrs_vctr()` checks for default method arguments (#791)", { expect_error(c(new_vctr(1), recursive = TRUE), "`recursive` must be") expect_error(c(new_vctr(1), use.names = FALSE), "`use.names` must be") }) # Cast/restore ------------------------------------------------------------ test_that("cast to NULL returns x", { x <- new_vctr(1, class = "x") expect_equal(vec_cast(NULL, x), NULL) }) test_that("cast succeeds if attributes equal", { x1 <- new_vctr(1, class = "x", a = 1, b = 2) x2 <- new_vctr(2, class = "x", a = 1, b = 2) expect_equal(vec_cast(x1, x2), x1) expect_equal(vec_cast(x2, x1), x2) }) test_that("and fails if attributes are different", { x1 <- new_vctr(1, class = "x", a = 1, b = 2) x2 <- new_vctr(2, class = "x", a = 2, b = 2) expect_error(vec_cast(x1, x2), class = "vctrs_error_incompatible_type") }) test_that("restoring to atomic vector of same type preserves attributes", { x1 <- new_vctr(1, class = "x") x2 <- new_vctr(2, class = "x") expect_equal(vec_restore(2, x1), x2) }) test_that("restoring to atomic vector of different type throws error", { x1 <- new_vctr(1, class = "x") expect_error(vec_restore("x", x1), class = "vctrs_error_incompatible_type") }) test_that("base coercion methods mapped to vec_cast", { x <- new_vctr(1, inherit_base_type = FALSE) expect_error(as.logical(x), class = "vctrs_error_incompatible_type") expect_error(as.integer(x), class = "vctrs_error_incompatible_type") expect_error(as.logical(x), class = "vctrs_error_incompatible_type") expect_error(as.double(x), class = "vctrs_error_incompatible_type") expect_error(as.character(x), class = "vctrs_error_incompatible_type") expect_error(as.Date(x), class = "vctrs_error_incompatible_type") expect_error(as.POSIXct(x), class = "vctrs_error_incompatible_type") expect_error(as.POSIXlt(x), class = "vctrs_error_incompatible_type") expect_equal(as.list(x), list(x)) }) test_that("as.data.frame creates data frame", { x <- new_vctr(1:3) df <- as.data.frame(x) expect_s3_class(df, "data.frame") expect_equal(nrow(df), 3) expect_named(df, "x") }) test_that("as.data.frame on shaped vctrs doesn't bring along extra attributes", { x <- new_vctr(1:3, foo = "bar", dim = c(3L, 1L)) df <- as.data.frame(x) expect_null(attr(df, "foo", exact = TRUE)) }) test_that("as.data.frame2() unclasses input to avoid dispatch on as.data.frame()", { x <- structure(1:2, dim = c(1L, 2L), dimnames = list("r1", c("c1", "c2")), class = "foo") expect <- data.frame(c1 = 1L, c2 = 2L, row.names = "r1") local_methods(as.data.frame.foo = function(x, ...) "dispatched!") expect_identical(as.data.frame2(x), expect) }) test_that("as.list() chops vectors", { expect_identical( as.list(new_vctr(1:3)), list(new_vctr(1L), new_vctr(2L), new_vctr(3L)) ) x <- new_vctr(as.list(1:3)) expect_identical(as.list(x), as.list(1:3)) }) # equality + comparison + arith + math --------------------------------------- test_that("equality functions remapped", { x <- new_vctr(c(1, 1, NA), inherit_base_type = FALSE) expect_error(x == 1, class = "vctrs_error_incompatible_type") expect_error(x != 1, class = "vctrs_error_incompatible_type") expect_equal(is.na(x), c(FALSE, FALSE, TRUE)) expect_true(anyNA(x)) expect_equal(unique(x), new_vctr(c(1, NA), inherit_base_type = FALSE)) expect_equal(duplicated(x), c(FALSE, TRUE, FALSE)) expect_true(anyDuplicated(x)) }) test_that("is.na<-() supported", { x <- new_vctr(1:4) is.na(x) <- c(FALSE, FALSE, TRUE, NA) expect_identical(x, new_vctr(c(1:2, NA, 4L))) x <- new_vctr(1:4) is.na(x) <- TRUE expect_identical(x, new_vctr(rep(NA_integer_, 4))) x <- new_vctr(1:4) is.na(x) <- c(2, 3) expect_identical(x, new_vctr(c(1L, NA, NA, 4L))) names <- c("a", "b", "c", "d") x <- set_names(new_vctr(1:4), names) is.na(x) <- c("d", "b", "b") expect_identical(x, set_names(new_vctr(c(1L, NA, 3L, NA)), names)) x <- new_vctr(1:4) expect_error(is.na(x) <- "x", "character names to index an unnamed vector") expect_error(is.na(x) <- 5, class = "vctrs_error_subscript_oob") }) test_that("comparison functions remapped", { local_methods( vec_proxy_compare.bizzaro = function(x, ...) -vec_data(x) ) x1 <- new_vctr(c(1, 2), class = "bizzaro") x2 <- new_vctr(2, class = "bizzaro") expect_equal(order(x1), c(2L, 1L)) expect_equal(x1 < x2, c(FALSE, FALSE)) expect_equal(x1 <= x2, c(FALSE, TRUE)) expect_equal(x1 > x2, c(TRUE, FALSE)) expect_equal(x1 >= x2, c(TRUE, TRUE)) }) test_that("operators remapped", { local_methods( vec_arith.bizzaro = function(op, x, y) 1L ) x <- new_vctr(c(1, 2), class = "bizzaro") expect_equal(x + 1, 1L) expect_equal(x - 1, 1L) expect_equal(x * 1, 1L) expect_equal(x / 1, 1L) expect_equal(x ^ 1, 1L) expect_equal(x %% 1, 1L) expect_equal(x %/% 1, 1L) expect_equal(x & 1, 1L) expect_equal(x | 1, 1L) expect_equal(!x, 1L) expect_equal(+x, 1L) expect_equal(-x, 1L) }) test_that("math functions overridden", { local_methods( vec_math.bizzaro = function(fn, x, ...) vec_math_base(fn, 2L) ) x <- new_vctr(c(1, NA), class = "bizzaro") expect_equal(mean(x), 2L) expect_equal(sum(x), 2L) expect_equal(is.finite(x), TRUE) expect_equal(is.infinite(x), FALSE) expect_equal(is.nan(x), FALSE) }) test_that("diff matches base R", { local_methods( vec_arith.vctrs_minus = function(op, x, y) vec_arith_base(op, x, y) ) x1 <- cumsum(cumsum(1:10)) x2 <- new_vctr(x1, class = "vctrs_minus") expect_equal(diff(x2), diff(x1)) expect_equal(diff(x2, lag = 2L), diff(x1, lag = 2L)) expect_equal(diff(x2, differences = 2L), diff(x1, differences = 2L)) expect_equal(diff(x2, lag = 11), x2[0L]) expect_equal(diff(x2, differences = 11), x2[0L]) }) # names ------------------------------------------------------------------- test_that("all elements must be named if any are named", { expect_error(new_vctr(setNames(1:2, c("a", NA))), "named") expect_error(new_vctr(setNames(1:2, c("a", ""))), "named") }) test_that("can not provide invalid names", { x <- new_vctr(c(a = 1, b = 2)) expect_error(names(x) <- "x", "length") expect_error(names(x) <- c("x", NA), "named") expect_error(names(x) <- c("x", ""), "named") expect_error(names(x) <- c("x", "y", "z"), "length") expect_error(names(x) <- NULL, NA) }) test_that("can use [ and [[ with names", { local_methods( vec_ptype2.vctrs_vctr.double = function(...) dbl(), vec_ptype2.double.vctrs_vctr = function(...) dbl() ) x <- new_vctr(c(a = 1, b = 2)) expect_equal(x["b"], new_vctr(c(b = 2))) expect_equal(x[["b"]], new_vctr(2)) # [[ drops names x[["c"]] <- 3 expect_equal(x[["c"]], new_vctr(3)) x["d"] <- 4 expect_equal(x[["d"]], new_vctr(4)) }) test_that("can use [ and [[ with names - list vctr", { local_methods( vec_ptype2.vctrs_vctr = function(...) list(), vec_ptype2.list.vctrs_vctr = function(...) list() ) y <- new_vctr(list(a = 1, b = 2)) y[["c"]] <- 3 expect_equal(y[["c"]], 3) y["d"] <- list(4) expect_equal(y[["d"]], 4) }) test_that("can use [[<- to replace n-dimensional elements", { local_methods( vec_restore.vctrs_mtrx = function(x, to, ...) x, vec_ptype2.double.vctrs_mtrx = function(...) dbl(), vec_ptype2.vctrs_mtrx = function(...) dbl() ) x <- new_vctr(rep(1, times = 4), dim = c(2, 2), class = "vctrs_mtrx") x[[2, 2]] <- 4 expect_equal(x[[2, 2]], 4) }) test_that("subsetting preserves attributes", { x <- new_vctr(c(a = 1, b = 2)) attr(x, "extra") <- TRUE y <- x[1] expect_equal(attr(x, "extra"), TRUE) }) test_that("$ inherits from underlying vector", { x1 <- new_vctr(c(a = 1, b = 2)) expect_error(x1$a, "atomic vectors") expect_error(x1$a <- 2, "atomic vectors") x2 <- new_vctr(list(a = 1, b = 2)) expect_equal(x2$a, 1) x2$a <- 10 expect_equal(x2$a, 10) }) # unsupported/unimplemented operations -------------------------------------- test_that("can't touch protected attributes", { x <- new_vctr(1:4) expect_error(dim(x) <- c(2, 2), class = "vctrs_error_unsupported") expect_error(dimnames(x) <- list("x"), class = "vctrs_error_unsupported") expect_error(levels(x), class = "vctrs_error_unsupported") expect_error(levels(x) <- "x", class = "vctrs_error_unsupported") # but it's ok to set names to NULL; this happens at least in vec_c # and maybe elsewhere. We may need to back off on this level of # strictness in the future expect_error(names(x) <- NULL, NA) }) test_that("summary is unimplemented", { x <- new_vctr(1:4) expect_error(summary(x), class = "vctrs_error_unimplemented") }) # hidden class ------------------------------------------------------------ # We can't construct classes in test because the methods are not found # when vctr generics call other generics. Instead we rely on a very simple # class implemented in vctr.R test_that("class preserved when subsetting", { h <- new_hidden(1:4) expect_s3_class(h, "hidden") expect_s3_class(h[1], "hidden") expect_s3_class(h[[1]], "hidden") expect_s3_class(rep(h[1], 2), "hidden") expect_s3_class(as.list(h)[[1]], "hidden") length(h) <- 3 expect_s3_class(h, "hidden") }) test_that("RHS cast when using subset assign", { local_hidden() h <- new_hidden(1) expect_error(h[[1]] <- "x", class = "vctrs_error_incompatible_type") expect_error(h[1] <- "x", class = "vctrs_error_incompatible_type") h[2] <- 1 expect_equal(h, new_hidden(c(1, 1))) h[[2]] <- 2 expect_equal(h, new_hidden(c(1, 2))) }) test_that("c passes on to vec_c", { local_hidden() h <- new_hidden(1) expect_equal(c(h), h) expect_equal(c(h, NULL), h) expect_equal(c(h, 1), rep(h, 2)) expect_equal(c(h, h), rep(h, 2)) }) test_that("summaries preserve class", { h <- new_hidden(c(1, 2)) expect_equal(sum(h), new_hidden(3)) expect_equal(mean(h), new_hidden(1.5)) }) test_that("methods using vec_proxy_compare agree with base", { h <- new_hidden(c(1:10)) h_na <- new_hidden(c(NA, 1:10)) expect_agree <- function(f, x, na.rm = FALSE) { f <- enexpr(f) expect_equal(vec_data((!!f)(x, na.rm = na.rm)), (!!f)(vec_data(x), na.rm = na.rm)) } expect_agree(min, h) expect_agree(max, h) expect_agree(range, h) expect_agree(min, h_na) expect_agree(max, h_na) expect_agree(range, h_na) expect_agree(min, h_na, na.rm = TRUE) expect_agree(max, h_na, na.rm = TRUE) expect_agree(range, h_na, na.rm = TRUE) }) test_that("can put in data frame", { h <- new_hidden(1:4) expect_named(as.data.frame(h), "h") expect_named(data.frame(x = h), "x") }) test_that("base coercions default to vec_cast", { local_hidden() h <- new_hidden(1) expect_error(as.character(h), class = "vctrs_error_incompatible_type") expect_error(as.integer(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.factor(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.ordered(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.difftime(h), class = "vctrs_error_incompatible_type") expect_equal(as.logical(h), TRUE) expect_equal(as.double(h), 1) }) test_that("default print and str methods are useful", { h <- new_hidden(1:4) expect_known_output( { print(h) cat("\n") print(h[0]) cat("\n") str(h) }, file = "test-vctr-print.txt", ) }) test_that("default print method shows names", { h <- new_hidden(c(A = 1, B = 2, C = 3)) expect_known_output( { print(h) }, file = "test-vctr-print-names.txt", ) }) test_that("can't transpose", { h <- new_hidden(1:4) expect_error(t(h), class = "vctrs_error_unsupported") }) test_that("shaped vctrs can be cast to data frames", { x <- new_vctr(1:4, dim = 4) expect_identical(as.data.frame(x), data.frame(V1 = 1:4)) x <- new_vctr(1:4, dim = c(2, 2)) expect_identical(as.data.frame(x), data.frame(V1 = 1:2, V2 = 3:4)) }) # slicing ----------------------------------------------------------------- test_that("additional subscripts are handled (#269)", { new_2d <- function(.data, dim) { vctrs::new_vctr(.data, dim = dim, class = "vctrs_2d") } x <- new_2d(c(1, 2), dim = c(2L, 1L)) expect_identical(x[1], new_2d(1, dim = c(1, 1))) expect_identical(x[1, 1], new_2d(1, dim = c(1, 1))) expect_identical(x[, 1], new_2d(c(1, 2), dim = c(2, 1))) }) # summary generics -------------------------------------------------------- test_that("na.rm is forwarded to summary generics", { x <- new_vctr(dbl(1, 2, NA)) expect_identical(mean(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(mean(x, na.rm = TRUE), new_vctr(1.5)) expect_identical(min(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(min(x, na.rm = TRUE), new_vctr(1)) expect_identical(max(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(max(x, na.rm = TRUE), new_vctr(2)) x <- new_vctr(lgl(TRUE, NA)) expect_identical(all(x, na.rm = FALSE), lgl(NA)) expect_identical(all(x, na.rm = TRUE), TRUE) }) test_that("Summary generics behave identically to base for empty vctrs (#88)", { expect_warning( expect_identical( new_vctr(max(numeric())), max(new_vctr(numeric())) ) ) expect_warning( expect_identical( new_vctr(min(numeric())), min(new_vctr(numeric())) ) ) expect_identical( new_vctr(range(1)), range(new_vctr(1)) ) expect_identical( new_vctr(prod(numeric())), prod(new_vctr(numeric())) ) expect_identical( new_vctr(sum(numeric())), sum(new_vctr(numeric())) ) expect_identical( new_vctr(cummax(numeric())), cummax(new_vctr(numeric())) ) expect_identical( new_vctr(cummin(numeric())), cummin(new_vctr(numeric())) ) expect_identical( new_vctr(cumprod(numeric())), cumprod(new_vctr(numeric())) ) expect_identical( new_vctr(cumsum(numeric())), cumsum(new_vctr(numeric())) ) expect_identical( new_vctr(mean(numeric())), mean(new_vctr(numeric())) ) }) test_that("generic predicates return logical vectors (#251)", { x <- new_vctr(c(1, 2)) expect_identical(is.finite(x), c(TRUE, TRUE)) expect_identical(is.infinite(x), c(FALSE, FALSE)) expect_identical(is.nan(x), c(FALSE, FALSE)) x <- new_vctr(TRUE) expect_identical(any(x), TRUE) expect_identical(all(x), TRUE) }) test_that("xtfrm() returns a bare vector", { expect_identical(xtfrm(new_vctr(1:3, foo = "bar")), 1:3) }) test_that("xtfrm() works with character subclass", { expect_identical(xtfrm(new_vctr(chr())), int()) }) vctrs/tests/testthat/test-hash-hash.txt0000644000176200001440000000756314042546237020021 0ustar liggesusers$lgl [1] 70 a2 85 ef b9 79 37 9e 59 df 73 0b $int [1] 70 a2 85 ef bf 3c 2c cf e0 2d 28 24 3e 2c d4 c2 86 cd 44 6a c1 c6 22 fb 7d [26] 28 01 b7 c4 de 70 e7 cc a2 b3 60 49 7e 5c 87 bd 77 d8 31 2c 27 c0 1a 4b f7 [51] f2 33 aa 4d 23 5e fe 51 52 6a 75 cd 5c f3 a1 18 08 77 d0 cc 3b 74 cd ce 28 [76] 80 da 82 82 70 1e db 49 88 ef 6a 83 16 45 b6 ef 6f a8 63 fc 59 f3 50 ab 1d [101] 56 e3 06 58 61 10 57 e3 99 3e c1 e0 53 77 22 cd 86 b9 75 87 14 33 2e e9 31 [126] 21 82 48 cb 87 24 99 c7 b1 e2 a0 e3 2a 76 8c 99 50 f3 0c 08 81 8d 36 aa b5 [151] 72 f0 41 78 9f 1a fc 7b 81 05 51 8d 37 0d 15 47 b7 a6 7b c3 7b 7b ce e5 26 [176] 1d 43 60 95 97 d2 f2 a8 41 23 3c 26 82 d1 13 cb 66 41 e5 1d 8b 2e 28 1f 9d [201] f1 6f 67 ae 0c 1b 2f a0 3a 45 d9 12 0a 93 29 eb 4d a4 f9 d8 72 99 a2 b1 2e [226] 0e 12 21 ee 74 0d fc 0f f3 a9 bf 4c f9 bf c1 ee b1 42 35 70 ec 24 34 41 bf [251] 2e 12 41 72 24 81 68 83 4e 10 76 9a c5 56 d1 1d 56 95 9d 60 e6 31 5f d2 48 [276] 68 0d dc b3 7a 3a b1 0c 83 22 aa b9 cc 3b 72 d4 6c 58 88 e4 ce 6d 3f 7b f9 [301] 13 5b 06 d1 b5 d1 03 4e 4b 89 b8 59 fe ca aa 94 e7 03 74 de fe d4 11 15 e9 [326] a0 37 7c e0 61 8b 3c da f6 91 e3 3d 55 02 7f 33 24 73 1b bb 11 65 50 e3 51 [351] 16 9c a2 dc 9b 79 e0 27 fc e1 fe c4 5a 7e 4c e4 cd a7 43 f9 d4 82 f5 8e 6b [376] f4 1a a5 4f 41 4b 7a 10 f2 d2 98 6e 7e 7f 11 0f c9 93 eb 84 3a a3 d6 05 9c $dbl1 [1] 00 c1 04 29 59 03 e1 f3 6a 26 91 1e a7 0a 94 81 d0 c2 58 ef 6a b0 2f 59 c2 [26] f7 fe 63 c3 ce 8a 90 1d 1d ce 14 05 c8 d4 e8 2f cf 98 46 e2 99 af e3 b9 cf [51] 1b 4b 39 8b 47 f3 e3 49 63 d1 0a af 42 9d 48 fe c7 7b ab b4 ab 5b a8 d7 94 [76] 6e 0c 82 24 08 fc 20 d8 cb 7a bb c7 ba 9e 3b 0d d5 d6 8c 9e 2b 48 97 9e c8 [101] f0 c9 4b ad da 6b 9e 41 8b 82 e8 82 ba 6a f2 99 a1 81 82 d5 c0 ca 3d fb ce [126] 54 60 0e 7d 93 bd 32 8e 02 d8 db e0 cb f9 e7 f5 ae e1 b9 73 17 5d ed ff a7 [151] 43 68 23 ee 2d 63 73 d9 a0 43 f6 72 2d 68 19 ff fc 46 0b 49 69 3e 1f bc 03 [176] 1d 1a ab de c9 b0 4a ed 85 af 2f e3 fc 80 99 b7 c4 7f 65 ac c1 96 17 5b 86 [201] 78 2e c8 67 65 45 5d 95 77 3d 3a 9b d8 c7 01 fd e7 1d 5b 2d f6 19 7b 18 20 [226] 67 7c af 43 01 54 b6 2c 99 10 85 28 c4 68 e1 99 67 d9 ae f4 5a e2 04 87 14 [251] 63 8a 89 96 5e f6 fc 9c a3 6a f1 23 29 09 82 c9 2a cc 51 b7 8f 27 53 47 a0 [276] a6 fa 73 2d 07 86 f5 cc 07 98 1a f2 6c d4 e0 f2 43 31 90 98 2c 94 28 25 40 [301] 23 1f 2f 2f 41 50 c3 d2 65 6f 01 32 a1 7f 7a 40 a8 5c 11 db 88 7d 7e a4 1d [326] 7d 43 c3 17 cd 2e ca dc 56 aa c3 74 8d 37 1a 16 07 d4 c5 0a f9 03 8f a7 6d [351] 23 c5 b8 05 ad 7c e1 33 d1 0e 0e 8c 19 72 0d 80 3e c7 80 5c 77 58 b1 ef 89 [376] 39 a5 a2 72 0a c4 c4 c5 16 47 90 da 47 49 a1 b1 81 70 ce cf cf b3 5c ca 3a $dbl2 [1] b9 79 37 9e 87 e6 d0 04 ff 57 34 47 e9 da db 08 35 18 46 24 36 7b 48 f3 75 [26] 78 db 2d 6c 77 76 c5 67 0a 4b b9 64 57 19 b8 f4 69 d5 5d c2 04 8f 91 55 6a [51] d5 2f 33 2d 76 e9 9d f6 f8 5e cf c6 06 5d 6a 73 96 c6 21 e5 ca ad 3f d6 4c [76] 1d 3c 72 9b b3 3a 66 7a e4 91 0d 96 82 63 22 50 12 56 6b 1a 7f 0f 3e 5b e8 [101] bc 28 55 9c 90 22 9c f7 9a f9 60 5f 64 8c d0 c8 c4 cd fa 2d 20 1a db 65 c0 [126] 7e 70 5f 2d 59 57 6f cc 52 4f 75 87 f9 6a 7a e8 1e 2c 5c e7 1b 72 ea 69 8a [151] 6b fb a5 7d 41 05 1a e3 ed e6 ea 50 2b 06 3f 39 e8 a1 e0 69 29 56 5e 0e 72 [176] 29 b2 4c 65 ae 76 d2 cd 18 6c 74 3d f4 62 1f 67 51 37 e4 1f eb 54 0f 00 71 [201] 5b da 32 37 d5 fc 9d f3 c3 0e a6 81 09 19 25 1e 06 61 2f 2c 62 7a 33 fc 41 [226] 13 fc f9 44 55 57 9d d6 4b a7 ba 87 fe 1e 00 d7 1c 0c 34 ba 25 ce 21 7f 97 [251] d5 11 f5 cf 2f a4 34 2f 60 a7 b2 a5 2c 8f 90 29 c5 55 06 c6 6a 19 e4 bc 46 [276] 24 c0 43 8c bf ac d9 db 43 ea 2f 29 da 48 64 fa 51 55 85 81 40 cd ac bc 6e [301] 9b d2 07 e2 94 df 2a 7d c2 bd 63 87 5e 5e 47 8d 2d 21 ac 92 0a b8 52 e4 e5 [326] fb ec d6 5b 17 34 01 02 21 78 b2 83 28 f9 f0 f5 ae 90 27 30 aa 4b 5c 6a d2 [351] da 71 2e 3c c6 68 a7 9f a9 36 16 81 cb cb d8 6d 07 96 6e f4 f9 29 8d 61 f7 [376] 86 51 af f2 7e cc 3d 4f 24 8e e0 9f 70 be b1 af c9 73 f2 4b c9 00 c1 04 29 vctrs/tests/testthat/test-compare.R0000644000176200001440000002073513753021253017153 0ustar liggesusers test_that("inputs must be vectors", { expect_error(vec_compare(NULL, 1), class = "vctrs_error_scalar_type") expect_error(vec_compare(1, NULL), class = "vctrs_error_scalar_type") }) test_that("matches R ordering", { expect_same <- function(x, y) { expect_equal(vec_compare(!!x, !!y), cmp(!!x, !!y)) } expect_same(c(NA, FALSE, TRUE), FALSE) expect_same(c(NA, -100L, 0L, 100L), 0L) expect_same(c(NA, -Inf, -100, 100, Inf), 0L) expect_same(c(NA, NaN, 0), NA) expect_same(c(NA, "a", "b", "c"), "b") expect_same(as.raw(2:5), as.raw(4)) }) test_that("NAs equal when requested", { expect_value <- function(x, y, val, .ptype = NULL) { expect_equal(vec_compare(!!x, !!y, .ptype = .ptype, na_equal = TRUE), !!val) } expect_value(NA, NA, 0L) expect_value(NA, FALSE, -1L) expect_value(FALSE, NA, 1L) expect_value(NA_integer_, NA_integer_, 0L) expect_value(NA_integer_, 0L, -1L) expect_value(0L, NA_integer_, 1L) expect_value(NA_character_, NA_character_, 0L) expect_value(NA_character_, "", -1L) expect_value("", NA_character_, 1L) expect_value(0, NA_real_, 1L) expect_value(0, NaN, 1L) expect_value(0, 0, 0L) expect_value(NA_real_, NA_real_, 0L) expect_value(NA_real_, NaN, 1L) expect_value(NA_real_, 0, -1L) expect_value(NaN, NA_real_, -1L) expect_value(NaN, NaN, 0L) expect_value(NaN, 0, -1L) }) test_that("data frames are compared column by column", { df1 <- data.frame(x = c(1, 1, 1), y = c(-1, 0, 1)) expect_equal(vec_compare(df1, df1[2, ]), c(-1, 0, 1)) expect_equal(vec_compare(df1[1], df1[2, 1, drop = FALSE]), c(0, 0, 0)) expect_equal(vec_compare(df1[2], df1[2, 2, drop = FALSE]), c(-1, 0, 1)) expect_equal(vec_compare(df1[2:1], df1[2, 2:1]), c(-1, 0, 1)) }) test_that("can compare data frames with various types of columns", { x1 <- data_frame(x = 1, y = 2) y1 <- data_frame(x = 2, y = 1) x2 <- data_frame(x = "a") y2 <- data_frame(x = "b") x3 <- data_frame(x = FALSE) y3 <- data_frame(x = TRUE) x4 <- data_frame(x = 1L) y4 <- data_frame(x = 2L) expect_equal(vec_compare(x1, y1), -1) expect_equal(vec_compare(x2, y2), -1) expect_equal(vec_compare(x3, y3), -1) expect_equal(vec_compare(x4, y4), -1) }) test_that("can compare data frames with data frame columns", { df1 <- data_frame(x = data_frame(a = 1)) df2 <- data_frame(x = data_frame(a = 2)) expect_equal(vec_compare(df1, df1), 0) expect_equal(vec_compare(df1, df2), -1) }) test_that("C code doesn't crash with bad inputs", { df <- data.frame(x = c(1, 1, 1), y = c(-1, 0, 1)) expect_error(.Call(vctrs_compare, df, df[1], TRUE), "not comparable") # Names are not checked, as `vec_cast_common()` should take care of the type. # So if `vec_cast_common()` is not called, or is improperly specified, then # this could result in false equality. expect_equal(.Call(vctrs_compare, df, setNames(df, c("x", "z")), TRUE), c(0, 0, 0)) df1 <- new_data_frame(list(x = 1:3, y = c(1, 1, 1))) df2 <- new_data_frame(list(y = 1:2, x = 1:2)) expect_error(.Call(vctrs_compare, df1, df2, TRUE), "must have the same types and lengths") }) test_that("xtfrm.vctrs_vctr works for variety of base classes", { df <- data.frame(x = c(NA, 1, 1), y = c(1, 2, 1)) expect_equal(xtfrm.vctrs_vctr(df), c(3, 2, 1)) x <- c(2, 3, 1) expect_equal(xtfrm.vctrs_vctr(x), x) expect_equal(xtfrm.vctrs_vctr(letters[x]), x) }) test_that("vec_proxy_order() orders list using order of appearance", { x <- 1:2 y <- 2:4 z <- "a" lst <- list(x, y, x, y, z) expect_identical(vec_proxy_order(lst), c(1L, 2L, 1L, 2L, 5L)) }) test_that("vec_compare() calls vec_proxy_compare()", { local_methods( vec_proxy_compare.vctrs_foobar = function(x, ...) rev(x), vec_ptype2.integer.vctrs_foobar = function(...) foobar(int()), vec_ptype2.vctrs_foobar = function(...) foobar(int()), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.integer = function(x, ...) x, ) expect_identical(vec_compare(1:3, 1:3), int(0, 0, 0)) expect_identical(vec_compare(1:3, foobar(1:3)), int(-1, 0, 1)) }) test_that("vec_proxy_compare() preserves data frames and vectors", { df <- data_frame(x = 1:2, y = c("a", "b")) expect_identical(vec_proxy_compare(df), df) x <- c(NA, "a", "b", "c") expect_identical(vec_proxy_compare(x), x) }) test_that("vec_proxy_compare() handles data frame with a POSIXlt column", { df <- data.frame(times = 1:5, x = 1:5) df$times <- as.POSIXlt(seq.Date(as.Date("2019-12-30"), as.Date("2020-01-03"), by = "day")) df2 <- df df2$times <- vec_proxy_compare(df$times) expect_identical( vec_proxy_compare(df), vec_proxy_compare(df2) ) }) test_that("vec_proxy_compare.POSIXlt() correctly orders (#720)", { dates <- as.POSIXlt(seq.Date(as.Date("2019-12-30"), as.Date("2020-01-03"), by = "day")) expect_equal(vec_order(dates), 1:5) }) test_that("vec_proxy_compare.POSIXlt() correctly orders around DST", { # 1am in EDT x <- as.POSIXlt("2020-11-01 01:00:00", tz = "America/New_York") # "falls back" to 1am again, but in EST y <- as.POSIXlt(x + 3600) expect_equal(vec_order(c(y, x)), c(2, 1)) }) test_that("vec_proxy_compare() flattens df-cols", { df_col <- data_frame(z = 3:4, w = 4:5) df <- data_frame(x = 1:2, y = df_col) expect <- data_frame(x = 1:2, z = 3:4, w = 4:5) expect_identical(vec_proxy_compare(df), expect) }) test_that("vec_proxy_compare() unwraps 1 col dfs", { df <- data_frame(x = 1:2) expect_identical(vec_proxy_compare(df), 1:2) df_col <- data_frame(y = 1:2) df <- data_frame(x = df_col) expect_identical(vec_proxy_compare(df), 1:2) }) test_that("vec_proxy_order() works on deeply nested lists", { df_col <- data_frame(z = list("b", "a", "b")) # Relaxed and unwrapped df1 <- data_frame(x = df_col) expect_identical(vec_proxy_order(df1), c(1L, 2L, 1L)) df2 <- data_frame(x = df_col, y = 1:3) expect_identical(vec_proxy_order(df2), data_frame(x = c(1L, 2L, 1L), y = 1:3)) }) test_that("error is thrown with data frames with 0 columns", { x <- new_data_frame(n = 1L) expect_error(vec_compare(x, x), "data frame with zero columns") }) test_that("error is thrown when comparing lists", { expect_error(vec_compare(list(), list()), class = "vctrs_error_unsupported") expect_error(.Call(vctrs_compare, list(), list(), FALSE), "Can't compare lists") }) test_that("error is thrown when comparing data frames with list columns", { df <- data_frame(x = list()) expect_error(vec_compare(df, df), class = "vctrs_error_unsupported") expect_error(.Call(vctrs_compare, df, df, FALSE), "Can't compare lists") }) test_that("error is thrown when comparing scalars", { x <- new_sclr(x = 1) expect_error(vec_compare(x, x), class = "vctrs_error_scalar_type") expect_error(.Call(vctrs_compare, x, x, FALSE), class = "vctrs_error_scalar_type") }) test_that("`na_equal` is validated", { expect_error(vec_compare(1, 1, na_equal = 1), class = "vctrs_error_assert_ptype") expect_error(vec_compare(1, 1, na_equal = c(TRUE, FALSE)), class = "vctrs_error_assert_size") }) test_that("can compare equal strings with different encodings", { for (x_encoding in encodings()) { for (y_encoding in encodings()) { expect_equal(vec_compare(x_encoding, y_encoding), 0L) } } }) test_that("can compare non-equal strings with different encodings", { x <- "x" y <- encodings()$latin1 expect_equal(vec_compare(x, y), -1L) }) test_that("comparison can be determined when strings have identical encodings", { encs <- encodings() for (enc in encs) { expect_equal(vec_compare(enc, enc), 0L) } }) test_that("comparison is known to always fail with bytes", { enc <- encoding_bytes() error <- "translating strings with \"bytes\" encoding" expect_error(vec_compare(enc, enc), error) }) test_that("comparison is known to fail when comparing bytes to other encodings", { error <- "translating strings with \"bytes\" encoding" for (enc in encodings()) { expect_error(vec_compare(encoding_bytes(), enc), error) expect_error(vec_compare(enc, encoding_bytes()), error) } }) test_that("can compare unspecified", { expect_equal(vec_compare(NA, NA), NA_integer_) expect_equal(vec_compare(NA, NA, na_equal = TRUE), 0) expect_equal(vec_compare(c(NA, NA), unspecified(2)), c(NA_integer_, NA_integer_)) }) test_that("can't supply NA as `na_equal`", { expect_error(vec_compare(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`") }) test_that("vec_compare() silently falls back to base data frame", { expect_silent(expect_identical( vec_compare(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(0L, 32) )) }) vctrs/tests/testthat/helper-c.R0000644000176200001440000000007413622451540016242 0ustar liggesusers class_type <- function(x) { .Call(vctrs_class_type, x) } vctrs/tests/testthat/test-type-tibble.R0000644000176200001440000000624113723213047017742 0ustar liggesusers test_that("tibble beats data frame", { df <- new_data_frame() dt <- tibble::tibble() expect_s3_class(vec_ptype_common(dt, df), "tbl_df") expect_s3_class(vec_ptype_common(df, dt), "tbl_df") }) test_that("can cast tibble to df and vice versa", { df <- new_data_frame() tib <- tibble::tibble() expect_identical(vec_cast(df, tib), tib) expect_identical(vec_cast(tib, df), df) expect_identical(vec_cast(tib, tib), tib) }) test_that("can't cast vector to tibble", { dt <- tibble::tibble() v <- logical() expect_error(vec_ptype2(v, dt), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(dt, v), class = "vctrs_error_incompatible_type") expect_error(vec_cast(v, dt), class = "vctrs_error_incompatible_type") }) test_that("casting to and from tibble preserves row names", { out <- vec_cast(mtcars, tibble::as_tibble(mtcars)) expect_identical(row.names(out), row.names(mtcars)) out <- vec_cast(out, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) }) test_that("no common type between list and tibble", { dt <- tibble::tibble() l <- list() expect_error(vec_ptype2(l, dt), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(dt, l), class = "vctrs_error_incompatible_type") }) test_that("vec_restore restores tibbles", { df1 <- tibble::tibble(x = 1:4) df2 <- vec_restore(vec_data(df1), df1) expect_s3_class(df2, "tbl_df") }) test_that("the type of a tibble with an unspecified column retains unspecifiedness", { df1 <- tibble::tibble(x = 1, y = NA) df2 <- tibble::tibble(x = 1, y = unspecified(1)) expect <- tibble::tibble(x = numeric(), y = unspecified()) expect_identical(vec_ptype(df1), expect) expect_identical(vec_ptype(df2), expect) }) test_that("vec_ptype_finalise() works recursively over tibbles", { df <- tibble(x = numeric(), y = unspecified()) expect <- tibble(x = numeric(), y = logical()) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() can handle tibble df columns", { df <- tibble(x = numeric(), y = tibble(z = unspecified())) expect <- tibble(x = numeric(), y = tibble(z = logical())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("can use ptype2 and cast with tibble that has incorrect class vector", { tib1 <- structure(data.frame(x = 1), class = c("tbl_df", "data.frame")) tib2 <- structure(data.frame(y = 2), class = c("tbl_df", "data.frame")) exp <- structure(data.frame(x = dbl(), y = dbl()), class = c("tbl_df", "data.frame")) requireNamespace("tibble") expect_identical( vec_ptype_common(tib1, tib2), exp ) expect_identical( vec_ptype_common(tib1, data.frame(y = 2)), tibble::new_tibble(exp, nrow = nrow(exp)) ) expect_identical( vec_ptype_common(data.frame(x = 1), tib2), tibble::new_tibble(exp, nrow = nrow(exp)) ) expect_identical( vec_cast(tib1, tib1), tib1 ) expect_error( vec_cast(tib1, tib2), class = "vctrs_error_cast_lossy_dropped" ) expect_error( vec_cast(tib1, data.frame(y = 2)), class = "vctrs_error_cast_lossy_dropped" ) expect_error( vec_cast(data.frame(x = 1), tib2), class = "vctrs_error_cast_lossy_dropped" ) }) vctrs/tests/testthat/test-type-sf.R0000644000176200001440000001466413671672047017134 0ustar liggesusers # Avoids adding `sf` to Suggests testthat_import_from("sf", c( "st_sf", "st_sfc", "st_point", "st_bbox", "st_precision", "st_crs", "st_linestring", "st_as_sf", "st_multipoint" )) # Need recent version to work around restore bug for sfc lists skip_if_not_installed("sf", "0.9-4") test_that("sf has a ptype2 method", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) out = vctrs::vec_ptype2(sf1, sf2) exp = st_sf( x = double(), y = character(), geo1 = sfc1[0], geo2 = sfc2[0], stringsAsFactors = FALSE ) expect_identical(out, exp) out = vctrs::vec_ptype2(sf1, new_data_frame(sf2)) expect_identical(out, exp) out = vctrs::vec_ptype2(new_data_frame(sf1), sf2) exp_rhs = st_sf( x = double(), y = character(), geo1 = sfc1[0], geo2 = sfc2[0], stringsAsFactors = FALSE, sf_column_name = "geo2" ) expect_identical(out, exp_rhs) }) test_that("sf has a cast method", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) expect_error( vctrs::vec_cast(sf1, sf2), class = "vctrs_error_cast_lossy" ) expect_error( vctrs::vec_cast(sf2, sf1), class = "vctrs_error_cast_lossy" ) common = vec_ptype2(sf1, sf2) out = vctrs::vec_cast(sf1, common) exp = st_sf( x = c(1, 0), y = character(2)[NA], geo1 = sfc1, geo2 = sfc2[c(NA, NA) + 0L], stringsAsFactors = FALSE ) expect_identical(out, exp) out = vctrs::vec_cast(new_data_frame(sf1), common) expect_identical(out, exp) out = vctrs::vec_cast(sf1, new_data_frame(common)) expect_identical(out, new_data_frame(exp)) out = vctrs::vec_cast(sf2, common) exp = st_sf( x = 0, y = "", geo1 = sfc1[NA + 0L], geo2 = sfc2, stringsAsFactors = FALSE ) expect_identical(out, exp) }) # https://github.com/r-lib/vctrs/issues/1136 test_that("can combine sf data frames", { testthat_import_from("dplyr", "bind_rows") sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) # FIXME: Currently `vec_rbind()` returns a data frame because we # are temporarily working around bugs due to bad interaction of # different fallbacks. `bind_rows()` returns an `sf` data frame as # expected because of `dplyr_reconstruct()`. exp = data_frame( x = c(1, 0, 0), geo1 = sfc1[c(1:2, NA)], y = c(NA, NA, ""), geo2 = sfc2[c(NA, NA, 1)] ) expect_identical(vctrs::vec_rbind(sf1, sf2), exp) expect_identical(bind_rows(sf1, sf2), st_as_sf(exp)) exp = data_frame( y = c("", NA, NA, ""), x = c(0, 1, 0, 0), geo2 = sfc2[c(1, NA, NA, 1)], geo1 = sfc1[c(NA, 1:2, NA)] ) expect_identical(vctrs::vec_rbind(sf2, sf1, sf2), exp) expect_identical(bind_rows(sf2, sf1, sf2), st_as_sf(exp)) }) test_that("can combine sf and tibble", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) out = vctrs::vec_rbind(sf2, data.frame(x = 1)) exp = data_frame( y = c("", NA), x = c(0, 1), geo2 = sfc2[c(1L, NA)] ) expect_identical(out, exp) out = vctrs::vec_rbind(sf2, tibble::tibble(x = 1)) expect_identical(out, exp) out = vctrs::vec_rbind(tibble::tibble(x = 1), sf2) exp = data_frame( x = c(1, 0), y = c(NA, ""), geo2 = sfc2[c(NA, 1L)] ) expect_identical(out, exp) }) # https://github.com/r-spatial/sf/issues/1390 test_that("can combine sfc lists", { ls <- st_linestring(matrix(1:3, ncol = 3)) sfc <- st_sfc(ls) expect_identical(vec_c(sfc, sfc), c(sfc, sfc)) sf <- st_as_sf(data.frame(id = 1, geometry = sfc)) # Currently returns a bare data frame because of the workaround for # the `c()` fallback sentinels expect_identical(vec_rbind(sf, sf), new_data_frame(rbind(sf, sf))) expect_identical(vec_rbind(sf, sf, sf), new_data_frame(rbind(sf, sf, sf))) }) test_that("can combine sfc lists with unspecified chunks", { point <- st_point(1:2) out <- vec_c(c(NA, NA), st_sfc(point), NA) expect_identical(out, st_sfc(NA, NA, point, NA)) multipoint <- st_multipoint(matrix(1:4, 2)) x <- st_sfc(point) y <- st_sfc(multipoint, multipoint) out <- vec_rbind( data_frame(x = x), data_frame(y = y) ) expect_identical(out, data_frame( x = st_sfc(point, NA, NA), y = st_sfc(NA, multipoint, multipoint) )) }) test_that("`n_empty` attribute of `sfc` vectors is restored", { pt1 = st_sfc(st_point(c(NA_real_, NA_real_))) pt2 = st_sfc(st_point(0:1)) x = c(pt1, pt2) expect_identical(attr(vctrs::vec_slice(x, 1), "n_empty"), 1L) expect_identical(attr(vctrs::vec_slice(x, 2), "n_empty"), 0L) combined = vctrs::vec_c(pt1, pt2, pt1) expect_length(combined, 3) expect_identical(attr(combined, "n_empty"), 2L) }) test_that("bbox attributes of `sfc` vectors are restored", { pt1 = st_sfc(st_point(c(1L, 2L))) pt2 = st_sfc(st_point(c(10L, 20L))) x = c(pt1, pt2) expect_identical(st_bbox(vctrs::vec_slice(x, 1)), st_bbox(pt1)) expect_identical(st_bbox(vctrs::vec_slice(x, 2)), st_bbox(pt2)) combined = vctrs::vec_c(pt1, pt2) expect_identical(st_bbox(x), st_bbox(combined)) }) test_that("`precision` and `crs` attributes of `sfc` vectors are restored", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) out = vctrs::vec_slice(x, 1) expect_identical(st_precision(x), st_precision(out)) expect_identical(st_crs(x), st_crs(out)) }) test_that("`precision` and `crs` attributes of `sfc` vectors are combined", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 3857) out = vctrs::vec_c(x, y) expect_identical(st_precision(x), st_precision(out)) expect_identical(st_crs(x), st_crs(out)) # These used to be errors before we fell back to c() y = st_sfc(st_point(c(0, 0)), precision = 1e-2, crs = 3857) expect_identical(vctrs::vec_c(x, y), c(x, y)) # expect_error(vctrs::vec_c(x, y), "precisions not equal") y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 4326) expect_identical(vctrs::vec_c(x, y), c(x, y)) # expect_error(vctrs::vec_c(x, y), "coordinate reference systems not equal") }) # Local Variables: # indent-tabs-mode: t # ess-indent-offset: 4 # tab-width: 4 # End: vctrs/tests/testthat/test-assert-explanations.txt0000644000176200001440000000472314042546233022150 0ustar liggesusers> vec_assert(lgl(), chr()): Error: `lgl()` must be a vector with type . Instead, it has type . > vec_assert(lgl(), factor()): Error: `lgl()` must be a vector with type >. Instead, it has type . > vec_assert(lgl(), factor(levels = "foo")): Error: `lgl()` must be a vector with type >. Instead, it has type . > vec_assert(factor(levels = "bar"), factor(levels = "foo")): Error: `factor(levels = "bar")` must be a vector with type >. Instead, it has type >. > vec_assert(factor(), chr()): Error: `factor()` must be a vector with type . Instead, it has type >. > vec_assert(lgl(), data.frame()): Error: `lgl()` must be a vector with type >. Instead, it has type . > vec_assert(lgl(), data.frame(x = 1)): Error: `lgl()` must be a vector with type >. Instead, it has type . > vec_assert(lgl(), data.frame(x = 1, y = 2)): Error: `lgl()` must be a vector with type: > Instead, it has type . > vec_assert(data.frame(), chr()): Error: `data.frame()` must be a vector with type . Instead, it has type >. > vec_assert(data.frame(x = 1), chr()): Error: `data.frame(x = 1)` must be a vector with type . Instead, it has type >. > vec_assert(data.frame(x = 1), data.frame(x = "foo")): Error: `data.frame(x = 1)` must be a vector with type >. Instead, it has type >. > vec_assert(data.frame(x = 1), data.frame(x = "foo", y = 2)): Error: `data.frame(x = 1)` must be a vector with type: > Instead, it has type >. > vec_assert(data.frame(x = 1, y = 2), chr()): Error: `data.frame(x = 1, y = 2)` must be a vector with type . Instead, it has type: > > vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo")): Error: `data.frame(x = 1, y = 2)` must be a vector with type >. Instead, it has type: > > vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo", y = 2)): Error: `data.frame(x = 1, y = 2)` must be a vector with type: > Instead, it has type: > vctrs/tests/testthat/test-slice.R0000644000176200001440000006033413753021002016613 0ustar liggesusers test_that("vec_slice throws error with non-vector inputs", { expect_error(vec_slice(environment(), 1L), class = "vctrs_error_scalar_type") }) test_that("vec_slice throws error with non-vector subscripts", { verify_errors({ expect_error(vec_slice(1:3, Sys.Date()), class = "vctrs_error_subscript_type") expect_error(vec_slice(1:3, matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type") }) }) test_that("can subset base vectors", { i <- 2:3 expect_identical(vec_slice(lgl(1, 0, 1), i), lgl(0, 1)) expect_identical(vec_slice(int(1, 2, 3), i), int(2, 3)) expect_identical(vec_slice(dbl(1, 2, 3), i), dbl(2, 3)) expect_identical(vec_slice(cpl(1, 2, 3), i), cpl(2, 3)) expect_identical(vec_slice(chr("1", "2", "3"), i), chr("2", "3")) expect_identical(vec_slice(bytes(1, 2, 3), i), bytes(2, 3)) expect_identical(vec_slice(list(1, 2, 3), i), list(2, 3)) }) test_that("can subset shaped base vectors", { i <- 2:3 mat <- as.matrix expect_identical(vec_slice(mat(lgl(1, 0, 1)), i), mat(lgl(0, 1))) expect_identical(vec_slice(mat(int(1, 2, 3)), i), mat(int(2, 3))) expect_identical(vec_slice(mat(dbl(1, 2, 3)), i), mat(dbl(2, 3))) expect_identical(vec_slice(mat(cpl(1, 2, 3)), i), mat(cpl(2, 3))) expect_identical(vec_slice(mat(chr("1", "2", "3")), i), mat(chr("2", "3"))) expect_identical(vec_slice(mat(bytes(1, 2, 3)), i), mat(bytes(2, 3))) expect_identical(vec_slice(mat(list(1, 2, 3)), i), mat(list(2, 3))) }) test_that("can subset with missing indices", { for (i in list(int(2L, NA), lgl(FALSE, TRUE, NA))) { expect_identical(vec_slice(lgl(1, 0, 1), i), lgl(0, NA)) expect_identical(vec_slice(int(1, 2, 3), i), int(2, NA)) expect_identical(vec_slice(dbl(1, 2, 3), i), dbl(2, NA)) expect_identical(vec_slice(cpl(1, 2, 3), i), cpl(2, NA)) expect_identical(vec_slice(chr("1", "2", "3"), i), c("2", NA)) expect_identical(vec_slice(bytes(1, 2, 3), i), bytes(2, 0)) expect_identical(vec_slice(list(1, 2, 3), i), list(2, NULL)) } }) test_that("can subset with a recycled NA", { expect_identical(vec_slice(1:3, NA), int(NA, NA, NA)) expect_identical(vec_slice(new_vctr(1:3), NA), new_vctr(int(NA, NA, NA))) rownames <- rep_len("", nrow(mtcars)) rownames <- vec_as_names(rownames, repair = "unique") expect_identical(vec_slice(mtcars, NA), structure(mtcars[NA, ], row.names = rownames)) }) test_that("can subset with a recycled TRUE", { expect_identical(vec_slice(1:3, TRUE), 1:3) expect_identical(vec_slice(mtcars, TRUE), mtcars) expect_identical(vec_slice(new_vctr(1:3), TRUE), new_vctr(1:3)) expect_identical(vec_as_location(TRUE, 2), 1:2) }) test_that("can subset with a recycled FALSE", { expect_identical(vec_slice(1:3, FALSE), int()) expect_identical(vec_slice(mtcars, FALSE), mtcars[NULL, ]) expect_identical(vec_slice(new_vctr(1:3), FALSE), new_vctr(integer())) }) test_that("can't index beyond the end of a vector", { verify_errors({ expect_error(vec_slice(1:2, 3L), class = "vctrs_error_subscript_oob") expect_error(vec_slice(1:2, -3L), class = "vctrs_error_subscript_oob") }) }) test_that("slicing non existing elements fails", { expect_error(vec_as_location("foo", 1L, "f"), class = "vctrs_error_subscript_oob") expect_error(vec_slice(c(f = 1), "foo"), class = "vctrs_error_subscript_oob") }) test_that("can subset object of any dimensionality", { x0 <- c(1, 1) x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_equal(vec_slice(x0, 1L), 1) expect_identical(vec_slice(x1, 1L), ones(1)) expect_identical(vec_slice(x2, 1L), ones(1, 3)) expect_identical(vec_slice(x3, 1L), ones(1, 3, 4)) expect_identical(vec_slice(x4, 1L), ones(1, 3, 4, 5)) }) test_that("can subset using logical subscript", { x0 <- c(1, 1) expect_identical(vec_slice(x0, TRUE), x0) expect_identical(vec_slice(x0, c(TRUE, FALSE)), 1) expect_error( vec_slice(x0, c(TRUE, FALSE, TRUE)), class = "vctrs_error_subscript_size" ) expect_error( vec_slice(x0, lgl()), class = "vctrs_error_subscript_size" ) expect_error( vec_slice(mtcars, c(TRUE, FALSE)), class = "vctrs_error_subscript_size" ) }) test_that("can subset data frame columns", { df <- data.frame(x = 1:2) df$y <- data.frame(a = 2:1) expect_equal(vec_slice(df, 1L)$y, vec_slice(df$y, 1L)) }) test_that("can subset empty data frames", { df <- new_data_frame(n = 3L) expect_equal(vec_size(vec_slice(df, integer())), 0) expect_equal(vec_size(vec_slice(df, 1L)), 1) expect_equal(vec_size(vec_slice(df, 1:3)), 3) df$df <- df expect_equal(vec_size(vec_slice(df, integer())), 0) expect_equal(vec_size(vec_slice(df, 1L)), 1) expect_equal(vec_size(vec_slice(df, 1:3)), 3) }) test_that("ignores NA in logical subsetting", { x <- c(NA, 1, 2) expect_equal(vec_slice(x, x > 0), c(NA, 1, 2)) }) test_that("ignores NA in integer subsetting", { expect_equal(vec_slice(0:2, c(NA, 2:3)), c(NA, 1, 2)) }) test_that("can't slice with missing argument", { expect_error(vec_slice(1:3)) expect_error(vec_slice(mtcars)) expect_error(vec_slice(new_vctr(1:3))) }) test_that("can slice with NULL argument", { expect_identical(vec_slice(1:3, NULL), integer()) expect_identical(vec_slice(iris, NULL), iris[0, ]) expect_identical(vec_slice(new_vctr(1:3), NULL), new_vctr(integer())) }) test_that("slicing unclassed structures preserves attributes", { x <- structure(1:3, foo = "bar") expect_identical(vec_slice(x, 1L), structure(1L, foo = "bar")) }) test_that("can slice with negative indices", { expect_identical(vec_slice(1:3, -c(1L, 3L)), 2L) expect_identical(vec_slice(mtcars, -(1:30)), vec_slice(mtcars, 31:32)) expect_error(vec_slice(1:3, -c(1L, NA)), class = "vctrs_error_subscript_type") expect_error(vec_slice(1:3, c(-1L, 1L)), class = "vctrs_error_subscript_type") }) test_that("0 is ignored in negative indices", { expect_identical(vec_slice(1:3, c(-2L, 0L)), int(1L, 3L)) expect_identical(vec_slice(1:3, c(0L, -2L)), int(1L, 3L)) }) test_that("0 is ignored in positive indices", { expect_identical(vec_slice(1:3, 0L), int()) expect_identical(vec_slice(1:3, c(0L, 0L)), int()) expect_identical(vec_slice(1:3, c(0L, 2L, 0L)), 2L) }) test_that("can slice with double indices", { expect_identical(vec_slice(1:3, dbl(2, 3)), 2:3) err <- expect_error(vec_as_location(2^31, 3L), class = "vctrs_error_subscript_type") expect_s3_class(err$parent, "vctrs_error_cast_lossy") }) test_that("can slice with symbols", { expect_identical(vec_as_location(quote(b), 26, letters), 2L) }) test_that("can `vec_slice()` S3 objects without dispatch infloop", { expect_identical(new_vctr(1:3)[1], new_vctr(1L)) expect_identical(new_vctr(as.list(1:3))[1], new_vctr(list(1L))) }) test_that("can `vec_slice()` records", { out <- vec_slice(new_rcrd(list(a = 1L, b = 2L)), rep(1, 3)) expect_size(out, 3) out <- vec_init(new_rcrd(list(a = 1L, b = 2L)), 2) expect_size(out, 2) }) test_that("vec_restore() is called after proxied slicing", { local_methods( vec_proxy.vctrs_foobar = identity, vec_restore.vctrs_foobar = function(x, to, ...) "dispatch" ) expect_identical(vec_slice(foobar(1:3), 2), "dispatch") }) test_that("vec_slice() is proxied", { local_proxy() x <- vec_slice(new_proxy(1:3), 2:3) expect_identical(proxy_deref(x), 2:3) }) test_that("dimensions are preserved by vec_slice()", { # Fallback case x <- foobar(1:4) dim(x) <- c(2, 2) dimnames(x) <- list(a = c("foo", "bar"), b = c("quux", "hunoz")) out <- vec_slice(x, 1) exp <- foobar( c(1L, 3L), dim = c(1, 2), dimnames = list(a = "foo", b = c("quux", "hunoz") )) expect_identical(out, exp) # Native case attrib <- NULL local_methods( vec_proxy.vctrs_foobar = identity, vec_restore.vctrs_foobar = function(x, to, ...) attrib <<- attributes(x) ) vec_slice(x, 1) exp <- list(dim = 1:2, dimnames = list(a = "foo", b = c("quux", "hunoz"))) expect_identical(attrib, exp) }) test_that("can slice shaped objects by name", { x <- matrix(1:2) expect_error(vec_slice(x, "foo"), "unnamed") dimnames(x) <- list(c("foo", "bar")) expect_equal(vec_slice(x, "foo"), vec_slice(x, 1L)) expect_error(vec_slice(x, "baz"), class = "vctrs_error_subscript_oob") }) test_that("vec_slice() unclasses input before calling `vec_restore()`", { oo <- NULL local_methods( vec_proxy.vctrs_foobar = identity, vec_restore.vctrs_foobar = function(x, ...) oo <<- is.object(x) ) x <- foobar(1:4) dim(x) <- c(2, 2) vec_slice(x, 1) expect_false(oo) }) test_that("can call `vec_slice()` from `[` methods with shaped objects without infloop", { local_methods( `[.vctrs_foobar` = function(x, i, ...) vec_slice(x, i) ) x <- foobar(1:4) dim(x) <- c(2, 2) exp <- foobar(c(1L, 3L)) dim(exp) <- c(1, 2) expect_identical(x[1], exp) }) test_that("vec_slice() restores attributes on shaped S3 objects correctly", { x <- factor(c("a", "b", "c", "d", "e", "f")) dim(x) <- c(3, 2) expect <- factor(c("a", "c", "d", "f"), levels = levels(x)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, c(1, 3)), expect) }) test_that("vec_slice() falls back to `[` with S3 objects", { local_methods( `[.vctrs_foobar` = function(x, i, ...) "dispatched" ) expect_identical(vec_slice(foobar(NA), 1), "dispatched") expect_error(vec_slice(foobar(list(NA)), 1), class = "vctrs_error_scalar_type") local_methods( vec_proxy.vctrs_foobar = identity ) expect_identical(vec_slice(foobar(list(NA)), 1), foobar(list(NA))) }) test_that("vec_slice() doesn't restore when attributes have already been restored", { local_methods( `[.vctrs_foobar` = function(x, i, ...) structure("dispatched", foo = "bar"), vec_restore.vctrs_foobar = function(...) stop("not called") ) expect_error(vec_slice(foobar(NA), 1), NA) }) test_that("vec_slice() doesn't restore when `[` method intentionally dropped attributes", { local_methods( `[.vctrs_foobar` = function(x, i, ...) unstructure(NextMethod()), vec_restore.vctrs_foobar = function(...) stop("not called") ) expect_identical(vec_slice(foobar(NA), 1), NA) }) test_that("can vec_slice() without inflooping when restore calls math generics", { local_methods( new_foobar = function(x) { new_vctr(as.double(x), class = "vctrs_foobar") }, vec_restore.vctrs_foobar = function(x, ...) { abs(x) sum(x) mean(x) is.finite(x) is.infinite(x) is.nan(x) new_foobar(x) } ) expect_identical(new_foobar(1:10)[1:2], new_foobar(1:2)) }) test_that("vec_restore() is called after slicing data frames", { local_methods( vec_restore.vctrs_tabble = function(...) "dispatched" ) df <- structure(mtcars, class = c("vctrs_tabble", "data.frame")) expect_identical(vec_slice(df, 1), "dispatched") }) test_that("additional subscripts are forwarded to `[`", { local_methods( `[.vctrs_foobar` = function(x, i, ...) vec_index(x, i, ...) ) x <- foobar(c("foo", "bar", "quux", "hunoz")) dim(x) <- c(2, 2) exp <- foobar("quux") dim(exp) <- c(1, 1) expect_identical(x[1, 2], exp) }) test_that("can use names to vec_slice() a named object", { x0 <- c(a = 1, b = 2) x1 <- c(a = 1, a = 2) expect_identical(vec_slice(x0, letters[1]), c(a = 1)) expect_identical(vec_slice(x0, letters[2:1]), c(b = 2, a = 1)) expect_identical(vec_slice(x1, letters[1]), c(a = 1)) expect_error(vec_slice(x0, letters[3:1]), class = "vctrs_error_subscript_oob") expect_error(vec_slice(x1, letters[2]), class = "vctrs_error_subscript_oob") }) test_that("can't use names to vec_slice() an unnamed object", { expect_error( vec_slice(1:3, letters[1]), "Can't use character names to index an unnamed vector.", fixed = TRUE ) expect_error( vec_slice(1:3, letters[25:27]), "Can't use character names to index an unnamed vector.", fixed = TRUE ) }) test_that("can slice with missing character indices (#244)", { expect_identical(vec_as_location(na_chr, 2L, c("x", "")), na_int) expect_identical(vec_slice(c(x = 1), na_chr), set_names(na_dbl, "")) expect_identical(vec_slice(c(x = "foo"), na_chr), set_names(na_chr, "")) }) test_that("can slice with numerics (#577)", { expect_identical(vec_as_location(1:2, 3), 1:2) expect_error(vec_as_location(1:2, 3.5), class = "vctrs_error_cast_lossy") }) test_that("missing indices don't create NA names", { x <- set_names(letters) expect_identical(vec_slice(x, na_int), set_names(na_chr, "")) expect_identical(vec_slice(x, int(1, NA, 3, NA)), chr(a = "a", NA, c = "c", NA)) # Preserves existing NA names x <- set_names(1:2, c(NA, "foo")) expect_identical(vec_slice(x, 1:2), x) }) test_that("vec_slice() asserts vectorness (#301)", { expect_error(vec_slice(NULL, 1), class = "vctrs_error_scalar_type") }) test_that("slicing an unspecified logical vector returns a logical vector", { expect_identical(vec_slice(NA, integer()), logical()) expect_identical(vec_slice(NA, c(1, 1)), c(NA, NA)) }) test_that("slicing an unspecified() object returns an unspecified()", { expect_identical(vec_slice(unspecified(1), integer()), unspecified()) expect_identical(vec_slice(unspecified(1), c(1, 1)), unspecified(2)) }) test_that("vec_slice() works with Altrep classes with custom extract methods", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) idx <- c(9, 10, 11) expect_equal(vec_slice(x, idx), c("foo", "foo", "bar")) }) test_that("slice has informative error messages", { verify_output(test_path("error", "test-slice.txt"), { "# Unnamed vector with character subscript" vec_slice(1:3, letters[1]) "# Negative subscripts are checked" vec_slice(1:3, -c(1L, NA)) vec_slice(1:3, c(-1L, 1L)) "# oob error messages are properly constructed" vec_slice(c(bar = 1), "foo") "Multiple OOB indices" vec_slice(letters, c(100, 1000)) vec_slice(letters, c(1, 100:103, 2, 104:110)) vec_slice(set_names(letters), c("foo", "bar")) vec_slice(set_names(letters), toupper(letters)) "# Can't index beyond the end of a vector" vec_slice(1:2, 3L) vec_slice(1:2, -3L) "# vec_slice throws error with non-vector subscripts" vec_slice(1:3, Sys.Date()) vec_slice(1:3, matrix(TRUE, ncol = 1)) }) }) # vec_init ---------------------------------------------------------------- test_that("na of atomic vectors is as expected", { expect_equal(vec_init(TRUE), NA) expect_equal(vec_init(1L), NA_integer_) expect_equal(vec_init(1), NA_real_) expect_equal(vec_init("x"), NA_character_) expect_equal(vec_init(1i), NA_complex_) }) test_that("na of factor preserves levels", { f1 <- factor("a", levels = c("a", "b")) f2 <- vec_init(f1) expect_equal(levels(f1), levels(f2)) }) test_that("na of POSIXct preserves tz", { dt1 <- as.POSIXct("2010-01-01", tz = "America/New_York") dt2 <- vec_init(dt1) expect_equal(attr(dt2, "tzone"), "America/New_York") }) test_that("na of list is list(NULL)", { expect_equal(vec_init(list()), list(NULL)) }) test_that("na of array is 1d slice", { x1 <- array(1:12, c(2, 3, 4)) x2 <- vec_init(x1) expect_equal(x2, array(NA_integer_, c(1, 3, 4))) }) test_that("na of list-array is 1d slice", { x1 <- array(as.list(1:12), c(2, 3, 4)) x2 <- vec_init(x1) expect_equal(x2, array(list(), c(1, 3, 4))) }) test_that("vec_init() asserts vectorness (#301)", { expect_error(vec_init(NULL, 1L), class = "vctrs_error_scalar_type") }) test_that("vec_init() works with Altrep classes", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 1L, bar = 2L)) expect_equal(vec_init(x, 2), rep(NA_character_, 2)) }) # vec_slice + compact_rep ------------------------------------------------- # `i` is 1-based test_that("names are repaired correctly with compact reps and `NA_integer_`", { x <- list(a = 1L, b = 2L) expect <- set_names(list(NULL, NULL), c("", "")) expect_equal(vec_slice_rep(x, NA_integer_, 2L), expect) }) test_that("names are recycled correctly with compact reps", { expect_named(vec_slice_rep(c(x = 1L), 1L, 3L), c("x", "x", "x")) }) test_that("vec_slice() with compact_reps work with Altrep classes", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) expect_equal(vec_slice_rep(x, 10L, 3L), rep("foo", 3)) }) # vec_slice + compact_seq ------------------------------------------------- # `start` is 0-based test_that("can subset base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE expect_identical(vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl(0, 1)) expect_identical(vec_slice_seq(int(1, 2, 3), start, size, increasing), int(2, 3)) expect_identical(vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl(2, 3)) expect_identical(vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl(2, 3)) expect_identical(vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr("2", "3")) expect_identical(vec_slice_seq(bytes(1, 2, 3), start, size, increasing), bytes(2, 3)) expect_identical(vec_slice_seq(list(1, 2, 3), start, size, increasing), list(2, 3)) }) test_that("can subset base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE expect_identical(vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl(1, 0)) expect_identical(vec_slice_seq(int(1, 2, 3), start, size, increasing), int(3, 2)) expect_identical(vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl(3, 2)) expect_identical(vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl(3, 2)) expect_identical(vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr("3", "2")) expect_identical(vec_slice_seq(bytes(1, 2, 3), start, size, increasing), bytes(3, 2)) expect_identical(vec_slice_seq(list(1, 2, 3), start, size, increasing), list(3, 2)) }) test_that("can subset base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE expect_identical(vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl()) expect_identical(vec_slice_seq(int(1, 2, 3), start, size, increasing), int()) expect_identical(vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl()) expect_identical(vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl()) expect_identical(vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr()) expect_identical(vec_slice_seq(bytes(1, 2, 3), start, size, increasing), bytes()) expect_identical(vec_slice_seq(list(1, 2, 3), start, size, increasing), list()) }) test_that("can subset shaped base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical(vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl(0, 1))) expect_identical(vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int(2, 3))) expect_identical(vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl(2, 3))) expect_identical(vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl(2, 3))) expect_identical(vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr("2", "3"))) expect_identical(vec_slice_seq(mat(bytes(1, 2, 3)), start, size, increasing), mat(bytes(2, 3))) expect_identical(vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list(2, 3))) }) test_that("can subset shaped base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE mat <- as.matrix expect_identical(vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl(1, 0))) expect_identical(vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int(3, 2))) expect_identical(vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl(3, 2))) expect_identical(vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl(3, 2))) expect_identical(vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr("3", "2"))) expect_identical(vec_slice_seq(mat(bytes(1, 2, 3)), start, size, increasing), mat(bytes(3, 2))) expect_identical(vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list(3, 2))) }) test_that("can subset shaped base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE mat <- as.matrix expect_identical(vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl())) expect_identical(vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int())) expect_identical(vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl())) expect_identical(vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl())) expect_identical(vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr())) expect_identical(vec_slice_seq(mat(bytes(1, 2, 3)), start, size, increasing), mat(bytes())) expect_identical(vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list())) }) test_that("can subset object of any dimensionality with compact seqs", { x0 <- c(1, 1) x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_equal(vec_slice_seq(x0, 0L, 1L), 1) expect_identical(vec_slice_seq(x1, 0L, 1L), ones(1)) expect_identical(vec_slice_seq(x2, 0L, 1L), ones(1, 3)) expect_identical(vec_slice_seq(x3, 0L, 1L), ones(1, 3, 4)) expect_identical(vec_slice_seq(x4, 0L, 1L), ones(1, 3, 4, 5)) }) test_that("can subset data frames with compact seqs", { df <- data_frame(x = 1:5, y = letters[1:5]) expect_equal(vec_slice_seq(df, 0L, 0L), vec_slice(df, integer())) expect_equal(vec_slice_seq(df, 0L, 1L), vec_slice(df, 1L)) expect_equal(vec_slice_seq(df, 0L, 3L), vec_slice(df, 1:3)) expect_equal(vec_slice_seq(df, 2L, 3L, FALSE), vec_slice(df, 3:1)) df$df <- df expect_equal(vec_slice_seq(df, 0L, 0L), vec_slice(df, integer())) expect_equal(vec_slice_seq(df, 0L, 1L), vec_slice(df, 1L)) expect_equal(vec_slice_seq(df, 0L, 3L), vec_slice(df, 1:3)) expect_equal(vec_slice_seq(df, 2L, 3L, FALSE), vec_slice(df, 3:1)) }) test_that("can subset S3 objects using the fallback method with compact seqs", { x <- factor(c("a", "b", "c", "d")) expect_equal(vec_slice_seq(x, 0L, 0L), vec_slice(x, integer())) expect_equal(vec_slice_seq(x, 0L, 1L), vec_slice(x, 1L)) expect_equal(vec_slice_seq(x, 2L, 2L), vec_slice(x, 3:4)) expect_equal(vec_slice_seq(x, 3L, 2L, FALSE), vec_slice(x, 4:3)) }) test_that("vec_slice() with compact_seqs work with Altrep classes", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 2L, bar = 3L)) expect_equal(vec_slice_seq(x, 1L, 3L), c("foo", "bar", "bar")) }) test_that("vec_slice() handles symbols and OO objects", { expect_identical(vec_slice(c(a = 1, b = 2), quote(b)), c(b = 2)) expect_identical(vec_slice(c(a = 1, b = 2), factor("b")), c(b = 2)) expect_error(vec_slice(c(a = 1, b = 2), foobar("b")), class = "vctrs_error_subscript_type") }) test_that("vec_init() handles names in columns", { expect_identical( vec_init(data_frame(x = c(a = 1, b = 2)))$x, named(na_dbl) ) expect_identical( vec_init(data_frame(x = c(1, 2)))$x, na_dbl ) }) test_that("vec_slice() restores unrestored but named foreign classes", { x <- foobar(c(x = 1)) expect_identical(vec_slice(x, 1), x) expect_identical(vec_chop(x), list(x)) expect_identical(vec_chop(x, list(1)), list(x)) expect_identical(vec_ptype(x), foobar(named(dbl()))) expect_identical(vec_ptype(x), foobar(named(dbl()))) expect_identical(vec_ptype_common(x, x), foobar(named(dbl()))) out <- vec_ptype_common_fallback(x, x) expect_true(is_common_class_fallback(out)) expect_identical(fallback_class(out), "vctrs_foobar") }) test_that("scalar type error is thrown when `vec_slice_impl()` is called directly (#1139)", { x <- foobar(as.list(1:3)) expect_error(vec_slice_seq(x, 1L, 1L), class = "vctrs_error_scalar_type") }) test_that("column sizes are checked before slicing (#552)", { x <- structure(list(a = 1, b = 2:3), row.names = 1:2, class = "data.frame") expect_error(vctrs::vec_slice(x, 2), "must match the data frame size") }) test_that("base_vec_rep() slices data frames with the base::rep() UI", { df <- data_frame(x = data_frame(y = 1:2)) expect_identical( base_vec_rep(df, length.out = 4), vec_slice(df, c(1:2, 1:2)) ) }) test_that("vec_size_assign() slices data frames with the base::rep() UI", { df <- data_frame(x = data_frame(y = 1:3)) expect_identical( vec_size_assign(df, 2), vec_slice(df, 1:2) ) expect_identical( vec_size_assign(df, 4), vec_slice(df, c(1:3, NA)) ) }) vctrs/tests/testthat/helper-encoding.R0000644000176200001440000000137413637142417017620 0ustar liggesusersencodings <- function(bytes = FALSE) { string <- "\u00B0C" utf8 <- iconv(string, from = Encoding(string), to = "UTF-8") unknown <- iconv(string, from = Encoding(string), to = "", mark = FALSE) latin1 <- iconv(string, from = Encoding(string), to = "latin1") out <- list(utf8 = utf8, unknown = unknown, latin1 = latin1) if (bytes) { out <- list2(!!! out, bytes = encoding_bytes()) } out } encoding_bytes <- function() { string <- "\u00B0C" unknown <- iconv(string, from = Encoding(string), to = "", mark = FALSE) bytes <- unknown Encoding(bytes) <- "bytes" bytes } expect_equal_encoding <- function(object, expected) { args <- vec_recycle_common(object, expected) expect_equal(Encoding(args[[1L]]), Encoding(args[[2L]])) } vctrs/tests/testthat/test-type-vec-type-common-error.txt0000644000176200001440000000352214042546252023271 0ustar liggesusers vec_ptype_common(df1, df2): Can't combine `..1$x$y$z` and `..2$x$y$z` . vec_ptype_common(df1, df1, df2): Can't combine `..1$x$y$z` and `..3$x$y$z` . vec_ptype_common(large_df1, large_df2): Can't combine `..1$foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar$y$z` and `..2$foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar$y$z` . vec_ptype_common(foo = TRUE, bar = "foo"): Can't combine `foo` and `bar` . vec_ptype_common(foo = TRUE, baz = FALSE, bar = "foo"): Can't combine `foo` and `bar` . vec_ptype_common(foo = df1, bar = df2): Can't combine `foo$x$y$z` and `bar$x$y$z` . vec_ptype_common(df1, df1, bar = df2): Can't combine `..1$x$y$z` and `bar$x$y$z` . vec_ptype_common(TRUE, !!!list(1, "foo")): Can't combine `..2` and `..3` . vec_ptype_common(TRUE, !!!list(1, 2), "foo"): Can't combine `..2` and `..5` . vec_ptype_common(1, !!!list(TRUE, FALSE), "foo"): Can't combine `..1` and `..5` . vec_ptype_common(foo = TRUE, !!!list(FALSE, FALSE), bar = "foo"): Can't combine `foo` and `bar` . vec_ptype_common(foo = TRUE, !!!list(bar = 1, "foo")): Can't combine `bar` and `..3` . vec_ptype_common(foo = TRUE, !!!list(bar = "foo")): Can't combine `foo` and `bar` . vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), baz = "chr"): Can't combine `foo` and `baz` . vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), !!!list(baz = "chr")): Can't combine `foo` and `baz` . vctrs/tests/testthat/test-slice-chop.R0000644000176200001440000006020514027045462017552 0ustar liggesusers test_that("vec_chop() throws error with non-vector inputs", { expect_error(vec_chop(NULL), class = "vctrs_error_scalar_type") expect_error(vec_chop(environment()), class = "vctrs_error_scalar_type") }) test_that("atomics are split into a list", { x <- 1:5 expect_equal(vec_chop(x), as.list(x)) x <- letters[1:5] expect_equal(vec_chop(x), as.list(x)) }) test_that("atomic names are kept", { x <- set_names(1:5) result <- lapply(vec_chop(x), names) expect_equal(result, as.list(names(x))) }) test_that("base R classed objects are split into a list", { fctr <- factor(c("a", "b")) expect <- lapply(vec_seq_along(fctr), vec_slice, x = fctr) expect_equal(vec_chop(fctr), expect) date <- new_date(c(0, 1)) expect <- lapply(vec_seq_along(date), vec_slice, x = date) expect_equal(vec_chop(date), expect) }) test_that("base R classed object names are kept", { fctr <- set_names(factor(c("a", "b"))) result <- lapply(vec_chop(fctr), names) expect_equal(result, as.list(names(fctr))) }) test_that("list elements are split", { x <- list(1, 2) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("data frames are split rowwise", { x <- data_frame(x = 1:2, y = c("a", "b")) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("vec_chop() keeps data frame row names", { x <- data_frame(x = 1:2, y = c("a", "b")) rownames(x) <- c("r1", "r2") result <- lapply(vec_chop(x), rownames) expect_equal(result, list("r1", "r2")) }) test_that("matrices / arrays are split rowwise", { x <- array(1:12, c(2, 2, 2)) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("matrix / array row names are kept", { x <- array(1:12, c(2, 2, 2), dimnames = list(c("r1", "r2"), c("c1", "c2"))) result <- lapply(vec_chop(x), rownames) expect_equal(result, list("r1", "r2")) }) test_that("matrices / arrays without row names have other dimension names kept", { x <- array(1:12, c(2, 2, 2), dimnames = list(NULL, c("c1", "c2"))) result <- lapply(vec_chop(x), colnames) expect_equal(result, list(c("c1", "c2"), c("c1", "c2"))) }) test_that("vec_chop() doesn't restore when attributes have already been restored", { local_methods( `[.vctrs_foobar` = function(x, i, ...) structure("dispatched", foo = "bar"), vec_restore.vctrs_foobar = function(...) structure("dispatched-and-restored", foo = "bar") ) result <- vec_chop(foobar(NA))[[1]] expect_equal(result, structure("dispatched", foo = "bar")) }) test_that("vec_chop() does not restore when attributes have not been restored by `[`", { local_methods( `[.vctrs_foobar` = function(x, i, ...) "dispatched", vec_restore.vctrs_foobar = function(...) "dispatched-and-restored" ) result <- vec_chop(foobar(NA))[[1]] expect_equal(result, "dispatched") }) test_that("vec_chop() falls back to `[` for shaped objects with no proxy", { x <- foobar(1) dim(x) <- c(1, 1) result <- vec_chop(x)[[1]] expect_equal(result, x) }) test_that("`indices` are validated", { expect_error(vec_chop(1, 1), "`indices` must be a list of index values, or `NULL`") expect_error(vec_chop(1, list(1.5)), class = "vctrs_error_subscript_type") expect_error(vec_chop(1, list(2)), class = "vctrs_error_subscript_oob") }) test_that("size 0 `indices` list is allowed", { expect_equal(vec_chop(1, list()), list()) }) test_that("individual index values of size 0 are allowed", { expect_equal(vec_chop(1, list(integer())), list(numeric())) df <- data.frame(a = 1, b = "1") expect_equal(vec_chop(df, list(integer())), list(vec_ptype(df))) }) test_that("data frame row names are kept when `indices` are used", { x <- data_frame(x = 1:2, y = c("a", "b")) rownames(x) <- c("r1", "r2") result <- lapply(vec_chop(x, list(1, 1:2)), rownames) expect_equal(result, list("r1", c("r1", "r2"))) }) test_that("vec_chop(, indices =) can be equivalent to the default", { x <- 1:5 indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices), vec_chop(x)) }) test_that("vec_chop(, indices =) can be equivalent to the default", { x <- data.frame(x = 1:5) indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices), vec_chop(x)) }) test_that("vec_chop(, indices =) can be equivalent to the default", { x <- array(1:8, c(2, 2, 2)) indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices), vec_chop(x)) }) test_that("`indices` cannot use names", { x <- set_names(1:3, c("a", "b", "c")) expect_error(vec_chop(x, list("a", c("b", "c"))), class = "vctrs_error_subscript_type") x <- array(1:4, c(2, 2), dimnames = list(c("r1", "r2"))) expect_error(vec_chop(x, list("r1")), class = "vctrs_error_subscript_type") x <- data.frame(x = 1, row.names = "r1") expect_error(vec_chop(x, list("r1")), class = "vctrs_error_subscript_type") }) test_that("fallback method with `indices` works", { fctr <- factor(c("a", "b")) indices <- list(1, c(1, 2)) expect_equal( vec_chop(fctr, indices), map(indices, vec_slice, x = fctr) ) }) test_that("vec_chop() falls back to `[` for shaped objects with no proxy when indices are provided", { x <- foobar(1) dim(x) <- c(1, 1) result <- vec_chop(x, list(1))[[1]] expect_equal(result, x) }) test_that("vec_chop() with data frame proxies always uses the proxy's length info", { local_methods( vec_proxy.vctrs_proxy = function(x) { x <- proxy_deref(x) new_data_frame(list(x = x$x, y = x$y)) }, vec_restore.vctrs_proxy = function(x, to, ...) { new_proxy(list(x = x$x, y = x$y)) } ) x <- new_proxy(list(x = 1:2, y = 3:4)) result <- vec_chop(x) result1 <- result[[1]] result2 <- result[[2]] expect1 <- new_proxy(list(x = 1L, y = 3L)) expect2 <- new_proxy(list(x = 2L, y = 4L)) expect_identical(proxy_deref(result1), proxy_deref(expect1)) expect_identical(proxy_deref(result2), proxy_deref(expect2)) }) # vec_chop + compact_seq -------------------------------------------------- # `start` is 0-based test_that("can chop base vectors with compact seqs", { start <- 1L size <- 2L expect_identical(vec_chop_seq(lgl(1, 0, 1), start, size), list(lgl(0, 1))) expect_identical(vec_chop_seq(int(1, 2, 3), start, size), list(int(2, 3))) expect_identical(vec_chop_seq(dbl(1, 2, 3), start, size), list(dbl(2, 3))) expect_identical(vec_chop_seq(cpl(1, 2, 3), start, size), list(cpl(2, 3))) expect_identical(vec_chop_seq(chr("1", "2", "3"), start, size), list(chr("2", "3"))) expect_identical(vec_chop_seq(bytes(1, 2, 3), start, size), list(bytes(2, 3))) expect_identical(vec_chop_seq(list(1, 2, 3), start, size), list(list(2, 3))) }) test_that("can chop with a decreasing compact seq", { expect_equal(vec_chop_seq(int(1, 2, 3), 1L, 2L, FALSE), list(int(2, 1))) }) test_that("can chop with multiple compact seqs", { start <- c(1L, 0L) size <- c(1L, 3L) expect_equal( vec_chop_seq(int(1, 2, 3), start, size), list(int(2), int(1, 2, 3)) ) }) test_that("can chop S3 objects using the fallback method with compact seqs", { x <- factor(c("a", "b", "c", "d")) expect_equal(vec_chop_seq(x, 0L, 0L), list(vec_slice(x, integer()))) expect_equal(vec_chop_seq(x, 0L, 1L), list(vec_slice(x, 1L))) expect_equal(vec_chop_seq(x, 2L, 2L), list(vec_slice(x, 3:4))) }) # vec_unchop -------------------------------------------------------------- test_that("`x` must be a list", { expect_error(vec_unchop(1, list(1)), "`x` must be a list") expect_error(vec_unchop(data.frame(x=1), list(1)), "`x` must be a list") }) test_that("`indices` must be a list", { expect_error(vec_unchop(list(1), 1), "`indices` must be a list of integers, or `NULL`") expect_error(vec_unchop(list(1), data.frame(x=1)), "`indices` must be a list of integers, or `NULL`") }) test_that("`indices` must be a list of integers", { expect_error(vec_unchop(list(1), list("x")), class = "vctrs_error_subscript_type") expect_error(vec_unchop(list(1), list(TRUE)), class = "vctrs_error_subscript_type") expect_error(vec_unchop(list(1), list(quote(name))), class = "vctrs_error_subscript_type") }) test_that("`x` and `indices` must be lists of the same size", { expect_error(vec_unchop(list(1, 2), list(1)), "`x` and `indices` must be lists of the same size") }) test_that("can unchop empty vectors", { expect_null(vec_unchop(list())) expect_null(vec_unchop(list(), list())) expect_identical(vec_unchop(list(), list(), ptype = numeric()), numeric()) }) test_that("can unchop a list of NULL", { expect_null(vec_unchop(list(NULL), list(integer()))) expect_identical(vec_unchop(list(NULL), list(integer()), ptype = numeric()), numeric()) expect_identical(vec_unchop(list(NULL, NULL), list(integer(), integer()), ptype = numeric()), numeric()) }) test_that("NULLs are ignored when unchopped with other vectors", { expect_identical(vec_unchop(list("a", NULL, "b")), c("a", "b")) expect_identical(vec_unchop(list("a", NULL, "b"), list(2, integer(), 1)), c("b", "a")) }) test_that("can unchop atomic vectors", { expect_identical(vec_unchop(list(1, 2), list(2, 1)), c(2, 1)) expect_identical(vec_unchop(list("a", "b"), list(2, 1)), c("b", "a")) }) test_that("can unchop lists", { x <- list(list("a", "b"), list("c")) indices <- list(c(2, 3), 1) expect_identical(vec_unchop(x, indices), list("c", "a", "b")) }) test_that("can unchop data frames", { df1 <- data_frame(x = 1:2) df2 <- data_frame(x = 3:4) x <- list(df1, df2) indices <- list(c(3, 1), c(2, 4)) expect <- vec_slice(vec_c(df1, df2), vec_order(vec_c(!!! indices))) expect_identical(vec_unchop(x, indices), expect) }) test_that("can unchop factors", { fctr1 <- factor("z") fctr2 <- factor(c("x", "y")) x <- list(fctr1, fctr2) indices <- list(2, c(3, 1)) # levels are in the order they are seen! expect <- factor(c("y", "z", "x"), levels = c("z", "x", "y")) expect_identical(vec_unchop(x, indices), expect) }) test_that("can fallback when unchopping matrices", { mat1 <- matrix(1:4, nrow = 2, ncol = 2) mat2 <- matrix(5:10, nrow = 3, ncol = 2) x <- list(mat1, mat2) indices <- list(c(4, 1), c(2, 3, 5)) expect <- vec_slice(vec_c(mat1, mat2), vec_order(vec_c(!!! indices))) expect_identical(vec_unchop(x, indices), expect) expect_identical(vec_unchop(x), vec_c(mat1, mat2)) }) test_that("can fallback when unchopping arrays of >2D", { arr1 <- array(1:8, c(2, 2, 2)) arr2 <- matrix(9:10, c(1, 2)) x <- list(arr1, arr2) indices <- list(c(3, 1), 2) expect <- vec_slice(vec_c(arr1, arr2), vec_order(vec_c(!!! indices))) expect_identical(vec_unchop(x, indices), expect) expect_identical(vec_unchop(x), vec_c(arr1, arr2)) }) test_that("can unchop with all size 0 elements and get the right ptype", { x <- list(integer(), integer()) indices <- list(integer(), integer()) expect_identical(vec_unchop(x, indices), integer()) }) test_that("can unchop with some size 0 elements", { x <- list(integer(), 1:2, integer()) indices <- list(integer(), 2:1, integer()) expect_identical(vec_unchop(x, indices), 2:1) }) test_that("NULL is a valid index", { expect_equal(vec_unchop(list(1, 2), list(NULL, 1)), 2) expect_error(vec_unchop(list(1, 2), list(NULL, 2)), class = "vctrs_error_subscript_oob") }) test_that("unchopping recycles elements of x to the size of the index", { x <- list(1, 2) indices <- list(c(3, 4, 5), c(2, 1)) expect_identical(vec_unchop(x, indices), c(2, 2, 1, 1, 1)) }) test_that("unchopping takes the common type", { x <- list(1, "a") indices <- list(1, 2) expect_error(vec_unchop(x, indices), class = "vctrs_error_incompatible_type") x <- list(1, 2L) expect_type(vec_unchop(x, indices), "double") }) test_that("can specify a ptype to override common type", { x <- list(1, 2L) indices <- list(1, 2) expect_identical(vec_unchop(x, indices, ptype = integer()), c(1L, 2L)) }) test_that("leaving `indices = NULL` unchops sequentially", { x <- list(1:2, 3:5, 6L) expect_identical(vec_unchop(x), 1:6) }) test_that("outer names are kept", { x <- list(x = 1, y = 2) expect_named(vec_unchop(x), c("x", "y")) expect_named(vec_unchop(x, list(2, 1)), c("y", "x")) }) test_that("outer names are recycled in the right order", { x <- list(x = 1, y = 2) expect_error(vec_unchop(x, list(c(1, 2), 3)), "Can't merge") expect_named(vec_unchop(x, list(c(1, 3), 2), name_spec = "{outer}_{inner}"), c("x_1", "y", "x_2")) expect_named(vec_unchop(x, list(c(3, 1), 2), name_spec = "{outer}_{inner}"), c("x_2", "y", "x_1")) }) test_that("outer names can be merged with inner names", { x <- list(x = c(a = 1), y = c(b = 2)) expect_error(vec_unchop(x), "Can't merge") expect_named(vec_unchop(x, name_spec = "{outer}_{inner}"), c("x_a", "y_b")) expect_named(vec_unchop(x, list(2, 1), name_spec = "{outer}_{inner}"), c("y_b", "x_a")) }) test_that("not all inputs have to be named", { x <- list(c(a = 1), 2, c(c = 3)) indices <- list(2, 1, 3) expect_named(vec_unchop(x, indices), c("", "a", "c")) }) test_that("vec_unchop() keeps data frame row names", { df1 <- data.frame(x = 1:2, row.names = c("r1", "r2")) df2 <- data.frame(x = 3:4, row.names = c("r3", "r4")) x <- list(df1, df2) indices <- list(c(3, 1), c(2, 4)) result <- vec_unchop(x, indices) expect <- c("r2", "r3", "r1", "r4") expect_identical(vec_names(result), expect) }) test_that("individual data frame columns retain vector names", { df1 <- data_frame(x = c(a = 1, b = 2)) df2 <- data_frame(x = c(c = 3)) x <- list(df1, df2) indices <- list(c(1, 2), 3) result <- vec_unchop(x, indices = indices) expect_named(result$x, c("a", "b", "c")) # Names should be identical to equivalent `vec_c()` call expect_identical(vec_unchop(x, indices = indices), vec_c(!!!x)) }) test_that("df-col row names are repaired silently", { df1 <- data_frame(x = new_data_frame(list(a = 1), row.names = "inner")) df2 <- data_frame(x = new_data_frame(list(a = 2), row.names = "inner")) x <- list(df1, df2) indices <- list(1, 2) expect_silent({ result <- vec_unchop(x, indices = indices) }) expect_identical(vec_names(result$x), c("inner...1", "inner...2")) }) test_that("monitoring - can technically assign to the same location twice", { x <- list(1:2, 3L) indices <- list(1:2, 1L) expect_identical(vec_unchop(x, indices), c(3L, 2L, NA)) }) test_that("index values are validated", { x <- list(1, 2) indices1 <- list(4, 1) indices2 <- list(c(1, 4), 2) indices3 <- list(c(1, 3, 4), 2) expect_error(vec_unchop(x, indices1), class = "vctrs_error_subscript_oob") expect_error(vec_unchop(x, indices2), class = "vctrs_error_subscript_oob") expect_identical(vec_unchop(x, indices3), c(1, 2, 1, 1)) }) test_that("name repair is respected and happens after ordering according to `indices`", { x <- list(c(a = 1), c(a = 2)) indices <- list(2, 1) expect_named(vec_unchop(x, indices), c("a", "a")) expect_named(vec_unchop(x, indices, name_repair = "unique"), c("a...1", "a...2")) }) test_that("vec_unchop() errors on unsupported location values", { verify_errors({ expect_error( vec_unchop(list(1, 2), list(c(1, 2), 0)), class = "vctrs_error_subscript_type" ) expect_error( vec_unchop(list(1), list(-1)), class = "vctrs_error_subscript_type" ) }) }) test_that("missing values propagate", { expect_identical( vec_unchop(list(1, 2), list(c(NA_integer_, NA_integer_), c(NA_integer_, 3))), c(NA, NA, 2, NA) ) }) test_that("vec_unchop() works with simple homogeneous foreign S3 classes", { expect_identical(vec_unchop(list(foobar(1), foobar(2))), vec_c(foobar(c(1, 2)))) }) test_that("vec_unchop() fails with complex foreign S3 classes", { verify_errors({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") expect_error(vec_unchop(list(x, y)), class = "vctrs_error_incompatible_type") }) }) test_that("vec_unchop() fails with complex foreign S4 classes", { verify_errors({ joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") expect_error(vec_unchop(list(joe, jane)), class = "vctrs_error_incompatible_type") }) }) test_that("vec_unchop() falls back to c() if S3 method is available", { # Check off-by-one error expect_error( vec_unchop(list(foobar(1), "", foobar(2)), list(1, 2, 3)), class = "vctrs_error_incompatible_type" ) # Fallback when the class implements `c()` method_foobar <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) foobar(res) } local_methods( c.vctrs_foobar = method_foobar ) expect_identical( vec_unchop(list(foobar(1), foobar(2))), foobar(c(1, 2)) ) expect_identical( vec_unchop(list(foobar(1), foobar(2)), list(1, 2)), foobar(c(1, 2)) ) expect_identical( vec_unchop(list(foobar(1), foobar(2)), list(2, 1)), foobar(c(2, 1)) ) expect_identical( vec_unchop(list(NULL, foobar(1), NULL, foobar(2))), foobar(c(1, 2)) ) # OOB error is respected expect_error( vec_unchop(list(foobar(1), foobar(2)), list(1, 3)), class = "vctrs_error_subscript_oob" ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( vec_unchop(list(foobar(c(1, 2)), foobar(3)), list(c(1, 3), 1)), foobar(c(3, NA, 2)) ) expect_identical( vec_unchop(list(foobar(c(1, 2)), foobar(3)), list(c(2, NA), NA)), foobar(c(NA, 1, NA)) ) # Names are kept expect_identical( vec_unchop(list(foobar(c(x = 1, y = 2)), foobar(c(x = 1))), list(c(2, 1), 3)), foobar(c(y = 2, x = 1, x = 1)) ) # Recycles to the size of index expect_identical( vec_unchop(list(foobar(1), foobar(2)), list(c(1, 3), 2)), foobar(c(1, 2, 1)) ) expect_identical( vec_unchop(list(foobar(1), foobar(2)), list(c(1, 2), integer())), foobar(c(1, 1)) ) expect_error( vec_unchop(list(foobar(1), foobar(2)), list(c(1, 3), integer())), class = "vctrs_error_subscript_oob" ) method_vctrs_c_fallback <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) structure(res, class = "vctrs_c_fallback") } # Registered fallback s3_register("base::c", "vctrs_c_fallback", method_vctrs_c_fallback) expect_identical( vec_unchop( list( structure(1, class = "vctrs_c_fallback"), structure(2, class = "vctrs_c_fallback") ), list(2, 1) ), structure(c(2, 1), class = "vctrs_c_fallback") ) # Don't fallback for S3 lists which are treated as scalars by default expect_error( vec_unchop(list(foobar(list(1)), foobar(list(2)))), class = "vctrs_error_scalar_type" ) }) test_that("vec_unchop() falls back for S4 classes with a registered c() method", { joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") verify_errors({ expect_error( vec_unchop(list(joe, 1, jane), list(c(1, 2), 3, 4)), class = "vctrs_error_incompatible_type" ) }) local_c_counts() expect_identical( vec_unchop(list(joe, jane), list(c(1, 3), 2)), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) expect_identical( vec_unchop(list(NULL, joe, jane), list(integer(), c(1, 3), 2)), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( vec_unchop(list(joe, jane), list(c(1, 3), 1)), .Counts(c(3L, NA, 2L), name = "Dispatched") ) expect_identical( vec_unchop(list(joe, jane), list(c(2, NA), NA)), .Counts(c(NA, 1L, NA), name = "Dispatched") ) }) test_that("vec_unchop() fallback doesn't support `name_spec` or `ptype`", { verify_errors({ foo <- structure(foobar(1), foo = "foo") bar <- structure(foobar(2), bar = "bar") expect_error( with_c_foobar(vec_unchop(list(foo, bar), name_spec = "{outer}_{inner}")), "name specification" ) # Used to be an error about `ptype` expect_error( with_c_foobar(vec_unchop(list(foobar(1)), ptype = "")), class = "vctrs_error_incompatible_type" ) }) }) test_that("vec_unchop() supports numeric S3 indices", { local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) UseMethod("vec_ptype2.vctrs_foobar"), vec_ptype2.vctrs_foobar.integer = function(x, y, ...) foobar(integer()), vec_cast.integer.vctrs_foobar = function(x, to, ...) vec_data(x) ) expect_identical(vec_unchop(list(1), list(foobar(1L))), 1) }) test_that("vec_unchop() does not support non-numeric S3 indices", { verify_errors({ expect_error( vec_unchop(list(1), list(factor("x"))), class = "vctrs_error_subscript_type" ) expect_error( vec_unchop(list(1), list(foobar(1L))), class = "vctrs_error_subscript_type" ) }) }) test_that("can ignore names in `vec_unchop()` by providing a `zap()` name-spec (#232)", { expect_error(vec_unchop(list(a = c(b = 1:2)))) expect_identical( vec_unchop(list(a = c(b = 1:2), b = 3L), name_spec = zap()), 1:3 ) expect_identical( vec_unchop( list(a = c(foo = 1:2), b = c(bar = 3L)), indices = list(2:1, 3), name_spec = zap() ), c(2L, 1L, 3L) ) verify_errors({ expect_error( vec_unchop(list(a = c(b = letters), b = 3L), name_spec = zap()), class = "vctrs_error_incompatible_type" ) expect_error( vec_unchop( list(a = c(foo = 1:2), b = c(bar = "")), indices = list(2:1, 3), name_spec = zap() ), class = "vctrs_error_incompatible_type" ) }) }) test_that("vec_unchop() falls back to c() methods (#1120)", { expect_error( vec_unchop(list(foobar(1), foobar(2, class = "foo"))), class = "vctrs_error_incompatible_type" ) local_methods( c.vctrs_foobar = function(...) { out <- NextMethod() paste0(rep_along(out, "dispatched"), seq_along(out)) } ) # Homogeneous subclasses xs <- list(foobar(1), foobar(2, class = "foo")) expect_identical( vec_unchop(xs), c("dispatched1", "dispatched2") ) expect_identical( vec_unchop(xs, indices = list(2, 1)), c("dispatched2", "dispatched1") ) # Different subclasses xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_identical( vec_unchop(xs), c("dispatched1", "dispatched2", "dispatched3") ) expect_identical( vec_unchop(xs, list(c(2, 1), 3)), c("dispatched2", "dispatched1", "dispatched3") ) }) test_that("vec_unchop() fails if foreign classes are not homogeneous and there is no c() method", { xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_error( vec_unchop(xs), class = "vctrs_error_incompatible_type" ) expect_error( vec_unchop(xs, list(c(2, 1), 3)), class = "vctrs_error_incompatible_type" ) }) test_that("vec_unchop() has informative error messages", { verify_output(test_path("error", "test-unchop.txt"), { "# vec_unchop() errors on unsupported location values" vec_unchop(list(1, 2), list(c(1, 2), 0)) vec_unchop(list(1), list(-1)) "# vec_unchop() fails with complex foreign S3 classes" x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") vec_unchop(list(x, y)) "# vec_unchop() fails with complex foreign S4 classes" joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") vec_unchop(list(joe, jane)) "# vec_unchop() falls back for S4 classes with a registered c() method" joe1 <- .Counts(c(1L, 2L), name = "Joe") joe2 <- .Counts(3L, name = "Joe") vec_unchop(list(joe1, 1, joe2), list(c(1, 2), 3, 4)) "# vec_unchop() fallback doesn't support `name_spec` or `ptype`" foo <- structure(foobar(1), foo = "foo") bar <- structure(foobar(2), bar = "bar") with_c_foobar(vec_unchop(list(foo, bar), name_spec = "{outer}_{inner}")) with_c_foobar(vec_unchop(list(foobar(1)), ptype = "")) "# vec_unchop() does not support non-numeric S3 indices" vec_unchop(list(1), list(factor("x"))) vec_unchop(list(1), list(foobar(1L))) "# can ignore names in `vec_unchop()` by providing a `zap()` name-spec (#232)" vec_unchop(list(a = c(b = letters), b = 3L), name_spec = zap()) vec_unchop( list(a = c(foo = 1:2), b = c(bar = "")), indices = list(2:1, 3), name_spec = zap() ) }) }) vctrs/tests/testthat/test-shape-print.txt0000644000176200001440000000002613473164157020376 0ustar liggesusersshape: [1] shape: [1] vctrs/tests/testthat/test-partial-factor.R0000644000176200001440000000334113723213047020430 0ustar liggesusers test_that("has ok print method", { partial <- partial_factor("x") expect_known_output( print(partial), test_path("test-partial-factor-print-partial.txt") ) both <- vec_ptype2(partial, factor("y")) expect_known_output( print(both), test_path("test-partial-factor-print-both.txt") ) empty <- partial_factor() expect_known_output( print(empty), test_path("test-partial-factor-print-empty.txt") ) learned <- vec_ptype2(empty, factor("y")) expect_known_output( print(learned), test_path("test-partial-factor-print-learned.txt") ) expect_equal(vec_ptype_abbr(partial), "prtl_fctr") }) test_that("order of levels comes from data", { pfctr <- partial_factor(c("y", "x")) fctr <- factor(levels = c("x", "y")) expect_equal(levels(vec_ptype_common(pfctr, fctr)), c("x", "y")) expect_equal(levels(vec_ptype_common(fctr, pfctr)), c("x", "y")) }) test_that("partial levels added to end if not in data", { pfctr <- partial_factor("y") fctr <- factor(levels = "x") expect_equal(levels(vec_ptype_common(pfctr, fctr)), c("x", "y")) expect_equal(levels(vec_ptype_common(fctr, pfctr)), c("x", "y")) }) test_that("can assert partial factors based on level presence", { pfctr <- partial_factor("y") expect_true(vec_is(factor("y"), pfctr)) expect_false(vec_is(factor("x"), pfctr)) expect_true(vec_is(factor(c("x", "y")), pfctr)) pfctr <- partial_factor(c("y", "z")) expect_false(vec_is(factor("y"), pfctr)) expect_true(vec_is(factor(c("y", "z")), pfctr)) expect_true(vec_is(factor(c("x", "y", "z")), pfctr)) }) # TODO - why is this not working? # test_that("can assert partial factor based on factor type", { # pfctr <- partial_factor() # expect_false(vec_is(1, pfctr)) # }) vctrs/tests/testthat/test-list_of-str-empty.txt0000644000176200001440000000026614042546250021542 0ustar liggesuserslist [1:0] list() @ ptype: num(0) List of 1 $ :List of 2 ..$ : list [1:2] .. ..$ : num 1 .. ..$ : num [1:2] 2 3 .. ..@ ptype: num(0) ..$ y: int [1:2] 2 1 vctrs/tests/testthat/test-type-factor.R0000644000176200001440000001577314027045462017773 0ustar liggesusers test_that("ptype methods are descriptive", { f <- factor() o <- ordered(character()) expect_equal(vec_ptype_abbr(f), "fct") expect_equal(vec_ptype_abbr(o), "ord") expect_equal(vec_ptype_full(f), "factor<>") expect_equal(vec_ptype_full(o), "ordered<>") }) test_that("slicing factors uses a proxy to not go through `[.factor`", { x <- factor("x") y <- ordered("y") local_methods( `[.factor` = function(x, ...) abort("should not be called") ) expect_identical(vec_slice(x, 1), x) expect_identical(vec_slice(y, 1), y) }) test_that("`vec_c()` throws the right error with subclassed factors (#1015)", { skip("Factors now have a `c()` method") a <- subclass(factor("a")) b <- subclass(factor("b")) # We used to return a subclass expect_identical(vec_c(a, a), subclass(factor(c("a", "a")))) # We used to fail if attributes were incoPatible expect_error(vec_c(a, b), class = "vctrs_error_incompatible_type") }) # Coercion ---------------------------------------------------------------- test_that("factor/character coercions are symmetric and unchanging", { types <- list( ordered(character()), factor(), character() ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) expect_known_output( mat, print = TRUE, test_path("test-type-factor.txt"), width = 200 ) }) test_that("factors level are unioned", { # This is technically incorrect, but because of R's existing behaviour # anything else will cause substantial friction. fa <- factor(levels = "a") fb <- factor(levels = "b") expect_equal(vec_ptype_common(fa, fb), factor(levels = c("a", "b"))) expect_equal(vec_ptype_common(fb, fa), factor(levels = c("b", "a"))) }) test_that("coercion errors with factors", { f <- factor(levels = "a") expect_error(vec_ptype_common(f, logical()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype_common(logical(), f), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2(, NA) is symmetric (#687)", { fct <- new_factor() expect_identical( vec_ptype2(fct, NA), vec_ptype2(NA, fct) ) fct <- new_ordered() expect_identical( vec_ptype2(fct, NA), vec_ptype2(NA, fct) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { i64 <- bit64::integer64() expect_identical( vec_ptype2(i64, NA), vec_ptype2(NA, i64) ) }) test_that("vec_ptype2() errors with malformed factors", { x <- structure(1, class = "factor") y <- factor("x") expect_error(vec_ptype2(x, y, x_arg = "z"), "`z` is a corrupt factor") expect_error(vec_ptype2(y, x, y_arg = "z"), "`z` is a corrupt factor") }) test_that("vec_ptype2() errors with malformed ordered factors", { x <- structure(1, class = c("ordered", "factor")) y <- as.ordered(factor("x")) expect_error(vec_ptype2(x, y, x_arg = "z"), "`z` is a corrupt ordered factor") expect_error(vec_ptype2(y, x, y_arg = "z"), "`z` is a corrupt ordered factor") }) test_that("ordered factors with different levels are not compatible", { expect_error( vec_ptype2(ordered("a"), ordered("b")), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(ordered("a"), ordered(c("a", "b"))), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), ordered("b")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), ordered(c("a", "b"))), class = "vctrs_error_incompatible_type" ) }) test_that("factors and ordered factors are not compatible", { expect_error( vec_ptype2(factor("a"), ordered("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(ordered("a"), factor("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(factor("a"), ordered("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), factor("a")), class = "vctrs_error_incompatible_type" ) }) # Casting ----------------------------------------------------------------- test_that("safe casts work as expected", { fa <- factor("a") fab <- factor(c("a", "b")) expect_equal(vec_cast(NULL, fa), NULL) expect_equal(vec_cast(fa, fa), fa) expect_equal(vec_cast(fa, fab), fab[1]) expect_equal(vec_cast("a", fab), fab[1]) expect_equal(vec_cast("a", factor()), fa) expect_equal(vec_cast(fa, factor()), fa) # This used to be allowed expect_error(vec_cast(list("a", "b"), fab), class = "vctrs_error_incompatible_type") }) test_that("can cast to character", { expect_equal(vec_cast(factor("X"), character()), "X") expect_equal(vec_cast(ordered("X"), character()), "X") }) test_that("can cast NA and unspecified to factor", { expect_identical(vec_cast(NA, new_factor()), factor(NA)) expect_identical(vec_cast(NA, new_ordered()), ordered(NA)) expect_identical(vec_cast(unspecified(2), new_factor()), factor(c(NA, NA))) expect_identical(vec_cast(unspecified(2), new_ordered()), ordered(c(NA, NA))) }) test_that("lossy factor casts fail", { fa <- factor("a") fb <- factor("b") expect_lossy(vec_cast(fa, fb), factor(NA, levels = "b"), x = fa, to = fb) expect_lossy(vec_cast("a", fb), factor(NA, levels = "b"), x = chr(), to = fb) }) test_that("invalid casts generate error", { expect_error(vec_cast(double(), factor("a")), class = "vctrs_error_incompatible_type") expect_error(vec_cast(factor("a"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(ordered("a"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(logical(), factor("a")), class = "vctrs_error_incompatible_type") expect_error(vec_cast(logical(), ordered("a")), class = "vctrs_error_incompatible_type") }) test_that("orderedness of factor is preserved", { ord <- ordered(c("a", "b"), levels = c("b", "a")) expect_equal(vec_cast("a", ord), ordered("a", levels = c("b", "a"))) }) test_that("NA are not considered lossy in factor cast (#109)", { f <- factor(c("itsy", "bitsy", NA, "spider", "spider")) expect_warning(vec_cast(f, f[1]), NA) }) test_that("Casting to a factor with explicit NA levels retains them", { f <- factor(c("x", NA), exclude = NULL) expect_identical(vec_cast(f, f), f) expect_identical(vec_cast(f, factor()), f) }) test_that("characters can be cast to ordered", { expect_identical(vec_cast("a", ordered("a")), ordered("a")) expect_error(vec_cast(c("a", "b"), ordered("a")), class = "vctrs_error_cast_lossy") }) # Proxy / restore --------------------------------------------------------- test_that("subclassed factors / ordered factors can be restored (#1015)", { x <- subclass(factor("a")) proxy <- vec_proxy(x) expect_identical(vec_restore(proxy, x), x) y <- subclass(ordered("a")) proxy <- vec_proxy(y) expect_identical(vec_restore(proxy, y), y) }) # Arithmetic and factor --------------------------------------------------- test_that("factors don't support math or arthimetic", { f <- factor("x") expect_error(vec_math("sum", f), class = "vctrs_error_unsupported") expect_error(vec_arith("+", f, f), class = "vctrs_error_unsupported") }) vctrs/tests/testthat/test-subscript-loc.R0000644000176200001440000005522313712211241020307 0ustar liggesusers test_that("vec_as_location2() returns a position", { expect_identical(vec_as_location2(2, 2L), 2L) expect_identical(vec_as_location2("foo", 2L, c("bar", "foo")), 2L) expect_identical(vec_as_location2("0", 4L, as.character(-1:2)), 2L) }) test_that("vec_as_location2() requires integer or character inputs", { verify_errors({ expect_error(vec_as_location2(TRUE, 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(mtcars, 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(env(), 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(foobar(), 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(2.5, 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(Inf, 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(-Inf, 10L), class = "vctrs_error_subscript_type") "Idem with custom `arg`" expect_error(vec_as_location2(foobar(), 10L, arg = "foo"), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(2.5, 3L, arg = "foo"), class = "vctrs_error_subscript_type") expect_error(with_tibble_rows(vec_as_location2(TRUE)), class = "vctrs_error_subscript_type") }) }) test_that("vec_as_location() requires integer, character, or logical inputs", { verify_errors({ expect_error(vec_as_location(mtcars, 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(env(), 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(foobar(), 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(2.5, 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(list(), 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(function() NULL, 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(Sys.Date(), 3L), class = "vctrs_error_subscript_type") "Idem with custom `arg`" expect_error(vec_as_location(env(), 10L, arg = "foo"), class = "vctrs_error_subscript_type") expect_error(vec_as_location(foobar(), 10L, arg = "foo"), class = "vctrs_error_subscript_type") expect_error(vec_as_location(2.5, 3L, arg = "foo"), class = "vctrs_error_subscript_type") }) }) test_that("vec_as_location2() and vec_as_location() require integer- or character-like OO inputs", { expect_identical(vec_as_location2(factor("foo"), 2L, c("bar", "foo")), 2L) expect_identical(vec_as_location(factor("foo"), 2L, c("bar", "foo")), 2L) expect_error(vec_as_location2(foobar(1L), 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(foobar(1L), 10L), class = "vctrs_error_subscript_type") # Define subtype of logical and integer local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) UseMethod("vec_ptype2.vctrs_foobar"), vec_ptype2.vctrs_foobar.logical = function(x, y, ...) logical(), vec_ptype2.vctrs_foobar.integer = function(x, y, ...) integer(), vec_ptype2.logical.vctrs_foobar = function(x, y, ...) logical(), vec_ptype2.integer.vctrs_foobar = function(x, y, ...) integer(), vec_cast.vctrs_foobar = function(x, to, ...) UseMethod("vec_cast.vctrs_foobar"), vec_cast.vctrs_foobar.integer = function(x, to, ...) foobar(x), vec_cast.integer.vctrs_foobar = function(x, to, ...) vec_cast(unclass(x), int()), vec_cast.logical.vctrs_foobar = function(x, to, ...) vec_cast(unclass(x), lgl()) ) expect_error(vec_as_location2(foobar(TRUE), 10L), class = "vctrs_error_subscript_type") expect_identical(vec_as_location(foobar(TRUE), 10L), 1:10) expect_identical(vec_as_location(foobar(FALSE), 10L), int()) }) test_that("vec_as_location() and variants check for OOB elements", { verify_errors({ "Numeric indexing" expect_error(vec_as_location(10L, 2L), class = "vctrs_error_subscript_oob") expect_error(vec_as_location(-10L, 2L), class = "vctrs_error_subscript_oob") expect_error(vec_as_location2(10L, 2L), class = "vctrs_error_subscript_oob") "Character indexing" expect_error(vec_as_location("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob") expect_error(vec_as_location2("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob") }) expect_error(num_as_location(10L, 2L), class = "vctrs_error_subscript_oob") expect_error(num_as_location2(10L, 2L), class = "vctrs_error_subscript_oob") }) test_that("vec_as_location() doesn't require `n` for character indexing", { expect_identical(vec_as_location("b", NULL, names = letters), 2L) }) test_that("vec_as_location2() requires length 1 inputs", { verify_errors({ expect_error(vec_as_location2(1:2, 2L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(c("foo", "bar"), 2L, c("foo", "bar")), class = "vctrs_error_subscript_type") "Idem with custom `arg`" expect_error(vec_as_location2(1:2, 2L, arg = "foo"), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(mtcars, 10L, arg = "foo"), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(1:2, 2L, arg = "foo"), class = "vctrs_error_subscript_type") }) }) test_that("vec_as_location2() requires positive integers", { verify_errors({ expect_error(vec_as_location2(0, 2L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(-1, 2L), class = "vctrs_error_subscript_type") "Idem with custom `arg`" expect_error(vec_as_location2(0, 2L, arg = "foo"), class = "vctrs_error_subscript_type") }) }) test_that("vec_as_location2() fails with NA", { verify_errors({ expect_error(vec_as_location2(na_int, 2L), class = "vctrs_error_subscript_type") expect_error(vec_as_location2(na_chr, 1L, names = "foo"), class = "vctrs_error_subscript_type") "Idem with custom `arg`" expect_error(vec_as_location2(na_int, 2L, arg = "foo"), class = "vctrs_error_subscript_type") }) }) test_that("vec_as_location2() doesn't allow lossy casts", { expect_error(vec_as_location2(2^31, 3L), class = "vctrs_error_subscript_type") # Lossy casts generate missing values, which are disallowed expect_error(allow_lossy_cast(vec_as_location2(2^31, 3L)), class = "vctrs_error_subscript_type") }) test_that("all subscript errors inherit from `vctrs_error_subscript`", { expect_error(vec_as_location(100, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location("foo", 2L, names = c("bar", "baz")), class = "vctrs_error_subscript") expect_error(vec_as_location(foobar(1L), 2L), class = "vctrs_error_subscript") expect_error(vec_as_location(1.5, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location2(TRUE, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location2(1.5, 2L), class = "vctrs_error_subscript") }) test_that("all OOB errors inherit from `vctrs_error_subscript_oob`", { expect_error(vec_as_location(100, 2L), class = "vctrs_error_subscript_oob") expect_error(vec_as_location("foo", 2L, names = c("bar", "baz")), class = "vctrs_error_subscript_oob") }) test_that("vec_as_location() preserves names if possible", { expect_identical(vec_as_location(c(a = 1L, b = 3L), 3L), c(a = 1L, b = 3L)) expect_identical(vec_as_location(c(a = 1, b = 3), 3L), c(a = 1L, b = 3L)) expect_identical(vec_as_location(c(a = "z", b = "y"), 26L, letters), c(a = 26L, b = 25L)) expect_identical(vec_as_location(c(foo = TRUE, bar = FALSE, baz = TRUE), 3L), c(foo = 1L, baz = 3L)) expect_identical(vec_as_location(c(foo = TRUE), 3L), c(foo = 1L, foo = 2L, foo = 3L)) expect_identical(vec_as_location(c(foo = NA), 3L), c(foo = na_int, foo = na_int, foo = na_int)) # Names of negative selections are dropped expect_identical(vec_as_location(c(a = -1L, b = -3L), 3L), 2L) }) test_that("vec_as_location2() optionally allows missing values", { expect_identical(vec_as_location2(NA, 2L, missing = "propagate"), na_int) expect_error(vec_as_location2(NA, 2L, missing = "error"), class = "vctrs_error_subscript_type") }) test_that("num_as_location2() optionally allows missing and negative locations", { expect_identical(num_as_location2(na_dbl, 2L, missing = "propagate"), na_int) expect_identical(num_as_location2(-1, 2L, negative = "ignore"), -1L) expect_error(num_as_location2(-3, 2L, negative = "ignore"), class = "vctrs_error_subscript_oob") expect_error(num_as_location2(0, 2L, negative = "ignore"), class = "vctrs_error_subscript_type") }) test_that("num_as_location() optionally allows negative indices", { expect_identical(num_as_location(dbl(1, -1), 2L, negative = "ignore"), int(1L, -1L)) expect_error(num_as_location(c(1, -10), 2L, negative = "ignore"), class = "vctrs_error_subscript_oob") }) test_that("num_as_location() optionally forbids negative indices", { verify_errors({ expect_error(num_as_location(dbl(1, -1), 2L, negative = "error"), class = "vctrs_error_subscript_type") }) expect_error(num_as_location(c(1, -10), 2L, negative = "error"), class = "vctrs_error_subscript_type") }) test_that("num_as_location() optionally ignores zero indices", { expect_identical(num_as_location(c(1, 0), 2L, zero = "ignore"), c(1L, 0L)) }) test_that("num_as_location() optionally forbids zero indices", { verify_errors({ expect_error( num_as_location(0L, 1L, zero = "error"), class = "vctrs_error_subscript_type" ) expect_error( num_as_location(c(0, 0, 0, 0, 0, 0), 1, zero = "error"), class = "vctrs_error_subscript_type" ) }) }) test_that("vec_as_location() handles NULL", { expect_identical( vec_as_location(NULL, 10), vec_as_location(int(), 10), ) }) test_that("vec_as_location() checks for mix of negative and missing locations", { verify_errors({ expect_error( vec_as_location(-c(1L, NA), 30), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location(-c(1L, rep(NA, 10)), 30), class = "vctrs_error_subscript_type" ) }) }) test_that("vec_as_location() checks for mix of negative and positive locations", { verify_errors({ expect_error( vec_as_location(c(-1L, 1L), 30), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location(c(-1L, rep(1L, 10)), 30), class = "vctrs_error_subscript_type" ) }) }) test_that("logical subscripts must match size of indexed vector", { verify_errors({ expect_error( vec_as_location(c(TRUE, FALSE), 3), class = "vctrs_error_subscript_size" ) }) }) test_that("character subscripts require named vectors", { verify_errors({ expect_error(vec_as_location(letters[1], 3), "unnamed vector") }) }) test_that("can optionally extend beyond the end", { expect_error(num_as_location(1:5, 3), class = "vctrs_error_subscript_oob") expect_identical(num_as_location(1:5, 3, oob = "extend"), 1:5) expect_identical(num_as_location(4:5, 3, oob = "extend"), 4:5) verify_errors({ expect_error( num_as_location(3, 1, oob = "extend"), class = "vctrs_error_subscript_oob" ) expect_error( num_as_location(c(1, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob" ) expect_error( num_as_location(c(1:5, 7), 3, oob = "extend"), class = "vctrs_error_subscript_oob" ) expect_error( class = "vctrs_error_subscript_oob", num_as_location(c(1:5, 7, 1), 3, oob = "extend") ) expect_error( class = "vctrs_error_subscript_oob", num_as_location(c(1:5, 7, 1, 10), 3, oob = "extend") ) }) }) test_that("can extend beyond the end consecutively but non-monotonically (#1166)", { expect_identical(num_as_location(6:4, 3, oob = "extend"), 6:4) expect_identical(num_as_location(c(1:5, 7, 6), 3, oob = "extend"), c(1:5, 7L, 6L)) expect_identical(num_as_location(c(1, NA, 4, 3), 2, oob = "extend"), c(1L, NA, 4L, 3L)) }) test_that("missing values are supported in error formatters", { verify_errors({ expect_error( num_as_location(c(1, NA, 2, 3), 1), class = "vctrs_error_subscript_oob" ) expect_error( num_as_location(c(1, NA, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob" ) }) }) test_that("can disallow missing values", { verify_errors({ expect_error( vec_as_location(c(1, NA), 2, missing = "error"), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location(c(1, NA, 2, NA), 2, missing = "error", arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(vec_as_location(c(1, NA, 2, NA), 2, missing = "error")), class = "vctrs_error_subscript_type" ) }) }) test_that("can customise subscript type errors", { verify_errors({ "With custom `arg`" expect_error( num_as_location(-1, 2, negative = "error", arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( num_as_location2(-1, 2, negative = "error", arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location2(0, 2, arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location2(na_dbl, 2, arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location2(c(1, 2), 2, arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location(c(TRUE, FALSE), 3, arg = "foo"), class = "vctrs_error_subscript_size" ) expect_error( vec_as_location(c(-1, NA), 3, arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location(c(-1, 1), 3, arg = "foo"), class = "vctrs_error_subscript_type" ) expect_error( num_as_location(c(1, 4), 2, oob = "extend", arg = "foo"), class = "vctrs_error_subscript_oob" ) expect_error( num_as_location(0, 1, zero = "error", arg = "foo"), class = "vctrs_error_subscript_type" ) "With tibble columns" expect_error( with_tibble_cols(num_as_location(-1, 2, negative = "error")), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(num_as_location2(-1, 2, negative = "error")), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(vec_as_location2(0, 2)), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(vec_as_location2(na_dbl, 2)), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(vec_as_location2(c(1, 2), 2)), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(vec_as_location(c(TRUE, FALSE), 3)), class = "vctrs_error_subscript_size" ) expect_error( with_tibble_cols(vec_as_location(c(-1, NA), 3)), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(vec_as_location(c(-1, 1), 3)), class = "vctrs_error_subscript_type" ) expect_error( with_tibble_cols(num_as_location(c(1, 4), 2, oob = "extend")), class = "vctrs_error_subscript_oob" ) expect_error( with_tibble_cols(num_as_location(0, 1, zero = "error")), class = "vctrs_error_subscript_type" ) }) }) test_that("can customise OOB errors", { verify_errors({ expect_error( vec_slice(set_names(letters), "foo"), class = "vctrs_error_subscript_oob" ) "With custom `arg`" expect_error( vec_as_location(30, length(letters), arg = "foo"), class = "vctrs_error_subscript_oob" ) expect_error( vec_as_location("foo", NULL, letters, arg = "foo"), class = "vctrs_error_subscript_oob" ) "With tibble columns" expect_error( with_tibble_cols(vec_slice(set_names(letters), "foo")), class = "vctrs_error_subscript_oob" ) expect_error( with_tibble_cols(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob" ) expect_error( with_tibble_cols(vec_slice(set_names(letters), -30)), class = "vctrs_error_subscript_oob" ) "With tibble rows" expect_error( with_tibble_rows(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob" ) expect_error( with_tibble_rows(vec_slice(set_names(letters), 1:30)), class = "vctrs_error_subscript_oob" ) expect_error( with_tibble_rows(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob" ) }) }) test_that("num_as_location() requires non-S3 inputs", { expect_error(num_as_location(factor("foo"), 2), "must be a numeric vector") }) test_that("vec_as_location() checks dimensionality", { verify_errors({ expect_error(vec_as_location(matrix(TRUE, nrow = 1), 3L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(array(TRUE, dim = c(1, 1, 1)), 3L), class = "vctrs_error_subscript_type") expect_error(with_tibble_rows(vec_as_location(matrix(TRUE, nrow = 1), 3L)), class = "vctrs_error_subscript_type") }) }) test_that("vec_as_location() works with vectors of dimensionality 1", { expect_identical(vec_as_location(array(TRUE, dim = 1), 3L), 1:3) }) test_that("conversion to locations has informative error messages", { verify_output(test_path("error", "test-subscript-loc.txt"), { "# vec_as_location() UI" vec_as_location(1, 1L, missing = "bogus") "# vec_as_location() checks for mix of negative and missing locations" vec_as_location(-c(1L, NA), 30) vec_as_location(-c(1L, rep(NA, 10)), 30) "# vec_as_location() checks for mix of negative and positive locations" vec_as_location(c(-1L, 1L), 30) vec_as_location(c(-1L, rep(1L, 10)), 30) "# num_as_location() UI" num_as_location(1, 1L, missing = "bogus") num_as_location(1, 1L, negative = "bogus") num_as_location(1, 1L, oob = "bogus") num_as_location(1, 1L, zero = "bogus") "# num_as_location() optionally forbids negative indices" num_as_location(dbl(1, -1), 2L, negative = "error") "# num_as_location() optionally forbids zero indices" num_as_location(0L, 1L, zero = "error") num_as_location(c(0, 0, 0, 0, 0, 0), 1, zero = "error") "# logical subscripts must match size of indexed vector" vec_as_location(c(TRUE, FALSE), 3) "# character subscripts require named vectors" vec_as_location(letters[1], 3) "# vec_as_location() requires integer, character, or logical inputs" vec_as_location(mtcars, 10L) vec_as_location(env(), 10L) vec_as_location(foobar(), 10L) vec_as_location(2.5, 3L) vec_as_location(list(), 10L) vec_as_location(function() NULL, 10L) vec_as_location(Sys.Date(), 3L) "Idem with custom `arg`" vec_as_location(env(), 10L, arg = "foo") vec_as_location(foobar(), 10L, arg = "foo") vec_as_location(2.5, 3L, arg = "foo") "# vec_as_location2() UI" vec_as_location2(1, 1L, missing = "bogus") "# vec_as_location2() requires integer or character inputs" vec_as_location2(TRUE, 10L) vec_as_location2(mtcars, 10L) vec_as_location2(env(), 10L) vec_as_location2(foobar(), 10L) vec_as_location2(2.5, 3L) vec_as_location2(Inf, 10L) vec_as_location2(-Inf, 10L) "Idem with custom `arg`" vec_as_location2(foobar(), 10L, arg = "foo") vec_as_location2(2.5, 3L, arg = "foo") with_tibble_rows(vec_as_location2(TRUE)) "# vec_as_location2() requires length 1 inputs" vec_as_location2(1:2, 2L) vec_as_location2(mtcars, 10L) "Idem with custom `arg`" vec_as_location2(1:2, 2L, arg = "foo") vec_as_location2(mtcars, 10L, arg = "foo") vec_as_location2(1:2, 2L, arg = "foo") "# vec_as_location2() requires positive integers" vec_as_location2(0, 2L) vec_as_location2(-1, 2L) "Idem with custom `arg`" vec_as_location2(0, 2L, arg = "foo") "vec_as_location2() fails with NA" vec_as_location2(na_int, 2L) vec_as_location2(na_chr, 1L, names = "foo") "Idem with custom `arg`" vec_as_location2(na_int, 2L, arg = "foo") "# vec_as_location() and variants check for OOB elements" "Numeric subscripts" vec_as_location(10L, 2L) vec_as_location(-10L, 2L) vec_as_location2(10L, 2L) "Character subscripts" vec_as_location("foo", 1L, names = "bar") vec_as_location2("foo", 1L, names = "bar") "# num_as_location() UI" num_as_location(1, 1L, missing = "bogus") num_as_location(1, 1L, negative = "bogus") "# can optionally extend beyond the end" num_as_location(c(1, 3), 1, oob = "extend") num_as_location(c(1:5, 7), 3, oob = "extend") num_as_location(c(1:5, 7, 1), 3, oob = "extend") num_as_location(c(1:5, 7, 1, 10), 3, oob = "extend") "# missing values are supported in error formatters" num_as_location(c(1, NA, 2, 3), 1) num_as_location(c(1, NA, 3), 1, oob = "extend") "# can customise subscript type errors" "With custom `arg`" num_as_location(-1, 2, negative = "error", arg = "foo") num_as_location2(-1, 2, negative = "error", arg = "foo") vec_as_location2(0, 2, arg = "foo") vec_as_location2(na_dbl, 2, arg = "foo") vec_as_location2(c(1, 2), 2, arg = "foo") vec_as_location(c(TRUE, FALSE), 3, arg = "foo") vec_as_location(c(-1, NA), 3, arg = "foo") vec_as_location(c(-1, 1), 3, arg = "foo") num_as_location(c(1, 4), 2, oob = "extend", arg = "foo") num_as_location(0, 1, zero = "error", arg = "foo") "With tibble columns" with_tibble_cols(num_as_location(-1, 2, negative = "error")) with_tibble_cols(num_as_location2(-1, 2, negative = "error")) with_tibble_cols(vec_as_location2(0, 2)) with_tibble_cols(vec_as_location2(na_dbl, 2)) with_tibble_cols(vec_as_location2(c(1, 2), 2)) with_tibble_cols(vec_as_location(c(TRUE, FALSE), 3)) with_tibble_cols(vec_as_location(c(-1, NA), 3)) with_tibble_cols(vec_as_location(c(-1, 1), 3)) with_tibble_cols(num_as_location(c(1, 4), 2, oob = "extend")) with_tibble_cols(num_as_location(0, 1, zero = "error")) "# can customise OOB errors" vec_slice(set_names(letters), "foo") "With custom `arg`" vec_as_location(30, length(letters), arg = "foo") vec_as_location("foo", NULL, letters, arg = "foo") "With tibble columns" with_tibble_cols(vec_slice(set_names(letters), "foo")) with_tibble_cols(vec_slice(set_names(letters), 30)) with_tibble_cols(vec_slice(set_names(letters), -30)) "With tibble rows" with_tibble_rows(vec_slice(set_names(letters), c("foo", "bar"))) with_tibble_rows(vec_slice(set_names(letters), 1:30)) with_tibble_rows(vec_slice(set_names(letters), -(1:30))) "# can disallow missing values" vec_as_location(c(1, NA), 2, missing = "error") vec_as_location(c(1, NA, 2, NA), 2, missing = "error", arg = "foo") with_tibble_cols(vec_as_location(c(1, NA, 2, NA), 2, missing = "error")) "# vec_as_location() checks dimensionality" vec_as_location(matrix(TRUE, nrow = 1), 3L) vec_as_location(array(TRUE, dim = c(1, 1, 1)), 3L) with_tibble_rows(vec_as_location(matrix(TRUE, nrow = 1), 3L)) }) }) vctrs/tests/testthat/test-rcrd-format.txt0000644000176200001440000000216414042546250020360 0ustar liggesusers [1] (1,1) (1,2) (1,3) (1,4) (1,5) (1,6) (1,7) (1,8) (1,9) [10] (1,10) (1,11) (1,12) (1,13) (1,14) (1,15) (1,16) (1,17) (1,18) [19] (1,19) (1,20) (1,21) (1,22) (1,23) (1,24) (1,25) (1,26) (1,27) [28] (1,28) (1,29) (1,30) (1,31) (1,32) (1,33) (1,34) (1,35) (1,36) [37] (1,37) (1,38) (1,39) (1,40) (1,41) (1,42) (1,43) (1,44) (1,45) [46] (1,46) (1,47) (1,48) (1,49) (1,50) (1,51) (1,52) (1,53) (1,54) [55] (1,55) (1,56) (1,57) (1,58) (1,59) (1,60) (1,61) (1,62) (1,63) [64] (1,64) (1,65) (1,66) (1,67) (1,68) (1,69) (1,70) (1,71) (1,72) [73] (1,73) (1,74) (1,75) (1,76) (1,77) (1,78) (1,79) (1,80) (1,81) [82] (1,82) (1,83) (1,84) (1,85) (1,86) (1,87) (1,88) (1,89) (1,90) [91] (1,91) (1,92) (1,93) (1,94) (1,95) (1,96) (1,97) (1,98) (1,99) [100] (1,100) vctrs_tp [1:10] (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7), (1,8), (1... List of 1 $ :List of 1 ..$ :List of 2 .. ..$ : vctrs_tp [1:100] (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7)... .. ..$ : int [1:100] 1 2 3 4 5 6 7 8 9 10 ... vctrs/tests/testthat/test-type-bare.R0000644000176200001440000003453713723213047017423 0ustar liggesusers test_that("ptype2 base methods are not inherited", { ptypes <- vec_remove(base_empty_types, c("null", "dataframe")) for (ptype in ptypes) { x <- new_vctr(ptype, class = "foobar", inherit_base_type = TRUE) expect_s3_class(vec_ptype2(x, x), "foobar") expect_error(vec_ptype2(x, ptype), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(ptype, x), class = "vctrs_error_incompatible_type") } }) test_that("cast base methods are not inherited", { ptypes <- vec_remove(base_empty_types, c("null", "dataframe")) for (ptype in ptypes) { x <- new_vctr(ptype, class = "foobar", inherit_base_type = TRUE) expect_s3_class(vec_cast(ptype, x), "foobar") expect_error(vec_cast(x, ptype), class = "vctrs_error_incompatible_type") } }) test_that("default cast allows objects with the same type", { x <- structure(1, class = c("foo", "double")) expect_equal(vec_cast(x, x), x) }) # vec_shaped_ptype ------------------------------------------------------- test_that("array dimensions are preserved", { mat1 <- matrix(lgl(), nrow = 1, ncol = 1) mat2 <- matrix(lgl(), nrow = 2, ncol = 2) mat3 <- matrix(lgl(), nrow = 2, ncol = 3) expect_equal(vec_ptype2(mat1, mat1), matrix(lgl(), nrow = 0, ncol = 1)) expect_equal(vec_ptype2(mat1, mat2), matrix(lgl(), nrow = 0, ncol = 2)) expect_error(vec_ptype2(mat2, mat3), class = "vctrs_error_incompatible_type") }) test_that("vec_shaped_ptype()", { int <- function(...) array(NA_integer_, c(...)) expect_identical(vec_shaped_ptype(integer(), int(5), int(10)), new_shape(integer())) expect_identical(vec_shaped_ptype(integer(), int(5, 1), int(10, 1)), new_shape(integer(), 1)) expect_identical(vec_shaped_ptype(integer(), int(5, 1, 2), int(10, 1, 2)), new_shape(integer(), 1:2)) }) # vec_cast() -------------------------------------------------------------- # NULL test_that("NULL is idempotent", { expect_equal(vec_cast(NULL, NULL), NULL) expect_equal(vec_cast(list(1:3), NULL), list(1:3)) }) # Logical test_that("safe casts work as expected", { exp <- lgl(TRUE, FALSE) expect_equal(vec_cast(NULL, logical()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), logical()), exp) expect_equal(vec_cast(int(1L, 0L), logical()), exp) expect_equal(vec_cast(dbl(1, 0), logical()), exp) # These used to be allowed expect_error(vec_cast(chr("T", "F"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("TRUE", "FALSE"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("true", "false"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(1, 0), logical()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- lgl(NA) to <- lgl() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(lgl(NA)) to_mat <- matrix(lgl()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("lossy casts generate warning", { expect_lossy(vec_cast(int(2L, 1L), lgl()), lgl(TRUE, TRUE), x = int(), to = lgl()) expect_lossy(vec_cast(dbl(2, 1), lgl()), lgl(TRUE, TRUE), x = dbl(), to = lgl()) # These used to be allowed expect_error(vec_cast(chr("x", "TRUE"), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("t", "T"), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("f", "F"), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(c(TRUE, FALSE), TRUE), lgl()), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), logical()), class = "vctrs_error_incompatible_type") }) test_that("dimensionality matches output" ,{ x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) x <- matrix(1, nrow = 2, ncol = 2) expect_error(vec_cast(x, logical()), class = "vctrs_error_incompatible_type") }) test_that("the common type of two `NA` vectors is unspecified", { expect_equal(vec_ptype2(NA, NA), unspecified()) }) # Integer test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, integer()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), integer()), int(1L, 0L)) expect_equal(vec_cast(int(1L, 2L), integer()), int(1L, 2L)) expect_equal(vec_cast(dbl(1, 2), integer()), int(1L, 2L)) # These used to be allowed expect_error(vec_cast(chr("1", "2"), integer()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(1L, 2L), integer()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- int(NA) to <- int() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(int(NA)) to_mat <- matrix(int()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("lossy casts generate error", { expect_lossy(vec_cast(c(2.5, 2), int()), int(2, 2), x = dbl(), to = int()) expect_lossy(vec_cast(c(.Machine$integer.max + 1, 1), int()), int(NA, 1L), x = dbl(), to = int()) expect_lossy(vec_cast(c(-.Machine$integer.max - 1, 1), int()), int(NA, 1L), x = dbl(), to = int()) # These used to be allowed expect_error(vec_cast(c("2.5", "2"), int()), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), integer()), class = "vctrs_error_incompatible_type") }) # Double test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, double()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), double()), dbl(1, 0)) expect_equal(vec_cast(int(1, 0), double()), dbl(1, 0)) expect_equal(vec_cast(dbl(1, 1.5), double()), dbl(1, 1.5)) # These used to be allowed expect_error(vec_cast(chr("1", "1.5"), double()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(1, 1.5), double()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- dbl(NA) to <- dbl() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(dbl(NA)) to_mat <- matrix(dbl()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), double()), class = "vctrs_error_incompatible_type") }) # Complex test_that("safe casts to complex works", { expect_identical(vec_cast(NULL, cpl()), NULL) expect_identical(vec_cast(lgl(TRUE, FALSE), cpl()), cpl(1, 0)) expect_identical(vec_cast(int(1, 0), cpl()), cpl(1, 0)) expect_identical(vec_cast(dbl(1, 1.5), cpl()), cpl(1, 1.5)) # This used to be allowed expect_error(vec_cast(list(1, 1.5), cpl()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- cpl(NA) to <- cpl() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # This used to be allowed expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(cpl(NA)) to_mat <- matrix(cpl()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # This used to be allowed expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("complex is coercible to numeric types", { expect_identical(vec_ptype2(cpl(), NULL), cpl()) expect_identical(vec_ptype2(NULL, cpl()), cpl()) expect_identical(vec_ptype2(cpl(), int()), cpl()) expect_identical(vec_ptype2(int(), cpl()), cpl()) expect_identical(vec_ptype2(cpl(), dbl()), cpl()) expect_identical(vec_ptype2(dbl(), cpl()), cpl()) expect_identical(vec_ptype2(cpl(), cpl()), cpl()) expect_identical(vec_c(0, 1i), cpl(0i, 1i)) }) test_that("complex is not coercible to logical", { expect_error(vec_ptype2(cpl(), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(lgl(), cpl()), class = "vctrs_error_incompatible_type") }) # Character test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, character()), NULL) expect_equal(vec_cast(NA, character()), NA_character_) # These used to be allowed expect_error(vec_cast(lgl(TRUE, FALSE), character()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list("x", "y"), character()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- chr(NA) to <- chr() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(chr(NA), to), exp) # These used to be allowed expect_error(vec_cast(int(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(dbl(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(chr(NA)) to_mat <- matrix(chr()) expect_equal(vec_cast(mat(chr(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(lgl(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(int(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(dbl(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("difftime does not get special treatment", { dt1 <- as.difftime(600, units = "secs") # This used to be allowed expect_error(vec_cast(dt1, character()), class = "vctrs_error_incompatible_type") }) # Raw test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, raw()), NULL) # This used to be allowed expect_error(vec_cast(list(raw(1)), raw()), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(raw(1), double()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(double(1), raw()), class = "vctrs_error_incompatible_type") }) test_that("can sort raw", { x <- as.raw(c(3, 1, 2, 4)) expect_identical(vec_order(x), int(2, 3, 1, 4)) expect_identical(x[vec_order(x)], as.raw(1:4)) }) test_that("raw has informative type summaries", { expect_equal(vec_ptype_abbr(raw()), "raw") expect_equal(vec_ptype_full(raw()), "raw") }) test_that("can provide common type with raw", { local_methods( vec_ptype2.raw.vctrs_foobar = function(...) "dispatched-left", vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.raw = function(...) "dispatched-right" ) expect_identical(vec_ptype2(raw(), foobar("")), "dispatched-left") expect_identical(vec_ptype2(foobar(""), raw()), "dispatched-right") }) # Lists test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, list()), NULL) expect_equal(vec_cast(NA, list()), list(NULL)) expect_equal(vec_cast(list(1L, 2L), list()), list(1L, 2L)) # This used to be allowed expect_error(vec_cast(1:2, list()), class = "vctrs_error_incompatible_type") }) test_that("dimensionality matches to" ,{ x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1L, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) }) test_that("data frames are cast to list row wise (#639)", { x <- data.frame(x = 1:2, row.names = c("a", "b")) expect <- list(data.frame(x = 1L), data.frame(x = 2L)) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) test_that("data frames can be cast to shaped lists", { to <- array(list(), dim = c(0, 2, 1)) x <- data.frame(x = 1:2, y = 3:4) expect <- list(vec_slice(x, 1), vec_slice(x, 2)) expect <- array(expect, dim = c(2, 2, 1)) # This used to be allowed expect_error(vec_cast(x, to), class = "vctrs_error_incompatible_type") }) test_that("Casting atomic `NA` values to list results in a `NULL`", { x <- c(NA, 1) expect <- list(NULL, 1) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) test_that("Casting data frame `NA` rows to list results in a `NULL`", { x <- data.frame(x = c(NA, NA, 1), y = c(NA, 1, 2)) expect <- list(NULL, vec_slice(x, 2), vec_slice(x, 3)) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) # Unspecified test_that("unspecified can be cast to bare methods", { for (x in vectors[-4]) { expect_identical(vec_cast(unspecified(3), x), vec_init(x, 3)) } }) vctrs/tests/testthat/test-print-str.R0000644000176200001440000000043613723213047017464 0ustar liggesusers test_that("show attributes", { x <- structure(1:100, x = "a string", y = 1:20, z = data.frame(x = 1:3)) expect_known_output( obj_str(x), test_path("test-print-str-attr.txt") ) expect_known_output( obj_str(mtcars), test_path("test-print-str-mtcars.txt") ) }) vctrs/tests/testthat/helper-order.R0000644000176200001440000000123513753021253017132 0ustar liggesusers# Keep in sync with macros in `order.c` GROUP_DATA_SIZE_DEFAULT <- 100000L ORDER_INSERTION_BOUNDARY <- 128L INT_ORDER_COUNTING_RANGE_BOUNDARY <- 100000L # Force radix method for character comparisons base_order <- function(x, na.last = TRUE, decreasing = FALSE) { if (is.data.frame(x)) { x <- unname(x) } else { x <- list(x) } args <- list(na.last = na.last, decreasing = decreasing) # `method` didn't exist on R < 3.3. # It would sometimes use radix sorting automatically. if (getRversion() < "3.3.0") { method <- list() } else { method <- list(method = "radix") } args <- c(x, args, method) rlang::exec("order", !!!args) } vctrs/tests/testthat/test-lifecycle-deprecated.R0000644000176200001440000000074213650511520021553 0ustar liggesusers test_that("vec_as_index() still works", { local_lifecycle_silence() expect_identical(vec_as_index(-2, 10), vec_as_location(-2, 10)) expect_identical( vec_as_index("cyl", length(mtcars), names(mtcars)), vec_as_location("cyl", length(mtcars), names(mtcars)) ) }) test_that("vec_repeat() still works", { local_lifecycle_silence() expect_identical(vec_repeat(1:2, times = 2), vec_rep(1:2, 2)) expect_identical(vec_repeat(1:2, each = 2), vec_rep_each(1:2, 2)) }) vctrs/tests/testthat/helper-output.R0000644000176200001440000000042413650511520017353 0ustar liggesusers try_cat <- function(expr) { cat(paste0("> ", as_label(substitute(expr)), ":\n\n")) out <- tryCatch(expr, error = function(err) { cat(paste0("Error: ", err$message, "\n")) }) cat("\n\n\n") out } # Dummy until implemented in testthat verify_errors <- identity vctrs/tests/testthat/helper-memory.R0000644000176200001440000000022713653027721017334 0ustar liggesusersmaybe_shared_col <- function(x, i) { .Call(vctrs_maybe_shared_col, x, i) } new_df_unshared_col <- function() { .Call(vctrs_new_df_unshared_col) } vctrs/tests/testthat/helper-expectations.R0000644000176200001440000000632413712211241020522 0ustar liggesusersexpect_dim <- function(x, shape) { dim <- dim2(x) expect_equal(dim, !!shape) } expect_lossy <- function(expr, result, x = NULL, to = NULL) { expr <- enquo(expr) expect_error(eval_tidy(expr), class = "vctrs_error_cast_lossy") out <- allow_lossy_cast(eval_tidy(expr), x_ptype = x, to_ptype = to) expect_identical(!!out, !!result) } expect_args <- function(x, y, x_arg, y_arg) { err <- catch_cnd(vec_ptype2(x, y, x_arg = x_arg, y_arg = y_arg), classes = "vctrs_error_incompatible_type") expect_true(!is_null(err)) expect_true(grepl(paste0("combine `", x_arg, "`"), err$message, fixed = TRUE)) expect_true(grepl(paste0("and `", y_arg, "`"), err$message, fixed = TRUE)) expect_identical(list(err$x_arg, err$y_arg), list(x_arg, y_arg)) } # Work around deparsing of !! on old versions of R as_label2 <- function(expr) { expr <- duplicate(expr, shallow = FALSE) label <- as_label(fix_bang(expr)) label <- gsub("+++", "!!!", label, fixed = TRUE) label <- gsub("++", "!!", label, fixed = TRUE) label } fix_bang <- function(expr) { curr <- expr while (!is_null(curr)) { car <- node_car(curr) if (is_triple_bang(car)) { replace_triple_bang(car) } else if (is_double_bang(car)) { replace_double_bang(car) } else if (is_call(car)) { node_poke_car(curr, fix_bang(car)) } curr <- node_cdr(curr) } expr } is_double_bang <- function(expr) { is_call(expr, "!") && is_call(node_cadr(expr), "!") } is_triple_bang <- function(expr) { is_double_bang(expr) && is_call(node_cadr(node_cadr(expr)), "!") } replace_double_bang <- function(expr) { node_poke_car(expr, sym("+")) node_poke_car(node_cadr(expr), sym("+")) } replace_triple_bang <- function(expr) { replace_double_bang(expr) node_poke_car(node_cadr(node_cadr(expr)), sym("+")) } try2 <- function(expr) { cat(paste0("\n", as_label2(substitute(expr)), ":\n\n")) cat(conditionMessage(catch_cnd(expr, classes = "error")), "\n\n") } expect_known_output_nobang <- function(object, file, ...) { expect_known_output(object, file, ...) } expect_syntactic <- function(name, exp_syn_name) { expect_identical( syn_name <- make_syntactic(name), exp_syn_name ) expect_identical(syn_name, make.names(syn_name)) } expect_error_cnd <- function(object, class, message = NULL, ..., .fixed = TRUE) { cnd <- expect_error(object, regexp = message, class = class[[1]], fixed = .fixed) expect_true(inherits_all(cnd, class)) exp_fields <- list2(...) expect_true(is_empty(setdiff(!!names(exp_fields), names(cnd)))) expect_equal(cnd[names(exp_fields)], exp_fields) } expect_incompatible_df <- function(x, fallback) { if (is_true(peek_option("vctrs:::warn_on_fallback"))) { x <- expect_df_fallback_warning(x) } expect_identical(x, fallback) } # Never warns so we don't get repeat warnings expect_incompatible_df_cast <- function(x, fallback) { expect_identical(x, fallback) } expect_df_fallback_warning <- function(expr) { suppressWarnings(expect_warning({{ expr }}, "falling back to (|)")) } expect_df_fallback_warning_maybe <- function(expr) { if (is_true(peek_option("vctrs:::warn_on_fallback"))) { expect_warning({{ expr }}, "falling back to (|)") } else { expr } } vctrs/tests/testthat/helper-size.R0000644000176200001440000000024413723213047016771 0ustar liggesusers expect_size <- function(object, n) { expect_identical(vec_size(object), vec_cast(n, int())) } zap_dimnames <- function(x) { attr(x, "dimnames") <- NULL x } vctrs/tests/testthat/test-type-vec-size-common-error.txt0000644000176200001440000000027414042546242023262 0ustar liggesusers vec_size_common(1:2, 1, 1:4): Can't recycle `..1` (size 2) to match `..3` (size 4). vec_size_common(foo = 1:2, 1, bar = 1:4): Can't recycle `foo` (size 2) to match `bar` (size 4). vctrs/tests/testthat/test-assert.R0000644000176200001440000002257113723213047017027 0ustar liggesusers test_that("basic assert is idempotent", { x <- new_vctr(1:4) expect_true(vec_is(x)) expect_identical(vec_assert(x), x) expect_identical(vec_assert(x), new_vctr(1:4)) expect_false(withVisible(vec_assert(x))$visible) expect_true(vec_is(1:4)) expect_identical(vec_assert(1:4), 1:4) }) test_that("asserting ptype", { x <- new_vctr(1:4) good <- new_vctr(integer()) expect_true(vec_is(x, good)) expect_error(vec_assert(x, good), NA) # Is this the correct error message? bad <- new_vctr(double()) expect_false(vec_is(x, bad)) expect_error(vec_assert(x, bad), class = "vctrs_error_assert_ptype") }) test_that("asserting size", { x <- new_vctr(1:4) expect_true(vec_is(x, size = 4)) expect_error(vec_assert(x, size = 4), NA) expect_false(vec_is(x, size = 5)) expect_error(vec_assert(x, size = 5), class = "vctrs_error_assert_size") }) test_that("vec_assert() labels input", { expect_error( vec_assert(new_vctr(1:4), size = 5), regexp = "`new_vctr\\(1:4\\)` must have", class = "vctrs_error_assert_size" ) expect_error( vec_assert(new_vctr(1:4), size = 5, arg = "foobar"), regexp = "`foobar` must have", class = "vctrs_error_assert_size" ) }) test_that("bare atomic vectors are vectors but not recursive", { expect_true(vec_is_vector(TRUE)) expect_true(vec_is_vector(1L)) expect_true(vec_is_vector(1)) expect_true(vec_is_vector(1i)) expect_true(vec_is_vector("foo")) expect_true(vec_is_vector(as.raw(1))) }) test_that("S3 atomic vectors are vectors", { expect_true(vec_is_vector(foobar(TRUE))) expect_true(vec_is_vector(foobar(1L))) expect_true(vec_is_vector(foobar(1))) expect_true(vec_is_vector(foobar(1i))) expect_true(vec_is_vector(foobar("foo"))) expect_true(vec_is_vector(foobar(as.raw(1)))) }) test_that("bare lists are vectors", { expect_true(vec_is_vector(list())) }) test_that("S3 lists are not vectors by default", { expect_false(vec_is_vector(foobar())) local_foobar_proxy() expect_true(vec_is_vector(foobar())) }) test_that("data frames and records are vectors", { expect_true(vec_is_vector(mtcars)) expect_true(vec_is_vector(new_rcrd(list(x = 1, y = 2)))) }) test_that("non-vector base types are scalars", { expect_identical(vec_typeof(quote(foo)), "scalar") expect_identical(vec_typeof(pairlist("")), "scalar") expect_identical(vec_typeof(function() NULL), "scalar") expect_identical(vec_typeof(env()), "scalar") expect_identical(vec_typeof(quote(foo)), "scalar") expect_identical(vec_typeof(~foo), "scalar") expect_identical(vec_typeof(base::`{`), "scalar") expect_identical(vec_typeof(base::c), "scalar") expect_identical(vec_typeof(expression()), "scalar") expect_false(vec_is_vector(quote(foo))) expect_false(vec_is_vector(pairlist(""))) expect_false(vec_is_vector(function() NULL)) expect_false(vec_is_vector(env())) expect_false(vec_is_vector(~foo)) expect_false(vec_is_vector(base::`{`)) expect_false(vec_is_vector(base::c)) expect_false(vec_is_vector(expression())) expect_false(vec_is(quote(foo))) expect_false(vec_is(pairlist(""))) expect_false(vec_is(function() NULL)) expect_false(vec_is(env())) expect_false(vec_is(~foo)) expect_false(vec_is(base::`{`)) expect_false(vec_is(base::c)) expect_false(vec_is(expression())) expect_error(vec_assert(quote(foo)), class = "vctrs_error_scalar_type") expect_error(vec_assert(pairlist("")), class = "vctrs_error_scalar_type") expect_error(vec_assert(function() NULL), class = "vctrs_error_scalar_type") expect_error(vec_assert(env()), class = "vctrs_error_scalar_type") expect_error(vec_assert(~foo), class = "vctrs_error_scalar_type") expect_error(vec_assert(base::`{`), class = "vctrs_error_scalar_type") expect_error(vec_assert(base::c), class = "vctrs_error_scalar_type") expect_error(vec_assert(expression()), class = "vctrs_error_scalar_type") }) test_that("non-vector types can be proxied", { x <- new_proxy(1:3) expect_identical(vec_typeof(x), "scalar") expect_false(vec_is_vector(x)) expect_false(vec_is(x)) expect_error(vec_assert(x), class = "vctrs_error_scalar_type") local_env_proxy() expect_identical(vec_typeof(x), "integer") expect_true(vec_is_vector(x)) expect_true(vec_is(x)) expect_error(regexp = NA, vec_assert(x)) }) test_that("vec_assert() uses friendly type in error messages", { # Friendly type will be generated in rlang in the future. Upstream # changes should not cause CRAN failures. skip_on_cran() expect_error(vec_assert(function() NULL), class = "vctrs_error_scalar_type") }) test_that("vec_typeof() handles all types", { for (i in seq_along(empty_types)) { expect_identical(vec_typeof(!!empty_types[[i]]), !!names(empty_types)[[i]]) } }) test_that("bare prototypes don't act as partial types", { expect_false(vec_is(foobar(1), dbl())) expect_error(vec_assert(foobar(1), dbl()), class = "vctrs_error_assert_ptype") }) test_that("data frames are always classified as such even when dispatch is off", { expect_identical(vec_typeof_bare(mtcars), "dataframe") }) test_that("assertion is not applied on proxy", { local_methods( vec_proxy.vctrs_foobar = unclass, vec_restore.vctrs_foobar = function(x, ...) foobar(x), `[.vctrs_foobar` = function(x, i) vec_slice(x, i) ) x <- foobar(list()) expect_true(vec_is(x, x)) expect_false(vec_is(x, list())) expect_error(vec_assert(x, list()), class = "vctrs_error_assert_ptype") expect_error(vec_assert(x, x), regexp = NA) }) test_that("attributes of unclassed vectors are asserted", { x <- structure(FALSE, foo = "bar") y <- structure(TRUE, foo = "bar") expect_false(vec_is(x, FALSE)) expect_false(vec_is(FALSE, x)) expect_true(vec_is(y, x)) expect_true(vec_is(x, y)) }) test_that("unspecified is finalised before assertion", { expect_true(vec_is(NA, TRUE)) expect_true(vec_is(data.frame(x = NA), data.frame(x = lgl()))) expect_error(regexp = NA, vec_assert(NA, TRUE)) expect_error(regexp = NA, vec_assert(data.frame(x = NA), data.frame(x = lgl()))) }) test_that("assertion failures are explained", { expect_known_output(file = test_path("test-assert-explanations.txt"), { local_no_stringsAsFactors() local_options(rlang_backtrace_on_error = "none") try_cat(vec_assert(lgl(), chr())) try_cat(vec_assert(lgl(), factor())) try_cat(vec_assert(lgl(), factor(levels = "foo"))) try_cat(vec_assert(factor(levels = "bar"), factor(levels = "foo"))) try_cat(vec_assert(factor(), chr())) try_cat(vec_assert(lgl(), data.frame())) try_cat(vec_assert(lgl(), data.frame(x = 1))) try_cat(vec_assert(lgl(), data.frame(x = 1, y = 2))) try_cat(vec_assert(data.frame(), chr())) try_cat(vec_assert(data.frame(x = 1), chr())) try_cat(vec_assert(data.frame(x = 1), data.frame(x = "foo"))) try_cat(vec_assert(data.frame(x = 1), data.frame(x = "foo", y = 2))) try_cat(vec_assert(data.frame(x = 1, y = 2), chr())) try_cat(vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo"))) try_cat(vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo", y = 2))) }) }) test_that("NULL is not a vector", { expect_false(vec_is_vector(NULL)) expect_false(vec_is(NULL)) }) test_that("names and row names do not influence type identity (#707)", { expect_true(vec_is(c(a = TRUE), logical())) expect_true(vec_is(TRUE, c(a = TRUE))) expect_true(vec_is(structure(mtcars, row.names = 1:32), mtcars)) expect_true(vec_is(mtcars, structure(mtcars, row.names = 1:32))) }) # vec_is_list ----------------------------------------------------------- test_that("bare lists are lists", { expect_true(vec_is_list(list())) }) test_that("list_of are lists", { expect_true(vec_is_list(new_list_of())) }) test_that("Vectors with a non-VECSXP type are not lists", { expect_false(vec_is_list(1)) expect_false(vec_is_list("a")) expect_false(vec_is_list(quote(name))) }) test_that("explicitly classed lists are lists", { x <- structure(list(), class = "list") expect_true(vec_is_list(x)) expect_true(vec_is_list(subclass(x))) }) test_that("explicit inheritance must be in the base class", { x <- structure(1:2, class = c("list", "foobar")) expect_false(vec_is_list(x)) }) test_that("POSIXlt are not considered a list", { expect_false(vec_is_list(as.POSIXlt(new_datetime()))) }) test_that("rcrd types are not lists", { expect_false(vec_is_list(new_rcrd(list(x = 1)))) }) test_that("scalars are not lists", { expect_false(vec_is_list(foobar())) }) test_that("S3 types can't lie about their internal representation", { x <- structure(1:2, class = c("foobar", "list")) expect_false(vec_is_list(x)) }) test_that("data frames of all types are not lists", { expect_false(vec_is_list(data.frame())) expect_false(vec_is_list(subclass(data.frame()))) expect_false(vec_is_list(tibble::tibble())) }) test_that("S3 list with non-list proxy is still a list (#1208)", { x <- structure(list(), class = c("foobar", "list")) local_methods(vec_proxy.foobar = function(x) 1) # This used to be an error (#1003) # expect_error(vec_is_list(x), "`x` inherits") expect_true(vec_is_list(x)) }) test_that("list-rcrds with data frame proxies are considered lists (#1208)", { x <- structure( list(1:2, "x"), special = c("a", "b"), class = c("list_rcrd", "list") ) local_methods( vec_proxy.list_rcrd = function(x) { special <- attr(x, "special") data <- unstructure(x) new_data_frame(list(data = data, special = special)) } ) expect_true(vec_is_list(x)) }) vctrs/tests/testthat/test-slice-assign.R0000644000176200001440000005667314027045462020123 0ustar liggesusers test_that("slice-assign throws error with non-vector inputs", { x <- environment() expect_error(vec_slice(x, 1L) <- 1L, class = "vctrs_error_scalar_type") }) test_that("slice-assign throws error with non-vector `value`", { x <- 1L expect_error(vec_slice(x, 1L) <- NULL, class = "vctrs_error_scalar_type") expect_error(vec_slice(x, 1L) <- environment(), class = "vctrs_error_scalar_type") }) test_that("can slice-assign NULL", { x <- NULL vec_slice(x, 1L) <- 1 expect_identical(x, NULL) }) test_that("can slice-assign base vectors", { x <- rep(FALSE, 3) vec_slice(x, 2) <- TRUE expect_identical(x, lgl(FALSE, TRUE, FALSE)) x <- rep(0L, 3) vec_slice(x, 2) <- 1L expect_identical(x, int(0L, 1L, 0L)) x <- rep(0., 3) vec_slice(x, 2) <- 1 expect_identical(x, dbl(0, 1, 0)) x <- rep(0i, 3) vec_slice(x, 2) <- 1i expect_identical(x, cpl(0i, 1i, 0i)) x <- rep("", 3) vec_slice(x, 2) <- "foo" expect_identical(x, chr("", "foo", "")) x <- as.raw(rep(0, 3)) vec_slice(x, 2) <- as.raw(1) expect_identical(x, as.raw(c(0, 1, 0))) }) test_that("can assign base vectors", { x <- rep(FALSE, 3) expect_identical(vec_assign(x, 2, TRUE), lgl(FALSE, TRUE, FALSE)) expect_identical(x, rep(FALSE, 3)) x <- rep(0L, 3) expect_identical(vec_assign(x, 2, 1L), int(0L, 1L, 0L)) expect_identical(x, rep(0L, 3)) x <- rep(0., 3) expect_identical(vec_assign(x, 2, 1), dbl(0, 1, 0)) expect_identical(x, rep(0., 3)) x <- rep(0i, 3) expect_identical(vec_assign(x, 2, 1i), cpl(0i, 1i, 0i)) expect_identical(x, rep(0i, 3)) x <- rep("", 3) expect_identical(vec_assign(x, 2, "foo"), chr("", "foo", "")) expect_identical(x, rep("", 3)) x <- as.raw(rep(0, 3)) expect_identical(vec_assign(x, 2, as.raw(1)), as.raw(c(0, 1, 0))) expect_identical(x, as.raw(rep(0, 3))) }) test_that("can assign shaped base vectors", { mat <- as.matrix x <- mat(rep(FALSE, 3)) expect_identical(vec_assign(x, 2, TRUE), mat(lgl(FALSE, TRUE, FALSE))) expect_identical(x, mat(rep(FALSE, 3))) x <- mat(rep(0L, 3)) expect_identical(vec_assign(x, 2, 1L), mat(int(0L, 1L, 0L))) expect_identical(x, mat(rep(0L, 3))) x <- mat(rep(0, 3)) expect_identical(vec_assign(x, 2, 1), mat(dbl(0, 1, 0))) expect_identical(x, mat(rep(0, 3))) x <- mat(rep(0i, 3)) expect_identical(vec_assign(x, 2, 1i), mat(cpl(0i, 1i, 0i))) expect_identical(x, mat(rep(0i, 3))) x <- mat(rep("", 3)) expect_identical(vec_assign(x, 2, "foo"), mat(chr("", "foo", ""))) expect_identical(x, mat(rep("", 3))) x <- mat(as.raw(rep(0, 3))) expect_identical(vec_assign(x, 2, as.raw(1)), mat(as.raw(c(0, 1, 0)))) expect_identical(x, mat(as.raw(rep(0, 3)))) }) test_that("can slice-assign lists", { x <- rep(list(NULL), 3) vec_slice(x, 2) <- list(NA) expect_identical(x, list(NULL, NA, NULL)) }) test_that("can slice-assign shaped lists", { mat <- as.matrix x <- mat(rep(list(NULL), 3)) vec_slice(x, 2) <- list(NA) expect_identical(x, mat(list(NULL, NA, NULL))) }) test_that("can assign lists", { x <- rep(list(NULL), 3) expect_identical(vec_assign(x, 2, list(NA)), list(NULL, NA, NULL)) expect_identical(x, rep(list(NULL), 3)) }) test_that("can assign shaped lists", { mat <- as.matrix x <- mat(rep(list(NULL), 3)) expect_identical(vec_assign(x, 2, list(NA)), mat(list(NULL, NA, NULL))) expect_identical(x, mat(rep(list(NULL), 3))) }) test_that("can assign object of any dimensionality", { x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_identical(vec_assign(x1, 1L, 2L), array(rep(c(2, 1), 1), dim = 2)) expect_identical(vec_assign(x2, 1L, 2L), array(rep(c(2, 1), 3), dim = c(2, 3))) expect_identical(vec_assign(x3, 1L, 2L), array(rep(c(2, 1), 12), dim = c(2, 3, 4))) expect_identical(vec_assign(x4, 1L, 2L), array(rep(c(2, 1), 60), dim = c(2, 3, 4, 5))) }) test_that("atomics can't be assigned in lists", { x <- list(NULL) expect_error(vec_slice(x, 1) <- 1, class = "vctrs_error_incompatible_type") expect_error(vec_assign(x, 1, 2), class = "vctrs_error_incompatible_type") expect_error(vec_slice(x, 1) <- "foo", class = "vctrs_error_incompatible_type") expect_error(vec_assign(x, 1, "foo"), class = "vctrs_error_incompatible_type") }) test_that("Unspecified `NA` vector can be assigned into lists", { x <- list(1, 2) vec_slice(x, 1) <- NA expect_identical(x, list(NULL, 2)) }) test_that("monitoring test - unspecified() can be assigned in lists", { x <- list(1, 2) expect_error(vec_slice(x, 1) <- unspecified(1), NA) expect_equal(x, list(NULL, 2)) }) test_that("can assign and slice-assign data frames", { df <- data.frame(x = 1:2) df$y <- data.frame(a = 2:1) orig <- duplicate(df, shallow = FALSE) other <- data.frame(x = 3) other$y <- data.frame(a = 3) exp <- data.frame(x = int(3, 2)) exp$y <- data.frame(a = int(3, 1)) expect_identical(vec_assign(df, 1, other), exp) expect_identical(df, orig) vec_slice(df, 1) <- other expect_identical(df, exp) }) test_that("can slice-assign using logical index", { x <- c(2, 1) vec_slice(x, TRUE) <- 3 expect_equal(x, c(3, 3)) vec_slice(x, c(TRUE, FALSE)) <- 4 expect_equal(x, c(4, 3)) expect_error( vec_assign(x, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size" ) expect_error( vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size" ) }) test_that("slice-assign ignores NA in logical subsetting", { x <- c(NA, 1, 2) expect_equal(`vec_slice<-`(x, x > 0, 1), c(NA, 1, 1)) expect_equal(`vec_slice<-`(x, x > 0, c(NA, 2:1)), c(NA, 2, 1)) }) test_that("slice-assign with arrays ignores NA in logical subsetting", { mat <- as.matrix x <- c(NA, 1, 2) expect_equal(`vec_slice<-`(mat(x), x > 0, 1), mat(c(NA, 1, 1))) expect_equal(`vec_slice<-`(mat(x), x > 0, c(NA, 2:1)), mat(c(NA, 2, 1))) }) test_that("slice-assign ignores NA in integer subsetting", { x <- 0:2 expect_equal(`vec_slice<-`(x, c(NA, 2:3), 1), c(0, 1, 1)) expect_equal(`vec_slice<-`(x, c(NA, 2:3), c(NA, 2:1)), c(0, 2, 1)) }) test_that("slice-assign with arrays ignores NA in integer subsetting", { mat <- as.matrix x <- mat(0:2) expect_equal(`vec_slice<-`(x, c(NA, 2:3), 1), mat(c(0, 1, 1))) expect_equal(`vec_slice<-`(x, c(NA, 2:3), c(NA, 2:1)), mat(c(0, 2, 1))) }) test_that("can't modify subset with missing argument", { x <- 1:3 expect_error(vec_slice(x, ) <- 2L) }) test_that("can modify subset with recycled NA argument", { x <- 1:3 vec_slice(x, NA) <- 2L expect_identical(x, 1:3) }) test_that("can modify subset with recycled TRUE argument", { x <- 1:3 vec_slice(x, TRUE) <- 2L expect_identical(x, rep(2L, 3)) }) test_that("can modify subset with recycled FALSE argument", { x <- 1:3 vec_slice(x, FALSE) <- 2L expect_identical(x, 1:3) }) test_that("can modify subset with NULL argument", { x <- 1:3 vec_slice(x, NULL) <- 2L expect_identical(x, 1:3) }) test_that("can slice-assign with missing indices", { x <- 1:3 y <- 4:6 test <- c(NA, TRUE, FALSE) vec_slice(x, test) <- vec_slice(y, test) expect_identical(x, int(1, 5, 3)) }) test_that("slice-assign checks vectorness", { x <- foobar(list(1)) expect_error(vec_slice(x, 1) <- 10, class = "vctrs_error_scalar_type") }) test_that("a coercible RHS is cast to LHS before assignment (#140)", { x <- 1:2 expect_error(vec_slice(x, 1) <- "1", class = "vctrs_error_incompatible_type") x <- c("foo", "bar") expect_error(vec_slice(x, 1) <- 1, class = "vctrs_error_incompatible_type") x <- 1:2 expect_error(vec_slice(x, 1) <- 3.5, class = "vctrs_error_cast_lossy") allow_lossy_cast(vec_slice(x, 1) <- 3.5) expect_identical(x, int(3, 2)) x <- matrix(1:4, 2) vec_slice(x, 1) <- matrix(c(FALSE, FALSE), 1) expect_identical(x, matrix(int(0, 2, 0, 4), 2)) expect_error(vec_assign(x, 1, matrix(c("", ""), 1)), class = "vctrs_error_incompatible_type") }) test_that("slice-assign takes the proxy", { local_proxy() x <- new_proxy(1:3) y <- new_proxy(20:21) vec_slice(x, 2:3) <- y expect_identical(proxy_deref(x), int(1, 20, 21)) }) test_that("can use names to vec_slice<-() a named object", { x0 <- c(a = 1, b = 2) x1 <- c(a = 1, a = 2) vec_slice(x0, "b") <- 3 expect_identical(x0, c(a = 1, b = 3)) vec_slice(x1, "a") <- 3 expect_identical(x1, c(a = 3, a = 2)) }) test_that("can't use names to vec_slice<-() an unnamed object", { x0 <- 1:3 expect_error( vec_slice(x0, letters[1]) <- 4L, "Can't use character names to index an unnamed vector.", fixed = TRUE ) expect_error( vec_slice(x0, letters[25:27]) <- 5L, "Can't use character names to index an unnamed vector.", fixed = TRUE ) }) test_that("slice-assign falls back to `[<-` when proxy is not implemented", { obj <- foobar(c("foo", "bar", "baz")) expect_error(vec_slice(obj, 1:2) <- TRUE, class = "vctrs_error_incompatible_type") vec_slice(obj, 1:2) <- foobar("quux") vec_ptype2(foobar(""), foobar("")) vec_cast(foobar(""), foobar("")) #> Error: Can't cast to local_methods( `[<-.vctrs_foobar` = function(x, i, value) { x <- unclass(x) x[i] <- "dispatched" x }, vec_ptype2.logical.vctrs_foobar = function(...) foobar(""), vec_ptype2.vctrs_foobar = function(...) foobar(""), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.logical = function(x, to, ...) foobar(rep("", length(x))) ) obj <- foobar(c("foo", "bar", "baz")) vec_slice(obj, 1:2) <- TRUE expect_identical(obj, foobar(c("dispatched", "dispatched", "baz"))) }) test_that("vec_assign() can always assign unspecified values into foreign vector types", { obj <- foobar(c("foo", "bar", "baz")) expect <- foobar(c(NA, "bar", "baz")) expect_identical(vec_assign(obj, 1, NA), expect) expect_identical(vec_assign(obj, 1, unspecified(1)), expect) }) test_that("slice-assign casts to `to` before falling back to `[<-` (#443)", { called <- FALSE local_methods( vec_proxy.vctrs_proxy = proxy_deref, vec_ptype2.vctrs_proxy = function(...) NULL, vec_ptype2.vctrs_proxy.vctrs_foobar = function(...) new_proxy(NA), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_proxy = function(x, ...) foobar(proxy_deref(x)), `[<-.vctrs_foobar` = function(x, i, value) { called <<- TRUE expect_identical(value, foobar(10)) } ) x <- foobar(1) y <- new_proxy(10) vec_slice(x, 1) <- y expect_true(called) }) test_that("index and value are sliced before falling back", { # Work around a bug in base R `[<-` lhs <- foobar(c(NA, 1:4)) rhs <- foobar(int(0L, 10L)) exp <- foobar(int(10L, 1:4)) expect_identical(vec_assign(lhs, c(NA, 1), rhs), exp) }) test_that("can assign to data frame", { x <- data_frame(x = 1:3) y <- data_frame(x = 20) expect_identical(vec_assign(x, 2, y), data_frame(x = int(1, 20, 3))) }) test_that("can assign to a data frame with matrix columns (#625)", { df <- tibble(x = 1:2, y = matrix(1:4, nrow = 2)) expect_identical(vec_assign(df, 2L, df[1,]), vec_slice(df, c(1, 1))) }) test_that("assigning to a factor doesn't produce corrupt levels (#853)", { x <- factor(c("a", NA), levels = c("a", "b")) value <- factor("b", levels = "b") res <- vec_assign(x, 2, value) expect_identical(res, factor(c("a", "b"))) res <- vec_assign(x, 1:2, value) expect_identical(res, factor(c("b", "b"), levels = c("a", "b"))) }) test_that("can slice-assign unspecified vectors with default type2 method", { local_rational_class() x <- rational(1:2, 2:3) x[[1]] <- NA expect_identical(x, rational(c(NA, 2L), c(NA, 3L))) }) test_that("`vec_assign()` validates `x_arg`", { expect_error(vec_assign(1, 1, 1, x_arg = 1), "must be a string") expect_error(vec_assign(1, 1, 1, x_arg = c("x", "y")), "must be a string") expect_error(vec_assign(1, 1, 1, x_arg = NA_character_), "must be a string") }) test_that("`vec_assign()` validates `value_arg`", { expect_error(vec_assign(1, 1, 1, value_arg = 1), "must be a string") expect_error(vec_assign(1, 1, 1, value_arg = c("x", "y")), "must be a string") expect_error(vec_assign(1, 1, 1, value_arg = NA_character_), "must be a string") }) test_that("`vec_assign()` requires recyclable value", { verify_errors({ expect_error( vec_assign(1:3, 1:3, 1:2), class = "vctrs_error_recycle_incompatible_size" ) }) }) test_that("logical subscripts must match size of indexed vector", { verify_errors({ expect_error( vec_assign(1:2, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size" ) }) }) test_that("must assign existing elements", { verify_errors({ expect_error( vec_assign(1:3, 5, 10), class = "vctrs_error_subscript_oob" ) expect_error( vec_assign(1:3, "foo", 10), "unnamed vector" ) expect_error( vec_slice(letters, -100) <- "foo", class = "vctrs_error_subscript_oob" ) expect_error( vec_assign(set_names(letters), "foo", "bar"), class = "vctrs_error_subscript_oob" ) }) }) test_that("must assign with proper negative locations", { verify_errors({ expect_error( vec_assign(1:3, c(-1, 1), 1:2), class = "vctrs_error_subscript_type" ) expect_error( vec_assign(1:3, c(-1, NA), 1:2), class = "vctrs_error_subscript_type" ) }) }) test_that("`vec_assign()` error args can be overridden", { verify_errors({ expect_error( vec_assign(1:2, 1L, "x", x_arg = "foo", value_arg = "bar"), class = "vctrs_error_incompatible_type" ) expect_error( vec_assign(1:2, 1L, 1:2, value_arg = "bar"), class = "vctrs_error_recycle_incompatible_size" ) }) }) test_that("names are not assigned by default", { vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) vec_out <- c(a = 1L, b = 4L, c = 3L) expect_identical( vec_assign(vec_x, 2, vec_y), vec_out ) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "FOO") df_out <- new_data_frame(list(x = c(1L, 4L, 3L)), row.names = letters[1:3]) expect_identical( vec_assign(df_x, 2, df_y), df_out ) mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("FOO")) mat_out <- matrix(c(1L, 4L, 3L), dimnames = list(letters[1:3])) expect_identical( vec_assign(mat_x, 2, mat_y), mat_out ) nested_x <- new_data_frame(list(df = df_x, mat = mat_x, vec = vec_x), row.names = c("foo", "bar", "baz")) nested_y <- new_data_frame(list(df = df_y, mat = mat_y, vec = vec_y), row.names = c("quux")) nested_out <- new_data_frame(list(df = df_out, mat = mat_out, vec = vec_out), row.names = c("foo", "bar", "baz")) expect_identical( vec_assign(nested_x, 2, nested_y), nested_out ) }) test_that("can optionally assign names", { vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) vec_out <- c(a = 1L, FOO = 4L, c = 3L) expect_identical( vec_assign_params(vec_x, 2, vec_y, assign_names = TRUE), vec_out ) oo_x <- set_names(as_posixlt(c("2020-01-01", "2020-01-02", "2020-01-03")), letters[1:3]) oo_y <- as_posixlt(c(FOO = "2020-01-04")) oo_out <- as_posixlt(c(a = "2020-01-01", FOO = "2020-01-04", c = "2020-01-03")) expect_identical( vec_assign_params(oo_x, 2, oo_y, assign_names = TRUE), oo_out ) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "FOO") df_out <- new_data_frame(list(x = c(1L, 4L, 3L)), row.names = c("a", "FOO", "c")) expect_identical( vec_assign_params(df_x, 2, df_y, assign_names = TRUE), df_out ) mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("FOO")) mat_out <- matrix(c(1L, 4L, 3L), dimnames = list(c("a", "FOO", "c"))) expect_identical( vec_assign_params(mat_x, 2, mat_y, assign_names = TRUE), mat_out ) nested_x <- new_data_frame(list(df = df_x, mat = mat_x, vec = vec_x, oo = oo_x), row.names = c("foo", "bar", "baz")) nested_y <- new_data_frame(list(df = df_y, mat = mat_y, vec = vec_y, oo = oo_y), row.names = c("quux")) nested_out <- new_data_frame(list(df = df_out, mat = mat_out, vec = vec_out, oo = oo_out), row.names = c("foo", "quux", "baz")) expect_identical( vec_assign_params(nested_x, 2, nested_y, assign_names = TRUE), nested_out ) }) test_that("assignment requires that the value proxy is the same type as the output proxy", { x <- foobar(1) y <- foobar("a") local_foobar_proxy() local_methods( vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) x ) expect_error( vec_assign(x, 1, y), "`double` incompatible with `value` proxy of type `character`" ) }) test_that("assignment allows a df `value`'s column to be a different type than its proxy (#1082)", { x <- new_data_frame(list(x = foobar(1))) y <- new_data_frame(list(x = foobar(2))) local_methods( # proxying foobar wraps it in a 1 col df vec_proxy.vctrs_foobar = function(x, ...) { attributes(x) <- NULL new_data_frame(list(vec = x)) }, # restoring extracts the column vec_restore.vctrs_foobar = function(x, to, ...) { foobar(x$vec) }, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x ) expect1 <- new_data_frame(list(x = foobar(c(1, 1)))) expect2 <- new_data_frame(list(x = foobar(2))) expect_identical(vec_rbind(x, x), expect1) expect_identical(vec_assign(x, 1, y), expect2) }) test_that("monitoring: assignment to a data frame with unshared columns doesn't overwrite (#986)", { x <- new_df_unshared_col() value <- new_data_frame(list(x = 2)) expect <- new_data_frame(list(x = 1L)) # - On R < 4.0.0, the NAMED value of the column is 0. # - On R >= 4.0.0, the refcnt of the column is 1 from the call to # `SET_VECTOR_ELT()` in `new_df_unshared_col()`. expect_false(maybe_shared_col(x, 1L)) new <- vec_assign(x, 1, value) # On R < 4.0.0, `vec_assign()` shallow duplicates `x`, which recursively # bumps the NAMED-ness of each column of `x` to the max value of 7 by # calling `ENSURE_NAMEDMAX()` on it. So the columns of `x` are all considered # shared from that. # On R >= 4.0.0, references are tracked more precisely. # - `new_df_unshared_col()` calls `SET_VECTOR_ELT()` when setting the # column into `x`, bumping the column's namedness to 1. # - Then, at the start of `df_assign()`, `x` is shallow duplicated and # assigned to `out`. This calls `ENSURE_NAMEDMAX()` on each column, # however this does nothing on R 4.0.0. The refcnt of each column is instead # incremented by 1 by calls to `SET_VECTOR_ELT()` in `duplicate1()`. # So now it is at 2. # - But then in `df_assign()` we use `SET_VECTOR_ELT()` on `out`, overwriting # each column. This actually decrements the refcnt on the value that was # in `out` before the column was overwritten. The column of `out` that it # decrements the refcnt for is the same SEXP as that column in `x`, so now # it is back to 1, and it is not considered shared. if (getRversion() >= "4.0.0") { expect_false(maybe_shared_col(x, 1L)) } else { expect_true(maybe_shared_col(x, 1L)) } # Expect no changes to `x`! expect_identical(x, expect) }) test_that("monitoring: assignment to atomic vectors doesn't modify by reference", { x <- c(1, 2, 3) expect <- c(1, 2, 3) vec_assign(x, 2, 3) expect_identical(x, expect) }) # vec_assign + compact_seq ------------------------------------------------- # `start` is 0-based test_that("can assign shaped base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical(vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(lgl(1, NA, NA))) expect_identical(vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, NA, NA))) expect_identical(vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, NA, NA))) expect_identical(vec_assign_seq(mat(cpl(1, 2, 3)), NA, start, size, increasing), mat(cpl(1, NA, NA))) expect_identical(vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", NA, NA))) expect_identical(vec_assign_seq(mat(bytes(1, 2, 3)), bytes(1), start, size, increasing), mat(bytes(1, 1, 1))) expect_identical(vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, NULL, NULL))) }) test_that("can assign shaped base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE mat <- as.matrix expect_identical(vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(lgl(1, NA, NA))) expect_identical(vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, NA, NA))) expect_identical(vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, NA, NA))) expect_identical(vec_assign_seq(mat(cpl(1, 2, 3)), NA, start, size, increasing), mat(cpl(1, NA, NA))) expect_identical(vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", NA, NA))) expect_identical(vec_assign_seq(mat(bytes(1, 2, 3)), bytes(1), start, size, increasing), mat(bytes(1, 1, 1))) expect_identical(vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, NULL, NULL))) }) test_that("can assign shaped base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE mat <- as.matrix expect_identical(vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(mat(lgl(1, 0, 1)))) expect_identical(vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, 2, 3))) expect_identical(vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, 2, 3))) expect_identical(vec_assign_seq(mat(cpl(1, 2, 3)), NA, start, size, increasing), mat(cpl(1, 2, 3))) expect_identical(vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", "2", "3"))) expect_identical(vec_assign_seq(mat(bytes(1, 2, 3)), bytes(1), start, size, increasing), mat(bytes(1, 2, 3))) expect_identical(vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, 2, 3))) }) test_that("can assign object of any dimensionality with compact seqs", { x1 <- ones(3) x2 <- ones(3, 4) x3 <- ones(3, 4, 5) x4 <- ones(3, 4, 5, 6) start <- 0L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical(vec_assign_seq(x1, 2, start, size, increasing), array(rep(c(2, 2, 1), 1), dim = 3)) expect_identical(vec_assign_seq(x2, 2, start, size, increasing), array(rep(c(2, 2, 1), 4), dim = c(3, 4))) expect_identical(vec_assign_seq(x3, 2, start, size, increasing), array(rep(c(2, 2, 1), 20), dim = c(3, 4, 5))) expect_identical(vec_assign_seq(x4, 2, start, size, increasing), array(rep(c(2, 2, 1), 120), dim = c(3, 4, 5, 6))) }) # Golden tests ------------------------------------------------------------ test_that("slice and assign have informative errors", { verify_output(test_path("error", "test-slice-assign.txt"), { "# `vec_assign()` requires recyclable value" vec_assign(1:3, 1:3, 1:2) "# logical subscripts must match size of indexed vector" vec_assign(1:2, c(TRUE, FALSE, TRUE), 5) vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]) "# must assign existing elements" vec_assign(1:3, 5, 10) vec_assign(1:3, "foo", 10) vec_slice(letters, -100) <- "foo" vec_assign(set_names(letters), "foo", "bar") "# must assign with proper negative locations" vec_assign(1:3, c(-1, 1), 1:2) vec_assign(1:3, c(-1, NA), 1:2) "# `vec_assign()` error args can be overridden" vec_assign(1:2, 1L, "x", x_arg = "foo", value_arg = "bar") vec_assign(1:2, 1L, 1:2, value_arg = "bar") }) }) vctrs/tests/testthat/test-size.R0000644000176200001440000000721713723213047016500 0ustar liggesusers # vec_size ----------------------------------------------------------------- test_that("vec_size must be called with vector", { expect_error(vec_size(mean), class = "vctrs_error_scalar_type") }) test_that("length is number of rows", { expect_equal(vec_size(integer()), 0) expect_equal(vec_size(array(integer())), 0) expect_equal(vec_size(1:2), 2) expect_equal(vec_size(array(dim = 2)), 2) expect_equal(vec_size(matrix(nrow = 2, ncol = 3)), 2) expect_equal(vec_size(array(dim = c(2, 1, 5))), 2) }) test_that("length of record is number of rows, not fields", { r <- new_rcrd(list(x = 1:10)) expect_equal(vec_size(r), 10) }) test_that("handles three types of data frame rownames", { df1 <- df2 <- df3 <- data.frame(x = 1:3) rownames(df1) <- NULL rownames(df2) <- 3:1 rownames(df3) <- letters[1:3] expect_equal(vec_size(df1), 3) expect_equal(vec_size(df2), 3) expect_equal(vec_size(df3), 3) }) test_that("handles positive short row names (#220)", { data <- structure(mtcars, row.names = c(NA, 32)) expect_identical(vec_size(data), 32L) }) test_that("size is proxied", { local_env_proxy() expect_size(new_proxy(1:3), 3) expect_size(new_proxy(list(1, 2, 3)), 3) expect_size(new_proxy(foobar(list(1, 2, 3))), 3) }) test_that("`NULL` has size zero", { expect_identical(vec_size(NULL), 0L) }) test_that("can take the size of unspecified objects", { expect_size(NA, 1) expect_size(c(NA, NA), 2) expect_size(unspecified(2), 2) }) # vec_size_common --------------------------------------------------------- test_that("vec_size_common with no input is 0L unless `.absent` is provided", { expect_identical(vec_size_common(), 0L) expect_identical(vec_size_common(NULL), 0L) expect_equal(vec_size_common(.absent = na_int), na_int) }) test_that("`.absent` must be a length 1 integer if provided", { expect_error(vec_size_common(.absent = 1), "must be a single integer") expect_error(vec_size_common(.absent = c(1L, 2L)), "must be a single integer") }) test_that("`NULL` is treated as the absence of input", { expect_equal(vec_size_common(1:5, NULL), vec_size_common(1:5)) }) test_that("size 1 is overshadowed by any other size", { expect_equal(vec_size_common(1, integer()), 0) expect_equal(vec_size_common(1, 1:5), 5) }) test_that("if not size 1, sizes must be identical", { expect_equal(vec_size_common(integer(), integer()), 0) expect_error(vec_size_common(1:2, integer()), class = "vctrs_error_incompatible_size") expect_error(vec_size_common(1:2, 1:3), class = "vctrs_error_incompatible_size") }) test_that("argument tags are forwarded", { expect_known_output_nobang(file = test_path("test-type-vec-size-common-error.txt"), { try2(vec_size_common(1:2, 1, 1:4)) try2(vec_size_common(foo = 1:2, 1, bar = 1:4)) }) }) test_that("can pass size", { expect_identical(vec_size_common(1:2, 1:3, .size = 5L), 5L) }) test_that("provided size is cast to an integer", { expect_identical(vec_size_common(.size = 1), 1L) }) # list_sizes -------------------------------------------------------------- test_that("only lists are allowed", { expect_error(list_sizes(mtcars), "must be a list") expect_error(list_sizes(1), "must be a list") }) test_that("computes element sizes", { expect_identical(list_sizes(list(1, 1:3, c("a", "b"))), c(1L, 3L, 2L)) }) # sequences --------------------------------------------------------------- test_that("vec_seq_along returns size-0 output for size-0 input", { expect_equal(vec_seq_along(character()), integer()) expect_equal(vec_seq_along(data.frame()), integer()) }) test_that("vec_init_along can be called with single argument", { expect_equal(vec_init_along(1:3), rep(NA_integer_, 3)) }) vctrs/tests/testthat/test-vctr-print-names.txt0000644000176200001440000000004614042546252021350 0ustar liggesusers A B C xxx xxx xxx vctrs/tests/testthat/test-type-unspecified.R0000644000176200001440000000661014027045462021001 0ustar liggesusers test_that("unknown type is idempotent", { types <- list( unspecified(), logical(), integer(), double(), character(), list(), new_list_of(ptype = integer()), new_factor(), new_ordered(), new_date(), new_datetime(), new_duration(), vec_ptype(matrix(1:4, 2)), vec_ptype(array(1:5, c(1, 5))) ) lhs <- map(types, vec_ptype2, x = unspecified()) expect_identical(types, lhs) lhs <- map(types, vec_ptype2, x = NA) expect_identical(types, lhs) rhs <- map(types, vec_ptype2, y = unspecified()) expect_identical(types, rhs) rhs <- map(types, vec_ptype2, y = NA) expect_identical(types, rhs) }) test_that("common type of unspecified and NULL is unspecified", { expect_identical(vec_ptype2(unspecified(), NULL), unspecified()) expect_identical(vec_ptype2(NULL, unspecified()), unspecified()) expect_identical(vec_ptype2(NA, NULL), unspecified()) expect_identical(vec_ptype2(NULL, NA), unspecified()) }) test_that("cannot take the common type of unspecified and a scalar list", { expect_error(vec_ptype2(unspecified(), foobar()), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(foobar(), unspecified()), class = "vctrs_error_scalar_type") }) test_that("subsetting works", { expect_identical(unspecified(4)[2:3], unspecified(2)) }) test_that("has useful print method", { expect_known_output(unspecified(), print = TRUE, file = test_path("test-type-unspecified.txt")) }) test_that("can finalise data frame containing unspecified columns", { df <- data.frame(y = NA, x = c(1, 2, NA)) ptype <- vec_ptype(df) expect_identical(ptype$y, unspecified()) finalised <- vec_ptype_finalise(ptype) expect_identical(finalised$y, lgl()) common <- vec_ptype_common(df, df) expect_identical(common$y, lgl()) }) test_that("can cast to common type data frame containing unspecified columns", { df <- data.frame(y = NA, x = c(1, 2, NA)) expect_identical(vec_cast_common(df, df), list(df, df)) }) test_that("unspecified vectors are always unspecified (#222)", { expect_true(is_unspecified(unspecified())) expect_true(is_unspecified(unspecified(1))) }) test_that("S3 vectors and shaped vectors are never unspecified", { expect_false(is_unspecified(foobar(NA))) expect_false(is_unspecified(foobar(lgl(NA, NA)))) expect_false(is_unspecified(matrix(NA, 2))) }) test_that("can finalise lengthy unspecified vectors", { expect_identical(vec_ptype_finalise(unspecified(3)), rep(NA, 3)) expect_identical(ununspecify(unspecified(3)), rep(NA, 3)) }) test_that("unspecified() validates input", { expect_identical(unspecified(1), unspecified(1L)) expect_error(unspecified(1:3), "must be a single number") }) test_that("tibble::type_sum() knows about unspecified", { expect_identical(tibble::type_sum(unspecified(3)), "???") }) test_that("casting to a scalar type errors", { expect_error(vec_cast(NA, quote(x)), class = "vctrs_error_scalar_type") expect_error(vec_cast(unspecified(1), quote(x)), class = "vctrs_error_scalar_type") }) test_that("monitoring test - can cast to unspecified from unspecified", { expect_identical(vec_cast(NA, unspecified()), unspecified(1)) expect_identical(vec_cast(unspecified(1), unspecified()), unspecified(1)) }) test_that("monitoring test - casting unspecified input to NA unspecified results in NA vector", { expect_identical(vec_cast(unspecified(1), NA), NA) expect_identical(vec_cast(NA, NA), NA) }) vctrs/tests/testthat/test-empty.R0000644000176200001440000000014313723213047016653 0ustar liggesusers test_that("uses y when x is empty", { expect_equal(1 %0% 2, 1) expect_equal(1[0] %0% 2, 2) }) vctrs/tests/testthat/test-shape.R0000644000176200001440000000604514027045462016626 0ustar liggesusers # common shape ------------------------------------------------------------ test_that("vec_shape2() applies recycling rules", { expect_equal(vec_shape2(shaped_int(1, 5, 5), shaped_int(1)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1, 5), shaped_int(1, 5, 1)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 5, 1), shaped_int(1, 1, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 1, 1)), c(0L, 0L, 5L)) }) test_that("incompatible shapes throw errors", { verify_errors({ expect_error(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1)), class = "vctrs_error_incompatible_type") expect_error(vec_shape2(shaped_int(1, 5, 0), shaped_int(1, 1, 5)), class = "vctrs_error_incompatible_type") }) }) test_that("can override error args", { verify_errors({ expect_error( vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1), x_arg = "foo", y_arg = "bar"), class = "vctrs_error_incompatible_type" ) }) }) # broadcasting ------------------------------------------------------------- test_that("can broadcast to higher dimension, but not lower", { expect_identical(shape_broadcast_(1, NULL), 1) expect_null(shape_broadcast_(NULL, 1)) expect_equal( shape_broadcast_(1, shaped_int(0, 4)), array(1, c(1, 4)) ) expect_error( shape_broadcast_(shaped_int(1, 1, 1), shaped_int(4, 4)), class = "vctrs_error_incompatible_type" ) expect_error( shape_broadcast_(shaped_int(3, 2), shaped_int(3, 3)), class = "vctrs_error_incompatible_type" ) }) test_that("shape_broadcast_() applies recycling rules", { expect_equal( shape_broadcast_(array(1:4, c(1, 1, 4)), shaped_int(0, 4, 4))[1, , ], matrix(1:4, 4, 4, byrow = TRUE) ) expect_equal( shape_broadcast_(array(1:4, c(1, 4, 1)), shaped_int(0, 4, 4))[1, , ], matrix(1:4, 4, 4) ) expect_equal( shape_broadcast_(array(1L, c(1, 1)), shaped_int(1, 0)), matrix(integer(), nrow = 1) ) expect_error( shape_broadcast_(array(1L, c(1, 2)), shaped_int(1, 0)), "Non-recyclable dimensions", class = "vctrs_error_incompatible_type" ) expect_error( shape_broadcast_(array(1L, c(1, 0)), shaped_int(1, 1)), "Non-recyclable dimensions", class = "vctrs_error_incompatible_type" ) }) # -------------------------------------------------------------------------- test_that("shape errors have informative output", { verify_output(test_path("error", "test-shape.txt"), { "# incompatible shapes throw errors" vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1)) vec_shape2(shaped_int(1, 5, 0), shaped_int(1, 1, 5)) "# can override error args" vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1), x_arg = "foo", y_arg = "bar") }) }) vctrs/tests/testthat/test-vctr-print.txt0000644000176200001440000000011714042546252020246 0ustar liggesusers [1] xxx xxx xxx xxx hidden [1:4] xxx, xxx, xxx, xxx vctrs/tests/testthat/test-subscript.R0000644000176200001440000000561113712211241017530 0ustar liggesusers test_that("vec_as_subscript() coerces unspecified vectors", { expect_identical( vec_as_subscript(NA), NA ) expect_identical( vec_as_subscript(NA, logical = "error"), na_int ) expect_identical( vec_as_subscript(NA, logical = "error", numeric = "error"), na_chr ) }) test_that("vec_as_subscript() coerces subtypes and supertypes", { expect_identical(vec_as_subscript(factor("foo")), "foo") with_lgl_subtype({ expect_identical(vec_as_subscript(new_lgl_subtype(TRUE)), TRUE) }) with_lgl_supertype({ expect_identical(vec_as_subscript(new_lgl_supertype(TRUE)), TRUE) }) }) test_that("vec_as_subscript() handles NULL", { expect_identical(vec_as_subscript(NULL), int()) expect_error( vec_as_subscript(NULL, numeric = "error"), class = "vctrs_error_subscript_type" ) }) test_that("vec_as_subscript() handles symbols", { expect_identical(vec_as_subscript(quote(foo)), "foo") expect_identical(vec_as_subscript(quote(``)), "\u5e78") expect_error( vec_as_subscript(quote(foo), character = "error"), class = "vctrs_error_subscript_type" ) }) test_that("can customise subscript errors", { verify_errors({ expect_error( with_tibble_cols(vec_as_subscript(env())), class = "vctrs_error_subscript_type" ) }) }) test_that("vec_as_subscript() checks dimensionality", { verify_errors({ expect_error(vec_as_subscript(matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type") expect_error(vec_as_subscript(array(TRUE, dim = c(1, 1, 1))), class = "vctrs_error_subscript_type") expect_error(with_tibble_rows(vec_as_subscript(matrix(TRUE, nrow = 1))), class = "vctrs_error_subscript_type") }) }) test_that("vec_as_subscript() works with vectors of dimensionality 1", { arr <- array(TRUE, dim = 1) expect_identical(vec_as_subscript(arr), arr) }) test_that("subscript functions have informative error messages", { verify_output(test_path("error", "test-subscript.txt"), { "# vec_as_subscript() forbids subscript types" vec_as_subscript(1L, logical = "error", numeric = "error") vec_as_subscript("foo", logical = "error", character = "error") vec_as_subscript(TRUE, logical = "error") vec_as_subscript("foo", character = "error") vec_as_subscript(NULL, numeric = "error") vec_as_subscript(quote(foo), character = "error") "# vec_as_subscript2() forbids subscript types" vec_as_subscript2(1L, numeric = "error", logical = "error") vec_as_subscript2("foo", character = "error", logical = "error") vec_as_subscript2(TRUE, logical = "error") "# can customise subscript errors" with_tibble_cols(vec_as_subscript(env())) with_dm_tables(vec_as_subscript(env())) "# vec_as_subscript() checks dimensionality" vec_as_subscript(matrix(TRUE, nrow = 1)) vec_as_subscript(array(TRUE, dim = c(1, 1, 1))) with_tibble_rows(vec_as_subscript(matrix(TRUE, nrow = 1))) }) }) vctrs/tests/testthat/test-utils.R0000644000176200001440000000431713723213047016664 0ustar liggesusers test_that("names preserved if outer name is missing", { x <- c("a", "z", "") expect_equal(outer_names(x, NULL, 3), x) expect_equal(outer_names(x, "", 3), x) expect_equal(outer_names(x, na_chr, 3), x) }) test_that("outer name vectorised if needed", { expect_equal(outer_names(NULL, "x", 1L), c("x")) expect_equal(outer_names(NULL, "x", 2L), c("x1", "x2")) }) test_that("outer and inner names are combined", { expect_equal(outer_names("x", "y", 1), c("y..x")) }) test_that("options are created", { expect_identical( unclass(new_opts(c("a", "c"), letters[1:4])), c(a = TRUE, b = FALSE, c = TRUE, d = FALSE) ) }) test_that("can't supply unknown option", { expect_error( new_opts(c("a", "foo"), letters[1:4]), "Argument must be one of \"a\", \"b\", \"c\" or \"d\"" ) expect_error( new_opts(c("a", "foo"), letters[1:4], arg = "foo"), "`foo` must be one of \"a\", \"b\", \"c\" or \"d\"" ) }) test_that("`has_dim()` doesn't partial match on the `dim` attribute (#948)", { x <- structure(1, dimB = 1) expect_false(has_dim(x)) }) test_that("df_has_base_subset() detects `[` methods", { expect_true(df_has_base_subset(foobar(mtcars))) out <- with_methods( `[.vctrs_foobar` = function(x, i, ...) structure(NextMethod(), dispatched = TRUE), df_has_base_subset(foobar(mtcars)) ) expect_false(out) }) test_that("vec_common_suffix() finds common suffix", { x <- c("foo", "bar", "baz") y <- c("quux", "foo", "hop", "baz") expect_identical(vec_common_suffix(x, y), "baz") x <- c("foo", "bar", "baz") y <- c("quux", "foo", "bar", "baz") expect_identical(vec_common_suffix(x, y), x) x <- letters y <- chr() expect_identical(vec_common_suffix(x, y), chr()) x <- data.frame(x = 1:3, y = c("foo", "bar", "baz")) y <- data.frame(x = 0:3, y = c("foo", "hop", "bar", "baz")) exp <- data.frame(x = 2:3, y = c("bar", "baz")) expect_identical(vec_common_suffix(x, y), exp) }) test_that("fast_c() concatenates", { expect_identical(fast_c(character(), "foo"), "foo") expect_identical(fast_c("foo", character()), "foo") expect_identical(fast_c("foo", c("bar", "baz")), c("foo", "bar", "baz")) expect_identical(fast_c(c("bar", "baz"), "foo"), c("bar", "baz", "foo")) }) vctrs/tests/testthat/test-runs.R0000644000176200001440000001242414027045462016513 0ustar liggesusers# vec_identify_runs ------------------------------------------------------------ test_that("vec_identify_runs() works with size zero input", { expect <- structure(integer(), n = 0L) expect_identical(vec_identify_runs(integer()), expect) expect_identical(vec_identify_runs(data.frame()), expect) }) test_that("works with atomic input of various types", { expect <- structure(c(1L, 1L, 2L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(c(TRUE, TRUE, FALSE, FALSE, TRUE)), expect) expect_identical(vec_identify_runs(c(1L, 1L, 2L, 2L, 3L)), expect) expect_identical(vec_identify_runs(c(1, 1, 2, 2, 3)), expect) expect_identical(vec_identify_runs(complex(real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3))), expect) expect_identical(vec_identify_runs(c("a", "a", "b", "b", "c")), expect) expect_identical(vec_identify_runs(as.raw(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_identify_runs(list(1, 1, 2, 2, 3)), expect) }) test_that("NA values are identical", { expect <- structure(c(1L, 1L), n = 1L) expect_identical(vec_identify_runs(c(NA, NA)), expect) expect_identical(vec_identify_runs(c(NA_integer_, NA_integer_)), expect) expect_identical(vec_identify_runs(c(NA_real_, NA_real_)), expect) expect_identical(vec_identify_runs(c(NA_complex_, NA_complex_)), expect) expect_identical(vec_identify_runs(c(NA_character_, NA_character_)), expect) # No NA type for raw expect_identical(vec_identify_runs(list(NULL, NULL)), expect) }) test_that("NA and NaN are different", { expect <- structure(c(1L, 2L), n = 2L) expect_identical(vec_identify_runs(c(NA_real_, NaN)), expect) }) test_that("normalizes character encodings", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1) expect_identical(vec_identify_runs(x), structure(rep(1L, 3), n = 1L)) }) test_that("errors on scalars", { expect_error(vec_identify_runs(foobar()), class = "vctrs_error_scalar_type") }) test_that("works with data frames rowwise", { df <- data_frame(x = c(1, 1, 1, 2), y = c(1, 1, 2, 3)) expect <- structure(c(1L, 1L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(df), expect) df <- data_frame(x = c(1, 1, 1), y = c(2, 2, 2), z = c("b", "a", "a")) expect <- structure(c(1L, 2L, 2L), n = 2L) expect_identical(vec_identify_runs(df), expect) }) test_that("works with data frames with rows but no columns", { expect <- structure(rep(1L, 5), n = 1L) expect_identical(vec_identify_runs(new_data_frame(n = 5L)), expect) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, 1, 2, 2), b = c(1, 2, 3, 3)) df <- data_frame(x = rep(1, 4), y = col) expect <- structure(c(1L, 2L, 3L, 3L), n = 3L) expect_identical(vec_identify_runs(df), expect) }) test_that("works with columns of various types", { # Use two columns to keep the data frame from being squashed to a vector add_col <- function(col) { x <- rep(1L, 5) data_frame(x = x, y = col) } expect <- structure(c(1L, 1L, 2L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(add_col(c(TRUE, TRUE, FALSE, FALSE, TRUE))), expect) expect_identical(vec_identify_runs(add_col(c(1L, 1L, 2L, 2L, 3L))), expect) expect_identical(vec_identify_runs(add_col(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_identify_runs(add_col(complex(real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_identify_runs(add_col(c("a", "a", "b", "b", "c"))), expect) expect_identical(vec_identify_runs(add_col(as.raw(c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_identify_runs(add_col(list(1, 1, 2, 2, 3))), expect) }) # vec_locate_runs -------------------------------------------------------------- test_that("can locate run starts", { expect_identical( vec_locate_runs(c(1, 3, 3, 1, 5, 5, 6)), c(1L, 2L, 4L, 5L, 7L) ) }) test_that("can locate run ends", { expect_identical( vec_locate_runs(c(1, 3, 3, 1, 5, 5, 6), start = FALSE), c(1L, 3L, 4L, 6L, 7L) ) }) test_that("vec_locate_runs() works with size zero input", { expect_identical(vec_locate_runs(integer(), start = TRUE), integer()) expect_identical(vec_locate_runs(integer(), start = FALSE), integer()) }) test_that("vec_locate_runs() validates `start`", { expect_error(vec_locate_runs(1, start = "x"), "single `TRUE` or `FALSE`") expect_error(vec_locate_runs(1, start = NA), "single `TRUE` or `FALSE`") expect_error(vec_locate_runs(1, start = c(TRUE, TRUE)), "single `TRUE` or `FALSE`") }) # vec_detect_runs -------------------------------------------------------------- test_that("can detect run starts", { expect_identical( vec_detect_runs(c(1, 3, 3, 1, 5, 5, 6)), c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE) ) }) test_that("can detect run ends", { expect_identical( vec_detect_runs(c(1, 3, 3, 1, 5, 5, 6), start = FALSE), c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE) ) }) test_that("vec_detect_runs() works with size zero input", { expect_identical(vec_detect_runs(integer(), start = TRUE), logical()) expect_identical(vec_detect_runs(integer(), start = FALSE), logical()) }) test_that("vec_detect_runs() validates `start`", { expect_error(vec_detect_runs(1, start = "x"), "single `TRUE` or `FALSE`") expect_error(vec_detect_runs(1, start = NA), "single `TRUE` or `FALSE`") expect_error(vec_detect_runs(1, start = c(TRUE, TRUE)), "single `TRUE` or `FALSE`") }) vctrs/tests/testthat/test-type-data-frame.txt0000644000176200001440000000060314042546246021121 0ustar liggesusers> vec_ptype_show(mtcars) Prototype: data.frame< mpg : double cyl : double disp: double hp : double drat: double wt : double qsec: double vs : double am : double gear: double carb: double > > vec_ptype_show(iris) Prototype: data.frame< Sepal.Length: double Sepal.Width : double Petal.Length: double Petal.Width : double Species : factor > vctrs/tests/testthat/test-equal.R0000644000176200001440000003103613753021253016630 0ustar liggesusers # vectorised -------------------------------------------------------------- test_that("throws error for unsuported type", { expect_error(.Call(vctrs_equal, expression(x), expression(x), TRUE), class = "vctrs_error_scalar_type") }) test_that("C wrapper throws error if length or type doesn't match", { expect_error(.Call(vctrs_equal, 1:2, 1L, TRUE), "same types and lengths") expect_error(.Call(vctrs_equal, 1, 1L, TRUE), "same types and lengths") }) test_that("correct behaviour for basic vectors", { expect_equal(vec_equal(c(TRUE, FALSE), TRUE), c(TRUE, FALSE)) expect_equal(vec_equal(c(1L, 2L), 1L), c(TRUE, FALSE)) expect_equal(vec_equal(c(1, 2), 1), c(TRUE, FALSE)) expect_equal(vec_equal(c("1", "2"), "1"), c(TRUE, FALSE)) expect_equal(vec_equal(as.raw(1:2), as.raw(1L)), c(TRUE, FALSE)) expect_equal(vec_equal(list(1:3, 1:2), list(1:3)), c(TRUE, FALSE)) expect_equal(vec_equal(list(1:3, 1.5), list(1:3)), c(TRUE, FALSE)) expect_equal(vec_equal(list(as.raw(1:3), as.raw(1.5)), list(as.raw(1:3))), c(TRUE, FALSE)) expect_equal(vec_equal(list(1+1i, 1+0i), list(1+1i)), c(TRUE, FALSE)) expect_equal(vec_equal(c(1, 2) + 1i, 1+1i), c(TRUE, FALSE)) }) test_that("NAs are equal", { expect_true(vec_equal(NA, NA, na_equal = TRUE)) expect_true(vec_equal(NA_integer_, NA_integer_, na_equal = TRUE)) expect_true(vec_equal(NA_real_, NA_real_, na_equal = TRUE)) expect_true(vec_equal(NA_character_, NA_character_, na_equal = TRUE)) expect_true(vec_equal(list(NULL), list(NULL), na_equal = TRUE)) }) test_that("double special values", { expect_equal(vec_equal(c(NaN, NA), NaN, na_equal = TRUE), c(TRUE, FALSE)) expect_equal(vec_equal(c(NA, NaN), NA, na_equal = TRUE), c(TRUE, FALSE)) expect_true(vec_equal(Inf, Inf)) expect_true(vec_equal(-Inf, -Inf)) }) test_that("`list(NULL)` is considered a missing value (#653)", { expect_equal(vec_equal(list(NULL), list(NULL)), NA) expect_equal(vec_equal(list(NULL), list(1)), NA) }) test_that("can compare data frames", { df <- data.frame(x = 1:2, y = letters[2:1], stringsAsFactors = FALSE) expect_equal(vec_equal(df, df[1, ]), c(TRUE, FALSE)) }) test_that("can compare data frames with various types of columns", { x1 <- data_frame(x = 1, y = 2) y1 <- data_frame(x = 2, y = 1) x2 <- data_frame(x = "a") y2 <- data_frame(x = "b") x3 <- data_frame(x = FALSE) y3 <- data_frame(x = TRUE) x4 <- data_frame(x = 1L) y4 <- data_frame(x = 2L) x5 <- data_frame(x = as.raw(0)) y5 <- data_frame(x = as.raw(1)) x6 <- data_frame(x = 1+0i) y6 <- data_frame(x = 1+1i) expect_false(vec_equal(x1, y1)) expect_false(vec_equal(x2, y2)) expect_false(vec_equal(x3, y3)) expect_false(vec_equal(x4, y4)) expect_false(vec_equal(x5, y5)) expect_false(vec_equal(x6, y6)) }) test_that("can compare data frames with data frame columns", { df1 <- data_frame(x = data_frame(a = 1)) df2 <- data_frame(x = data_frame(a = 2)) expect_true(vec_equal(df1, df1)) expect_false(vec_equal(df1, df2)) }) test_that("can compare data frames with list columns", { df1 <- data_frame(x = list(a = 1, b = 2), y = c(1, 1)) df2 <- data_frame(x = list(a = 0, b = 2), y = c(1, 1)) expect_equal(vec_equal(df1, df2), c(FALSE, TRUE)) }) test_that("data frames must have same size and columns", { expect_error(.Call(vctrs_equal, data.frame(x = 1), data.frame(x = 1, y = 2), TRUE ), "must have same types and lengths" ) expect_error(.Call(vctrs_equal, data.frame(x = 1, y = 2, z = 2), data.frame(x = 1, y = 2), TRUE ), "must have the same number of columns" ) # Names are not checked, as `vec_cast_common()` should take care of the type. # So if `vec_cast_common()` is not called, or is improperly specified, then # this could result in false equality. expect_true(.Call(vctrs_equal, data.frame(x = 1), data.frame(y = 1), TRUE )) expect_error(.Call(vctrs_equal, data.frame(x = 1:2, y = 3:4), data.frame(x = 1, y = 2), TRUE ), "must have same types and lengths" ) expect_false(.Call(vctrs_equal, data.frame(x = 1), data.frame(x = 2), TRUE )) expect_false(.Call(vctrs_equal, list(data.frame(x = 1)), list(10), TRUE )) }) test_that("can compare data frames with 0 columns", { x <- new_data_frame(n = 1L) expect_true(vec_equal(x, x)) }) test_that("can compare lists of scalars (#643)", { lst <- list(new_sclr(x = 1)) expect_true(vec_equal(lst, lst)) # NA does not propagate lst <- list(new_sclr(y = NA)) expect_true(vec_equal(lst, lst)) df <- data.frame(x = c(1, 4, 3), y = c(2, 8, 9)) model <- lm(y ~ x, df) lst <- list(model) expect_true(vec_equal(lst, lst)) }) test_that("can determine equality of strings with different encodings (#553)", { for (x_encoding in encodings()) { for (y_encoding in encodings()) { expect_equal(vec_equal(x_encoding, y_encoding), TRUE) expect_equal(vec_equal(x_encoding, y_encoding), x_encoding == y_encoding) } } }) test_that("equality can be determined when strings have identical encodings", { encs <- encodings() for (enc in encs) { expect_true(vec_equal(enc, enc)) expect_equal(vec_equal(enc, enc), enc == enc) } }) test_that("equality is known to always fail with bytes", { enc <- encoding_bytes() error <- "translating strings with \"bytes\" encoding" expect_error(vec_equal(enc, enc), error) }) test_that("equality is known to fail when comparing bytes to other encodings", { error <- "translating strings with \"bytes\" encoding" for (enc in encodings()) { expect_error(vec_equal(encoding_bytes(), enc), error) expect_error(vec_equal(enc, encoding_bytes()), error) } }) test_that("`na_equal` is validated", { expect_error(vec_equal(1, 1, na_equal = 1), class = "vctrs_error_assert_ptype") expect_error(vec_equal(1, 1, na_equal = c(TRUE, FALSE)), class = "vctrs_error_assert_size") }) test_that("can compare lists of expressions", { x <- list(expression(x), expression(y)) y <- list(expression(x)) expect_equal(vec_equal(x, y), c(TRUE, FALSE)) }) test_that("vec_equal() silently falls back to base data frame", { expect_silent(expect_identical( vec_equal(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(TRUE, 32) )) }) # object ------------------------------------------------------------------ test_that("can compare NULL",{ expect_true(obj_equal(NULL, NULL)) }) test_that("can compare objects with reference semantics", { expect_true(obj_equal(globalenv(), globalenv())) expect_false(obj_equal(globalenv(), environment())) expect_true(obj_equal(quote(x), quote(x))) expect_false(obj_equal(quote(x), quote(y))) }) test_that("can compare pairlists", { expect_true(obj_equal(quote(x + y), quote(x + y))) expect_true(obj_equal(pairlist(x = 1, y = 2), pairlist(x = 1, y = 2))) }) test_that("can compare functions", { f1 <- function(x, y) x + y f2 <- function(x, y) x + y expect_false(obj_equal(f2, f1)) attr(f1, "srcref") <- NULL attr(f2, "srcref") <- NULL expect_true(obj_equal(f2, f1)) f3 <- f1 formals(f3) <- alist(x = 1) expect_false(obj_equal(f3, f1)) f4 <- f1 body(f4) <- quote(x) expect_false(obj_equal(f4, f2)) }) test_that("not equal if different types or lengths", { expect_false(obj_equal(1, 2)) expect_false(obj_equal(1:2, 1)) }) test_that("not equal if attributes not equal", { x1 <- structure(1:10, x = 1, y = 2) x2 <- structure(1:10, x = 1, y = 3) expect_false(obj_equal(x1, x2)) }) test_that("can compare expressions", { expect_true(obj_equal(expression(x), expression(x))) expect_false(obj_equal(expression(x), expression(y))) }) # na ---------------------------------------------------------------------- test_that("can detect different types of NA", { expect_true(vec_equal_na(NA)) expect_true(vec_equal_na(NA_integer_)) expect_true(vec_equal_na(NA_real_)) expect_true(vec_equal_na(NA_complex_)) expect_true(vec_equal_na(complex(real = NA, imaginary = 1))) expect_true(vec_equal_na(NaN)) expect_true(vec_equal_na(NA_character_)) expect_true(vec_equal_na(list(NULL))) }) test_that("can detect different types of NA in data frames", { # using multiple columns to prevent proxy unwrapping expect_true(vec_equal_na(data.frame(x = NA, y = NA))) expect_true(vec_equal_na(data.frame(x = NA_integer_, y = NA_integer_))) expect_true(vec_equal_na(data.frame(x = NA_real_, y = NaN))) expect_true(vec_equal_na(data.frame(x = NA_complex_, y = NA_complex_))) expect_true(vec_equal_na(data.frame(x = complex(real = NA, imaginary = 1), y = complex(real = 1, imaginary = NA)))) expect_true(vec_equal_na(data.frame(x = NA_character_, y = NA_character_))) expect_true(vec_equal_na(new_data_frame(list(x = list(NULL), y = list(NULL))))) }) test_that("raw vectors can never be NA", { expect_false(vec_equal_na(raw(1))) expect_false(vec_equal_na(data.frame(x = raw(1), y = raw(1)))) }) test_that("vectorised over rows of a data frame", { df <- data.frame(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA)) expect_equal(vec_equal_na(df), c(FALSE, FALSE, FALSE, TRUE)) }) test_that("works recursively with data frame columns", { df <- data.frame(x = c(1, 1, NA, NA)) df$df <- data.frame(y = c(NA, 1, 1, NA), z = c(1, NA, 1, NA)) expect_equal(vec_equal_na(df), c(FALSE, FALSE, FALSE, TRUE)) }) test_that("NA propagate symmetrically (#204)", { exp <- c(NA, NA) expect_identical(vec_equal(c(TRUE, FALSE), NA), exp) expect_identical(vec_equal(1:2, NA), exp) expect_identical(vec_equal(c(1, 2), NA), exp) expect_identical(vec_equal(letters[1:2], NA), exp) expect_identical(vec_equal(NA, c(TRUE, FALSE)), exp) expect_identical(vec_equal(NA, 1:2), exp) expect_identical(vec_equal(NA, c(1, 2)), exp) expect_identical(vec_equal(NA, letters[1:2]), exp) }) test_that("NA propagate from data frames columns", { x <- data.frame(x = 1:3) y <- data.frame(x = c(1L, NA, 2L)) expect_identical(vec_equal(x, y), c(TRUE, NA, FALSE)) expect_identical(vec_equal(y, x), c(TRUE, NA, FALSE)) expect_identical(vec_equal(x, y, na_equal = TRUE), c(TRUE, FALSE, FALSE)) expect_identical(vec_equal(y, x, na_equal = TRUE), c(TRUE, FALSE, FALSE)) x <- data.frame(x = 1:3, y = 1:3) y <- data.frame(x = c(1L, NA, 2L), y = c(NA, 2L, 3L)) expect_identical(vec_equal(x, y), c(NA, NA, FALSE)) expect_identical(vec_equal(y, x), c(NA, NA, FALSE)) expect_identical(vec_equal(x, y, na_equal = TRUE), c(FALSE, FALSE, FALSE)) expect_identical(vec_equal(y, x, na_equal = TRUE), c(FALSE, FALSE, FALSE)) }) test_that("NA do not propagate from list components (#662)", { expect_true(obj_equal(NA, NA)) expect_true(vec_equal(list(NA), list(NA))) }) test_that("NA do not propagate from names when comparing objects", { x <- set_names(1:3, c("a", "b", NA)) y <- set_names(1:3, c("a", NA, NA)) expect_true(obj_equal(x, x)) expect_false(obj_equal(x, y)) expect_equal(vec_equal(list(x, x, y), list(x, y, y)), c(TRUE, FALSE, TRUE)) }) test_that("NA do not propagate from attributes", { x <- structure(1:3, foo = NA) y <- structure(1:3, foo = "") expect_true(obj_equal(x, x)) expect_false(obj_equal(x, y)) }) test_that("NA do not propagate from function bodies or formals", { fn <- other <- function() NA body(other) <- TRUE expect_true(vec_equal(list(fn), list(fn))) expect_false(vec_equal(list(fn), list(other))) expect_true(obj_equal(fn, fn)) expect_false(obj_equal(fn, other)) fn <- other <- function(x = NA) NULL formals(other) <- list(x = NULL) expect_true(vec_equal(list(fn), list(fn))) expect_false(vec_equal(list(fn), list(other))) }) test_that("can check equality of unspecified objects", { expect_equal(vec_equal(NA, NA), NA) expect_true(vec_equal(NA, NA, na_equal = TRUE)) expect_equal(vec_equal(unspecified(1), unspecified(1)), NA) expect_true(vec_equal(unspecified(1), unspecified(1), na_equal = TRUE)) expect_equal(vec_equal(NA, unspecified(1)), NA) expect_true(vec_equal(NA, unspecified(1), na_equal = TRUE)) }) test_that("can't supply NA as `na_equal`", { expect_error(vec_equal(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`") }) # proxy ------------------------------------------------------------------- test_that("vec_equal() takes vec_proxy() by default", { local_env_proxy() x <- new_proxy(1:3) y <- new_proxy(3:1) expect_identical(vec_equal(x, y), lgl(FALSE, TRUE, FALSE)) }) test_that("vec_equal() takes vec_proxy_equal() if implemented", { local_comparable_tuple() x <- tuple(1:3, 1:3) y <- tuple(1:3, 4:6) expect_identical(x == y, rep(TRUE, 3)) expect_identical(vec_equal(x, y), rep(TRUE, 3)) # Recursive case foo <- data_frame(x = x) bar <- data_frame(x = y) expect_identical(vec_equal(foo, bar), rep(TRUE, 3)) }) vctrs/tests/testthat/performance/0000755000176200001440000000000014024411013016704 5ustar liggesusersvctrs/tests/testthat/performance/test-bind.txt0000644000176200001440000000246314042546235021362 0ustar liggesusers> ints <- rep(list(1L), 100) > named_ints <- rep(list(set_names(1:3, letters[1:3])), 100) > # Integers as rows > suppressMessages(with_memory_prof(vec_rbind(!!!ints))) [1] 2.53KB > suppressMessages(with_memory_prof(vec_rbind(!!!named_ints))) [1] 3.41KB > # Data frame with named columns > df <- data_frame(x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c( + "A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb")))) > dfs <- rep(list(df), 100) > with_memory_prof(vec_rbind(!!!dfs)) [1] 10.2KB > # Data frame with rownames (non-repaired, non-recursive case) > df <- data_frame(x = 1:2) > dfs <- rep(list(df), 100) > dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) > with_memory_prof(vec_rbind(!!!dfs)) [1] 7.42KB > # Data frame with rownames (repaired, non-recursive case) > dfs <- map(dfs, set_rownames_recursively) > with_memory_prof(vec_rbind(!!!dfs)) [1] 14.8KB > # FIXME (#1217): Data frame with rownames (non-repaired, recursive case) > df <- data_frame(x = 1:2, y = data_frame(x = 1:2)) > dfs <- rep(list(df), 100) > dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) > with_memory_prof(vec_rbind(!!!dfs)) [1] 1MB > # FIXME (#1217): Data frame with rownames (repaired, recursive case) > dfs <- map(dfs, set_rownames_recursively) > with_memory_prof(vec_rbind(!!!dfs)) [1] 1.02MB vctrs/tests/testthat/performance/test-c.txt0000644000176200001440000000405514042546236020670 0ustar liggesusers> ints <- rep(list(1L), 100) > dbls <- rep(list(1), 100) `vec_c()` ========== > # Integers > with_memory_prof(vec_c(!!!ints)) [1] 1.7KB > # Doubles > with_memory_prof(vec_c(!!!dbls)) [1] 2.09KB > # Integers to integer > with_memory_prof(vec_c(!!!ints, ptype = int())) [1] 3.38KB > # Doubles to integer > with_memory_prof(vec_c(!!!dbls, ptype = int())) [1] 3.77KB `vec_unchop()` =============== > # Integers > with_memory_prof(vec_unchop(ints)) [1] 896B > # Doubles > with_memory_prof(vec_unchop(dbls)) [1] 1.27KB > # Integers to integer > with_memory_prof(vec_unchop(ints, ptype = int())) [1] 896B > # Doubles to integer > with_memory_prof(vec_unchop(dbls, ptype = int())) [1] 896B Concatenation with names ======================== > # Named integers > ints <- rep(list(set_names(1:3, letters[1:3])), 100) > with_memory_prof(vec_unchop(ints)) [1] 4.05KB > # Named matrices > mat <- matrix(1:4, 2, dimnames = list(c("foo", "bar"))) > mats <- rep(list(mat), 100) > with_memory_prof(vec_unchop(mats)) [1] 3.66KB > # Data frame with named columns > df <- data_frame(x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c( + "A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb")))) > dfs <- rep(list(df), 100) > with_memory_prof(vec_unchop(dfs)) [1] 8.53KB > # Data frame with rownames (non-repaired, non-recursive case) > df <- data_frame(x = 1:2) > dfs <- rep(list(df), 100) > dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) > with_memory_prof(vec_unchop(dfs)) [1] 5.77KB > # Data frame with rownames (repaired, non-recursive case) > dfs <- map(dfs, set_rownames_recursively) > with_memory_prof(vec_unchop(dfs)) [1] 13.1KB > # FIXME (#1217): Data frame with rownames (non-repaired, recursive case) > df <- data_frame(x = 1:2, y = data_frame(x = 1:2)) > dfs <- rep(list(df), 100) > dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) > with_memory_prof(vec_unchop(dfs)) [1] 1MB > # FIXME (#1217): Data frame with rownames (repaired, recursive case) > dfs <- map(dfs, set_rownames_recursively) > with_memory_prof(vec_unchop(dfs)) [1] 1.02MB vctrs/tests/testthat/helper-s3.R0000644000176200001440000001212313723213047016343 0ustar liggesusers new_ctor <- function(base_class) { function(x = list(), ..., class = NULL) { if (inherits(x, "tbl_df")) { tibble::new_tibble(x, class = c(class, base_class), nrow = nrow(x)) } else if (is.data.frame(x)) { structure(x, class = c(class, base_class, "data.frame"), ...) } else { structure(x, class = c(class, base_class), ...) } } } foobar <- new_ctor("vctrs_foobar") foobaz <- new_ctor("vctrs_foobaz") quux <- new_ctor("vctrs_quux") expect_foobar <- function(x) expect_s3_class({{ x }}, "vctrs_foobar") expect_foobaz <- function(x) expect_s3_class({{ x }}, "vctrs_foobaz") expect_quux <- function(x) expect_s3_class({{ x }}, "vctrs_quux") with_c_foobar <- function(expr) { with_methods( expr, c.vctrs_foobar = function(...) foobar(NextMethod()) ) } unrownames <- function(x) { row.names(x) <- NULL x } local_methods <- function(..., .frame = caller_env()) { local_bindings(..., .env = global_env(), .frame = .frame) } with_methods <- function(.expr, ...) { local_methods(...) .expr } local_proxy <- function(frame = caller_env()) { local_methods(.frame = frame, vec_proxy.vctrs_proxy = function(x, ...) proxy_deref(x), vec_restore.vctrs_proxy = function(x, to, ...) new_proxy(x), vec_ptype2.vctrs_proxy = function(x, y, ...) UseMethod("vec_ptype2.vctrs_proxy"), vec_ptype2.vctrs_proxy.vctrs_proxy = function(x, y, ...) new_proxy(vec_ptype(proxy_deref(x))), vec_cast.vctrs_proxy = function(x, to, ...) UseMethod("vec_cast.vctrs_proxy"), vec_cast.vctrs_proxy.vctrs_proxy = function(x, to, ...) x ) } new_proxy <- function(x) { structure(list(env(x = x)), class = "vctrs_proxy") } proxy_deref <- function(x) { x[[1]]$x } local_env_proxy <- function(frame = caller_env()) { local_methods(.frame = frame, vec_proxy.vctrs_proxy = proxy_deref, vec_restore.vctrs_proxy = function(x, ...) new_proxy(x), vec_cast.vctrs_proxy = function(x, to, ...) UseMethod("vec_cast.vctrs_proxy"), vec_cast.vctrs_proxy.vctrs_proxy = function(x, to, ...) x, vec_ptype2.vctrs_proxy = function(x, y, ...) UseMethod("vec_ptype2.vctrs_proxy"), vec_ptype2.vctrs_proxy.vctrs_proxy = function(x, y, ...) new_proxy(proxy_deref(x)[0]) ) } local_no_stringsAsFactors <- function(frame = caller_env()) { local_options(.frame = frame, stringsAsFactors = FALSE) } tibble <- function(...) { tibble::tibble(...) } local_foobar_proxy <- function(frame = caller_env()) { local_methods(.frame = frame, vec_proxy.vctrs_foobar = identity) } subclass <- function(x) { class(x) <- c("vctrs_foo", "vctrs_foobar", class(x)) x } # Subclass promoted to logical new_lgl_subtype <- function(x) { stopifnot(is_logical(x)) structure(x, class = "vctrs_lgl_subtype") } local_lgl_subtype <- function(frame = caller_env()) { local_methods(.frame = frame, vec_ptype2.vctrs_lgl_subtype = function(x, y, ...) UseMethod("vec_ptype2.vctrs_lgl_subtype"), vec_ptype2.vctrs_lgl_subtype.vctrs_lgl_subtype = function(x, y, ...) x, vec_ptype2.vctrs_lgl_subtype.logical = function(x, y, ...) y, vec_ptype2.logical.vctrs_lgl_subtype = function(x, y, ...) x, vec_cast.vctrs_lgl_subtype = function(x, to, ...) UseMethod("vec_cast.vctrs_lgl_subtype"), vec_cast.vctrs_lgl_subtype.vctrs_lgl_subtype = function(x, to, ...) x, vec_cast.vctrs_lgl_subtype.logical = function(x, to, ...) new_lgl_subtype(x), vec_cast.logical.vctrs_lgl_subtype = function(x, to, ...) unstructure(x) ) } with_lgl_subtype <- function(expr) { local_lgl_subtype() expr } # Logical promoted to subclass new_lgl_supertype <- function(x) { stopifnot(is_logical(x)) structure(x, class = "vctrs_lgl_supertype") } local_lgl_supertype <- function(frame = caller_env()) { local_methods(.frame = frame, vec_ptype2.vctrs_lgl_supertype = function(x, y, ...) UseMethod("vec_ptype2.vctrs_lgl_supertype"), vec_ptype2.vctrs_lgl_supertype.vctrs_lgl_supertype = function(x, y, ...) x, vec_ptype2.vctrs_lgl_supertype.logical = function(x, y, ...) x, vec_ptype2.logical.vctrs_lgl_supertype = function(x, y, ...) y, vec_cast.vctrs_lgl_supertype = function(x, to, ...) UseMethod("vec_cast.vctrs_lgl_supertype"), vec_cast.vctrs_lgl_supertype.vctrs_lgl_supertype = function(x, to, ...) x, vec_cast.vctrs_lgl_supertype.logical = function(x, to, ...) new_lgl_subtype(x), vec_cast.logical.vctrs_lgl_supertype = function(x, to, ...) unstructure(x) ) } with_lgl_supertype <- function(expr) { local_lgl_supertype() expr } foobar_df_ptype2 <- function(x, y, ...) { foobar(df_ptype2(x, y, ...)) } foobar_df_cast <- function(x, y, ...) { foobar(df_cast(x, y, ...)) } local_foobar_df_methods <- function(expr, frame = caller_env()) { local_methods( .frame = frame, vec_ptype2.vctrs_foobar.vctrs_foobar = foobar_df_ptype2, vec_ptype2.data.frame.vctrs_foobar = foobar_df_ptype2, vec_ptype2.vctrs_foobar.data.frame = foobar_df_ptype2, vec_cast.vctrs_foobar.vctrs_foobar = foobar_df_cast, vec_cast.data.frame.vctrs_foobar = foobar_df_cast, vec_cast.vctrs_foobar.data.frame = foobar_df_cast ) } with_foobar_df_methods <- function(expr) { local_foobar_df_methods() expr } vctrs/tests/testthat/helper-vctrs.R0000644000176200001440000000351213753006726017170 0ustar liggesusers testthat_import_from <- function(ns, names, env = caller_env()) { skip_if_not_installed(ns) import_from(ns, names, env = env) } vec_ptype2_fallback <- function(x, y, ...) { vec_ptype2_params(x, y, ..., df_fallback = DF_FALLBACK_warn) } vec_ptype_common_df_fallback <- function(..., .ptype = NULL) { vec_ptype_common_params( ..., .ptype = .ptype, .df_fallback = DF_FALLBACK_warn ) } shaped_int <- function(...) { array(NA_integer_, c(...)) } set_rownames_recursively <- function(x, i = NULL) { n <- vec_size(x) stopifnot(n <= length(letters)) for (j in seq_along(x)) { if (is.data.frame(x[[j]])) { x[[j]] <- set_rownames_recursively(x[[j]], i = i) } } row.names(x) <- paste0(letters[seq_len(n)], i) x } expect_waldo_equal <- function(type, act, exp, info, ...) { comp <- waldo::compare(act$val, exp$val, ..., x_arg = "actual", y_arg = "expected") expect( length(comp) == 0, sprintf( "`actual` (%s) not %s to `expected` (%s).\n\n%s", act$lab, type, exp$lab, paste0(comp, collapse = "\n\n") ), info = info ) invisible(act$val) } expect_identical <- function(object, expected, info = NULL, label = NULL, expected.label = NULL, ...) { act <- quasi_label(enquo(object), label, arg = "object") exp <- quasi_label(enquo(expected), expected.label, arg = "expected") expect_waldo_equal("identical", act, exp, info, ...) } expect_equal <- function(object, expected, ..., tolerance = .Machine$double.eps ^ 0.5, info = NULL, label = NULL, expected.label = NULL) { act <- quasi_label(enquo(object), label, arg = "object") exp <- quasi_label(enquo(expected), expected.label, arg = "expected") expect_waldo_equal("equal", act, exp, info, ..., tolerance = tolerance) } vctrs/tests/testthat/helper-rational.R0000644000176200001440000000355214027045636017642 0ustar liggesusers # Rational record class from the S3 vector vignette new_rational <- function(n = integer(), d = integer()) { vec_assert(n, ptype = integer()) vec_assert(d, ptype = integer()) new_rcrd(list(n = n, d = d), class = "vctrs_rational") } rational <- function(n, d) { args <- vec_cast_common(n, d, .to = integer()) args <- vec_recycle_common(!!! args) new_rational(args[[1L]], args[[2L]]) } format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_proxy_equal.vctrs_rational <- function(x) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational_methods <- list( vec_ptype_abbr.vctrs_rational = function(x, ...) "rtnl", vec_ptype_full.vctrs_rational = function(x, ...) "rational", vec_ptype2.vctrs_rational = function(x, y, ...) UseMethod("vec_ptype2.vctrs_rational"), vec_ptype2.vctrs_rational.vctrs_rational = function(x, y, ...) new_rational(), vec_ptype2.vctrs_rational.integer = function(x, y, ...) new_rational(), vec_ptype2.integer.vctrs_rational = function(x, y, ...) new_rational(), vec_cast.vctrs_rational = function(x, to, ...) UseMethod("vec_cast.vctrs_rational"), vec_cast.vctrs_rational.vctrs_rational = function(x, to, ...) x, vec_cast.double.vctrs_rational = function(x, to, ...) field(x, "n") / field(x, "d"), vec_cast.vctrs_rational.integer = function(x, to, ...) rational(x, 1), vec_proxy_equal.vctrs_rational = vec_proxy_equal.vctrs_rational, vec_proxy_compare.vctrs_rational = vec_proxy_compare.vctrs_rational ) local_rational_class <- function(frame = caller_env()) { local_methods(.frame = frame, !!!rational_methods) } vctrs/tests/testthat/test-proxy-restore.R0000644000176200001440000000737013723213047020370 0ustar liggesusers test_that("default vec_restore() restores attributes except names", { to <- structure(NA, foo = "foo", bar = "bar") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) to <- structure(NA, names = "a", foo = "foo", bar = "bar") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) to <- structure(NA, foo = "foo", names = "a", bar = "bar") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) to <- structure(NA, foo = "foo", bar = "bar", names = "a") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) }) test_that("default vec_restore() restores objectness", { to <- structure(NA, class = "foo") x <- vec_restore.default(NA, to) expect_true(is.object(x)) expect_s3_class(x, "foo") }) test_that("data frame vec_restore() checks type", { expect_error(vec_restore(NA, mtcars), "Attempt to restore data frame from a logical") }) test_that("data frame restore forces character column names", { df <- new_data_frame(list(1)) expect_named(vec_restore(df, df), "") }) test_that("can use vctrs primitives from vec_restore() without inflooping", { local_methods( vec_restore.vctrs_foobar = function(x, to, ...) { vec_ptype(x) vec_init(x) vec_assert(x) vec_slice(x, 0) "woot" } ) foobar <- new_vctr(1:3, class = "vctrs_foobar") expect_identical(vec_slice(foobar, 2), "woot") }) test_that("vec_restore() passes `n` argument to methods", { local_methods( vec_proxy.vctrs_foobar = identity, vec_restore.vctrs_foobar = function(x, to, ..., n) n ) expect_identical(vec_slice(foobar(1:3), 2), 1L) }) test_that("dimensions are preserved by default restore method", { x <- foobar(1:4) dim(x) <- c(2, 2) dimnames(x) <- list(a = c("foo", "bar"), b = c("quux", "hunoz")) exp <- foobar(c(1L, 3L)) dim(exp) <- c(1, 2) dimnames(exp) <- list(a = "foo", b = c("quux", "hunoz")) expect_identical(vec_slice(x, 1), exp) }) test_that("names attribute isn't set when restoring 1D arrays using 2D+ objects", { x <- foobar(1:2) dim(x) <- c(2) nms <- c("foo", "bar") dimnames(x) <- list(nms) res <- vec_restore(x, matrix(1)) expect_null(attributes(res)$names) expect_equal(attr(res, "names"), nms) expect_equal(names(res), nms) }) test_that("arguments are not inlined in the dispatch call (#300)", { local_methods( vec_restore.vctrs_foobar = function(x, to, ..., n) sys.call(), vec_proxy.vctrs_foobar = unclass ) call <- vec_restore(foobar(list(1)), foobar(list(1))) expect_equal(call, quote(vec_restore.vctrs_foobar(x = x, to = to, n = n))) }) test_that("restoring to non-bare data frames calls `vec_bare_df_restore()` before dispatching", { x <- list(x = numeric()) to <- new_data_frame(x, class = "tbl_foobar") local_methods( vec_restore.tbl_foobar = function(x, to, ..., n) { if (is.data.frame(x)) { abort(class = "error_df_restore_was_called") } } ) expect_error(vec_restore(x, to), class = "error_df_restore_was_called") }) test_that("row names are not restored if target is not a data frame", { proxy <- data.frame(x = 1) out <- vec_restore(proxy, to = foobar("")) exp <- list(names = "x", class = "vctrs_foobar") expect_identical(attributes(out), exp) }) test_that("attributes are properly restored when they contain special attributes", { exp <- list(foo = TRUE, bar = TRUE) x <- structure(list(), foo = TRUE, names = chr(), bar = TRUE) out <- vec_restore_default(list(), x) expect_identical(attributes(out), exp) # Was broken by #943 x <- structure(list(), foo = TRUE, names = chr(), row.names = int(), bar = TRUE) out <- vec_restore_default(list(), x) expect_identical(attributes(out), exp) }) vctrs/tests/testthat/test-list_of-str.txt0000644000176200001440000000031214042546250020376 0ustar liggesuserslist [1:2] $ : num 1 $ : num [1:2] 2 3 @ ptype: num(0) List of 1 $ :List of 2 ..$ : list [1:2] .. ..$ : num 1 .. ..$ : num [1:2] 2 3 .. ..@ ptype: num(0) ..$ y: int [1:2] 2 1 vctrs/tests/testthat/helper-shape.R0000644000176200001440000000017613653027721017127 0ustar liggesusers shape_broadcast_ <- function(x, to, x_arg = "x", to_arg = "to") { shape_broadcast(x, to, x_arg = x_arg, to_arg = to_arg) } vctrs/tests/testthat/test-type-sclr.R0000644000176200001440000000536113723213047017446 0ustar liggesusers test_that("sclr is a named list", { x <- new_sclr(x = 1, y = 2) expect_type(x, "list") expect_s3_class(x, "vctrs_sclr") expect_named(x, c("x", "y")) }) test_that("scalar must have unique names", { expect_error(new_sclr(x = 1, x = 2), "not TRUE") }) test_that("can get and set existing fields", { x <- new_sclr(x = 1, y = 2) x$x <- 3 expect_equal(x$x, 3) x[["y"]] <- 4 expect_equal(x[["y"]], 4) expect_error(x$z, "Invalid index") expect_error(x$z <- 1, "Invalid index") }) test_that("as.list strips attributes apart from names", { x <- new_sclr(x = 1, y = 2) y <- as.list(x) expect_type(y, "list") expect_equal(attributes(y), list(names = names(x))) }) test_that("as.data.frame works", { # #167: Not sure if this is the correct behavior. x <- new_sclr(x = 1, y = 2) expect_equal( as.data.frame(x, nm = "a"), new_data_frame(list(a = list(x))) ) }) test_that("putting in a data frame makes a list-col", { x <- new_sclr(x = 1, y = 2) df <- data.frame(x) expect_s3_class(df, "data.frame") expect_equal(df$x, list(x)) }) test_that("vector operations are unsupported", { x <- new_sclr(x = 1, y = 2) expect_error(x["a"], class = "vctrs_error_unsupported") expect_error(x["a"] <- 1, class = "vctrs_error_unsupported") expect_error(names(x) <- "x", class = "vctrs_error_unsupported") expect_error(dim(x) <- 1, class = "vctrs_error_unsupported") expect_error(dimnames(x) <- 1, class = "vctrs_error_unsupported") expect_error(levels(x) <- 1, class = "vctrs_error_unsupported") expect_error(is.na(x) <- 1, class = "vctrs_error_unsupported") expect_error(c(x), class = "vctrs_error_unsupported") expect_error(abs(x), class = "vctrs_error_unsupported") expect_error(x + 1, class = "vctrs_error_unsupported") expect_error(Arg(x), class = "vctrs_error_unsupported") expect_error(sum(x), class = "vctrs_error_unsupported") expect_error(order(x), class = "vctrs_error_unsupported") expect_error(levels(x), class = "vctrs_error_unsupported") expect_error(t(x), class = "vctrs_error_unsupported") expect_error(unique(x), class = "vctrs_error_unsupported") expect_error(duplicated(x), class = "vctrs_error_unsupported") expect_error(anyDuplicated(x), class = "vctrs_error_unsupported") expect_error(as.logical(x), class = "vctrs_error_unsupported") expect_error(as.integer(x), class = "vctrs_error_unsupported") expect_error(as.double(x), class = "vctrs_error_unsupported") expect_error(as.character(x), class = "vctrs_error_unsupported") expect_error(as.Date(x), class = "vctrs_error_unsupported") expect_error(as.POSIXct(x), class = "vctrs_error_unsupported") }) test_that("summary is unimplemented", { x <- new_sclr(x = 1, y = 2) expect_error(summary(x), class = "vctrs_error_unimplemented") }) vctrs/tests/testthat/test-dim.R0000644000176200001440000000126713723213047016276 0ustar liggesusers # vec_dim ----------------------------------------------------------------- test_that("dim is dimensions", { expect_equal(vec_dim(array(dim = c(1))), c(1)) expect_equal(vec_dim(array(dim = c(1, 1))), c(1, 1)) expect_equal(vec_dim(array(dim = c(1, 1, 1))), c(1, 1, 1)) }) test_that("dim_n is number of dimensions", { expect_equal(vec_dim_n(array(dim = c(1))), 1) expect_equal(vec_dim_n(array(dim = c(1, 1))), 2) expect_equal(vec_dim_n(array(dim = c(1, 1, 1))), 3) }) test_that("vector and 1-d array are equivalent", { x1 <- 1:5 x2 <- array(x1) expect_equal(vec_dim(x1), 5) expect_equal(vec_dim(x2), 5) expect_equal(vec_size(x1), 5) expect_equal(vec_size(x2), 5) }) vctrs/tests/testthat/test-conditions.R0000644000176200001440000001636013723213047017676 0ustar liggesusers test_that("conditions inherit from `vctrs_error`", { expect_error(stop_incompatible(NULL, NULL), class = "vctrs_error") expect_error(stop_incompatible_type(NULL, NULL, x_arg = "x", y_arg = "y"), class = "vctrs_error") expect_error(stop_incompatible_cast(NULL, NULL, x_arg = "x", to_arg = "to"), class = "vctrs_error") expect_error(stop_incompatible_op("", NULL, NULL), class = "vctrs_error") expect_error(stop_incompatible_size(NULL, NULL, 0, 0, x_arg = "x", y_arg = "y"), class = "vctrs_error") expect_error(maybe_lossy_cast(NULL, NULL, NULL, TRUE, x_arg = "x", to_arg = "to"), class = "vctrs_error") expect_error(stop_unsupported("", ""), class = "vctrs_error") expect_error(stop_unimplemented("", ""), class = "vctrs_error") expect_error(stop_scalar_type(NULL), class = "vctrs_error") expect_error(stop_names(), class = "vctrs_error") expect_error(stop_names_cannot_be_empty(""), class = "vctrs_error") expect_error(stop_names_cannot_be_dot_dot("..1"), class = "vctrs_error") expect_error(stop_names_must_be_unique("x"), class = "vctrs_error") }) test_that("incompatible cast throws an incompatible type error", { expect_error( stop_incompatible_cast(1, 1, x_arg = "x", to_arg = "to"), class = "vctrs_error_incompatible_type" ) }) test_that("incompatible type error validates `action`", { verify_errors({ expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = "c")) expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = 1)) }) }) test_that("can override arg in OOB conditions", { verify_errors({ expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), NULL ), class = "vctrs_error_subscript_oob" ) expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), quote(foo) ), class = "vctrs_error_subscript_oob" ) expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), quote(foo(bar)) ), class = "vctrs_error_subscript_oob" ) }) }) test_that("scalar type errors are informative", { verify_errors({ expect_error( vec_slice(foobar(list(1)), 1), class = "vctrs_error_scalar_type" ) expect_error( stop_scalar_type(foobar(list(1)), arg = "foo"), class = "vctrs_error_scalar_type" ) }) }) test_that("empty names errors are informative", { verify_errors({ expect_error( vec_as_names(c("x", "", "y"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" ) expect_error( vec_as_names(c("x", "", "y", ""), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" ) expect_error( vec_as_names(rep("", 10), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" ) }) }) test_that("dot dot names errors are informative", { verify_errors({ expect_error( vec_as_names(c("..1", "..1", "..1", "...", "z"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot" ) expect_error( vec_as_names(c(rep("..1", 20), rep(c("..2", "..3", "..4", "...", "..5"), 2)), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot" ) }) }) test_that("unique names errors are informative", { verify_errors({ expect_error( vec_as_names(c("x", "x", "x", "y", "y", "z"), repair = "check_unique"), class = "vctrs_error_names_must_be_unique" ) expect_error( vec_as_names(c(rep("x", 20), rep(c("a", "b", "c", "d", "e"), 2)), repair = "check_unique"), class = "vctrs_error_names_must_be_unique" ) }) }) test_that("can't supply both `message` and `details`", { expect_error( stop_incompatible_type(1, 2, message = "my message", x_arg = "x", y_arg = "y"), "my message", class = "vctrs_error_incompatible_type" ) expect_error( stop_incompatible_type(1, 2, message = "my message", details = "my details", x_arg = "x", y_arg = "y"), "Can't supply both `message` and `details`." ) }) test_that("lossy cast errors are internal", { # Should not trigger testthat warnings about untested class expect_error(vec_cast(mtcars, mtcars[1:3]), "convert") expect_error(vec_cast(1.5, int()), "convert") }) test_that("lossy cast from character to factor mentions loss of generality", { verify_errors({ expect_error(vec_cast("a", factor("b")), class = "vctrs_error_cast_lossy") }) }) test_that("ordered cast failures mention conversion", { verify_errors({ expect_error( vec_cast(ordered("x"), ordered("y")), class = "vctrs_error_incompatible_type" ) }) }) test_that("incompatible size errors", { verify_errors({ expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "")) expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = "")) expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "bar")) expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = quote(bar))) }) }) test_that("simplified backtraces include whole vctrs context", { skip_on_cran() top <- current_env() trace <- NULL expect_error(withCallingHandlers(vec_slice(1, 2), error = function(...) { trace <<- trace_back(top, sys.frame(-1L)) })) trace_lines <- format(trace, simplify = "branch") expect_true(any(grepl("vec_slice", trace_lines))) }) verify_output(test_path("error", "test-conditions.txt"), { "# incompatible type error validates `action`" stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = "conver") stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = 1) "# can override arg in OOB conditions" with_subscript_data( vec_slice(set_names(letters), "foo"), NULL ) with_subscript_data( vec_slice(set_names(letters), "foo"), "input" ) with_subscript_data( vec_slice(set_names(letters), "foo"), quote(input) ) with_subscript_data( vec_slice(set_names(letters), "foo"), quote(input[i]) ) "# scalar type errors are informative" vec_slice(foobar(list(1)), 1) stop_scalar_type(foobar(list(1)), arg = "foo") "# empty names errors are informative" vec_as_names(c("x", "", "y"), repair = "check_unique") vec_as_names(c("x", "", "y", ""), repair = "check_unique") vec_as_names(rep("", 10), repair = "check_unique") "# dot dot names errors are informative" vec_as_names(c("..1", "..1", "..1", "...", "z"), repair = "check_unique") vec_as_names(c(rep("..1", 20), rep(c("..2", "..3", "..4", "...", "..5"), 2)), repair = "check_unique") "# unique names errors are informative" vec_as_names(c("x", "x", "x", "y", "y", "z"), repair = "check_unique") vec_as_names(c(rep("x", 20), rep(c("a", "b", "c", "d", "e"), 2)), repair = "check_unique") "# lossy cast from character to factor mentions loss of generality" vec_cast("a", factor("b")) "# ordered cast failures mention conversion" vec_cast(ordered("x"), ordered("y")) "# incompatible size errors" stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "") stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = "") stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "bar") stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = quote(bar)) }) vctrs/tests/testthat/test-dictionary.R0000644000176200001440000002555413753021253017676 0ustar liggesusers # counting ---------------------------------------------------------------- test_that("vec_count counts number observations", { x <- vec_count(rep(1:3, 1:3), sort = "key") expect_equal(x, data.frame(key = 1:3, count = 1:3)) }) test_that("vec_count works with matrices", { x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2)) out <- vec_count(x) exp <- data_frame(key = c(NA, NA), count = int(2L, 1L)) exp$key <- vec_slice(x, c(1, 3)) expect_identical(out, exp) }) test_that("vec_count works with arrays", { x <- array(c(rep(1, 3), rep(2, 3)), dim = c(3, 2, 1)) expect <- data.frame(key = NA, count = 3) expect$key <- vec_slice(x, 1L) expect_equal(vec_count(x), expect) }) test_that("vec_count works for zero-length input", { x <- vec_count(integer(), sort = "none") expect_equal(x, data.frame(key = integer(), count = integer())) }) test_that("vec_count works with different encodings", { x <- vec_count(encodings()) expect_equal(x, new_data_frame(list(key = encodings()[1], count = 3L))) }) test_that("vec_count recursively takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect <- data_frame(key = vec_slice(df, c(1, 3)), count = c(2L, 1L)) expect_equal(vec_count(df), expect) }) # duplicates and uniques -------------------------------------------------- test_that("vec_duplicated reports on duplicates regardless of position", { x <- c(1, 1, 2, 3, 4, 4) expect_equal(vec_duplicate_detect(x), c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE)) }) test_that("vec_duplicate_any returns single TRUE/FALSE", { expect_false(vec_duplicate_any(c(1:10))) expect_true(vec_duplicate_any(c(1:10, 1))) }) test_that("vec_duplicate_id gives position of first found", { x <- c(1, 2, 3, 1, 4) expect_equal(vec_duplicate_id(x), c(1, 2, 3, 1, 5)) }) test_that("vec_unique matches unique", { x <- sample(100, 1000, replace = TRUE) expect_equal(vec_unique(x), unique(x)) expect_equal(vec_unique(c("x", "x")), "x") }) test_that("vec_unique matches unique for matrices", { x <- matrix(c(1, 1, 2, 2), ncol = 2) expect_equal(vec_unique(x), unique(x)) }) test_that("vec_unique_count matches length + unique", { x <- sample(100, 1000, replace = TRUE) expect_equal(vec_unique_count(x), length(unique(x))) }) test_that("also works for data frames", { df <- data.frame(x = 1:3, y = letters[3:1], stringsAsFactors = FALSE) idx <- c(1L, 1L, 1L, 2L, 2L, 3L) df2 <- df[idx, , drop = FALSE] rownames(df2) <- NULL expect_equal(vec_duplicate_detect(df2), vec_duplicate_detect(idx)) expect_equal(vec_unique(df2), vec_slice(df, vec_unique(idx))) count <- vec_count(df2, sort = "key") expect_equal(count$key, df) expect_equal(count$count, vec_count(idx)$count) exp <- tibble(x = c(1, 1, 2), y = c(1, 2, 3)) expect_identical(vec_unique(vec_slice(exp, c(1, 1, 2, 3))), exp) }) test_that("vec_unique() handles matrices (#327)", { x <- matrix(c(1, 2, 3, 4), c(2, 2)) y <- matrix(c(1, 2, 3, 5), c(2, 2)) expect_identical(vec_unique(list(x, x)), list(x)) expect_identical(vec_unique(list(x, y)), list(x, y)) x <- matrix(c(1, 2, 1, 1, 2, 1), nrow = 3) expect_identical(vec_unique(x), vec_slice(x, 1:2)) }) test_that("vec_unique() works with 1D arrays", { # 1D arrays are dispatched to `as.data.frame.vector()` which # currently does not strip dimensions. This caused an infinite # recursion. expect_identical(vec_unique(array(1:2)), array(1:2)) x <- new_vctr(c(1, 1, 1, 2, 1, 2), dim = c(3, 2)) expect_identical(vec_unique(x), new_vctr(c(1, 1, 2, 1), dim = c(2, 2))) }) test_that("unique functions take the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_true(vec_in(tuple(2, 100), x)) expect_identical(vec_match(tuple(2, 100), x), 2L) }) test_that("unique functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect_equal(vec_unique(df), vec_slice(df, c(1, 3))) expect_equal(vec_unique_count(df), 2L) expect_equal(vec_unique_loc(df), c(1, 3)) }) test_that("duplicate functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect_equal(vec_duplicate_any(df), TRUE) expect_equal(vec_duplicate_detect(df), c(TRUE, TRUE, FALSE)) expect_equal(vec_duplicate_id(df), c(1, 1, 3)) }) test_that("unique functions treat positive and negative 0 as equivalent (#637)", { expect_equal(vec_unique(c(0, -0)), 0) expect_equal(vec_unique_count(c(0, -0)), 1) expect_equal(vec_unique_loc(c(0, -0)), 1) }) test_that("unique functions work with different encodings", { encs <- encodings() expect_equal(vec_unique(encs), encs[1]) expect_equal(vec_unique_count(encs), 1L) expect_equal(vec_unique_loc(encs), 1L) }) test_that("unique functions can handle scalar types in lists", { x <- list(x = a ~ b, y = a ~ b, z = a ~ c) expect_equal(vec_unique(x), vec_slice(x, c(1, 3))) x <- list(x = call("x"), y = call("y"), z = call("x")) expect_equal(vec_unique(x), vec_slice(x, c(1, 2))) }) test_that("duplicate functions works with different encodings", { encs <- encodings() expect_equal(vec_duplicate_id(encs), rep(1, 3)) expect_equal(vec_duplicate_detect(encs), rep(TRUE, 3)) expect_equal(vec_duplicate_any(encs), TRUE) }) test_that("vec_unique() returns differently encoded strings in the order they appear", { encs <- encodings() x <- c(encs$unknown, encs$utf8) y <- c(encs$utf8, encs$unknown) expect_equal_encoding(vec_unique(x), encs$unknown) expect_equal_encoding(vec_unique(y), encs$utf8) }) test_that("vec_unique() works on lists containing expressions", { x <- list(expression(x), expression(y), expression(x)) expect_equal(vec_unique(x), x[1:2]) }) test_that("vec_unique() works with glm objects (#643)", { # class(model$family$initialize) == "expression" model <- glm(mpg ~ wt, data = mtcars) expect_equal(vec_unique(list(model, model)), list(model)) }) test_that("can take the unique locations of dfs with list-cols", { df <- tibble(x = list(1, 2, 1, 3), y = list(1, 2, 1, 3)) expect_identical(vec_unique_loc(df), c(1L, 2L, 4L)) }) # matching ---------------------------------------------------------------- test_that("vec_match() matches match()", { n <- c(1:3, NA) h <- c(4, 2, 1, NA) expect_equal(vec_match(n, h), match(n, h)) expect_equal(vec_match(1.5, c(2, 1.5, NA)), match(1.5, c(2, 1.5, NA))) expect_equal(vec_match("x", "x"), match("x", "x")) }) test_that("vec_match() and vec_in() check types", { verify_errors({ df1 <- data_frame(x = data_frame(foo = 1)) df2 <- data_frame(x = data_frame(foo = "")) expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type") expect_error(vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type") expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type") expect_error(vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type") }) }) test_that("vec_in() matches %in%", { n <- c(1:3, NA) h <- c(4, 2, 1, NA) expect_equal(vec_in(n, h), n %in% h) }) test_that("can opt out of NA matching", { n <- c(1, NA) h <- c(1:3, NA) expect_equal(vec_in(n, h, na_equal = FALSE), c(TRUE, NA)) }) test_that("vec_match works with empty data frame", { out <- vec_match( new_data_frame(n = 3L), new_data_frame(n = 0L) ) expect_equal(out, vec_init(integer(), 3)) }) test_that("matching functions take the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_identical(vec_unique_loc(x), 1:2) expect_identical(unique(x), tuple(c(1, 2), 1:2)) expect_true(vec_duplicate_any(x)) expect_identical(vec_duplicate_id(x), c(1L, 2L, 1L)) expect_identical(vec_unique_count(x), 2L) expect_identical(vec_duplicate_detect(x), c(TRUE, FALSE, TRUE)) }) test_that("can take the unique loc of 1d arrays (#461)", { x <- array(c(1, 1, 2, 2, 3)) y <- array(c(1, 1, 2, 2, 3), dimnames = list(NULL)) expect_identical(vctrs::vec_unique_loc(x), int(1, 3, 5)) expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5)) z <- array(c(1, 1, 2, 2, 3, 4), c(3, 2)) expect_silent(expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5))) }) test_that("matching functions work with different encodings", { encs <- encodings() expect_equal(vec_match(encs, encs[1]), rep(1, 3)) expect_equal(vec_in(encs, encs[1]), rep(TRUE, 3)) }) test_that("matching functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2), 1:2) df <- data_frame(x = x) y <- tuple(c(2, 3), c(3, 3)) df2 <- data_frame(x = y) expect_equal(vec_match(df, df2), c(NA, 1)) expect_equal(vec_in(df, df2), c(FALSE, TRUE)) }) test_that("can propagate missing values while matching", { exp <- c(NA, 3L, NA, 1L) expect_identical(vec_match(lgl(NA, TRUE, NA, FALSE), lgl(FALSE, NA, TRUE), na_equal = FALSE), exp) expect_identical(vec_match(int(NA, 1L, NA, 2L), int(2L, NA, 1L), na_equal = FALSE), exp) expect_identical(vec_match(dbl(NA, 1, NA, 2), dbl(2, NA, 1), na_equal = FALSE), exp) expect_identical(vec_match(cpl(NA, 1, NA, 2), cpl(2, NA, 1), na_equal = FALSE), exp) expect_identical(vec_match(chr(NA, "1", NA, "2"), chr("2", NA, "1"), na_equal = FALSE), exp) expect_identical(vec_match(list(NULL, 1, NULL, 2), list(2, NULL, 1), na_equal = FALSE), exp) # No missing values for raw vectors expect_identical(vec_match(bytes(0, 1, 0, 2), bytes(2, 0, 1), na_equal = FALSE), c(2L, 3L, 2L, 1L)) }) test_that("can propagate NaN as a missing value (#1252)", { expect_identical(vec_match(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), int(NA, NA)) expect_identical(vec_in(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), lgl(NA, NA)) }) test_that("missing values are propagated across columns", { for (na_value in list(NA, na_int, na_dbl, na_cpl, na_chr, list(NULL))) { df <- data_frame(x = 1, y = data_frame(foo = 2, bar = na_value), z = 3) expect_identical(vec_match(df, df), 1L) expect_identical(vec_match(df, df, na_equal = FALSE), na_int) } }) test_that("can't supply NA as `na_equal`", { expect_error(vec_match(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`") }) test_that("dictionary tools have informative errors", { verify_output(test_path("error", "test-dictionary.txt"), { "# vec_match() and vec_in() check types" df1 <- data_frame(x = data_frame(foo = 1)) df2 <- data_frame(x = data_frame(foo = "")) vec_match(df1, df2) vec_match(df1, df2, needles_arg = "n", haystack_arg = "h") vec_in(df1, df2) vec_in(df1, df2, needles_arg = "n", haystack_arg = "h") }) }) test_that("vec_match() and vec_in() silently fall back to base data frame", { expect_silent(expect_identical( vec_match(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), 1:32 )) expect_silent(expect_identical( vec_in(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(TRUE, 32) )) }) vctrs/tests/testthat/test-fields.R0000644000176200001440000000445413723213047016774 0ustar liggesusers test_that("n_fields captures number of fields", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(n_fields(r), 2) }) # get --------------------------------------------------------------------- test_that("can extract valid field", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(field(r, "x"), 1) expect_equal(field(r, 1L), 1) }) test_that("can extract field even if encoding is different", { x1 <- "fa\u00e7ile" skip_if_not(Encoding(x1) == "UTF-8") x2 <- iconv(x1, from = "UTF-8", to = "latin1") skip_if_not(Encoding(x2) == "latin1") r <- new_rcrd(setNames(list(1), x1)) expect_equal(field(r, x1), 1) expect_equal(field(r, x2), 1) }) test_that("invalid indices throw error", { r <- new_rcrd(list(x = 1, y = 2)) expect_error(field(r, "z"), "Invalid index") expect_error(field(r, NA_character_), "Invalid index") expect_error(field(r, ""), "Invalid index") expect_error(field(r, letters), "Invalid index") expect_error(field(r, 0L), "Invalid index") expect_error(field(r, NA_integer_), "Invalid index") expect_error(field(r, 0), "Invalid index") expect_error(field(r, NA_real_), "Invalid index") expect_error(field(r, Inf), "Invalid index") expect_error(field(r, mean), "Invalid index") }) test_that("corrupt rcrd throws error", { r <- new_rcrd(list(x = 1, y = 2)) expect_error(field(1:10, 1L), "Corrupt rcrd") expect_error(field(list(), 1L), "Corrupt rcrd") expect_error(field(list(1), "x"), "Corrupt x") expect_error(field(setNames(list(1, 1), "y"), "x"), "Corrupt x") }) # set --------------------------------------------------------------------- test_that("field<- modifies a copy", { r1 <- new_rcrd(list(x = 1, y = 2)) r2 <- r1 field(r1, "x") <- 3 expect_equal(field(r1, "x"), 3) expect_equal(field(r2, "x"), 1) }) test_that("field<- checks inputs", { x <- list() expect_error(field(x, "x") <- 1, "Corrupt rcrd") r <- new_rcrd(list(x = 1)) expect_error(field(r, "x") <- 1:3, "Invalid value") expect_error(field(r, "x") <- environment(), "Invalid value") }) test_that("field<- respects size, not length (#450)", { r1 <- new_rcrd(list(df = new_data_frame(n = 2L))) new_df <- data.frame(x = 1:2) field(r1, 'df') <- new_df expect_equal(field(r1, "df"), new_df) expect_error(field(r1, 'df') <- new_data_frame(n = 3L), "Invalid value") }) vctrs/tests/testthat/test-type2-error-messages.txt0000644000176200001440000000026614042546253022143 0ustar liggesusersBare objects: vec_ptype2("foo", 10): Can't combine and . Nested dataframes: vec_ptype2(df1, df2): Can't combine `x$y$z` and `x$y$z` . vctrs/tests/testthat/test-type-rcrd.R0000644000176200001440000001633013723213047017433 0ustar liggesusers # constructor and accessors ----------------------------------------------- test_that("can construct and access components", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(length(r), 1) expect_equal(n_fields(r), 2) expect_equal(names(r), NULL) expect_equal(fields(r), c("x", "y")) expect_error(r$x, class = "vctrs_error_unsupported") expect_equal(field(r, "x"), 1) }) test_that("requires format method", { x <- new_rcrd(list(x = 1)) expect_error(format(x), class = "vctrs_error_unimplemented") }) test_that("vec_proxy() transforms records to data frames", { expect_identical( vec_proxy(new_rcrd(list(a = "1"))), new_data_frame(list(a = "1")) ) }) # coercion ---------------------------------------------------------------- test_that("can't cast list to rcrd", { l <- list( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(b = "4", a = 2)) ) expect_error( vec_cast(l, new_rcrd(list(a = 1L, b = 2L))), class = "vctrs_error_incompatible_type" ) }) test_that("can recast rcrd from list", { r <- new_rcrd(list(x = integer(), y = numeric())) expect_equal( vec_restore(list(x = 1L, y = 1), r), new_rcrd(list(x = 1L, y = 1)) ) }) test_that("can't cast rcrd to list", { r <- new_rcrd(list(x = 1:2, y = 2:3)) expect_error(vec_cast(r, list()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(r, list()), class = "vctrs_error_incompatible_type") }) test_that("default casts are implemented correctly", { r <- new_rcrd(list(x = 1, y = 1)) expect_error(vec_cast(1, r), class = "vctrs_error_incompatible_type") expect_equal(vec_cast(NULL, r), NULL) }) test_that("can't cast incompatible rcrd", { expect_error( vec_cast( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(a = "1")) ), class = "vctrs_error_cast_lossy" ) expect_error( vec_cast( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(a = "1", c = 3L)) ), class = "vctrs_error_cast_lossy" ) expect_error( vec_cast( new_rcrd(list(a = "a", b = 3L)), new_rcrd(list(a = 1, b = 3L)) ), class = "vctrs_error_incompatible_type" ) }) # input validation -------------------------------------------------------- test_that("must be list of equal length vectors", { expect_error(new_rcrd(list()), "list of length 1") expect_error(new_rcrd(list(x = environment())), class = "vctrs_error_scalar_type") expect_error(new_rcrd(list(x = 1:2, y = 1:3)), "same size") }) test_that("names must be unique", { expect_error(new_rcrd(list(1, 2)), class = "vctrs_error_names_cannot_be_empty") expect_error(new_rcrd(list(x = 1, 2)), class = "vctrs_error_names_cannot_be_empty") expect_error(new_rcrd(list(x = 1, x = 2)), class = "vctrs_error_names_must_be_unique") expect_error(new_rcrd(setNames(list(1, 2), "x")), "can't return `NA`") }) test_that("subset assignment throws error", { x <- new_rcrd(list(x = 1)) expect_error( x$y <- 2, class = "vctrs_error_unsupported" ) }) test_that("can supply data frame as fields", { expect_identical( new_rcrd(list(x = 1)), new_rcrd(tibble(x = 1)) ) }) test_that("fields are not recycled", { expect_error( new_rcrd(list(x = 1, y = 1:2)), "must be the same size" ) }) # tuple class ---------------------------------------------------------- # use simple class to test essential features of rcrds test_that("print and str use format", { local_tuple_methods() r <- tuple(1, 1:100) expect_known_output( file = test_path("test-rcrd-format.txt"), { print(r) cat("\n") str(r[1:10]) cat("\n") str(list(list(list(r, 1:100)))) } ) }) test_that("subsetting methods applied to each field", { local_tuple_methods() x <- tuple(1:2, 1) expect_equal(x[1], tuple(1, 1)) expect_equal(x[[1]], tuple(1, 1)) expect_equal(rep(tuple(1, 1), 2), tuple(c(1, 1), 1)) length(x) <- 1 expect_equal(x, tuple(1, 1)) }) test_that("subset assignment modifies each field", { local_tuple_methods() x <- tuple(c(1, 1), c(2, 2)) expect_error(x[[]] <- tuple(), "missing") x[[1]] <- tuple(3, 3) expect_equal(x, tuple(c(3, 1), c(3, 2))) x[1] <- tuple(4, 4) expect_equal(x, tuple(c(4, 1), c(4, 2))) }) test_that("subset assignment recycles", { local_tuple_methods() x <- tuple(c(1, 1), c(2, 2)) x[1:2] <- tuple(1, 1) expect_equal(x, tuple(c(1, 1), c(1, 1))) x[] <- tuple(2, 2) expect_equal(x, tuple(c(2, 2), c(2, 2))) }) test_that("can sort rcrd", { local_tuple_methods() x <- tuple(c(1, 2, 1), c(3, 1, 2)) expect_equal(xtfrm(x), c(2, 3, 1)) expect_equal(order(x), c(3, 1, 2)) expect_equal(sort(x), tuple(c(1, 1, 2), c(2, 3, 1))) }) test_that("can use dictionary methods on a rcrd", { local_tuple_methods() x <- tuple(c(1, 2, 1), c(3, 1, 3)) expect_equal(unique(x), x[1:2]) expect_equal(duplicated(x), c(FALSE, FALSE, TRUE)) expect_equal(anyDuplicated(x), TRUE) }) test_that("cannot round trip through list", { local_tuple_methods() t <- tuple(1:2, 3:4) # Used to be allowed expect_error(vec_cast(t, list()), class = "vctrs_error_incompatible_type") }) test_that("can convert to list using as.list() or vec_chop() (#1113)", { local_tuple_methods() t <- tuple(1:2, 3:4) expect <- list(tuple(1L, 3L), tuple(2L, 4L)) expect_identical(as.list(t), expect) expect_identical(vec_chop(t), expect) }) test_that("dangerous methods marked as unimplemented", { local_tuple_methods() t <- tuple() expect_error(mean(t), class = "vctrs_error_unsupported") expect_error(abs(t), class = "vctrs_error_unsupported") expect_error(is.finite(t), class = "vctrs_error_unsupported") expect_error(is.nan(t), class = "vctrs_error_unsupported") }) # slicing ----------------------------------------------------------------- test_that("dots are forwarded", { expect_error(new_rcrd(list(foo = "foo"))[1, 2], "undefined columns selected") }) test_that("records are restored after slicing the proxy", { expect_identical(new_rcrd(list(x = 1:2))[1], new_rcrd(list(x = 1L))) }) test_that("can slice with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:2))) out <- vec_slice(x, 2) expect_identical( out, new_rcrd(data_frame(x = data_frame(y = 2L))) ) expect_identical( x[2], out ) expect_identical( x[[2]], out ) }) test_that("can rep with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:2))) expect_identical( rep(x, length.out = 4), vec_slice(x, c(1:2, 1:2)) ) }) test_that("can assign with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:3))) y <- new_rcrd(data_frame(x = data_frame(y = FALSE))) exp <- new_rcrd(data_frame(x = data_frame(y = c(1L, 2L, 0L)))) expect_identical(vec_assign(x, 3, y), exp) out <- x out[[3]] <- y expect_identical(out, exp) }) test_that("can resize with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:3))) length(x) <- 2 expect_identical(x, new_rcrd(data_frame(x = data_frame(y = 1:2)))) length(x) <- 4 expect_identical(x, new_rcrd(data_frame(x = data_frame(y = c(1:2, NA, NA))))) }) test_that("`[[` preserves type of record fields (#1205)", { x <- new_rcrd(list(x = 1:3, a = list(1, 2:3, 4:6))) expect_identical(field(x[3], "a"), list(4:6)) expect_identical(field(x[[3]], "a"), list(4:6)) }) vctrs/tests/testthat/test-type-date-time.R0000644000176200001440000005234213723213047020355 0ustar liggesusers test_that("date-times have informative types", { expect_identical(vec_ptype_abbr(Sys.Date()), "date") expect_identical(vec_ptype_full(Sys.Date()), "date") expect_identical(vec_ptype_abbr(Sys.time()), "dttm") expect_identical(vec_ptype_full(Sys.time()), "datetime") expect_identical(vec_ptype_abbr(new_duration(10)), "drtn") expect_identical(vec_ptype_full(new_duration(10)), "duration") }) test_that("vec_ptype() returns a double date for integer dates", { x <- structure(0L, class = "Date") expect_true(is.double(vec_ptype(x))) }) test_that("dates and times are vectors", { expect_true(vec_is(Sys.Date())) expect_true(vec_is(as.POSIXct("2020-01-01"))) expect_true(vec_is(as.POSIXlt("2020-01-01"))) }) test_that("vec_cast() converts POSIXct with int representation to double when converting zones", { x <- structure(integer(), class = c("POSIXct", "POSIXt"), tzone = "UTC") y <- structure(numeric(), class = c("POSIXct", "POSIXt"), tzone = "America/Los_Angeles") expect_true(is.double(vec_cast(x, y))) }) test_that("vec_c() converts POSIXct with int representation to double representation (#540)", { time1 <- seq(as.POSIXct("2015-12-01", tz = "UTC"), length.out = 2, by = "days") time2 <- vec_c(time1) expect_true(is.double(time2)) time3 <- vec_c(time1, time1) expect_true(is.double(time3)) }) test_that("vec_c() and vec_rbind() convert Dates with int representation to double representation (#396)", { x <- structure(0L, class = "Date") df <- data.frame(x = x) expect_true(is.double(vec_c(x))) expect_true(is.double(vec_c(x, x))) expect_true(is.double(vec_rbind(df)$x)) expect_true(is.double(vec_rbind(df, df)$x)) }) test_that("vec_c() and vec_ptype() standardize missing `tzone` attributes (#561)", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_ptype(x), "tzone"), "") expect_identical(attr(vec_c(x, x), "tzone"), "") }) # constructor ------------------------------------------------------------- test_that("can create a date", { expect_identical(new_date(), structure(double(), class = "Date")) expect_identical(new_date(0), structure(0, class = "Date")) }) test_that("retains input names", { expect_named(new_date(c(x = 0)), "x") }) test_that("drops attributes except names", { expect_identical(new_date(structure(1, foo = "bar")), new_date(1)) }) test_that("only allows doubles", { expect_error(new_date(1L), "must be a double vector") expect_error(new_date("x"), "must be a double vector") }) test_that("can create a datetime", { expect_identical(new_datetime(), structure(double(), class = c("POSIXct", "POSIXt"), tzone = "")) expect_identical(new_datetime(0), structure(0, class = c("POSIXct", "POSIXt"), tzone = "")) }) test_that("retains input names", { expect_named(new_datetime(c(x = 0)), "x") }) test_that("drops attributes except names", { expect_identical(new_datetime(structure(1, foo = "bar")), new_datetime(1)) }) test_that("only allows doubles", { expect_error(new_datetime(1L), "must be a double vector") expect_error(new_datetime("x"), "must be a double vector") }) test_that("tzone is allowed to be `NULL`", { expect_identical(new_datetime(tzone = NULL), new_datetime(tzone = "")) }) test_that("tzone must be character or `NULL`", { expect_error(new_datetime(tzone = 1), "character vector or `NULL`") }) # coerce ------------------------------------------------------------------ test_that("datetime coercions are symmetric and unchanging", { types <- list( new_date(), new_datetime(), new_datetime(tzone = "US/Central"), as.POSIXlt(character(), tz = "US/Central"), difftime(Sys.time() + 1000, Sys.time()), difftime(Sys.time() + 1, Sys.time()) ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) expect_known_output( mat, test_path("test-type-date-time.txt"), print = TRUE, width = 200 ) }) test_that("tz comes from first non-empty", { # On the assumption that if you've set the time zone explicitly it # should win x <- as.POSIXct("2020-01-01") y <- as.POSIXct("2020-01-01", tz = "America/New_York") expect_identical(vec_ptype2(x, y), y[0]) expect_identical(vec_ptype2(y, x), y[0]) z <- as.POSIXct("2020-01-01", tz = "Pacific/Auckland") expect_identical(vec_ptype2(y, z), y[0]) expect_identical(vec_ptype2(z, y), z[0]) }) test_that("POSIXlt always steered towards POSIXct", { dtc <- as.POSIXct("2020-01-01", tz = "UTC") dtl <- as.POSIXlt("2020-01-01", tz = "UTC") expect_identical(vec_ptype2(dtc, dtl), dtc[0]) expect_identical(vec_ptype2(dtl, dtc), dtc[0]) expect_identical(vec_ptype2(dtl, dtl), dtc[0]) }) test_that("vec_ptype2() on a POSIXlt with multiple time zones returns the first", { x <- as.POSIXlt(new_datetime(), tz = "Pacific/Auckland") expect_identical(attr(x, "tzone"), c("Pacific/Auckland", "NZST", "NZDT")) expect_identical(attr(vec_ptype2(x, new_date()), "tzone"), "Pacific/Auckland") }) test_that("vec_ptype2(, NA) is symmetric (#687)", { date <- new_date() expect_identical( vec_ptype2(date, NA), vec_ptype2(NA, date) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { time <- Sys.time() expect_identical( vec_ptype2(time, NA), vec_ptype2(NA, time) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { dtime <- Sys.time() - Sys.time() expect_identical( vec_ptype2(dtime, NA), vec_ptype2(NA, dtime) ) }) # cast: dates --------------------------------------------------------------- test_that("safe casts work as expected", { date <- as.Date("2018-01-01") datetime_ct <- as.POSIXct(as.character(date)) datetime_lt <- as.POSIXlt(datetime_ct) expect_identical(vec_cast(NULL, date), NULL) expect_identical(vec_cast(date, date), date) expect_identical(vec_cast(datetime_ct, date), date) expect_identical(vec_cast(datetime_lt, date), date) missing_date <- new_date(NA_real_) expect_identical(vec_cast(missing_date, missing_date), missing_date) expect_identical(vec_cast(as.POSIXct(missing_date), missing_date), missing_date) expect_identical(vec_cast(as.POSIXlt(missing_date), missing_date), missing_date) # These used to be allowed expect_error(vec_cast(17532, date), class = "vctrs_error_incompatible_type") expect_error(vec_cast("2018-01-01", date), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(date), date), class = "vctrs_error_incompatible_type") }) test_that("date - datetime cast can be roundtripped", { date <- as.Date("2018-01-01") datetime <- as.POSIXct("2018-01-01", tz = "America/New_York") expect_identical(vec_cast(vec_cast(date, datetime), date), date) expect_identical(vec_cast(vec_cast(datetime, date), datetime), datetime) }) test_that("lossy casts generate error", { date <- as.Date("2018-01-01") datetime <- as.POSIXct(as.character(date)) + c(0, 3600) expect_lossy(vec_cast(datetime, date), vec_c(date, date), x = datetime, to = date) }) test_that("invalid casts generate error", { date <- as.Date("2018-01-01") expect_error(vec_cast(integer(), date), class = "vctrs_error_incompatible_type") }) test_that("can cast NA and unspecified to Date", { expect_identical(vec_cast(NA, new_date()), new_date(NA_real_)) expect_identical(vec_cast(unspecified(2), new_date()), new_date(dbl(NA, NA))) }) test_that("casting an integer date to another date returns a double date", { x <- structure(0L, class = "Date") expect_true(is.double(vec_cast(x, x))) }) test_that("casting an integer POSIXct to a Date returns a double Date", { x <- .POSIXct(18000L, tz = "America/New_York") expect <- new_date(0) expect_identical(vec_cast(x, new_date()), expect) }) # cast: datetimes ----------------------------------------------------------- test_that("safe casts work as expected", { datetime_c <- as.POSIXct("1970-02-01", tz = "UTC") datetime_l <- as.POSIXlt("1970-02-01", tz = "UTC") expect_identical(vec_cast(NULL, datetime_c), NULL) expect_identical(vec_cast(datetime_c, datetime_c), datetime_c) expect_identical(vec_cast(datetime_l, datetime_c), datetime_c) expect_identical(vec_cast(as.Date(datetime_c), datetime_c), datetime_c) expect_identical(vec_cast(NULL, datetime_l), NULL) expect_identical(vec_cast(datetime_c, datetime_l), datetime_l) expect_identical(vec_cast(datetime_l, datetime_l), datetime_l) expect_identical(vec_cast(as.Date(datetime_l), datetime_l), datetime_l) expect_error(vec_cast(raw(), datetime_l), class = "vctrs_error_incompatible_type") missing_c <- new_datetime(NA_real_, tzone = "UTC") missing_l <- as.POSIXlt(missing_c) expect_identical(vec_cast(missing_c, missing_c), missing_c) expect_identical(vec_cast(missing_l, missing_c), missing_c) expect_identical(vec_cast(as.Date(missing_c), missing_c), missing_c) expect_identical(vec_cast(missing_l, missing_l), missing_l) expect_identical(vec_cast(missing_c, missing_l), missing_l) expect_identical(vec_cast(as.Date(missing_l), missing_l), missing_l) # These used to be allowed expect_error(vec_cast(2678400, datetime_c), class = "vctrs_error_incompatible_type") expect_error(vec_cast("1970-02-01", datetime_c), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(datetime_c), datetime_c), class = "vctrs_error_incompatible_type") expect_error(vec_cast(2678400, datetime_l), class = "vctrs_error_incompatible_type") expect_error(vec_cast("1970-02-01", datetime_l), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(datetime_l), datetime_l), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { datetime <- as.POSIXct("1970-02-01", tz = "UTC") expect_error(vec_cast(integer(), datetime), class = "vctrs_error_incompatible_type") }) test_that("dates become midnight in date-time tzone", { date1 <- as.Date("2010-01-01") datetime_c <- as.POSIXct(character(), tz = "Pacific/Auckland") datetime_l <- as.POSIXlt(character(), tz = "Pacific/Auckland") date2_c <- vec_cast(date1, datetime_c) expect_identical(tzone(date2_c), "Pacific/Auckland") expect_identical(format(date2_c, "%H:%M"), "00:00") date2_l <- vec_cast(date1, datetime_l) expect_identical(tzone(date2_l), "Pacific/Auckland") expect_identical(format(date2_l, "%H:%M"), "00:00") }) test_that("can cast NA and unspecified to POSIXct and POSIXlt", { dtc <- as.POSIXct("2020-01-01") dtl <- as.POSIXlt("2020-01-01") expect_identical(vec_cast(NA, dtc), vec_init(dtc)) expect_identical(vec_cast(NA, dtl), vec_init(dtl)) expect_identical(vec_cast(unspecified(2), dtc), vec_init(dtc, 2)) expect_identical(vec_cast(unspecified(2), dtl), vec_init(dtl, 2)) }) test_that("changing time zones retains the underlying moment in time", { x_ct <- as.POSIXct("2019-01-01", tz = "America/New_York") x_lt <- as.POSIXlt(x_ct) to_ct <- new_datetime(tzone = "America/Los_Angeles") to_lt <- as.POSIXlt(to_ct) expect_ct <- x_ct attr(expect_ct, "tzone") <- "America/Los_Angeles" expect_lt <- as.POSIXlt(expect_ct) expect_identical(vec_cast(x_ct, to_ct), expect_ct) expect_identical(vec_cast(x_ct, to_lt), expect_lt) expect_identical(vec_cast(x_lt, to_ct), expect_ct) expect_identical(vec_cast(x_lt, to_lt), expect_lt) }) test_that("casting to date always retains the zoned year-month-day value", { x <- as.POSIXct("2019-01-01", tz = "Asia/Shanghai") expect_identical(vec_cast(x, new_date()), as.Date("2019-01-01")) }) # cast: durations ------------------------------------------------------------ test_that("safe casts work as expected", { dt1 <- as.difftime(600, units = "secs") dt2 <- as.difftime(10, units = "mins") expect_identical(vec_cast(NULL, dt1), NULL) expect_identical(vec_cast(dt1, dt1), dt1) expect_identical(vec_cast(dt1, dt2), dt2) # These used to be allowed expect_error(vec_cast(600, dt1), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(dt1), dt1), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { dt <- as.difftime(600, units = "secs") expect_error(vec_cast(integer(), dt), class = "vctrs_error_incompatible_type") }) test_that("can cast NA and unspecified to duration", { expect_identical(vec_cast(NA, new_duration()), new_duration(na_dbl)) expect_identical(vec_cast(unspecified(2), new_duration()), new_duration(dbl(NA, NA))) }) # proxy/restore: dates --------------------------------------------------- test_that("restoring an integer to an integer Date converts to double", { x <- structure(0L, class = "Date") expect_true(is.double(vec_restore(x, x))) }) test_that("vec_proxy() returns a double for Dates with int representation", { x <- structure(0L, class = "Date") expect_true(is.double(vec_proxy(x))) }) # proxy/restore: datetimes ------------------------------------------------ test_that("restoring an integer to an integer POSIXct converts to double", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_true(is.double(vec_restore(x, x))) }) test_that("restoring to a POSIXct with no time zone standardizes to an empty string (#561)", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_restore(x, x), "tzone"), "") }) test_that("restoring to a POSIXlt with no time zone standardizes to an empty string", { # Manually create a POSIXlt without a `tzone` attribute. # This is just: # `x <- as.POSIXlt("1970-01-01")` # which usually won't add a `tzone` attribute, but platforms where the local # time is UTC attach a `tzone` attribute automatically. x <- structure( list( sec = 0, min = 0L, hour = 0L, mday = 1L, mon = 0L, year = 70L, wday = 4L, yday = 0L, isdst = 0L, zone = "EST", gmtoff = NA_integer_ ), class = c("POSIXlt", "POSIXt") ) proxy <- vec_proxy(x) expect_identical(attr(vec_restore(proxy, x), "tzone"), "") }) test_that("proxying a POSIXct with no time zone standardizes to an empty string", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_proxy(x), "tzone"), "") }) test_that("vec_proxy() returns a double for POSIXct with int representation", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_true(is.double(vec_proxy(x))) }) test_that("POSIXlt roundtrips through proxy and restore", { x <- as_posixlt("2020-01-03") out <- vec_restore(vec_proxy(x), x) expect_identical(out, x) }) test_that("subclassed Dates / POSIXct / POSIXlt can be restored (#1015)", { x <- subclass(new_date(0)) proxy <- vec_proxy(x) expect_identical(vec_restore(proxy, x), x) y <- subclass(new_datetime(0)) proxy <- vec_proxy(y) expect_identical(vec_restore(proxy, y), y) z <- subclass(as.POSIXlt(new_datetime(0))) proxy <- vec_proxy(z) expect_identical(vec_restore(proxy, z), z) }) # arithmetic -------------------------------------------------------------- test_that("default is error", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-02 12:00") lt <- as.POSIXlt(dt) t <- as.difftime(12, units = "hours") f <- factor("x") expect_error(vec_arith("+", d, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", dt, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", lt, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", t, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", dt, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", lt, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", d, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("!", t, MISSING()), class = "vctrs_error_incompatible_op") }) test_that("date-time vs date-time", { d <- as.Date("2018-01-01") dt <- as.POSIXct(d) lt <- as.POSIXlt(dt) expect_error(vec_arith("+", d, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, d), d - d) expect_error(vec_arith("+", dt, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, dt), dt - dt) expect_error(vec_arith("+", lt, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, lt), lt - lt) expect_error(vec_arith("+", d, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, dt), difftime(d, dt)) expect_error(vec_arith("+", dt, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, d), difftime(dt, d)) expect_error(vec_arith("+", d, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, lt), difftime(d, lt)) expect_error(vec_arith("+", lt, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, d), difftime(lt, d)) expect_error(vec_arith("+", dt, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, lt), difftime(dt, lt)) expect_error(vec_arith("+", lt, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, dt), difftime(lt, dt)) }) test_that("date-time vs numeric", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-01", tz = "America/New_York") lt <- as.POSIXlt(dt) expect_identical(vec_arith("+", d, 1), d + 1) expect_identical(vec_arith("+", 1, d), d + 1) expect_identical(vec_arith("-", d, 1), d - 1) expect_error(vec_arith("-", 1, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("+", dt, 1), dt + 1) expect_identical(vec_arith("+", 1, dt), dt + 1) expect_identical(vec_arith("-", dt, 1), dt - 1) expect_error(vec_arith("-", 1, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("+", lt, 1), lt + 1) expect_identical(vec_arith("+", 1, lt), lt + 1) expect_identical(vec_arith("-", lt, 1), lt - 1) expect_error(vec_arith("-", 1, lt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, d), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", d, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, dt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", dt, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, lt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", lt, 1), class = "vctrs_error_incompatible_op") }) test_that("POSIXlt + numeric = POSIXct", { lt <- as.POSIXlt("2018-01-01", tz = "America/New_York") expect_s3_class(vec_arith("+", lt, 1), "POSIXct") expect_s3_class(vec_arith("+", 1, lt), "POSIXct") }) test_that("vec_arith() standardizes the `tzone` attribute", { dt <- structure(0, class = c("POSIXct", "POSIXt")) x <- vec_arith("+", dt, 1) expect_identical(attr(x, "tzone"), "") }) test_that("date-time vs difftime", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-01", tz = "UTC") lt <- as.POSIXlt(dt) t <- as.difftime(1, units = "days") th <- as.difftime(c(1, 24), units = "hours") expect_identical(vec_arith("+", d, t), d + t) expect_identical(vec_arith("+", t, d), t + d) expect_identical(vec_arith("+", dt, t), dt + t) expect_identical(vec_arith("+", t, dt), t + dt) expect_identical(vec_arith("+", lt, t), lt + t) expect_identical(vec_arith("+", t, lt), t + lt) expect_lossy(vec_arith("+", d, th), d + th, x = t, to = d) expect_lossy(vec_arith("+", th, d), th + d, x = t, to = d) expect_identical(vec_arith("+", dt, th), dt + th) expect_identical(vec_arith("+", th, dt), th + dt) expect_identical(vec_arith("+", lt, th), lt + th) expect_identical(vec_arith("+", th, lt), th + lt) expect_identical(vec_arith("-", d, t), d - t) expect_error(vec_arith("-", t, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, t), dt - t) expect_error(vec_arith("-", t, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, t), lt - t) expect_error(vec_arith("-", t, lt), class = "vctrs_error_incompatible_op") expect_lossy(vec_arith("-", d, th), d - th, x = t, to = d) expect_error(vec_arith("-", th, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, th), dt - th) expect_error(vec_arith("-", th, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, th), lt - th) expect_error(vec_arith("-", th, lt), class = "vctrs_error_incompatible_op") }) test_that("difftime vs difftime/numeric", { t <- as.difftime(12, units = "hours") expect_identical(vec_arith("-", t, MISSING()), -t) expect_identical(vec_arith("+", t, MISSING()), t) expect_identical(vec_arith("-", t, t), t - t) expect_identical(vec_arith("-", t, 1), t - 1) expect_identical(vec_arith("-", 1, t), 1 - t) expect_identical(vec_arith("+", t, t), 2 * t) expect_identical(vec_arith("+", t, 1), t + 1) expect_identical(vec_arith("+", 1, t), t + 1) expect_identical(vec_arith("*", 2, t), 2 * t) expect_identical(vec_arith("*", t, 2), 2 * t) expect_error(vec_arith("*", t, t), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("/", t, 2), t / 2) expect_error(vec_arith("/", 2, t), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("/", t, t), 1) expect_identical(vec_arith("%/%", t, t), 1) expect_identical(vec_arith("%%", t, t), 0) }) # Math -------------------------------------------------------------------- test_that("date and date times don't support math", { expect_error(vec_math("sum", new_date()), class = "vctrs_error_unsupported") expect_error(vec_math("sum", new_datetime()), class = "vctrs_error_unsupported") expect_error(vec_math("sum", as.POSIXlt(new_datetime())), class = "vctrs_error_unsupported") }) vctrs/tests/testthat/test-proxy.R0000644000176200001440000000720213723213047016701 0ustar liggesusers test_that("vec_data() preserves names (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_names(x), vec_names(vec_data(x))) x <- diag(2) rownames(x) <- letters[1:2] colnames(x) <- LETTERS[1:2] expect_identical(vec_names(x), vec_names(vec_data(x))) }) test_that("vec_data() preserves size (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_size(x), vec_size(vec_data(x))) x <- diag(2) expect_identical(vec_size(x), vec_size(vec_data(x))) }) test_that("vec_data() preserves dim and dimnames (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_dim(x), vec_dim(vec_data(x))) x <- diag(2) expect_identical(vec_dim(x), vec_dim(vec_data(x))) x <- diag(2) rownames(x) <- letters[1:2] colnames(x) <- LETTERS[1:2] expect_identical(dimnames(x), dimnames(vec_data(x))) }) test_that("strips vector attributes apart from names, dim and dimnames", { x <- new_vctr(1:10, a = 1, b = 2) expect_null(attributes(vec_data(x))) x <- new_vctr(c(x = 1, y = 2), a = 1, b = 2) expect_equal(names(attributes(vec_data(x))), "names") x <- new_vctr(1, a = 1, dim = c(1L, 1L)) expect_equal(names(attributes(vec_data(x))), "dim") x <- new_vctr(1, a = 1, dim = c(1L, 1L), dimnames = list("foo", "bar")) expect_equal(names(attributes(vec_data(x))), c("dim", "dimnames")) }) test_that("vec_proxy() is a no-op with data vectors", { for (x in vectors) { expect_identical(vec_proxy(!!x), !!x) } x <- structure(1:3, foo = "bar") expect_identical(vec_proxy(!!x), !!x) }) test_that("vec_proxy() transforms records to data frames", { for (x in records) { expect_identical(vec_proxy(x), new_data_frame(unclass(x))) } }) test_that("vec_proxy() is a no-op with non vectors", { x <- foobar(list()) expect_identical(vec_proxy(x), x) }) test_that("can take the proxy of non-vector objects", { local_env_proxy() expect_identical(vec_proxy(new_proxy(1:3)), 1:3) }) test_that("vec_data() asserts vectorness", { expect_error(vec_data(new_sclr()), class = "vctrs_error_scalar_type") expect_error(vec_data(~foo), class = "vctrs_error_scalar_type") }) test_that("vec_data() is proxied", { local_env_proxy() x <- new_proxy(mtcars) expect_identical(vec_data(x), vec_data(mtcars)) }) test_that("vec_proxy_equal() is recursive over data frames (#641)", { x <- new_data_frame(list(x = foobar(1:3), y = 41:43)) default <- vec_proxy_equal(x) expect_s3_class(default$x, "vctrs_foobar") local_methods(vec_proxy_equal.vctrs_foobar = function(...) c(0, 0, 0)) overridden <- vec_proxy_equal(x) expect_identical(overridden$x, c(0, 0, 0)) }) test_that("vec_proxy_equal() returns a POSIXct for POSIXlt objects (#901)", { x <- as.POSIXlt(new_date(0), tz = "UTC") expect_s3_class(vec_proxy_equal(x), "POSIXct") }) test_that("vec_proxy_equal() defaults to vec_proxy() and vec_proxy_compare() defaults to vec_proxy_equal() (#1140)", { foobar_proxy <- function(x, ...) data_frame(x = unclass(x), y = seq_along(x)) local_methods(vec_proxy.vctrs_foobar = foobar_proxy) x <- foobar(3:1) expect_identical(vec_proxy(x), foobar_proxy(x)) expect_identical(vec_proxy_equal(x), foobar_proxy(x)) expect_identical(vec_proxy_compare(x), foobar_proxy(x)) local_methods(vec_proxy_equal.vctrs_foobar = function(x, ...) foobar_proxy(letters[x])) expect_identical(vec_proxy_equal(x), data_frame(x = letters[3:1], y = 1:3)) expect_identical(vec_proxy_compare(x), data_frame(x = letters[3:1], y = 1:3)) }) test_that("vec_data() preserves data frames", { expect_identical( vec_data(tibble(x = 1)), data_frame(x = 1) ) # Rownames are preserved expect_identical( vec_data(mtcars), mtcars ) }) vctrs/tests/testthat/test-type-vec-c-error.txt0000644000176200001440000000042014042546235021237 0ustar liggesusers vec_c(df1, df2): Can't combine `..1$x$y$z` and `..2$x$y$z` . vec_c(df1, df1, df2): Can't combine `..1$x$y$z` and `..3$x$y$z` . vec_c(foo = df1, bar = df2): Can't combine `foo$x$y$z` and `bar$x$y$z` . vctrs/tests/testthat/test-partial-factor-print-both.txt0000644000176200001440000000005414042546241023130 0ustar liggesuserspartial_factor< bf275 {partial} fd1ad > vctrs/tests/testthat/test-type.R0000644000176200001440000002037014027045462016504 0ustar liggesusers test_that("vec_ptype() is a no-op for NULL", { expect_null(vec_ptype(NULL)) }) test_that("vec_ptype() is a no-op for partial types", { expect_identical(vec_ptype(partial_factor("x")), partial_factor("x")) expect_identical(vec_ptype(partial_frame(x = 1)), partial_frame(x = 1)) }) test_that("vec_ptype() errors on scalars", { expect_error(vec_ptype(quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype(quote(fn())), class = "vctrs_error_scalar_type") }) test_that(".ptype argument overrides others", { expect_equal(vec_ptype_common(.ptype = 1:10), numeric()) }) test_that(".ptype required in strict mode", { old <- options(vctrs.no_guessing = TRUE) on.exit(options(old)) expect_error(vec_ptype_common(), "strict mode") }) test_that("can feed ptype into itself", { expect_equal(vec_ptype_common(vec_ptype_common(1:10)), numeric()) }) test_that("finalised prototypes created from under specified inputs", { expect_equal(vec_ptype_common(), NULL) expect_equal(vec_ptype_common(NULL), NULL) expect_equal(vec_ptype_common(NA), logical()) expect_equal(vec_ptype_common(NA, NULL), logical()) expect_equal(vec_ptype_common(NULL, NA), logical()) }) test_that("finalised prototypes created from under specified data frame cols", { df <- data.frame(x = NA) expect_equal(vec_ptype_common(df)$x, logical()) }) test_that("non-missing logical get correct type", { expect_equal(vec_ptype_common(TRUE), logical()) }) test_that("output tests", { expect_known_output(vec_ptype_show(), "out/vec-ptype-0.txt") expect_known_output(vec_ptype_show(integer()), "out/vec-ptype-1.txt") expect_known_output(vec_ptype_show(integer(), double()), "out/vec-ptype-2.txt") expect_known_output(vec_ptype_show(logical(), integer(), double()), "out/vec-ptype-3.txt") }) test_that("vec_ptype_common() handles matrices", { m <- matrix(1:4, nrow = 2) expect_identical(vec_ptype_common(m, m), matrix(int(), ncol = 2)) }) test_that("vec_ptype_common() includes index in argument tag", { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) # Create a column name too large for default buffer nm <- str_dup("foobarfoobar", 10) large_df1 <- set_names(df1, nm) large_df2 <- set_names(df2, nm) expect_known_output_nobang(file = test_path("test-type-vec-type-common-error.txt"), { try2(vec_ptype_common(df1, df2)) try2(vec_ptype_common(df1, df1, df2)) try2(vec_ptype_common(large_df1, large_df2)) # Names try2(vec_ptype_common(foo = TRUE, bar = "foo")) try2(vec_ptype_common(foo = TRUE, baz = FALSE, bar = "foo")) try2(vec_ptype_common(foo = df1, bar = df2)) try2(vec_ptype_common(df1, df1, bar = df2)) # One splice box try2(vec_ptype_common(TRUE, !!!list(1, "foo"))) try2(vec_ptype_common(TRUE, !!!list(1, 2), "foo")) try2(vec_ptype_common(1, !!!list(TRUE, FALSE), "foo")) # One named splice box try2(vec_ptype_common(foo = TRUE, !!!list(FALSE, FALSE), bar = "foo")) try2(vec_ptype_common(foo = TRUE, !!!list(bar = 1, "foo"))) try2(vec_ptype_common(foo = TRUE, !!!list(bar = "foo"))) try2(vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), baz = "chr")) # Two splice boxes in next and current try2(vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), !!!list(baz = "chr"))) }) }) test_that("proxied types are have s3 bare type", { for (x in proxied_empty_types) { expect_identical(vec_typeof_bare(x), "s3") } }) test_that("vec_ptype() preserves attributes of unproxied structures", { expect_identical(vec_ptype(foobar(dbl(1))), foobar(dbl())) }) test_that("vec_ptype() errors on scalar lists", { expect_error(vec_ptype(foobar(list())), class = "vctrs_error_scalar_type") }) test_that("can retrieve type info", { exp <- list(type = "integer", proxy_method = NULL) expect_identical(vec_type_info(1:3), exp) exp <- list(type = "s3", proxy_method = NULL) expect_identical(vec_type_info(~foo), exp) x <- as.POSIXlt(new_datetime(0)) exp <- list(type = "s3", proxy_method = vec_proxy.POSIXlt) expect_identical(vec_type_info(x), exp) }) test_that("can retrieve proxy info", { exp <- list(type = "integer", proxy_method = NULL, proxy = 1:3) expect_identical(vec_proxy_info(1:3), exp) exp <- list(type = "scalar", proxy_method = NULL, proxy = ~foo) expect_identical(vec_proxy_info(~foo), exp) x <- as.POSIXlt(new_datetime(0)) proxy <- new_data_frame(unclass(x)) exp <- list(type = "dataframe", proxy_method = vec_proxy.POSIXlt, proxy = proxy) expect_identical(vec_proxy_info(x), exp) }) test_that("class_type() detects classes", { expect_identical(class_type(list()), "none") expect_identical(class_type(foobar(list())), "unknown") expect_identical(class_type(structure(list(), class = "list")), "list") expect_identical(class_type(subclass(structure(list(), class = "list"))), "list") expect_identical(class_type(data.frame()), "bare_data_frame") expect_identical(class_type(tibble::tibble()), "bare_tibble") expect_identical(class_type(subclass(data.frame())), "data_frame") expect_identical(class_type(new_factor()), "bare_factor") expect_identical(class_type(new_ordered()), "bare_ordered") expect_identical(class_type(subclass(new_factor())), "unknown") expect_identical(class_type(subclass(new_ordered())), "unknown") expect_identical(class_type(new_date()), "bare_date") expect_identical(class_type(new_datetime()), "bare_posixct") expect_identical(class_type(as.POSIXlt(new_date())), "bare_posixlt") expect_identical(class_type(subclass(new_date())), "unknown") expect_identical(class_type(subclass(new_datetime())), "unknown") expect_identical(class_type(NA), "none") expect_identical(class_type(foobar()), "unknown") }) test_that("vec_ptype() handles class-less yet OBJECT gremlins", { gremlin <- stats::model.frame(freeny) expect_error(vec_ptype(gremlin), NA) expect_error(vec_c(gremlin), NA) expect_error(vec_init(gremlin), NA) expect_error(vec_slice(gremlin, 1), NA) }) test_that("explicit list subclasses are vectors", { list_subclass <- function(x) { structure(x, class = c("custom_subclass", "list")) } x <- list_subclass(list()) expect_true(vec_is(x)) df <- data.frame(x = 1:2) df$z <- list_subclass(list(1, 2)) expect_identical(vec_slice(df, 1)$z, list_subclass(list(1))) }) test_that("the type of a classed data frame with an unspecified column retains unspecifiedness", { df1 <- subclass(data_frame(x = 1, y = NA)) df2 <- subclass(data_frame(x = 1, y = unspecified(1))) expect <- subclass(data_frame(x = numeric(), y = unspecified())) expect_identical(vec_ptype(df1), expect) expect_identical(vec_ptype(df2), expect) }) test_that("vec_ptype() methods can be written", { local_methods( vec_ptype.vctrs_foobar = function(x, ...) "dispatch" ) expect_identical(vec_ptype(foobar()), "dispatch") }) test_that("vec_ptype_finalise() works with NULL", { expect_identical(vec_ptype_finalise(NULL), NULL) }) test_that("vec_ptype_finalise() works recursively over bare data frames", { df <- new_data_frame(list(x = numeric(), y = unspecified(), z = partial_factor())) expect <- data_frame(x = numeric(), y = logical(), z = factor()) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() works recursively over classed data frames", { df <- new_data_frame(list(x = numeric(), y = unspecified(), z = partial_factor())) df <- subclass(df) expect <- subclass(data_frame(x = numeric(), y = logical(), z = factor())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() can handle data frame columns", { df <- data_frame(x = numeric(), y = data_frame(z = unspecified())) expect <- data_frame(x = numeric(), y = data_frame(z = logical())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() requires vector types", { expect_error(vec_ptype_finalise(quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype_finalise(foobar()), class = "vctrs_error_scalar_type") }) # This might change in the future if we decide that prototypes don't # have names test_that("vec_ptype() preserves type of names and row names", { expect_identical(vec_ptype(c(foo = 1)), named(dbl())) expect_identical(vec_ptype(mtcars), mtcars[0, ]) expect_identical(vec_ptype(foobar(mtcars)), foobar(mtcars[0, ])) }) vctrs/tests/testthat/test-list_of-print.txt0000644000176200001440000000017414042546250020730 0ustar liggesusers[2]> [[1]] [1] 1 [[2]] [1] 2 3 # A tibble: 2 x 1 x > 1 [1] 2 [2] vctrs/tests/testthat/test-type-group-rle.txt0000644000176200001440000000005414042546237021034 0ustar liggesusers [1] 1x3 2x2 1x1 vctrs/tests/testthat/test-names.R0000644000176200001440000006421114027045462016630 0ustar liggesusers # vec_names() --------------------------------------------------------- test_that("vec_names() retrieves names", { expect_null(vec_names(letters)) expect_identical(vec_names(set_names(letters)), letters) expect_identical(vec_names(mtcars), row.names(mtcars)) expect_null(vec_names(unrownames(mtcars))) expect_identical(vec_names(Titanic), dimnames(Titanic)[[1]]) x <- matrix(1L, dimnames = list("row", "col")) expect_identical(vec_names(x), dimnames(x)[[1]]) }) test_that("vec_names() dispatches", { local_methods( names.vctrs_foobar = function(x) "dispatched!" ) expect_identical(vec_names(foobar()), "dispatched!") }) # vec_names2() ------------------------------------------------------------- test_that("vec_names2() repairs names", { expect_identical(vec_names2(1:2), c("", "")) expect_identical(vec_names2(1:2, repair = "unique"), c("...1", "...2")) expect_identical(vec_names2(set_names(1:2, c("_foo", "_bar")), repair = "universal"), c("._foo", "._bar")) }) test_that("vec_names2() treats data frames and arrays as vectors", { expect_identical(vec_names2(mtcars), row.names(mtcars)) expect_identical(vec_names2(as.matrix(mtcars)), row.names(mtcars)) df <- unrownames(mtcars) exp <- rep_len("", nrow(mtcars)) expect_identical(vec_names2(df), exp) expect_identical(vec_names2(as.matrix(df)), exp) }) test_that("vec_names2() accepts and checks repair function", { expect_identical(vec_names2(1:2, repair = function(nms) rep_along(nms, "foo")), c("foo", "foo")) expect_error(vec_names2(1:2, repair = function(nms) "foo"), "length 1 instead of length 2") }) test_that("vec_names2() repairs names before invoking repair function", { x <- set_names(1:2, c(NA, NA)) expect_identical(vec_names2(x, repair = identity), c("", "")) }) # vec_as_names() ----------------------------------------------------------- test_that("vec_as_names() requires character vector", { expect_error(vec_as_names(NULL), "`names` must be a character vector") }) test_that("vec_as_names() validates `repair`", { expect_error(vec_as_names("x", repair = "foo"), "can't be \"foo\"") expect_error(vec_as_names(1, repair = 1), "string or a function") }) test_that("vec_as_names() repairs names", { expect_identical(vec_as_names(chr(NA, NA)), c("", "")) expect_identical(vec_as_names(chr(NA, NA), repair = "unique"), c("...1", "...2")) expect_identical(vec_as_names(chr("_foo", "_bar"), repair = "universal"), c("._foo", "._bar")) expect_identical(vec_as_names(chr("a", "b"), repair = "check_unique"), c("a", "b")) }) test_that("vec_as_names() checks unique names", { expect_error(vec_as_names(chr(NA), repair = "check_unique")) expect_error(vec_as_names(chr(""), repair = "check_unique")) expect_error(vec_as_names(chr("a", "a"), repair = "check_unique")) expect_error(vec_as_names(chr("..1"), repair = "check_unique")) expect_error(vec_as_names(chr("..."), repair = "check_unique")) }) test_that("vec_as_names() keeps the names of a named vector", { x_unnamed <- c(NA, "", "..1", "...2") x_names <- letters[1:4] x <- set_names(x_unnamed, x_names) expect_identical( set_names(vec_as_names(x_unnamed, repair = "minimal"), x_names), vec_as_names(x, repair = "minimal") ) expect_identical( set_names(vec_as_names(x_unnamed, repair = "unique"), x_names), vec_as_names(x, repair = "unique") ) expect_identical( set_names(vec_as_names(x_unnamed, repair = "universal"), x_names), vec_as_names(x, repair = "universal") ) }) test_that("vec_as_names() accepts and checks repair function", { f <- local({ local_obj <- "foo" ~ rep_along(.x, local_obj) }) expect_identical(vec_as_names(c("", ""), repair = f), c("foo", "foo")) expect_error(vec_as_names(c("", ""), repair = function(nms) "foo"), "length 1 instead of length 2") }) test_that("vec_as_names() repairs names before invoking repair function", { expect_identical(vec_as_names(chr(NA, NA), repair = identity), c("", "")) }) test_that("vec_as_names() is noisy by default", { verify_output(test_path("output", "test-vec-as-names.txt"), { # Noisy name repair vec_as_names(c("x", "x"), repair = "unique") # Quiet name repair vec_as_names(c("x", "x"), repair = "unique", quiet = TRUE) # Hint at repair argument, if known vec_as_names(c("x", "x"), repair = "check_unique", repair_arg = "repair") }) }) test_that("validate_minimal_names() checks names", { expect_error(validate_minimal_names(1), "must return a character vector") expect_error(validate_minimal_names(NULL), "can't return `NULL`") expect_error(validate_minimal_names(chr(NA)), "can't return `NA` values") }) test_that("validate_unique() checks unique names", { expect_error(validate_unique(chr(NA)), "`NA`") expect_error(validate_unique(chr("")), class = "vctrs_error_names_cannot_be_empty") expect_error(validate_unique(chr("a", "a")), class = "vctrs_error_names_must_be_unique") expect_error(validate_unique(chr("..1")), class = "vctrs_error_names_cannot_be_dot_dot") expect_error(validate_unique(chr("...")), class = "vctrs_error_names_cannot_be_dot_dot") }) test_that("vec_as_names_validate() validates repair arguments", { expect_identical( validate_name_repair_arg(c("unique", "check_unique")), "unique" ) expect_identical( validate_name_repair_arg(~ toupper(.))(letters), LETTERS ) }) test_that("vec_as_names() is quiet when function is supplied (#1018)", { expect_silent( vctrs::vec_as_names( c("a", "b"), repair = function(x) paste0(x, "a"), quiet = FALSE ) ) }) # vec_repair_names() ------------------------------------------------------- test_that("vec_repair_names() repairs names", { expect_identical(vec_repair_names(1:2), set_names(1:2, c("", ""))) expect_identical(vec_repair_names(1:2, "unique"), set_names(1:2, c("...1", "...2"))) expect_identical(vec_repair_names(set_names(1:2, c("_foo", "_bar")), "universal"), set_names(1:2, c("._foo", "._bar"))) }) test_that("vec_repair_names() handles data frames and arrays", { df <- data.frame(x = 1:2) expect_identical(vec_repair_names(df), df) expect_identical(row.names(vec_repair_names(as.matrix(df))), c("", "")) expect_identical(row.names(vec_repair_names(as.matrix(df), "unique")), c("...1", "...2")) }) # vec_set_names() ----------------------------------------------------------- test_that("vec_set_names() sets atomic names", { x <- 1:2 names <- c("x1", "x2") exp <- set_names(x, names) expect_equal(vec_set_names(x, names), exp) }) test_that("vec_set_names() sets matrix/array names", { x <- matrix(1:2) names <- c("x1", "x2") exp <- x rownames(exp) <- names expect_equal(vec_set_names(x, names), exp) y <- array(1:4, dim = c(2, 1, 2)) exp <- y rownames(exp) <- names expect_equal(vec_set_names(y, names), exp) }) test_that("vec_set_names() doesn't alter names", { x <- matrix(1, dimnames = list(rows = "a", cols = "x")) vec_set_names(x, "y") expect_equal(vec_names2(x), "a") expect_equal(colnames(x), "x") vec_set_names(x, NULL) expect_equal(vec_names2(x), "a") expect_equal(colnames(x), "x") y <- array(1:4, dim = c(1, 2, 2), dimnames = list(rows = "a", one = 1:2, two = 1:2)) vec_set_names(y, "y") expect_equal(vec_names2(y), "a") vec_set_names(y, NULL) expect_equal(vec_names2(y), "a") }) test_that("vec_set_names() sets row names on data frames", { expect_identical( vec_set_names(data_frame(x = 1), "foo"), new_data_frame(list(x = 1), row.names = "foo") ) expect_identical( vec_set_names(data_frame(x = 1:2), c("foo", "foo")), new_data_frame(list(x = 1:2), row.names = c("foo...1", "foo...2")) ) }) test_that("vec_set_names() correctly sets names on POSIXlt objects", { x <- as.POSIXlt(new_datetime(0)) exp <- set_names(x, "a") expect_equal(vec_set_names(x, "a"), exp) }) test_that("vec_set_names() falls back to `names<-` with proxied objects", { x <- structure(1, class = "foobar") exp <- set_names(x, "a") expect_equal(vec_set_names(x, "a"), exp) local_methods(`names<-.foobar` = function(x, value) "fallback!") expect_equal(vec_set_names(x, "a"), "fallback!") }) test_that("vec_set_names() falls back to `rownames<-` with shaped proxied objects", { x <- structure(1:2, dim = c(2L, 1L), class = "foobar") names <- c("r1", "r2") exp <- x rownames(exp) <- names expect_equal(vec_set_names(x, names), exp) # `rownames<-` is not generic, but eventually calls `dimnames<-` which is local_methods(`dimnames<-.foobar` = function(x, value) "fallback!") expect_equal(vec_set_names(x, names), "fallback!") }) test_that("vec_set_names() can set NULL names", { x <- 1:2 expect_equal(vec_set_names(x, NULL), x) x_named <- set_names(x) expect_equal(vec_set_names(x_named, NULL), x) x_mat <- as.matrix(x) expect_equal(vec_set_names(x_mat, NULL), x_mat) x_mat_named <- x_mat rownames(x_mat_named) <- c("1", "2") exp <- matrix(x_mat, dimnames = list(NULL, NULL)) expect_equal(vec_set_names(x_mat_named, NULL), exp) }) test_that("vec_set_names() errors with bad `names`", { expect_error(vec_set_names(1, 1), "character vector, not a double") expect_error(vec_set_names(1, c("x", "y")), "The size of `names`, 2") }) test_that("vec_names() and vec_set_names() work with 1-dimensional arrays", { x <- array(1:2, dimnames = list(c("a", "b"))) expect_identical(vec_names(x), c("a", "b")) expect_identical(vec_names(vec_set_names(x, c("A", "B"))), c("A", "B")) }) # minimal names ------------------------------------------------------------- test_that("minimal names are made from `n` when `name = NULL`", { expect_identical(minimal_names(1:2), c("", "")) }) test_that("as_minimal_names() checks input", { expect_error(as_minimal_names(1:3), "must be a character vector") }) test_that("minimal names have '' instead of NAs", { expect_identical(as_minimal_names(c("", NA, "", NA)), c("", "", "", "")) }) test_that("repairing minimal names copes with NULL input names", { x <- 1:3 x_named <- vec_repair_names(x) expect_equal(names(x_named), rep("", 3)) }) test_that("as_minimal_names() is idempotent", { x <- c("", "", NA) expect_identical(as_minimal_names(x), as_minimal_names(as_minimal_names(x))) }) test_that("minimal_names() treats data frames and arrays as vectors", { expect_identical(minimal_names(mtcars), row.names(mtcars)) expect_identical(minimal_names(as.matrix(mtcars)), row.names(mtcars)) df <- unrownames(mtcars) exp <- rep_len("", nrow(mtcars)) expect_identical(minimal_names(df), exp) expect_identical(minimal_names(as.matrix(df)), exp) }) test_that("as_minimal_names() copies on write", { nms <- chr(NA, NA) as_minimal_names(nms) expect_identical(nms, chr(NA, NA)) nms <- c("a", "b") out <- as_minimal_names(nms) expect_true(is_reference(nms, out)) }) # unique names ------------------------------------------------------------- test_that("unique_names() handles unnamed vectors", { expect_identical(unique_names(1:3), c("...1", "...2", "...3")) }) test_that("as_unique_names() is a no-op when no repairs are needed", { x <- c("x", "y") out <- as_unique_names(x) expect_true(is_reference(out, x)) expect_identical(out, c("x", "y")) }) test_that("as_unique_names() eliminates emptiness and duplication", { x <- c("", "x", "y", "x") expect_identical(as_unique_names(x), c("...1", "x...2", "y", "x...4")) }) test_that("as_unique_names(): solo empty or NA gets suffix", { expect_identical(as_unique_names(""), "...1") expect_identical(as_unique_names(NA_character_), "...1") }) test_that("as_unique_names() treats ellipsis like empty string", { expect_identical(as_unique_names("..."), as_unique_names("")) }) test_that("two_three_dots() does its job and no more", { x <- c(".", ".1", "...1", "..1a") expect_identical(two_to_three_dots(x), x) expect_identical(two_to_three_dots(c("..1", "..22")), c("...1", "...22")) }) test_that("two dots then number treated like three dots then number", { expect_identical(as_unique_names("..2"), as_unique_names("...5")) }) test_that("as_unique_names() strips positional suffixes, re-applies as needed", { x <- c("...20", "a...1", "b", "", "a...2...34") expect_identical(as_unique_names(x), c("...1", "a...2", "b", "...4", "a...5")) expect_identical(as_unique_names("a...1"), "a") expect_identical(as_unique_names(c("a...2", "a")), c("a...1", "a...2")) expect_identical(as_unique_names(c("a...3", "a", "a")), c("a...1", "a...2", "a...3")) expect_identical(as_unique_names(c("a...2", "a", "a")), c("a...1", "a...2", "a...3")) expect_identical(as_unique_names(c("a...2", "a...2", "a...2")), c("a...1", "a...2", "a...3")) }) test_that("as_unique_names() is idempotent", { x <- c("...20", "a...1", "b", "", "a...2") expect_identical(as_unique_names(!!x), as_unique_names(as_unique_names(!!x))) }) test_that("unique-ification has an 'algebraic'-y property", { ## inspired by, but different from, this guarantee about base::make.unique() ## make.unique(c(A, B)) == make.unique(c(make.unique(A), B)) ## If A is already unique, then make.unique(c(A, B)) preserves A. ## I haven't formulated what we guarantee very well yet, but it's probably ## implicit in this test (?) x <- c("...20", "a...1", "b", "", "a...2", "d") y <- c("", "a...3", "b", "...3", "e") ## fix names on each, catenate, fix the whole z1 <- as_unique_names( c( as_unique_names(x), as_unique_names(y) ) ) ## fix names on x, catenate, fix the whole z2 <- as_unique_names( c( as_unique_names(x), y ) ) ## fix names on y, catenate, fix the whole z3 <- as_unique_names( c( x, as_unique_names(y) ) ) ## catenate, fix the whole z4 <- as_unique_names( c( x, y ) ) expect_identical(z1, z2) expect_identical(z1, z3) expect_identical(z1, z4) }) test_that("unique_names() and as_unique_names() are verbose or silent", { expect_message(unique_names(1:2), "-> ...1", fixed = TRUE) expect_message(as_unique_names(c("", "")), "-> ...1", fixed = TRUE) expect_message(regexp = NA, unique_names(1:2, quiet = TRUE)) expect_message(regexp = NA, as_unique_names(c("", ""), quiet = TRUE)) }) test_that("names with only duplicates are repaired", { expect_identical(unique_names(list(x = NA, x = NA)), c("x...1", "x...2")) }) # Universal names ---------------------------------------------------------- test_that("zero-length input", { expect_equal(as_universal_names(character()), character()) }) test_that("universal names are not changed", { expect_equal(as_universal_names(letters), letters) }) test_that("as_universal_names() is idempotent", { x <- c(NA, "", "x", "x", "a1:", "_x_y}") expect_identical(as_universal_names(x), as_universal_names(as_universal_names(x))) }) test_that("dupes get a suffix", { expect_equal( as_universal_names(c("a", "b", "a", "c", "b")), c("a...1", "b...2", "a...3", "c", "b...5") ) }) test_that("as_universal_names(): solo empty or NA gets suffix", { expect_identical(as_universal_names(""), "...1") expect_identical(as_universal_names(NA_character_), "...1") }) test_that("as_universal_names() treats ellipsis like empty string", { expect_identical(as_universal_names("..."), as_universal_names("")) }) test_that("solo dot is unchanged", { expect_equal(as_universal_names("."), ".") }) test_that("dot, dot gets suffix", { expect_equal(as_universal_names(c(".", ".")), c("....1", "....2")) }) test_that("dot-dot, dot-dot gets suffix", { expect_equal(as_universal_names(c("..", "..")), c(".....1", ".....2")) }) test_that("empty, dot becomes suffix, dot", { expect_equal(as_universal_names(c("", ".")), c("...1", ".")) }) test_that("empty, empty, dot becomes suffix, suffix, dot", { expect_equal(as_universal_names(c("", "", ".")), c("...1", "...2", ".")) }) test_that("dot, dot, empty becomes suffix, suffix, suffix", { expect_equal(as_universal_names(c(".", ".", "")), c("....1", "....2", "...3")) }) test_that("dot, empty, dot becomes suffix, suffix, suffix", { expect_equal(as_universal_names(c(".", "", ".")), c("....1", "...2", "....3")) }) test_that("empty, dot, empty becomes suffix, dot, suffix", { expect_equal(as_universal_names(c("", ".", "")), c("...1", ".", "...3")) }) test_that("'...j' gets stripped then names are modified", { expect_equal(as_universal_names(c("...6", "...1...2")), c("...1", "...2")) expect_equal(as_universal_names("if...2"), ".if") }) test_that("complicated inputs", { expect_equal( as_universal_names(c("", ".", NA, "if...4", "if", "if...8", "for", "if){]1")), c("...1", ".", "...3", ".if...4", ".if...5", ".if...6", ".for", "if...1") ) }) test_that("message", { expect_message( as_universal_names(c("a b", "b c")), "New names:\n* `a b` -> a.b\n* `b c` -> b.c\n", fixed = TRUE ) }) test_that("quiet", { expect_message( as_universal_names("", quiet = TRUE), NA ) }) test_that("unique then universal is universal, with shuffling", { x <- c("", ".2", "..3", "...4", "....5", ".....6", "......7", "...") expect_identical(as_universal_names(as_unique_names(x)), as_universal_names(x)) x2 <- x[c(7L, 4L, 3L, 6L, 5L, 1L, 2L, 8L)] expect_identical(as_universal_names(as_unique_names(x2)), as_universal_names(x2)) x3 <- x[c(3L, 2L, 4L, 6L, 8L, 1L, 5L, 7L)] expect_identical(as_universal_names(as_unique_names(x3)), as_universal_names(x3)) }) test_that("zero-length inputs given character names", { out <- vec_repair_names(character(), "universal") expect_equal(names(out), character()) }) test_that("unnamed input gives uniquely named output", { out <- vec_repair_names(1:3, "universal") expect_equal(names(out), c("...1", "...2", "...3")) }) test_that("messages by default", { expect_message( vec_repair_names(set_names(1, "a:b"), "universal"), "New names:\n* `a:b` -> a.b\n", fixed = TRUE ) expect_message( vec_repair_names(set_names(1, "a:b"), ~ make.names(.)), "New names:\n* `a:b` -> a.b\n", fixed = TRUE ) }) test_that("quiet = TRUE", { expect_message(vec_repair_names(set_names(1, ""), "universal", quiet = TRUE), NA) }) test_that("non-universal names", { out <- vec_repair_names(set_names(1, "a b"), "universal") expect_equal(names(out), "a.b") expect_equal(as_universal_names("a b"), "a.b") }) # make_syntactic() --------------------------------------------------------- test_that("make_syntactic(): empty or NA", { expect_syntactic( c("", NA_character_), c(".", ".") ) }) test_that("make_syntactic(): reserved words", { expect_syntactic( c("if", "TRUE", "Inf", "NA_real_", "normal"), c(".if", ".TRUE", ".Inf", ".NA_real_", "normal") ) }) test_that("make_syntactic(): underscore", { expect_syntactic( c( "_", "_1", "_a}"), c("._", "._1", "._a.") ) }) test_that("make_syntactic(): dots", { expect_syntactic( c(".", "..", "...", "...."), c(".", "..", "....", "....") ) }) test_that("make_syntactic(): number", { expect_syntactic( c( "0", "1", "22", "333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): number then character", { expect_syntactic( c( "0a", "1b", "22c", "333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): number then non-character", { expect_syntactic( c( "0)", "1&", "22*", "333@"), c("..0.", "..1.", "..22.", "..333.") ) }) test_that("make_syntactic(): dot then number", { expect_syntactic( c( ".0", ".1", ".22", ".333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot then number then character", { expect_syntactic( c( ".0a", ".1b", ".22c", ".333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): dot then number then non-character", { expect_syntactic( c( ".0)", ".1&", ".22*", ".333@"), c("..0.", "..1.", "..22.", "..333.") ) }) test_that("make_syntactic(): dot dot then number", { expect_syntactic( c( "..0", "..1", "..22", "..333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot dot dot then number", { expect_syntactic( c("...0", "...1", "...22", "...333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot dot dot dot then number", { expect_syntactic( c("....0", "....1", "....22", "....333"), c("....0", "....1", "....22", "....333") ) }) test_that("make_syntactic(): dot dot dot dot dot then number", { expect_syntactic( c(".....0", ".....1", ".....22", ".....333"), c(".....0", ".....1", ".....22", ".....333") ) }) test_that("make_syntactic(): dot dot then number then character", { expect_syntactic( c("..0a", "..1b", "..22c", "..333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): dot dot then number then non-character", { expect_syntactic( c("..0)", "..1&", "..22*", "..333@"), c("..0.", "..1.", "..22.", "..333.") ) }) # Duplication -------------------------------------------------------------- test_that("Minimal name repair duplicates if needed", { x1 <- NA_character_ x3 <- c(x1, x1) # Called to check absence of side effect vec_as_names(x3, repair = "minimal") expect_identical(x3, c(NA_character_, NA_character_)) }) test_that("Unique name repair duplicates if needed", { x1 <- "fa\u00e7ile" x3 <- c(x1, x1) # Called to check absence of side effect vec_as_names(x3, repair = "unique") expect_identical(x3, c("fa\u00e7ile", "fa\u00e7ile")) }) # Encoding ------------------------------------------------------------- test_that("Name repair works with non-UTF-8 names", { x1 <- "fa\u00e7ile" skip_if_not(Encoding(x1) == "UTF-8") x2 <- iconv(x1, from = "UTF-8", to = "latin1") skip_if_not(Encoding(x2) == "latin1") x3 <- c(x2, x2) expect_equal(vec_as_names(x3, repair = "unique"), paste0(x3, "...", 1:2)) }) # Conditions ----------------------------------------------------------- test_that("names cannot be empty", { expect_error_cnd( stop_names_cannot_be_empty(c("", "")), class = c("vctrs_error_names_cannot_be_empty", "vctrs_error_names", "vctrs_error"), message = "Names can't be empty.", names = c("", "") ) }) test_that("names cannot be dot dot", { expect_error_cnd( stop_names_cannot_be_dot_dot(c("..1", "..2")), class = c("vctrs_error_names_cannot_be_dot_dot", "vctrs_error_names", "vctrs_error"), message = "Names can't be of the form `...` or `..j`.", names = c("..1", "..2") ) }) test_that("names must be unique", { expect_error_cnd( stop_names_must_be_unique(c("x", "y", "y", "x")), class = c("vctrs_error_names_must_be_unique", "vctrs_error_names", "vctrs_error"), message = "Names must be unique.", names = c("x", "y", "y", "x") ) }) # Legacy repair -------------------------------------------------------- test_that("vec_as_names_legacy() works", { expect_identical(vec_as_names_legacy(chr()), chr()) expect_identical(vec_as_names_legacy(c("a", "a", "", "")), c("a", "a1", "V1", "V2")) expect_identical(vec_as_names_legacy(c("a", "a", "", ""), sep = "_"), c("a", "a_1", "V_1", "V_2")) expect_identical(vec_as_names_legacy(c("a", "a", "", ""), prefix = "foo"), c("a", "a1", "foo1", "foo2")) expect_identical(vec_as_names_legacy(c("a", "a", "", ""), prefix = "foo", sep = "_"), c("a", "a_1", "foo_1", "foo_2")) # From tibble expect_identical(vec_as_names_legacy(c("x", "x")), c("x", "x1")) expect_identical(vec_as_names_legacy(c("", "")), c("V1", "V2")) expect_identical(vec_as_names_legacy(c("", "V1")), c("V2", "V1")) expect_identical(vec_as_names_legacy(c("", "V", "V")), c("V2", "V", "V1")) }) # Name specification --------------------------------------------------- test_that("NULL name specs works with scalars", { expect_identical(apply_name_spec(NULL, "foo", NULL, 1L), "foo") expect_named(vec_c(foo = 1), "foo") expect_identical(apply_name_spec(NULL, "foo", chr(), 0L), chr()) expect_named(vec_c(foo = set_names(dbl())), chr()) expect_named(vec_c(foo = set_names(dbl()), bar = set_names(dbl())), chr()) expect_error(apply_name_spec(NULL, "foo", c("a", "b")), "vector of length > 1") expect_error(vec_c(foo = c(a = 1, b = 2)), "vector of length > 1") expect_error(apply_name_spec(NULL, "foo", NULL, 2L), "vector of length > 1") expect_error(vec_c(foo = 1:2), "vector of length > 1") }) test_that("function name spec is applied", { spec <- function(outer, inner) { sep <- if (is_character(inner)) "_" else ":" paste0(outer, sep, inner) } expect_identical(apply_name_spec(spec, "foo", NULL, 1L), "foo") expect_named(vec_c(foo = 1, .name_spec = spec), "foo") expect_identical(apply_name_spec(spec, "foo", c("a", "b")), c("foo_a", "foo_b")) expect_named(vec_c(foo = c(a = 1, b = 2), .name_spec = spec), c("foo_a", "foo_b")) expect_identical(apply_name_spec(spec, "foo", NULL, 2L), c("foo:1", "foo:2")) expect_named(vec_c(foo = 1:2, .name_spec = spec), c("foo:1", "foo:2")) }) test_that("can pass lambda formula as name spec", { expect_named(vec_c(foo = c(a = 1, b = 2), .name_spec = ~ paste(.x, .y, sep = "_")), c("foo_a", "foo_b")) expect_error(vec_c(foo = c(a = 1, b = 2), .name_spec = env()), "Can't convert `.name_spec`", fixed = TRUE) }) test_that("can pass glue string as name spec", { expect_named(vec_c(foo = c(a = 1, b = 2), .name_spec = "{outer}_{inner}"), c("foo_a", "foo_b")) expect_named(vec_c(foo = 1:2, .name_spec = "{outer}_{inner}"), c("foo_1", "foo_2")) expect_error(vec_c(foo = c(a = 1, b = 2), .name_spec = c("a", "b")), "single string") }) test_that("`outer` is recycled before name spec is invoked", { expect_identical(vec_c(outer = 1:2, .name_spec = "{outer}"), c(outer = 1L, outer = 2L)) }) test_that("apply_name_spec() recycles return value not arguments (#1099)", { out <- unstructure(apply_name_spec("foo", "outer", c("a", "b", "c"))) expect_identical(out, c("foo", "foo", "foo")) inner <- NULL outer <- NULL spec <- function(outer, inner) { inner <<- inner outer <<- outer } apply_name_spec(spec, "outer", c("a", "b", "c")) expect_identical(inner, c("a", "b", "c")) expect_identical(outer, "outer") }) test_that("r_chr_paste_prefix() works", { nms <- c("foo", "bar") expect_equal( .Call(vctrs_chr_paste_prefix, nms, "baz", "."), c("baz.foo", "baz.bar") ) # Greater than `VCTRS_PASTE_BUFFER_MAX_SIZE` long_prefix <- strrep("a", 5000) expect_equal( .Call(vctrs_chr_paste_prefix, nms, long_prefix, "."), paste0(long_prefix, ".", nms) ) }) vctrs/tests/testthat/error/0000755000176200001440000000000014042546261015552 5ustar liggesusersvctrs/tests/testthat/error/test-slice.txt0000644000176200001440000000425014042546243020370 0ustar liggesusers Unnamed vector with character subscript ======================================= > vec_slice(1:3, letters[1]) Error: Can't use character names to index an unnamed vector. Negative subscripts are checked =============================== > vec_slice(1:3, -c(1L, NA)) Error: Must subset elements with a valid subscript vector. x Negative locations can't have missing values. i Subscript has a missing value at location 2. > vec_slice(1:3, c(-1L, 1L)) Error: Must subset elements with a valid subscript vector. x Negative and positive locations can't be mixed. i Subscript has a positive value at location 2. oob error messages are properly constructed =========================================== > vec_slice(c(bar = 1), "foo") Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. > # Multiple OOB indices > vec_slice(letters, c(100, 1000)) Error: Can't subset elements that don't exist. x Locations 100 and 1000 don't exist. i There are only 26 elements. > vec_slice(letters, c(1, 100:103, 2, 104:110)) Error: Can't subset elements that don't exist. x Locations 100, 101, 102, 103, 104, etc. don't exist. i There are only 26 elements. > vec_slice(set_names(letters), c("foo", "bar")) Error: Can't subset elements that don't exist. x Elements `foo` and `bar` don't exist. > vec_slice(set_names(letters), toupper(letters)) Error: Can't subset elements that don't exist. x Elements `A`, `B`, `C`, `D`, `E`, etc. don't exist. Can't index beyond the end of a vector ====================================== > vec_slice(1:2, 3L) Error: Can't subset elements that don't exist. x Location 3 doesn't exist. i There are only 2 elements. > vec_slice(1:2, -3L) Error: Can't negate elements that don't exist. x Location 3 doesn't exist. i There are only 2 elements. vec_slice throws error with non-vector subscripts ================================================= > vec_slice(1:3, Sys.Date()) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `date`. i It must be logical, numeric, or character. > vec_slice(1:3, matrix(TRUE, ncol = 1)) Error: Must subset elements with a valid subscript vector. x Subscript must be a simple vector, not a matrix. vctrs/tests/testthat/error/test-dictionary.txt0000644000176200001440000000111714042546237021440 0ustar liggesusers vec_match() and vec_in() check types ==================================== > df1 <- data_frame(x = data_frame(foo = 1)) > df2 <- data_frame(x = data_frame(foo = "")) > vec_match(df1, df2) Error: Can't combine `x$foo` and `x$foo` . > vec_match(df1, df2, needles_arg = "n", haystack_arg = "h") Error: Can't combine `n$x$foo` and `h$x$foo` . > vec_in(df1, df2) Error: Can't combine `x$foo` and `x$foo` . > vec_in(df1, df2, needles_arg = "n", haystack_arg = "h") Error: Can't combine `n$x$foo` and `h$x$foo` . vctrs/tests/testthat/error/test-unchop.txt0000644000176200001440000000606414042546243020572 0ustar liggesusers vec_unchop() errors on unsupported location values ================================================== > vec_unchop(list(1, 2), list(c(1, 2), 0)) Error: Must subset elements with a valid subscript vector. x Subscript can't contain `0` values. i It has a `0` value at location 1. > vec_unchop(list(1), list(-1)) Error: Must subset elements with a valid subscript vector. x Subscript can't contain negative locations. vec_unchop() fails with complex foreign S3 classes ================================================== > x <- structure(foobar(1), attr_foo = "foo") > y <- structure(foobar(2), attr_bar = "bar") > vec_unchop(list(x, y)) Error: Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . vec_unchop() fails with complex foreign S4 classes ================================================== > joe <- .Counts(c(1L, 2L), name = "Joe") > jane <- .Counts(3L, name = "Jane") > vec_unchop(list(joe, jane)) Error: Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . vec_unchop() falls back for S4 classes with a registered c() method =================================================================== > joe1 <- .Counts(c(1L, 2L), name = "Joe") > joe2 <- .Counts(3L, name = "Joe") > vec_unchop(list(joe1, 1, joe2), list(c(1, 2), 3, 4)) Error: Can't combine `..1` and `..2` . vec_unchop() fallback doesn't support `name_spec` or `ptype` ============================================================ > foo <- structure(foobar(1), foo = "foo") > bar <- structure(foobar(2), bar = "bar") > with_c_foobar(vec_unchop(list(foo, bar), name_spec = "{outer}_{inner}")) Error: Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . > with_c_foobar(vec_unchop(list(foobar(1)), ptype = "")) Error: Can't convert to . vec_unchop() does not support non-numeric S3 indices ==================================================== > vec_unchop(list(1), list(factor("x"))) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `character`. i It must be numeric. > vec_unchop(list(1), list(foobar(1L))) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `vctrs_foobar`. i It must be numeric. can ignore names in `vec_unchop()` by providing a `zap()` name-spec (#232) ========================================================================== > vec_unchop(list(a = c(b = letters), b = 3L), name_spec = zap()) Error: Can't combine `a` and `b` . > vec_unchop(list(a = c(foo = 1:2), b = c(bar = "")), indices = list(2:1, 3), + name_spec = zap()) Error: Can't combine `a` and `b` . vctrs/tests/testthat/error/test-type-asis.txt0000644000176200001440000000062314042546245021211 0ustar liggesusers AsIs objects throw ptype2 errors with their underlying types ============================================================ > vec_ptype2(I(1), I("x")) Error: Can't combine and . AsIs objects throw cast errors with their underlying types ========================================================== > vec_cast(I(1), I(factor("x"))) Error: Can't convert to >. vctrs/tests/testthat/error/test-recycle.txt0000644000176200001440000000063714042546241020722 0ustar liggesusers incompatible recycling size has informative error ================================================= > vec_recycle(1:2, 4) Error: Can't recycle input of size 2 to size 4. > vec_recycle(1:2, 4, x_arg = "foo") Error: Can't recycle `foo` (size 2) to size 4. recycling to size 1 has informative error ========================================= > vec_recycle(1:2, 1) Error: Can't recycle input of size 2 to size 1. vctrs/tests/testthat/error/test-rep.txt0000644000176200001440000000200314042546241020047 0ustar liggesusers `vec_rep()` validates `times` ============================= > vec_rep(1, "x") Error: Can't convert `times` to . > vec_rep(1, c(1, 2)) Error: `times` must be a single number. > vec_rep(1, -1) Error: `times` must be a positive number. > vec_rep(1, NA_integer_) Error: `times` can't be missing. `vec_rep_each()` validates `times` ================================== > vec_rep_each(1, "x") Error: Can't convert `times` to . > vec_rep_each(1, -1) Error: `times` must be a vector of positive numbers. Location 1 is negative. > vec_rep_each(c(1, 2), c(1, -1)) Error: `times` must be a vector of positive numbers. Location 2 is negative. > vec_rep_each(1, NA_integer_) Error: `times` can't be missing. Location 1 is missing. > vec_rep_each(c(1, 2), c(1, NA_integer_)) Error: `times` can't be missing. Location 2 is missing. `vec_rep_each()` uses recyclying errors ======================================= > vec_rep_each(1:2, 1:3) Error: Can't recycle `times` (size 3) to size 2. vctrs/tests/testthat/error/test-slice-assign.txt0000644000176200001440000000401114042546242021644 0ustar liggesusers `vec_assign()` requires recyclable value ======================================== > vec_assign(1:3, 1:3, 1:2) Error: Can't recycle input of size 2 to size 3. logical subscripts must match size of indexed vector ==================================================== > vec_assign(1:2, c(TRUE, FALSE, TRUE), 5) Error: Must assign to elements with a valid subscript vector. i Logical subscripts must match the size of the indexed input. x Input has size 2 but subscript has size 3. > vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]) Error: Must assign to elements with a valid subscript vector. i Logical subscripts must match the size of the indexed input. x Input has size 32 but subscript has size 2. must assign existing elements ============================= > vec_assign(1:3, 5, 10) Error: Can't assign to elements that don't exist. x Location 5 doesn't exist. i There are only 3 elements. > vec_assign(1:3, "foo", 10) Error: Can't use character names to index an unnamed vector. > vec_slice(letters, -100) <- "foo" Error: Can't negate elements that don't exist. x Location 100 doesn't exist. i There are only 26 elements. > vec_assign(set_names(letters), "foo", "bar") Error: Can't assign to elements that don't exist. x Element `foo` doesn't exist. must assign with proper negative locations ========================================== > vec_assign(1:3, c(-1, 1), 1:2) Error: Must assign to elements with a valid subscript vector. x Negative and positive locations can't be mixed. i Subscript has a positive value at location 2. > vec_assign(1:3, c(-1, NA), 1:2) Error: Must assign to elements with a valid subscript vector. x Negative locations can't have missing values. i Subscript has a missing value at location 2. `vec_assign()` error args can be overridden =========================================== > vec_assign(1:2, 1L, "x", x_arg = "foo", value_arg = "bar") Error: Can't convert `bar` to match type of `foo` . > vec_assign(1:2, 1L, 1:2, value_arg = "bar") Error: Can't recycle `bar` (size 2) to size 1. vctrs/tests/testthat/error/test-type2.txt0000644000176200001440000000700314042546253020334 0ustar liggesusers can override scalar vector error message for base scalar types ============================================================== > vec_ptype2(NULL, quote(x), y_arg = "foo") Error: `foo` must be a vector, not a symbol. > vec_ptype2(quote(x), NULL, x_arg = "foo") Error: `foo` must be a vector, not a symbol. can override scalar vector error message for S3 types ===================================================== > vec_ptype2(NULL, foobar(), y_arg = "foo") Error: `foo` must be a vector, not a object. > vec_ptype2(foobar(), NULL, x_arg = "foo") Error: `foo` must be a vector, not a object. ptype2 and cast errors when same class fallback is impossible are informative ============================================================================= > vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE)) Error: Can't convert to . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . > vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE)) Error: Can't combine and . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . > # Incompatible attributes bullets are not show when methods are implemented > with_foobar_cast <- (function(expr) { + with_methods(vec_cast.vctrs_foobar = function(...) NULL, + vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) vec_default_cast(x, + to, ...), expr) + }) > with_foobar_ptype2 <- (function(expr) { + with_methods(vec_ptype2.vctrs_foobar = function(...) NULL, + vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) vec_default_ptype2( + x, y, ...), expr) + }) > with_foobar_cast(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE))) Error: Can't convert to . > with_foobar_ptype2(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE))) Error: Can't combine and . common type errors don't mention columns if they are compatible =============================================================== > df <- data.frame(x = 1, y = "") > foo <- structure(df, class = c("vctrs_foo", "data.frame")) > bar <- structure(df, class = c("vctrs_bar", "data.frame")) > vec_cast_no_fallback(foo, bar) Error: Can't convert to . common type warnings for data frames take attributes into account ================================================================= > foobar_bud <- foobar(mtcars, bud = TRUE) > foobar_boo <- foobar(mtcars, boo = TRUE) > vec_ptype2_fallback(foobar_bud, foobar_boo) Warning: Can't combine and ; falling back to . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . [1] mpg cyl disp hp drat wt qsec vs am gear carb <0 rows> (or 0-length row.names) > # For reference, warning for incompatible classes > vec_ptype2_fallback(foobar(mtcars), foobaz(mtcars)) Warning: Can't combine and ; falling back to . [1] mpg cyl disp hp drat wt qsec vs am gear carb <0 rows> (or 0-length row.names) > # For reference, error when fallback is disabled > vec_ptype2_no_fallback(foobar(mtcars), foobaz(mtcars)) Error: Can't combine and . vctrs/tests/testthat/error/test-bind.txt0000644000176200001440000000170314042546234020205 0ustar liggesusers vec_rbind() fails with complex foreign S3 classes ================================================= > x <- structure(foobar(1), attr_foo = "foo") > y <- structure(foobar(2), attr_bar = "bar") > vec_rbind(set_names(x, "x"), set_names(y, "x")) Error: Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . vec_rbind() fails with complex foreign S4 classes ================================================= > joe <- .Counts(1L, name = "Joe") > jane <- .Counts(2L, name = "Jane") > vec_rbind(set_names(joe, "x"), set_names(jane, "x")) Error: Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . vctrs/tests/testthat/error/test-subscript.txt0000644000176200001440000000552314042546245021315 0ustar liggesusers vec_as_subscript() forbids subscript types ========================================== > vec_as_subscript(1L, logical = "error", numeric = "error") Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `integer`. i It must be character. > vec_as_subscript("foo", logical = "error", character = "error") Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `character`. i It must be numeric. > vec_as_subscript(TRUE, logical = "error") Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `logical`. i It must be numeric or character. > vec_as_subscript("foo", character = "error") Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `character`. i It must be logical or numeric. > vec_as_subscript(NULL, numeric = "error") Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `NULL`. i It must be logical or character. > vec_as_subscript(quote(foo), character = "error") Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `symbol`. i It must be logical or numeric. vec_as_subscript2() forbids subscript types =========================================== > vec_as_subscript2(1L, numeric = "error", logical = "error") Error: Must extract element with a single valid subscript. x Subscript has the wrong type `integer`. i It must be character. > vec_as_subscript2("foo", character = "error", logical = "error") Error: Must extract element with a single valid subscript. x Subscript has the wrong type `character`. i It must be numeric. > vec_as_subscript2(TRUE, logical = "error") Error: Must extract element with a single valid subscript. x Subscript has the wrong type `logical`. i It must be numeric or character. can customise subscript errors ============================== > with_tibble_cols(vec_as_subscript(env())) Error: Must rename columns with a valid subscript vector. x Subscript `foo(bar)` has the wrong type `environment`. i It must be logical, numeric, or character. > with_dm_tables(vec_as_subscript(env())) Error: Must extract tables with a valid subscript vector. x Subscript `foo(bar)` has the wrong type `environment`. i It must be logical, numeric, or character. vec_as_subscript() checks dimensionality ======================================== > vec_as_subscript(matrix(TRUE, nrow = 1)) Error: Must subset elements with a valid subscript vector. x Subscript must be a simple vector, not a matrix. > vec_as_subscript(array(TRUE, dim = c(1, 1, 1))) Error: Must subset elements with a valid subscript vector. x Subscript must be a simple vector, not an array. > with_tibble_rows(vec_as_subscript(matrix(TRUE, nrow = 1))) Error: Must remove rows with a valid subscript vector. x Subscript `foo(bar)` must be a simple vector, not a matrix. vctrs/tests/testthat/error/test-subscript-loc.txt0000644000176200001440000004422514042546261022070 0ustar liggesusers vec_as_location() UI ==================== > vec_as_location(1, 1L, missing = "bogus") Error: `missing` must be one of "propagate" or "error". vec_as_location() checks for mix of negative and missing locations ================================================================== > vec_as_location(-c(1L, NA), 30) Error: Must subset elements with a valid subscript vector. x Negative locations can't have missing values. i Subscript has a missing value at location 2. > vec_as_location(-c(1L, rep(NA, 10)), 30) Error: Must subset elements with a valid subscript vector. x Negative locations can't have missing values. i Subscript has 10 missing values at locations 2, 3, 4, 5, 6, etc. vec_as_location() checks for mix of negative and positive locations =================================================================== > vec_as_location(c(-1L, 1L), 30) Error: Must subset elements with a valid subscript vector. x Negative and positive locations can't be mixed. i Subscript has a positive value at location 2. > vec_as_location(c(-1L, rep(1L, 10)), 30) Error: Must subset elements with a valid subscript vector. x Negative and positive locations can't be mixed. i Subscript has 10 positive values at locations 2, 3, 4, 5, 6, etc. num_as_location() UI ==================== > num_as_location(1, 1L, missing = "bogus") Error: `missing` must be one of "propagate" or "error". > num_as_location(1, 1L, negative = "bogus") Error: `negative` must be one of "invert", "error", or "ignore". > num_as_location(1, 1L, oob = "bogus") Error: `oob` must be one of "error" or "extend". > num_as_location(1, 1L, zero = "bogus") Error: `zero` must be one of "remove", "error", or "ignore". num_as_location() optionally forbids negative indices ===================================================== > num_as_location(dbl(1, -1), 2L, negative = "error") Error: Must subset elements with a valid subscript vector. x Subscript can't contain negative locations. num_as_location() optionally forbids zero indices ================================================= > num_as_location(0L, 1L, zero = "error") Error: Must subset elements with a valid subscript vector. x Subscript can't contain `0` values. i It has a `0` value at location 1. > num_as_location(c(0, 0, 0, 0, 0, 0), 1, zero = "error") Error: Must subset elements with a valid subscript vector. x Subscript can't contain `0` values. i It has 6 `0` values at locations 1, 2, 3, 4, 5, etc. logical subscripts must match size of indexed vector ==================================================== > vec_as_location(c(TRUE, FALSE), 3) Error: Must subset elements with a valid subscript vector. i Logical subscripts must match the size of the indexed input. x Input has size 3 but subscript has size 2. character subscripts require named vectors ========================================== > vec_as_location(letters[1], 3) Error: Can't use character names to index an unnamed vector. vec_as_location() requires integer, character, or logical inputs ================================================================ > vec_as_location(mtcars, 10L) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `data.frame< mpg : double cyl : double disp: double hp : double drat: double wt : double qsec: double vs : double am : double gear: double carb: double >`. i It must be logical, numeric, or character. > vec_as_location(env(), 10L) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `environment`. i It must be logical, numeric, or character. > vec_as_location(foobar(), 10L) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `vctrs_foobar`. i It must be logical, numeric, or character. > vec_as_location(2.5, 3L) Error: Must subset elements with a valid subscript vector. x Can't convert from to due to loss of precision. > vec_as_location(list(), 10L) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `list`. i It must be logical, numeric, or character. > vec_as_location(function() NULL, 10L) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `function`. i It must be logical, numeric, or character. > vec_as_location(Sys.Date(), 3L) Error: Must subset elements with a valid subscript vector. x Subscript has the wrong type `date`. i It must be logical, numeric, or character. > # Idem with custom `arg` > vec_as_location(env(), 10L, arg = "foo") Error: Must subset elements with a valid subscript vector. x Subscript `foo` has the wrong type `environment`. i It must be logical, numeric, or character. > vec_as_location(foobar(), 10L, arg = "foo") Error: Must subset elements with a valid subscript vector. x Subscript `foo` has the wrong type `vctrs_foobar`. i It must be logical, numeric, or character. > vec_as_location(2.5, 3L, arg = "foo") Error: Must subset elements with a valid subscript vector. x Can't convert from `foo` to due to loss of precision. vec_as_location2() UI ===================== > vec_as_location2(1, 1L, missing = "bogus") Error: `missing` must be one of "error" or "propagate". vec_as_location2() requires integer or character inputs ======================================================= > vec_as_location2(TRUE, 10L) Error: Must extract element with a single valid subscript. x Subscript has the wrong type `logical`. i It must be numeric or character. > vec_as_location2(mtcars, 10L) Error: Must extract element with a single valid subscript. x Subscript has the wrong type `data.frame< mpg : double cyl : double disp: double hp : double drat: double wt : double qsec: double vs : double am : double gear: double carb: double >`. i It must be numeric or character. > vec_as_location2(env(), 10L) Error: Must extract element with a single valid subscript. x Subscript has the wrong type `environment`. i It must be numeric or character. > vec_as_location2(foobar(), 10L) Error: Must extract element with a single valid subscript. x Subscript has the wrong type `vctrs_foobar`. i It must be numeric or character. > vec_as_location2(2.5, 3L) Error: Must extract element with a single valid subscript. x Can't convert from to due to loss of precision. > vec_as_location2(Inf, 10L) Error: Must extract element with a single valid subscript. x Can't convert from to due to loss of precision. > vec_as_location2(-Inf, 10L) Error: Must extract element with a single valid subscript. x Can't convert from to due to loss of precision. > # Idem with custom `arg` > vec_as_location2(foobar(), 10L, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has the wrong type `vctrs_foobar`. i It must be numeric or character. > vec_as_location2(2.5, 3L, arg = "foo") Error: Must extract element with a single valid subscript. x Can't convert from `foo` to due to loss of precision. > with_tibble_rows(vec_as_location2(TRUE)) Error: Must remove row with a single valid subscript. x Subscript `foo(bar)` has the wrong type `logical`. i It must be numeric or character. vec_as_location2() requires length 1 inputs =========================================== > vec_as_location2(1:2, 2L) Error: Must extract element with a single valid subscript. x Subscript has size 2 but must be size 1. > vec_as_location2(mtcars, 10L) Error: Must extract element with a single valid subscript. x Subscript has the wrong type `data.frame< mpg : double cyl : double disp: double hp : double drat: double wt : double qsec: double vs : double am : double gear: double carb: double >`. i It must be numeric or character. > # Idem with custom `arg` > vec_as_location2(1:2, 2L, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has size 2 but must be size 1. > vec_as_location2(mtcars, 10L, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has the wrong type `data.frame< mpg : double cyl : double disp: double hp : double drat: double wt : double qsec: double vs : double am : double gear: double carb: double >`. i It must be numeric or character. > vec_as_location2(1:2, 2L, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has size 2 but must be size 1. vec_as_location2() requires positive integers ============================================= > vec_as_location2(0, 2L) Error: Must extract element with a single valid subscript. x Subscript has value 0 but must be a positive location. > vec_as_location2(-1, 2L) Error: Must extract element with a single valid subscript. x Subscript has value -1 but must be a positive location. > # Idem with custom `arg` > vec_as_location2(0, 2L, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has value 0 but must be a positive location. > # vec_as_location2() fails with NA > vec_as_location2(na_int, 2L) Error: Must extract element with a single valid subscript. x Subscript can't be `NA`. > vec_as_location2(na_chr, 1L, names = "foo") Error: Must extract element with a single valid subscript. x Subscript can't be `NA`. > # Idem with custom `arg` > vec_as_location2(na_int, 2L, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` can't be `NA`. vec_as_location() and variants check for OOB elements ===================================================== > # Numeric subscripts > vec_as_location(10L, 2L) Error: Can't subset elements that don't exist. x Location 10 doesn't exist. i There are only 2 elements. > vec_as_location(-10L, 2L) Error: Can't negate elements that don't exist. x Location 10 doesn't exist. i There are only 2 elements. > vec_as_location2(10L, 2L) Error: Can't subset elements that don't exist. x Location 10 doesn't exist. i There are only 2 elements. > # Character subscripts > vec_as_location("foo", 1L, names = "bar") Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. > vec_as_location2("foo", 1L, names = "bar") Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. num_as_location() UI ==================== > num_as_location(1, 1L, missing = "bogus") Error: `missing` must be one of "propagate" or "error". > num_as_location(1, 1L, negative = "bogus") Error: `negative` must be one of "invert", "error", or "ignore". can optionally extend beyond the end ==================================== > num_as_location(c(1, 3), 1, oob = "extend") Error: Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript contains non-consecutive location 3. > num_as_location(c(1:5, 7), 3, oob = "extend") Error: Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript contains non-consecutive locations 4 and 7. > num_as_location(c(1:5, 7, 1), 3, oob = "extend") Error: Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript contains non-consecutive locations 4 and 7. > num_as_location(c(1:5, 7, 1, 10), 3, oob = "extend") Error: Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript contains non-consecutive locations 4, 7, and 10. missing values are supported in error formatters ================================================ > num_as_location(c(1, NA, 2, 3), 1) Error: Can't subset elements that don't exist. x Locations 2 and 3 don't exist. i There are only 1 element. > num_as_location(c(1, NA, 3), 1, oob = "extend") Error: Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript contains non-consecutive location 3. can customise subscript type errors =================================== > # With custom `arg` > num_as_location(-1, 2, negative = "error", arg = "foo") Error: Must subset elements with a valid subscript vector. x Subscript `foo` can't contain negative locations. > num_as_location2(-1, 2, negative = "error", arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has value -1 but must be a positive location. > vec_as_location2(0, 2, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has value 0 but must be a positive location. > vec_as_location2(na_dbl, 2, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` can't be `NA`. > vec_as_location2(c(1, 2), 2, arg = "foo") Error: Must extract element with a single valid subscript. x Subscript `foo` has size 2 but must be size 1. > vec_as_location(c(TRUE, FALSE), 3, arg = "foo") Error: Must subset elements with a valid subscript vector. i Logical subscripts must match the size of the indexed input. x Input has size 3 but subscript `foo` has size 2. > vec_as_location(c(-1, NA), 3, arg = "foo") Error: Must subset elements with a valid subscript vector. x Negative locations can't have missing values. i Subscript `foo` has a missing value at location 2. > vec_as_location(c(-1, 1), 3, arg = "foo") Error: Must subset elements with a valid subscript vector. x Negative and positive locations can't be mixed. i Subscript `foo` has a positive value at location 2. > num_as_location(c(1, 4), 2, oob = "extend", arg = "foo") Error: Can't subset elements beyond the end with non-consecutive locations. i Input has size 2. x Subscript `foo` contains non-consecutive location 4. > num_as_location(0, 1, zero = "error", arg = "foo") Error: Must subset elements with a valid subscript vector. x Subscript `foo` can't contain `0` values. i It has a `0` value at location 1. > # With tibble columns > with_tibble_cols(num_as_location(-1, 2, negative = "error")) Error: Must rename columns with a valid subscript vector. x Subscript `foo(bar)` can't contain negative locations. > with_tibble_cols(num_as_location2(-1, 2, negative = "error")) Error: Must rename column with a single valid subscript. x Subscript `foo(bar)` has value -1 but must be a positive location. > with_tibble_cols(vec_as_location2(0, 2)) Error: Must rename column with a single valid subscript. x Subscript `foo(bar)` has value 0 but must be a positive location. > with_tibble_cols(vec_as_location2(na_dbl, 2)) Error: Must rename column with a single valid subscript. x Subscript `foo(bar)` can't be `NA`. > with_tibble_cols(vec_as_location2(c(1, 2), 2)) Error: Must rename column with a single valid subscript. x Subscript `foo(bar)` has size 2 but must be size 1. > with_tibble_cols(vec_as_location(c(TRUE, FALSE), 3)) Error: Must rename columns with a valid subscript vector. i Logical subscripts must match the size of the indexed input. x Input has size 3 but subscript `foo(bar)` has size 2. > with_tibble_cols(vec_as_location(c(-1, NA), 3)) Error: Must rename columns with a valid subscript vector. x Negative locations can't have missing values. i Subscript `foo(bar)` has a missing value at location 2. > with_tibble_cols(vec_as_location(c(-1, 1), 3)) Error: Must rename columns with a valid subscript vector. x Negative and positive locations can't be mixed. i Subscript `foo(bar)` has a positive value at location 2. > with_tibble_cols(num_as_location(c(1, 4), 2, oob = "extend")) Error: Can't rename columns beyond the end with non-consecutive locations. i Input has size 2. x Subscript `foo(bar)` contains non-consecutive location 4. > with_tibble_cols(num_as_location(0, 1, zero = "error")) Error: Must rename columns with a valid subscript vector. x Subscript `foo(bar)` can't contain `0` values. i It has a `0` value at location 1. can customise OOB errors ======================== > vec_slice(set_names(letters), "foo") Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. > # With custom `arg` > vec_as_location(30, length(letters), arg = "foo") Error: Can't subset elements that don't exist. x Location 30 doesn't exist. i There are only 26 elements. > vec_as_location("foo", NULL, letters, arg = "foo") Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. > # With tibble columns > with_tibble_cols(vec_slice(set_names(letters), "foo")) Error: Can't rename columns that don't exist. x Column `foo` doesn't exist. > with_tibble_cols(vec_slice(set_names(letters), 30)) Error: Can't rename columns that don't exist. x Location 30 doesn't exist. i There are only 26 columns. > with_tibble_cols(vec_slice(set_names(letters), -30)) Error: Can't rename columns that don't exist. x Location 30 doesn't exist. i There are only 26 columns. > # With tibble rows > with_tibble_rows(vec_slice(set_names(letters), c("foo", "bar"))) Error: Can't remove rows that don't exist. x Rows `foo` and `bar` don't exist. > with_tibble_rows(vec_slice(set_names(letters), 1:30)) Error: Can't remove rows that don't exist. x Locations 27, 28, 29, and 30 don't exist. i There are only 26 rows. > with_tibble_rows(vec_slice(set_names(letters), -(1:30))) Error: Can't remove rows that don't exist. x Locations 27, 28, 29, and 30 don't exist. i There are only 26 rows. can disallow missing values =========================== > vec_as_location(c(1, NA), 2, missing = "error") Error: Must subset elements with a valid subscript vector. x Subscript can't contain missing values. x It has a missing value at location 2. > vec_as_location(c(1, NA, 2, NA), 2, missing = "error", arg = "foo") Error: Must subset elements with a valid subscript vector. x Subscript can't contain missing values. x It has missing values at locations 2 and 4. > with_tibble_cols(vec_as_location(c(1, NA, 2, NA), 2, missing = "error")) Error: Must rename columns with a valid subscript vector. x Subscript `foo(bar)` can't contain missing values. x It has missing values at locations 2 and 4. vec_as_location() checks dimensionality ======================================= > vec_as_location(matrix(TRUE, nrow = 1), 3L) Error: Must subset elements with a valid subscript vector. x Subscript must be a simple vector, not a matrix. > vec_as_location(array(TRUE, dim = c(1, 1, 1)), 3L) Error: Must subset elements with a valid subscript vector. x Subscript must be a simple vector, not an array. > with_tibble_rows(vec_as_location(matrix(TRUE, nrow = 1), 3L)) Error: Must remove rows with a valid subscript vector. x Subscript `foo(bar)` must be a simple vector, not a matrix. vctrs/tests/testthat/error/test-cast.txt0000644000176200001440000000047614042546236020233 0ustar liggesusers Casting to named argument mentions 'match type ' ===================================================== > vec_cast(1, "", x_arg = "foo", to_arg = "bar") Error: Can't convert `foo` to match type of `bar` . > vec_cast(1, "", x_arg = "foo") Error: Can't convert `foo` to . vctrs/tests/testthat/error/test-conditions.txt0000644000176200001440000000771014042546261021446 0ustar liggesusers incompatible type error validates `action` ========================================== > stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = "conver") Error: `action` must be one of "combine" or "convert". Did you mean "convert"? > stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = 1) Error: `action` must be a character vector. can override arg in OOB conditions ================================== > with_subscript_data(vec_slice(set_names(letters), "foo"), NULL) Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. > with_subscript_data(vec_slice(set_names(letters), "foo"), "input") Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. > with_subscript_data(vec_slice(set_names(letters), "foo"), quote(input)) Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. > with_subscript_data(vec_slice(set_names(letters), "foo"), quote(input[i])) Error: Can't subset elements that don't exist. x Element `foo` doesn't exist. scalar type errors are informative ================================== > vec_slice(foobar(list(1)), 1) Error: Input must be a vector, not a object. > stop_scalar_type(foobar(list(1)), arg = "foo") Error: `foo` must be a vector, not a object. empty names errors are informative ================================== > vec_as_names(c("x", "", "y"), repair = "check_unique") Error: Names can't be empty. x Empty name found at location 2. > vec_as_names(c("x", "", "y", ""), repair = "check_unique") Error: Names can't be empty. x Empty names found at locations 2 and 4. > vec_as_names(rep("", 10), repair = "check_unique") Error: Names can't be empty. x Empty names found at locations 1, 2, 3, 4, 5, etc. dot dot names errors are informative ==================================== > vec_as_names(c("..1", "..1", "..1", "...", "z"), repair = "check_unique") Error: Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at locations 1, 2, and 3. * "..." at location 4. > vec_as_names(c(rep("..1", 20), rep(c("..2", "..3", "..4", "...", "..5"), 2)), + repair = "check_unique") Error: Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at locations 1, 2, 3, 4, 5, etc. * "..2" at locations 21 and 26. * "..3" at locations 22 and 27. * "..4" at locations 23 and 28. * "..." at locations 24 and 29. * ... unique names errors are informative =================================== > vec_as_names(c("x", "x", "x", "y", "y", "z"), repair = "check_unique") Error: Names must be unique. x These names are duplicated: * "x" at locations 1, 2, and 3. * "y" at locations 4 and 5. > vec_as_names(c(rep("x", 20), rep(c("a", "b", "c", "d", "e"), 2)), repair = "check_unique") Error: Names must be unique. x These names are duplicated: * "x" at locations 1, 2, 3, 4, 5, etc. * "a" at locations 21 and 26. * "b" at locations 22 and 27. * "c" at locations 23 and 28. * "d" at locations 24 and 29. * ... lossy cast from character to factor mentions loss of generality =============================================================== > vec_cast("a", factor("b")) Error: Can't convert from to > due to loss of generality. * Locations: 1 ordered cast failures mention conversion ======================================== > vec_cast(ordered("x"), ordered("y")) Error: Can't convert > to >. incompatible size errors ======================== > stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "") Error: Can't recycle input of size 2 to size 3. > stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = "") Error: Can't recycle `foo` (size 2) to size 3. > stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "bar") Error: Can't recycle input of size 2 to match `bar` (size 3). > stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = quote(bar)) Error: Can't recycle `foo` (size 2) to match `bar` (size 3). vctrs/tests/testthat/error/test-c.txt0000644000176200001440000000306314042546235017515 0ustar liggesusers vec_c() fails with complex foreign S3 classes ============================================= > x <- structure(foobar(1), attr_foo = "foo") > y <- structure(foobar(2), attr_bar = "bar") > vec_c(x, y) Error: Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . vec_c() fails with complex foreign S4 classes ============================================= > joe <- .Counts(c(1L, 2L), name = "Joe") > jane <- .Counts(3L, name = "Jane") > vec_c(joe, jane) Error: Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . vec_c() fallback doesn't support `name_spec` or `ptype` ======================================================= > with_c_foobar(vec_c(foobar(1), foobar(2), .name_spec = "{outer}_{inner}")) Error: Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . > with_c_foobar(vec_c(foobar(1), foobar(2), .ptype = "")) Error: Can't convert to . can ignore names by providing a `zap()` name-spec (#232) ======================================================== > vec_c(a = c(b = letters), b = 1, .name_spec = zap()) Error: Can't combine `a` and `b` . vctrs/tests/testthat/error/test-type-data-frame.txt0000644000176200001440000000572414042546246022263 0ustar liggesusers combining data frames with foreign classes uses fallback ======================================================== > foo <- structure(mtcars[1:3], class = c("foo", "data.frame")) > bar <- structure(mtcars[4:6], class = c("bar", "data.frame")) > baz <- structure(mtcars[7:9], class = c("baz", "data.frame")) > vec_ptype_common_df_fallback(foo, bar, baz) Warning: Can't combine and ; falling back to . Warning: Can't combine and ; falling back to . [1] mpg cyl disp hp drat wt qsec vs am <0 rows> (or 0-length row.names) > vec_ptype_common_df_fallback(foo, baz, bar, baz, foo, bar) Warning: Can't combine and ; falling back to . Warning: Can't combine and ; falling back to . [1] mpg cyl disp qsec vs am hp drat wt <0 rows> (or 0-length row.names) > with_fallback_warning(invisible(vec_rbind(foo, data.frame(), foo))) Warning: Can't combine and ; falling back to . > with_fallback_warning(invisible(vec_rbind(foo, baz, bar, baz, foo, bar))) Warning: Can't combine and ; falling back to . Warning: Can't combine and ; falling back to . > with_fallback_warning(invisible(vec_cbind(foo, data.frame(x = 1)))) Warning: Can't combine and ; falling back to . > with_fallback_warning(invisible(vec_cbind(foo, data.frame(x = 1), bar))) Warning: Can't combine and ; falling back to . Warning: Can't combine and ; falling back to . > with_fallback_quiet(invisible(vec_rbind(foo, data.frame(), foo))) > with_fallback_quiet(invisible(vec_rbind(foo, baz, bar, baz, foo, bar))) > with_fallback_quiet(invisible(vec_cbind(foo, data.frame(x = 1)))) > with_fallback_quiet(invisible(vec_cbind(foo, data.frame(x = 1), bar))) falls back to tibble for tibble subclasses (#1025) ================================================== > with_fallback_warning(invisible(vec_rbind(foobar(tibble::as_tibble(mtcars)), + mtcars, foobaz(mtcars)))) Warning: Can't combine and ; falling back to . Warning: Can't combine and ; falling back to . > with_fallback_warning(invisible(vec_rbind(tibble::as_tibble(mtcars), foobar( + tibble::as_tibble(mtcars))))) Warning: Can't combine and ; falling back to . > with_fallback_warning(invisible(vec_rbind(foobar(tibble::as_tibble(mtcars)), + mtcars, foobar(tibble::as_tibble(mtcars))))) Warning: Can't combine and ; falling back to . > with_fallback_quiet(invisible(vec_rbind(foobar(tibble::as_tibble(mtcars)), + mtcars, foobaz(mtcars)))) > with_fallback_quiet(invisible(vec_rbind(tibble::as_tibble(mtcars), foobar( + tibble::as_tibble(mtcars))))) > with_fallback_quiet(invisible(vec_rbind(foobar(tibble::as_tibble(mtcars)), + mtcars, foobar(tibble::as_tibble(mtcars))))) vctrs/tests/testthat/error/test-shape.txt0000644000176200001440000000116714042546242020374 0ustar liggesusers incompatible shapes throw errors ================================ > vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1)) Error: Can't combine and . x Incompatible sizes 0 and 5 along axis 2. > vec_shape2(shaped_int(1, 5, 0), shaped_int(1, 1, 5)) Error: Can't combine and . x Incompatible sizes 0 and 5 along axis 3. can override error args ======================= > vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1), x_arg = "foo", y_arg = "bar") Error: Can't combine `foo` and `bar` . x Incompatible sizes 0 and 5 along axis 2. vctrs/tests/testthat/helper-performance.R0000644000176200001440000000217413717456727020345 0ustar liggesusersskip_if_not_testing_performance <- function(x) { opt <- Sys.getenv("VCTRS_TEST_PERFORMANCE", unset = "false") testing <- identical(opt, "true") if (testing) { return() } skip("Not testing performance") } expect_time_lt <- function(expr, expect) { time <- time_of({{ expr }}) expect_lt(time, expect) } time_of <- function(expr) { expr <- enquo(expr) time <- system.time(eval_tidy(expr)) unclass(time)[["elapsed"]] } # From r-lib/bench with_memory_prof <- function(expr) { f <- tempfile() on.exit(unlink(f)) utils::Rprofmem(f, threshold = 1) on.exit(utils::Rprofmem(NULL), add = TRUE) res <- force(expr) utils::Rprofmem(NULL) bytes <- parse_allocations(f)$bytes bytes <- sum(bytes, na.rm = TRUE) new_vctrs_bytes(bytes) } parse_allocations <- function(filename) { if (!is_installed("profmem")) { testthat::skip("profmem must be installed.") } readRprofmem <- env_get(ns_env("profmem"), "readRprofmem") tryCatch( readRprofmem(filename), error = function(cnd) { testthat::skip(sprintf( "Memory profiling failed: %s", conditionMessage(cnd) )) } ) } vctrs/tests/testthat.R0000644000176200001440000000035413622451540014544 0ustar liggesuserslibrary(testthat) library(vctrs) if (requireNamespace("xml2")) { test_check("vctrs", reporter = MultiReporter$new(reporters = list(JunitReporter$new(file = "test-results.xml"), CheckReporter$new()))) } else { test_check("vctrs") } vctrs/src/0000755000176200001440000000000014042546502012204 5ustar liggesusersvctrs/src/equal.h0000644000176200001440000002316614042540502013466 0ustar liggesusers#ifndef VCTRS_EQUAL_H #define VCTRS_EQUAL_H #include "vctrs.h" // ----------------------------------------------------------------------------- SEXP vec_equal_na(SEXP x); // ----------------------------------------------------------------------------- static inline bool lgl_is_missing(int x) { return x == NA_LOGICAL; } static inline bool int_is_missing(int x) { return x == NA_INTEGER; } static inline bool dbl_is_missing(double x) { return isnan(x); } static inline bool cpl_is_missing(Rcomplex x) { return dbl_is_missing(x.r) || dbl_is_missing(x.i); } static inline bool chr_is_missing(SEXP x) { return x == NA_STRING; } static inline bool raw_is_missing(Rbyte x) { return false; } static inline bool list_is_missing(SEXP x) { return x == R_NilValue; } // ----------------------------------------------------------------------------- #define P_IS_MISSING(CTYPE, IS_MISSING) do { \ return IS_MISSING(((const CTYPE*) p_x)[i]); \ } while (0) static inline bool p_nil_is_missing(const void* p_x, r_ssize i) { stop_internal("p_nil_is_missing", "Can't check NULL for missingness."); } static inline bool p_lgl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(int, lgl_is_missing); } static inline bool p_int_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(int, int_is_missing); } static inline bool p_dbl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(double, dbl_is_missing); } static inline bool p_cpl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(Rcomplex, cpl_is_missing); } static inline bool p_chr_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(SEXP, chr_is_missing); } static inline bool p_raw_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(Rbyte, raw_is_missing); } static inline bool p_list_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(SEXP, list_is_missing); } #undef P_IS_MISSING static inline bool p_is_missing(const void* p_x, r_ssize i, const enum vctrs_type type) { switch (type) { case vctrs_type_logical: return p_lgl_is_missing(p_x, i); case vctrs_type_integer: return p_int_is_missing(p_x, i); case vctrs_type_double: return p_dbl_is_missing(p_x, i); case vctrs_type_complex: return p_cpl_is_missing(p_x, i); case vctrs_type_character: return p_chr_is_missing(p_x, i); case vctrs_type_raw: return p_raw_is_missing(p_x, i); case vctrs_type_list: return p_list_is_missing(p_x, i); default: stop_unimplemented_vctrs_type("p_is_missing", type); } } // ----------------------------------------------------------------------------- static inline int lgl_equal_na_equal(int x, int y) { return x == y; } static inline int int_equal_na_equal(int x, int y) { return x == y; } static inline int dbl_equal_na_equal(double x, double y) { switch (dbl_classify(x)) { case vctrs_dbl_number: break; case vctrs_dbl_missing: return dbl_classify(y) == vctrs_dbl_missing; case vctrs_dbl_nan: return dbl_classify(y) == vctrs_dbl_nan; } return isnan(y) ? false : x == y; } static inline int cpl_equal_na_equal(Rcomplex x, Rcomplex y) { return dbl_equal_na_equal(x.r, y.r) && dbl_equal_na_equal(x.i, y.i); } static inline int chr_equal_na_equal(SEXP x, SEXP y) { return x == y; } static inline int raw_equal_na_equal(Rbyte x, Rbyte y) { return x == y; } static inline int list_equal_na_equal(SEXP x, SEXP y) { return equal_object_normalized(x, y); } // ----------------------------------------------------------------------------- #define P_EQUAL_NA_EQUAL(CTYPE, EQUAL_NA_EQUAL) do { \ return EQUAL_NA_EQUAL(((const CTYPE*) p_x)[i], ((const CTYPE*) p_y)[j]); \ } while (0) static inline int p_nil_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { stop_internal("p_nil_equal_na_equal", "Can't compare NULL for equality."); } static inline int p_lgl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(int, lgl_equal_na_equal); } static inline int p_int_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(int, int_equal_na_equal); } static inline int p_dbl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(double, dbl_equal_na_equal); } static inline int p_cpl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(Rcomplex, cpl_equal_na_equal); } static inline int p_chr_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(SEXP, chr_equal_na_equal); } static inline int p_raw_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(Rbyte, raw_equal_na_equal); } static inline int p_list_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(SEXP, list_equal_na_equal); } #undef P_EQUAL_NA_EQUAL static inline bool p_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case vctrs_type_logical: return p_lgl_equal_na_equal(p_x, i, p_y, j); case vctrs_type_integer: return p_int_equal_na_equal(p_x, i, p_y, j); case vctrs_type_double: return p_dbl_equal_na_equal(p_x, i, p_y, j); case vctrs_type_complex: return p_cpl_equal_na_equal(p_x, i, p_y, j); case vctrs_type_character: return p_chr_equal_na_equal(p_x, i, p_y, j); case vctrs_type_raw: return p_raw_equal_na_equal(p_x, i, p_y, j); case vctrs_type_list: return p_list_equal_na_equal(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_equal_na_equal", type); } } // ----------------------------------------------------------------------------- static inline int lgl_equal_na_propagate(int x, int y) { if (lgl_is_missing(x) || lgl_is_missing(y)) { return NA_LOGICAL; } else { return lgl_equal_na_equal(x, y); } } static inline int int_equal_na_propagate(int x, int y) { if (int_is_missing(x) || int_is_missing(y)) { return NA_LOGICAL; } else { return int_equal_na_equal(x, y); } } static inline int dbl_equal_na_propagate(double x, double y) { if (dbl_is_missing(x) || dbl_is_missing(y)) { return NA_LOGICAL; } else { // Faster than `dbl_equal_na_equal()`, // which has unneeded missing value checks return x == y; } } static inline int cpl_equal_na_propagate(Rcomplex x, Rcomplex y) { int real_equal = dbl_equal_na_propagate(x.r, y.r); int imag_equal = dbl_equal_na_propagate(x.i, y.i); if (real_equal == NA_LOGICAL || imag_equal == NA_LOGICAL) { return NA_LOGICAL; } else { return real_equal && imag_equal; } } static inline int chr_equal_na_propagate(SEXP x, SEXP y) { if (chr_is_missing(x) || chr_is_missing(y)) { return NA_LOGICAL; } else { return chr_equal_na_equal(x, y); } } static inline int raw_equal_na_propagate(Rbyte x, Rbyte y) { return raw_equal_na_equal(x, y); } static inline int list_equal_na_propagate(SEXP x, SEXP y) { if (list_is_missing(x) || list_is_missing(y)) { return NA_LOGICAL; } else { return list_equal_na_equal(x, y); } } // ----------------------------------------------------------------------------- #define P_EQUAL_NA_PROPAGATE(CTYPE, EQUAL_NA_PROPAGATE) do { \ return EQUAL_NA_PROPAGATE(((const CTYPE*) p_x)[i], ((const CTYPE*) p_y)[j]); \ } while (0) static inline int p_nil_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { stop_internal("p_nil_equal_na_propagate", "Can't compare NULL for equality."); } static inline int p_lgl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(int, lgl_equal_na_propagate); } static inline int p_int_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(int, int_equal_na_propagate); } static inline int p_dbl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(double, dbl_equal_na_propagate); } static inline int p_cpl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(Rcomplex, cpl_equal_na_propagate); } static inline int p_chr_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(SEXP, chr_equal_na_propagate); } static inline int p_raw_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(Rbyte, raw_equal_na_propagate); } static inline int p_list_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(SEXP, list_equal_na_propagate); } #undef P_EQUAL_NA_PROPAGATE static inline bool p_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case vctrs_type_logical: return p_lgl_equal_na_propagate(p_x, i, p_y, j); case vctrs_type_integer: return p_int_equal_na_propagate(p_x, i, p_y, j); case vctrs_type_double: return p_dbl_equal_na_propagate(p_x, i, p_y, j); case vctrs_type_complex: return p_cpl_equal_na_propagate(p_x, i, p_y, j); case vctrs_type_character: return p_chr_equal_na_propagate(p_x, i, p_y, j); case vctrs_type_raw: return p_raw_equal_na_propagate(p_x, i, p_y, j); case vctrs_type_list: return p_list_equal_na_propagate(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_equal_na_propagate", type); } } // ----------------------------------------------------------------------------- #endif vctrs/src/order-transform.c0000644000176200001440000000545414042540502015476 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "order-transform.h" #include "utils.h" // ----------------------------------------------------------------------------- static SEXP chr_apply_transform(SEXP x, SEXP chr_transform); static SEXP df_apply_transform(SEXP x, SEXP chr_transform); // [[ include("order-transform.h") ]] SEXP proxy_chr_transform(SEXP proxy, SEXP chr_transform) { if (chr_transform == r_null) { return proxy; } chr_transform = PROTECT(r_as_function(chr_transform, "chr_transform")); SEXP out; switch (vec_proxy_typeof(proxy)) { case vctrs_type_character: out = chr_apply_transform(proxy, chr_transform); break; case vctrs_type_dataframe: out = df_apply_transform(proxy, chr_transform); break; default: out = proxy; } UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- static SEXP chr_apply_transform(SEXP x, SEXP chr_transform) { // Don't use vctrs dispatch utils because we match argument positionally SEXP call = PROTECT(Rf_lang2(syms_chr_transform, syms_x)); SEXP mask = PROTECT(r_new_environment(R_GlobalEnv)); Rf_defineVar(syms_chr_transform, chr_transform, mask); Rf_defineVar(syms_x, x, mask); SEXP out = PROTECT(Rf_eval(call, mask)); if (vec_typeof(out) != vctrs_type_character) { Rf_errorcall( R_NilValue, "`chr_transform` must return a character vector." ); } R_len_t x_size = vec_size(x); R_len_t out_size = vec_size(out); if (x_size != out_size) { Rf_errorcall( R_NilValue, "`chr_transform` must return a vector of the same length (%i, not %i).", x_size, out_size ); } UNPROTECT(3); return out; } // ----------------------------------------------------------------------------- static SEXP df_apply_transform(SEXP x, SEXP chr_transform) { const r_ssize n_cols = r_length(x); const SEXP* v_x = VECTOR_PTR_RO(x); r_ssize i = 0; for (; i < n_cols; ++i) { SEXP col = v_x[i]; if (vec_proxy_typeof(col) == vctrs_type_character) { break; } } if (i == n_cols) { // No character columns return x; } SEXP out = PROTECT(r_clone_referenced(x)); for (; i < n_cols; ++i) { SEXP col = v_x[i]; if (vec_proxy_typeof(col) != vctrs_type_character) { continue; } col = chr_apply_transform(col, chr_transform); SET_VECTOR_ELT(out, i, col); } UNPROTECT(1); return out; } vctrs/src/size-common.c0000644000176200001440000000506714042540502014612 0ustar liggesusers#include "vctrs.h" #include "utils.h" // [[ register(external = TRUE) ]] SEXP vctrs_size_common(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP size = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); if (size != R_NilValue) { R_len_t out = size_validate(size, ".size"); UNPROTECT(1); return r_int(out); } SEXP absent = PROTECT(Rf_eval(CAR(args), env)); if (absent != R_NilValue && (TYPEOF(absent) != INTSXP || Rf_length(absent) != 1)) { Rf_errorcall(R_NilValue, "`.absent` must be a single integer."); } SEXP xs = PROTECT(rlang_env_dots_list(env)); R_len_t common = vec_size_common(xs, -1); SEXP out; if (common < 0) { if (absent == R_NilValue) { Rf_errorcall(R_NilValue, "`...` is empty, and no `.absent` value was supplied."); } out = absent; } else { out = r_int(common); } UNPROTECT(3); return out; } static SEXP vctrs_size2_common(SEXP x, SEXP y, struct counters* counters, void* data); // [[ include("vctrs.h") ]] R_len_t vec_size_common(SEXP xs, R_len_t absent) { SEXP common = PROTECT(reduce(R_NilValue, args_empty, xs, &vctrs_size2_common, NULL)); R_len_t out; if (common == R_NilValue) { out = absent; } else { out = vec_size(common); } UNPROTECT(1); return out; } static SEXP vctrs_size2_common(SEXP x, SEXP y, struct counters* counters, void* data) { if (x == R_NilValue) { counters_shift(counters); return y; } if (y == R_NilValue) { return x; } R_len_t nx = vec_size(x); R_len_t ny = vec_size(y); if (nx == ny) { return x; } if (nx == 1) { counters_shift(counters); return y; } if (ny == 1) { return x; } stop_incompatible_size(x, y, nx, ny, counters->curr_arg, counters->next_arg); } // [[ register(external = TRUE) ]] SEXP vctrs_recycle_common(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP size = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); R_len_t common; SEXP xs = PROTECT(rlang_env_dots_list(env)); if (size != R_NilValue) { common = size_validate(size, ".size"); } else { common = vec_size_common(xs, -1); } SEXP out = PROTECT(vec_recycle_common(xs, common)); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP vec_recycle_common(SEXP xs, R_len_t size) { if (size < 0) { return xs; } xs = PROTECT(r_clone_referenced(xs)); R_len_t n = vec_size(xs); SEXP elt; for (R_len_t i = 0; i < n; ++i) { elt = VECTOR_ELT(xs, i); SET_VECTOR_ELT(xs, i, vec_recycle(elt, size, args_empty)); } UNPROTECT(1); return xs; } vctrs/src/arg-counter.c0000644000176200001440000001250614042540502014574 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "arg.h" #include "arg-counter.h" void init_counters(struct counters* counters, SEXP names, struct vctrs_arg* curr_arg, struct counters* prev_box_counters, struct counters* next_box_counters) { counters->curr = 0; counters->next = 0; counters->names = names; counters->names_curr = 0; counters->names_next = 0; counters->curr_counter_data = new_counter_arg_data(&counters->curr, &counters->names, &counters->names_curr); counters->next_counter_data = new_counter_arg_data(&counters->next, &counters->names, &counters->names_next); counters->curr_counter = new_counter_arg(NULL, (void*) &counters->curr_counter_data); counters->next_counter = new_counter_arg(NULL, (void*) &counters->next_counter_data); counters->curr_arg = curr_arg; counters->next_arg = (struct vctrs_arg*) &counters->next_counter; counters->prev_box_counters = prev_box_counters; counters->next_box_counters = next_box_counters; } void init_next_box_counters(struct counters* counters, SEXP names) { SWAP(struct counters*, counters->prev_box_counters, counters->next_box_counters); struct counters* next = counters->next_box_counters; REPROTECT(names, next->names_pi); init_counters(next, names, counters->curr_arg, NULL, NULL); next->next = counters->next; } // Stack-based protection, should be called after `init_counters()` #define PROTECT_COUNTERS(counters, nprot) do { \ PROTECT_WITH_INDEX((counters)->names, &(counters)->names_pi); \ PROTECT_WITH_INDEX(R_NilValue, &(counters)->prev_box_counters->names_pi); \ PROTECT_WITH_INDEX(R_NilValue, &(counters)->next_box_counters->names_pi); \ *nprot += 3; \ } while(0) void counters_inc(struct counters* counters) { ++(counters->next); ++(counters->names_next); } /** * Swap counters so that the `next` counter (the one being increased * on iteration and representing the new input in the reduction) * becomes the current counter (the one representing the result so * far of the reduction). * * [[ include("arg-counter.h") ]] */ void counters_shift(struct counters* counters) { // Swap the counters data SWAP(void*, counters->curr_counter.data, counters->next_counter.data); SWAP(R_len_t*, counters->curr_counter_data.i, counters->next_counter_data.i); SWAP(R_len_t*, counters->curr_counter_data.names_i, counters->next_counter_data.names_i); // Update the handles to `vctrs_arg` counters->curr_arg = (struct vctrs_arg*) &counters->curr_counter; counters->next_arg = (struct vctrs_arg*) &counters->next_counter; // Update the current index counters->curr = counters->next; } // Reduce `impl` with argument counters SEXP reduce_impl(SEXP current, SEXP rest, struct counters* counters, bool spliced, SEXP (*impl)(SEXP current, SEXP next, struct counters* counters, void* data), void* data); SEXP reduce_splice_box(SEXP current, SEXP rest, struct counters* counters, SEXP (*impl)(SEXP current, SEXP next, struct counters* counters, void* data), void* data); // [[ include("arg-counter.h") ]] SEXP reduce(SEXP current, struct vctrs_arg* current_arg, SEXP rest, SEXP (*impl)(SEXP current, SEXP next, struct counters* counters, void* data), void* data) { // Store the box counters here as they might outlive their frame struct counters next_box_counters; struct counters prev_box_counters; struct counters counters; init_counters(&counters, r_names(rest), current_arg, &prev_box_counters, &next_box_counters); int nprot = 0; PROTECT_COUNTERS(&counters, &nprot); SEXP out = reduce_impl(current, rest, &counters, false, impl, data); UNPROTECT(nprot); return out; } SEXP reduce_impl(SEXP current, SEXP rest, struct counters* counters, bool spliced, SEXP (*impl)(SEXP current, SEXP next, struct counters* counters, void* data), void* data) { R_len_t n = Rf_length(rest); for (R_len_t i = 0; i < n; ++i, counters_inc(counters)) { PROTECT(current); SEXP next = VECTOR_ELT(rest, i); // Don't call `rlang_is_splice_box()` if we're already looking at a // spliced list because it's expensive if (spliced || !rlang_is_splice_box(next)) { current = impl(current, next, counters, data); } else { next = PROTECT(rlang_unbox(next)); current = reduce_splice_box(current, next, counters, impl, data); UNPROTECT(1); } UNPROTECT(1); } return current; } SEXP reduce_splice_box(SEXP current, SEXP rest, struct counters* counters, SEXP (*impl)(SEXP current, SEXP rest, struct counters* counters, void* data), void* data) { init_next_box_counters(counters, r_names(rest)); struct counters* box_counters = counters->next_box_counters; current = reduce_impl(current, rest, box_counters, true, impl, data); counters->curr_arg = box_counters->curr_arg; counters->next = box_counters->next; return current; } vctrs/src/version.c0000644000176200001440000000166114042546227014045 0ustar liggesusers#define R_NO_REMAP #include const char* vctrs_version = "0.3.8"; /** * This file records the expected package version in the shared * library (or DLL) of the package. This is useful to check that users * have properly installed your package. Installation issues where the * package is updated but the DLL isn't are common on Windows in * particular. To automatically check that the native library of the * package was properly installed: * * - Register the function below as a C callable under the name * "vctrs_linked_version". * * - Call `rlang::check_linked_version(pkg_name)` from your * `.onLoad()` hook. If you don't depend on rlang copy the * compat-linked-version.R file from the rlang repository to your R * folder. Find it at * */ // [[ register() ]] SEXP vctrs_linked_version() { return Rf_mkString(vctrs_version); } vctrs/src/subscript.c0000644000176200001440000002317714042540502014372 0ustar liggesusers#include "vctrs.h" #include "ptype2.h" #include "subscript.h" #include "utils.h" #include "dim.h" static SEXP fns_cnd_body_subscript_dim = NULL; static SEXP new_error_subscript_type(SEXP subscript, const struct subscript_opts* opts, SEXP body, SEXP parent); static enum subscript_type_action parse_subscript_arg_type(SEXP x, const char* kind); static SEXP obj_cast_subscript(SEXP subscript, const struct subscript_opts* opts, ERR* err); static SEXP dbl_cast_subscript(SEXP subscript, const struct subscript_opts* opts, ERR* err); SEXP vec_as_subscript_opts(SEXP subscript, const struct subscript_opts* opts, ERR* err) { if (vec_dim_n(subscript) != 1) { *err = new_error_subscript_type(subscript, opts, fns_cnd_body_subscript_dim, R_NilValue); return R_NilValue; } PROTECT_INDEX subscript_pi; PROTECT_WITH_INDEX(subscript, &subscript_pi); SEXP orig_names = PROTECT(r_names(subscript)); switch (TYPEOF(subscript)) { case NILSXP: if (opts->numeric == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = vctrs_shared_empty_int; } break; case SYMSXP: if (opts->character == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = rlang_sym_as_character(subscript); } break; default: break; } REPROTECT(subscript, subscript_pi); if (!vec_is_vector(subscript)) { *err = new_error_subscript_type(subscript, opts, R_NilValue, R_NilValue); UNPROTECT(2); return R_NilValue; } if (OBJECT(subscript)) { subscript = obj_cast_subscript(subscript, opts, err); } else if (TYPEOF(subscript) == REALSXP) { subscript = dbl_cast_subscript(subscript, opts, err); } REPROTECT(subscript, subscript_pi); if (*err) { UNPROTECT(2); return R_NilValue; } // Coerce unspecified vectors to integer only if logical indices are // not allowed if (opts->logical == SUBSCRIPT_TYPE_ACTION_ERROR && vec_is_unspecified(subscript)) { struct vctrs_arg* arg = opts->subscript_arg; if (opts->numeric == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = vec_cast(subscript, vctrs_shared_empty_int, arg, NULL); } else { subscript = vec_cast(subscript, vctrs_shared_empty_chr, arg, NULL); } } REPROTECT(subscript, subscript_pi); enum subscript_type_action action = SUBSCRIPT_TYPE_ACTION_ERROR; switch (TYPEOF(subscript)) { case LGLSXP: action = opts->logical; break; case INTSXP: action = opts->numeric; break; case STRSXP: action = opts->character; break; default: break; } if (action == SUBSCRIPT_TYPE_ACTION_ERROR) { *err = new_error_subscript_type(subscript, opts, R_NilValue, R_NilValue); UNPROTECT(2); return R_NilValue; } if (orig_names != R_NilValue) { // FIXME: Handle names in cast methods subscript = r_clone_referenced(subscript); REPROTECT(subscript, subscript_pi); r_poke_names(subscript, orig_names); } UNPROTECT(2); return subscript; } static SEXP obj_cast_subscript(SEXP subscript, const struct subscript_opts* opts, ERR* err) { int dir = 0; struct ptype2_opts ptype2_opts = { .x = subscript, .y = R_NilValue, .x_arg = opts->subscript_arg }; struct cast_opts cast_opts = { .x = subscript, .to = R_NilValue, .x_arg = opts->subscript_arg }; ptype2_opts.y = cast_opts.to = vctrs_shared_empty_lgl; if (vec_is_coercible(&ptype2_opts, &dir)) { return vec_cast_opts(&cast_opts); } ptype2_opts.y = cast_opts.to = vctrs_shared_empty_int; if (vec_is_coercible(&ptype2_opts, &dir)) { return vec_cast_opts(&cast_opts); } ptype2_opts.y = cast_opts.to = vctrs_shared_empty_chr; if (vec_is_coercible(&ptype2_opts, &dir)) { return vec_cast_opts(&cast_opts); } *err = new_error_subscript_type(subscript, opts, R_NilValue, R_NilValue); return R_NilValue; } static SEXP dbl_cast_subscript_fallback(SEXP subscript, const struct subscript_opts* opts, ERR* err); static SEXP syms_new_dbl_cast_subscript_body = NULL; static SEXP syms_lossy_err = NULL; static SEXP dbl_cast_subscript(SEXP subscript, const struct subscript_opts* opts, ERR* err) { double* p = REAL(subscript); R_len_t n = Rf_length(subscript); SEXP out = PROTECT(Rf_allocVector(INTSXP, n)); int* out_p = INTEGER(out); for (R_len_t i = 0; i < n; ++i) { double elt = p[i]; // Generally `(int) nan` results in the correct `NA_INTEGER` value, // but this is not guaranteed, so we have to explicitly check for it. // https://stackoverflow.com/questions/10366485/problems-casting-nan-floats-to-int if (isnan(elt)) { out_p[i] = NA_INTEGER; continue; } if (!isfinite(elt) || elt <= INT_MIN || elt > INT_MAX) { // Once we throw lazy errors from the cast method, we should // throw the error here as well UNPROTECT(1); return dbl_cast_subscript_fallback(subscript, opts, err); } int elt_int = (int) elt; if (elt != elt_int) { UNPROTECT(1); return dbl_cast_subscript_fallback(subscript, opts, err); } out_p[i] = elt_int; } UNPROTECT(1); return out; } static SEXP dbl_cast_subscript_fallback(SEXP subscript, const struct subscript_opts* opts, ERR* err) { const struct cast_opts cast_opts = { .x = subscript, .to = vctrs_shared_empty_int, opts->subscript_arg }; SEXP out = PROTECT(vec_cast_e(&cast_opts, err)); if (*err) { SEXP err_obj = PROTECT(*err); SEXP body = PROTECT(vctrs_eval_mask1(syms_new_dbl_cast_subscript_body, syms_lossy_err, err_obj)); *err = new_error_subscript_type(subscript, opts, body, err_obj); UNPROTECT(3); return R_NilValue; } UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_as_subscript_result(SEXP subscript, SEXP logical, SEXP numeric, SEXP character, SEXP arg_) { struct vctrs_arg arg = vec_as_arg(arg_); struct subscript_opts opts = { .logical = parse_subscript_arg_type(logical, "logical"), .numeric = parse_subscript_arg_type(numeric, "numeric"), .character = parse_subscript_arg_type(character, "character"), .subscript_arg = &arg }; ERR err = NULL; SEXP out = vec_as_subscript_opts(subscript, &opts, &err); PROTECT2(out, err); out = r_result(out, err); UNPROTECT(2); return out; } // [[ register() ]] SEXP vctrs_as_subscript(SEXP subscript, SEXP logical, SEXP numeric, SEXP character, SEXP arg_) { struct vctrs_arg arg = vec_as_arg(arg_); struct subscript_opts opts = { .logical = parse_subscript_arg_type(logical, "logical"), .numeric = parse_subscript_arg_type(numeric, "numeric"), .character = parse_subscript_arg_type(character, "character"), .subscript_arg = &arg }; ERR err = NULL; SEXP out = vec_as_subscript_opts(subscript, &opts, &err); PROTECT2(out, err); out = r_result_get(out, err); UNPROTECT(2); return out; } // Arguments ------------------------------------------------------------------- static void stop_subscript_arg_type(const char* kind) { Rf_errorcall(R_NilValue, "`%s` must be one of \"cast\" or \"error\".", kind); } static enum subscript_type_action parse_subscript_arg_type(SEXP x, const char* kind) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_subscript_arg_type(kind); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "cast")) return SUBSCRIPT_TYPE_ACTION_CAST; if (!strcmp(str, "error")) return SUBSCRIPT_TYPE_ACTION_ERROR; stop_subscript_arg_type(kind); never_reached("parse_subscript_arg_type"); } // Conditions ------------------------------------------------------------------ static SEXP syms_new_error_subscript_type = NULL; static SEXP new_error_subscript_type(SEXP subscript, const struct subscript_opts* opts, SEXP body, SEXP parent) { SEXP logical = subscript_type_action_chr(opts->logical); SEXP numeric = subscript_type_action_chr(opts->numeric); SEXP character = subscript_type_action_chr(opts->character); subscript = PROTECT(expr_protect(subscript)); SEXP subscript_arg = PROTECT(vctrs_arg(opts->subscript_arg)); SEXP syms[9] = { syms_i, syms_subscript_arg, syms_subscript_action, syms_logical, syms_numeric, syms_character, syms_body, syms_parent, NULL }; SEXP args[9] = { subscript, subscript_arg, get_opts_action(opts), logical, numeric, character, body, parent, NULL }; SEXP call = PROTECT(r_call(syms_new_error_subscript_type, syms, args)); SEXP out = Rf_eval(call, vctrs_ns_env); UNPROTECT(3); return out; } void vctrs_init_subscript(SEXP ns) { syms_new_error_subscript_type = Rf_install("new_error_subscript_type"); syms_new_dbl_cast_subscript_body = Rf_install("new_cnd_bullets_subscript_lossy_cast"); syms_lossy_err = Rf_install("lossy_err"); fns_cnd_body_subscript_dim = Rf_eval(Rf_install("cnd_body_subscript_dim"), ns); } vctrs/src/c.h0000644000176200001440000000111413715253045012577 0ustar liggesusers#ifndef VCTRS_C_H #define VCTRS_C_H #include "utils.h" SEXP vec_c_opts(SEXP xs, SEXP ptype, SEXP name_spec, const struct name_repair_opts* name_repair, const struct fallback_opts* fallback_opts); SEXP vec_c_fallback_invoke(SEXP xs, SEXP name_spec); SEXP vec_c_fallback(SEXP ptype, SEXP xs, SEXP name_spec, const struct name_repair_opts* name_repair); bool needs_vec_c_fallback(SEXP ptype); bool needs_vec_c_homogeneous_fallback(SEXP xs, SEXP ptype); #endif vctrs/src/dictionary.c0000644000176200001440000003562214042540502014517 0ustar liggesusers#include "vctrs.h" #include "dictionary.h" #include "translate.h" #include "equal.h" #include "hash.h" #include "ptype2.h" #include "utils.h" // Initialised at load time struct vctrs_arg args_needles; struct vctrs_arg args_haystack; // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 int32_t ceil2(int32_t x) { x--; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x++; return x; } // Dictonary object ------------------------------------------------------------ static struct dictionary* new_dictionary_opts(SEXP x, struct dictionary_opts* opts); // Dictionaries must be protected in consistent stack order with // `PROTECT_DICT()` struct dictionary* new_dictionary(SEXP x) { struct dictionary_opts opts = { .partial = false, .na_equal = true }; return new_dictionary_opts(x, &opts); } struct dictionary* new_dictionary_partial(SEXP x) { struct dictionary_opts opts = { .partial = true, .na_equal = true }; return new_dictionary_opts(x, &opts); } static struct dictionary* new_dictionary_params(SEXP x, bool partial, bool na_equal) { struct dictionary_opts opts; opts.partial = partial; opts.na_equal = na_equal; return new_dictionary_opts(x, &opts); } static struct dictionary* new_dictionary_opts(SEXP x, struct dictionary_opts* opts) { int nprot = 0; SEXP out = PROTECT_N(Rf_allocVector(RAWSXP, sizeof(struct dictionary)), &nprot); struct dictionary* d = (struct dictionary*) RAW(out); d->protect = out; enum vctrs_type type = vec_proxy_typeof(x); struct poly_vec* p_poly_vec = new_poly_vec(x, type); PROTECT_POLY_VEC(p_poly_vec, &nprot); d->p_poly_vec = p_poly_vec; d->p_equal_na_equal = new_poly_p_equal_na_equal(type); d->p_is_missing = new_poly_p_is_missing(type); d->used = 0; if (opts->partial) { d->key = NULL; d->size = 0; } else { // assume worst case, that every value is distinct, aiming for a load factor // of at most 77%. We round up to power of 2 to ensure quadratic probing // strategy works. // Rprintf("size: %i\n", size); R_len_t size = ceil2(vec_size(x) / 0.77); size = (size < 16) ? 16 : size; d->key = (R_len_t*) R_alloc(size, sizeof(R_len_t)); memset(d->key, DICT_EMPTY, size * sizeof(R_len_t)); d->size = size; } R_len_t n = vec_size(x); if (n) { d->hash = (uint32_t*) R_alloc(n, sizeof(uint32_t)); if (!(d->hash)) { Rf_errorcall(R_NilValue, "Can't allocate hash lookup table. Please free memory."); } memset(d->hash, 0, n * sizeof(R_len_t)); hash_fill(d->hash, n, x, opts->na_equal); } else { d->hash = NULL; } UNPROTECT(nprot); return d; } // Use hash from `x` but value from `d`. `x` does not need a full // initialisation of the key vector and can be created with // `new_dictionary_partial()`. uint32_t dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { uint32_t hash = x->hash[i]; const void* p_d_vec = d->p_poly_vec->p_vec; const void* p_x_vec = x->p_poly_vec->p_vec; // Quadratic probing: will try every slot if d->size is power of 2 // http://research.cs.vt.edu/AVresearch/hashing/quadratic.php for (uint32_t k = 0; k < d->size; ++k) { uint32_t probe = (hash + k * (k + 1) / 2) & (d->size - 1); // Rprintf("Probe: %i\n", probe); // If we circled back to start, dictionary is full if (k > 1 && probe == hash) { break; } // Check for unused slot R_len_t idx = d->key[probe]; if (idx == DICT_EMPTY) { return probe; } // Check for same value as there might be a collision if (d->p_equal_na_equal(p_d_vec, idx, p_x_vec, i)) { return probe; } // Collision. next iteration will find another spot using // quadratic probing. } stop_internal("dict_hash_with", "Dictionary is full."); } uint32_t dict_hash_scalar(struct dictionary* d, R_len_t i) { return dict_hash_with(d, d, i); } bool dict_is_missing(struct dictionary* d, R_len_t i) { return d->hash[i] == HASH_MISSING && d->p_is_missing(d->p_poly_vec->p_vec, i); } void dict_put(struct dictionary* d, uint32_t hash, R_len_t i) { d->key[hash] = i; d->used++; } // R interface ----------------------------------------------------------------- // TODO: rename to match R function names // TODO: separate out into individual files SEXP vctrs_unique_loc(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); struct growable g = new_growable(INTSXP, 256); PROTECT_GROWABLE(&g, &nprot); for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); growable_push_int(&g, i + 1); } } SEXP out = growable_values(&g); UNPROTECT(nprot); return out; } // [[ include("vctrs.h") ]] SEXP vec_unique(SEXP x) { SEXP index = PROTECT(vctrs_unique_loc(x)); SEXP out = vec_slice(x, index); UNPROTECT(1); return out; } SEXP vctrs_duplicated_any(SEXP x) { bool out = duplicated_any(x); return Rf_ScalarLogical(out); } // [[ include("vctrs.h") ]] bool duplicated_any(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); bool out = false; for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } else { out = true; break; } } UNPROTECT(nprot); return out; } SEXP vctrs_n_distinct(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } } UNPROTECT(nprot); return Rf_ScalarInteger(d->used); } SEXP vctrs_id(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_out = INTEGER(out); for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } p_out[i] = d->key[hash] + 1; } UNPROTECT(nprot); return out; } // [[ register() ]] SEXP vctrs_match(SEXP needles, SEXP haystack, SEXP na_equal, SEXP needles_arg_, SEXP haystack_arg_) { struct vctrs_arg needles_arg = vec_as_arg(needles_arg_); struct vctrs_arg haystack_arg = vec_as_arg(haystack_arg_); return vec_match_params(needles, haystack, r_bool_as_int(na_equal), &needles_arg, &haystack_arg); } static inline void vec_match_loop(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle); static inline void vec_match_loop_propagate(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle); SEXP vec_match_params(SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg) { int nprot = 0; int _; SEXP type = vec_ptype2_params(needles, haystack, needles_arg, haystack_arg, DF_FALLBACK_quiet, &_); PROTECT_N(type, &nprot); needles = vec_cast_params(needles, type, needles_arg, args_empty, DF_FALLBACK_quiet, S3_FALLBACK_false); PROTECT_N(needles, &nprot); haystack = vec_cast_params(haystack, type, haystack_arg, args_empty, DF_FALLBACK_quiet, S3_FALLBACK_false); PROTECT_N(haystack, &nprot); needles = PROTECT_N(vec_proxy_equal(needles), &nprot); needles = PROTECT_N(vec_normalize_encoding(needles), &nprot); haystack = PROTECT_N(vec_proxy_equal(haystack), &nprot); haystack = PROTECT_N(vec_normalize_encoding(haystack), &nprot); R_len_t n_haystack = vec_size(haystack); R_len_t n_needle = vec_size(needles); struct dictionary* d = new_dictionary_params(haystack, false, na_equal); PROTECT_DICT(d, &nprot); // Load dictionary with haystack for (int i = 0; i < n_haystack; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } } struct dictionary* d_needles = new_dictionary_params(needles, true, na_equal); PROTECT_DICT(d_needles, &nprot); // Locate needles SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n_needle), &nprot); int* p_out = INTEGER(out); if (na_equal) { vec_match_loop(p_out, d, d_needles, n_needle); } else { vec_match_loop_propagate(p_out, d, d_needles, n_needle); } UNPROTECT(nprot); return out; } static inline void vec_match_loop(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle) { for (R_len_t i = 0; i < n_needle; ++i) { uint32_t hash = dict_hash_with(d, d_needles, i); if (d->key[hash] == DICT_EMPTY) { // TODO: Return `no_match` instead p_out[i] = NA_INTEGER; } else { p_out[i] = d->key[hash] + 1; } } } static inline void vec_match_loop_propagate(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle) { for (R_len_t i = 0; i < n_needle; ++i) { if (dict_is_missing(d_needles, i)) { p_out[i] = NA_INTEGER; continue; } uint32_t hash = dict_hash_with(d, d_needles, i); if (d->key[hash] == DICT_EMPTY) { // TODO: Return `no_match` instead p_out[i] = NA_INTEGER; } else { p_out[i] = d->key[hash] + 1; } } } // [[ register() ]] SEXP vctrs_in(SEXP needles, SEXP haystack, SEXP na_equal_, SEXP needles_arg_, SEXP haystack_arg_) { int nprot = 0; bool na_equal = r_bool_as_int(na_equal_); int _; struct vctrs_arg needles_arg = vec_as_arg(needles_arg_); struct vctrs_arg haystack_arg = vec_as_arg(haystack_arg_); SEXP type = vec_ptype2_params(needles, haystack, &needles_arg, &haystack_arg, DF_FALLBACK_quiet, &_); PROTECT_N(type, &nprot); needles = vec_cast_params(needles, type, &needles_arg, args_empty, DF_FALLBACK_quiet, S3_FALLBACK_false); PROTECT_N(needles, &nprot); haystack = vec_cast_params(haystack, type, &haystack_arg, args_empty, DF_FALLBACK_quiet, S3_FALLBACK_false); PROTECT_N(haystack, &nprot); needles = PROTECT_N(vec_proxy_equal(needles), &nprot); needles = PROTECT_N(vec_normalize_encoding(needles), &nprot); haystack = PROTECT_N(vec_proxy_equal(haystack), &nprot); haystack = PROTECT_N(vec_normalize_encoding(haystack), &nprot); R_len_t n_haystack = vec_size(haystack); R_len_t n_needle = vec_size(needles); struct dictionary* d = new_dictionary_params(haystack, false, na_equal); PROTECT_DICT(d, &nprot); // Load dictionary with haystack for (int i = 0; i < n_haystack; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } } struct dictionary* d_needles = new_dictionary_params(needles, true, na_equal); PROTECT_DICT(d_needles, &nprot); // Locate needles SEXP out = PROTECT_N(Rf_allocVector(LGLSXP, n_needle), &nprot); int* p_out = LOGICAL(out); bool propagate = !na_equal; for (int i = 0; i < n_needle; ++i) { if (propagate && dict_is_missing(d_needles, i)) { p_out[i] = NA_LOGICAL; } else { uint32_t hash = dict_hash_with(d, d_needles, i); p_out[i] = (d->key[hash] != DICT_EMPTY); } } UNPROTECT(nprot); return out; } SEXP vctrs_count(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP val = PROTECT_N(Rf_allocVector(INTSXP, d->size), &nprot); int* p_val = INTEGER(val); for (int i = 0; i < n; ++i) { int32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); p_val[hash] = 0; } p_val[hash]++; } // Create output SEXP out_key = PROTECT_N(Rf_allocVector(INTSXP, d->used), &nprot); SEXP out_val = PROTECT_N(Rf_allocVector(INTSXP, d->used), &nprot); int* p_out_key = INTEGER(out_key); int* p_out_val = INTEGER(out_val); int i = 0; for (int hash = 0; hash < d->size; ++hash) { if (d->key[hash] == DICT_EMPTY) continue; p_out_key[i] = d->key[hash] + 1; p_out_val[i] = p_val[hash]; i++; } SEXP out = PROTECT_N(Rf_allocVector(VECSXP, 2), &nprot); SET_VECTOR_ELT(out, 0, out_key); SET_VECTOR_ELT(out, 1, out_val); SEXP names = PROTECT_N(Rf_allocVector(STRSXP, 2), &nprot); SET_STRING_ELT(names, 0, Rf_mkChar("key")); SET_STRING_ELT(names, 1, Rf_mkChar("val")); Rf_setAttrib(out, R_NamesSymbol, names); UNPROTECT(nprot); return out; } SEXP vctrs_duplicated(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP val = PROTECT_N(Rf_allocVector(INTSXP, d->size), &nprot); int* p_val = INTEGER(val); for (int i = 0; i < n; ++i) { int32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); p_val[hash] = 0; } p_val[hash]++; } // Create output SEXP out = PROTECT_N(Rf_allocVector(LGLSXP, n), &nprot); int* p_out = LOGICAL(out); for (int i = 0; i < n; ++i) { int32_t hash = dict_hash_scalar(d, i); p_out[i] = p_val[hash] != 1; } UNPROTECT(nprot); return out; } void vctrs_init_dictionary(SEXP ns) { args_needles = new_wrapper_arg(NULL, "needles"); args_haystack = new_wrapper_arg(NULL, "haystack"); } vctrs/src/strides.h0000644000176200001440000001316013650511520014027 0ustar liggesusers#ifndef VCTRS_STRIDES_H #define VCTRS_STRIDES_H #include "vctrs.h" #include "utils.h" #include "dim.h" /* * Array slicing works by treating the array as a 1D structure, and transforming * the `index` passed from R into a series of 1D indices that are used to * extract elements from `x` into the new result. * * Strides represent the offset between elements in the same dimension. For * a (2, 2, 2) array, the strides would be [1, 2, 4]. This means that if you * flattened this 3D array to 1D in a column major order, there is 1 space * between row elements, 2 spaces between column elements and 4 spaces between * elements in the third dimension. In practice, we only need the shape strides * since the first stride is always 1, so `vec_strides()` only returns the shape * strides. Strides are computed as a cumulative product of the `dim`, with an * initial value of `1`, this is what `vec_strides()` does. * * Using the strides, any array index can be converted to a 1D index. * This is what `vec_strided_loc()` does. In a (2, 2, 2) array, to find * the location at the index [1, 0, 1] (C-based index, 2nd row, 1st col, * 2nd elem in 3rd dim) you compute a sum product between the array index * and the strides. So it looks like: * loc = 1 * (1) + 0 * (2) + 1 * (4) = 5 * (loc is a C-based index into `x`) * Since the first stride is always one, we leave it off and just do: * loc = 1 + 0 * (2) + 1 * (4) = 5 * * Example: * x = (3, 3, 2) array * vec_slice(x, 2:3) * * strides = [3, 9] // (shape strides) * * Indices are C-based * * | array index | x index | how? * ------------------------------------------------------- * out[0] | [1, 0, 0] | 1 | 1 + 0 * (3) + 0 * (9) * out[1] | [2, 0, 0] | 2 | * out[2] | [1, 1, 0] | 4 | * ... | ... | ... | * out[9] | [2, 1, 1] | 14 | 2 + 1 * (3) + 1 * (9) * out[10] | [1, 2, 1] | 16 | * out[11] | [2, 2, 1] | 17 | * ^ ^ ^ * | \/ * | |- shape_index * |- size_index */ struct strides_info { SEXP dim; const int* p_dim; SEXP strides; const int* p_strides; SEXP index; const int* p_index; SEXP steps; const int* p_steps; SEXP shape_index; int* p_shape_index; R_len_t dim_n; R_len_t shape_n; R_len_t index_n; R_len_t shape_elem_n; }; #define PROTECT_STRIDES_INFO(info, n) do { \ PROTECT((info)->dim); \ PROTECT((info)->strides); \ PROTECT((info)->index); \ PROTECT((info)->steps); \ PROTECT((info)->shape_index); \ *(n) += 5; \ } while(0) static inline SEXP vec_strides(const int* p_dim, const R_len_t shape_n) { SEXP strides = PROTECT(Rf_allocVector(INTSXP, shape_n)); int* p_strides = INTEGER(strides); int stride = 1; for (int i = 0; i < shape_n; ++i) { stride *= p_dim[i]; p_strides[i] = stride; } UNPROTECT(1); return strides; } static inline SEXP vec_steps(const int* p_index, const R_len_t index_n) { SEXP steps = PROTECT(Rf_allocVector(INTSXP, index_n)); int* p_steps = INTEGER(steps); // Indices come in 1-based int index_previous = 1; for (R_len_t i = 0; i < index_n; ++i) { const int index_current = p_index[i]; if (index_current == NA_INTEGER) { p_steps[i] = NA_INTEGER; continue; } p_steps[i] = index_current - index_previous; index_previous = index_current; } UNPROTECT(1); return steps; } static inline R_len_t vec_strided_loc(const int* p_shape_index, const int* p_strides, const R_len_t shape_n) { R_len_t loc = 0; for (R_len_t i = 0; i < shape_n; ++i) { loc += p_strides[i] * p_shape_index[i]; } return loc; } // Increment the `shape_index` value. This iterates through the array like: // [size, 0, 0] // [size, 1, 0] // [size, 0, 1] // [size, 1, 1] // ... static inline void vec_shape_index_increment(struct strides_info* p_info) { for (int j = 0; j < p_info->shape_n; ++j) { p_info->p_shape_index[j]++; if (p_info->p_shape_index[j] < p_info->p_dim[j + 1]) { break; } p_info->p_shape_index[j] = 0; } } static inline struct strides_info new_strides_info(SEXP x, SEXP index) { SEXP dim = PROTECT(vec_dim(x)); const int* p_dim = INTEGER_RO(dim); R_len_t dim_n = Rf_length(dim); R_len_t shape_n = dim_n - 1; R_len_t index_n = vec_subscript_size(index); SEXP strides = PROTECT(vec_strides(p_dim, shape_n)); const int* p_strides = INTEGER_RO(strides); const int* p_index = INTEGER_RO(index); // If using a compact rep/seq, the `steps` won't be used, but we still // need to put something in the struct SEXP steps; if (is_compact(index)) { steps = vctrs_shared_empty_int; } else { steps = vec_steps(p_index, index_n); } PROTECT(steps); const int* p_steps = INTEGER_RO(steps); // Initialize `shape_index` to the first element SEXP shape_index = PROTECT(Rf_allocVector(INTSXP, shape_n)); int* p_shape_index = INTEGER(shape_index); for (int i = 0; i < shape_n; ++i) { p_shape_index[i] = 0; } R_len_t shape_elem_n = 1; for (int i = 1; i < dim_n; ++i) { shape_elem_n *= p_dim[i]; } struct strides_info info = { .dim = dim, .p_dim = p_dim, .strides = strides, .p_strides = p_strides, .index = index, .p_index = p_index, .steps = steps, .p_steps = p_steps, .shape_index = shape_index, .p_shape_index = p_shape_index, .dim_n = dim_n, .shape_n = shape_n, .index_n = index_n, .shape_elem_n = shape_elem_n }; UNPROTECT(4); return info; } #endif vctrs/src/poly-op.h0000644000176200001440000000131213753021253013750 0ustar liggesusers#ifndef VCTRS_POLY_OP #define VCTRS_POLY_OP #include "vctrs.h" typedef int (*poly_binary_int_fn_ptr)(const void* x, r_ssize i, const void* y, r_ssize j); poly_binary_int_fn_ptr new_poly_p_equal_na_equal(enum vctrs_type type); typedef bool (*poly_unary_bool_fn_ptr)(const void* x, r_ssize i); poly_unary_bool_fn_ptr new_poly_p_is_missing(enum vctrs_type type); struct poly_vec { SEXP vec; const void* p_vec; SEXP self; }; struct poly_vec* new_poly_vec(SEXP proxy, enum vctrs_type type); #define PROTECT_POLY_VEC(p_poly_vec, p_n) do { \ PROTECT((p_poly_vec)->vec); \ PROTECT((p_poly_vec)->self); \ *(p_n) += 2; \ } while(0) #endif vctrs/src/slice.c0000644000176200001440000004346714042540502013457 0ustar liggesusers#include "vctrs.h" #include "altrep.h" #include "slice.h" #include "subscript-loc.h" #include "type-data-frame.h" #include "owned.h" #include "utils.h" #include "dim.h" // Initialised at load time SEXP syms_vec_slice_fallback = NULL; SEXP fns_vec_slice_fallback = NULL; SEXP syms_vec_slice_fallback_integer64 = NULL; SEXP fns_vec_slice_fallback_integer64 = NULL; SEXP syms_vec_slice_dispatch_integer64 = NULL; SEXP fns_vec_slice_dispatch_integer64 = NULL; #define SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ const CTYPE* data = CONST_DEREF(x); \ R_len_t n = Rf_length(subscript); \ int* subscript_data = INTEGER(subscript); \ \ SEXP out = PROTECT(Rf_allocVector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ for (R_len_t i = 0; i < n; ++i, ++subscript_data, ++out_data) { \ int j = *subscript_data; \ *out_data = (j == NA_INTEGER) ? NA_VALUE : data[j - 1]; \ } \ \ UNPROTECT(1); \ return out #define SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ const CTYPE* data = CONST_DEREF(x); \ \ int* subscript_data = INTEGER(subscript); \ R_len_t j = subscript_data[0]; \ R_len_t n = subscript_data[1]; \ \ SEXP out = PROTECT(Rf_allocVector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ CTYPE elt = (j == NA_INTEGER) ? NA_VALUE : data[j - 1]; \ \ for (R_len_t i = 0; i < n; ++i, ++out_data) { \ *out_data = elt; \ } \ \ UNPROTECT(1); \ return out #define SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF) \ int* subscript_data = INTEGER(subscript); \ R_len_t start = subscript_data[0]; \ R_len_t n = subscript_data[1]; \ R_len_t step = subscript_data[2]; \ \ const CTYPE* data = CONST_DEREF(x) + start; \ \ SEXP out = PROTECT(Rf_allocVector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ for (int i = 0; i < n; ++i, ++out_data, data += step) { \ *out_data = *data; \ } \ \ UNPROTECT(1); \ return out #define SLICE(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ if (ALTREP(x)) { \ SEXP alt_subscript = PROTECT(compact_materialize(subscript)); \ SEXP out = ALTVEC_EXTRACT_SUBSET_PROXY(x, alt_subscript, R_NilValue); \ UNPROTECT(1); \ if (out != NULL) { \ return out; \ } \ } \ if (is_compact_rep(subscript)) { \ SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \ } else { \ SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } static SEXP lgl_slice(SEXP x, SEXP subscript) { SLICE(LGLSXP, int, LOGICAL, LOGICAL_RO, NA_LOGICAL); } static SEXP int_slice(SEXP x, SEXP subscript) { SLICE(INTSXP, int, INTEGER, INTEGER_RO, NA_INTEGER); } static SEXP dbl_slice(SEXP x, SEXP subscript) { SLICE(REALSXP, double, REAL, REAL_RO, NA_REAL); } static SEXP cpl_slice(SEXP x, SEXP subscript) { SLICE(CPLXSXP, Rcomplex, COMPLEX, COMPLEX_RO, vctrs_shared_na_cpl); } static SEXP chr_slice(SEXP x, SEXP subscript) { SLICE(STRSXP, SEXP, STRING_PTR, STRING_PTR_RO, NA_STRING); } static SEXP raw_slice(SEXP x, SEXP subscript) { SLICE(RAWSXP, Rbyte, RAW, RAW_RO, 0); } #undef SLICE #undef SLICE_COMPACT_REP #undef SLICE_COMPACT_SEQ #undef SLICE_SUBSCRIPT #define SLICE_BARRIER_SUBSCRIPT(RTYPE, GET, SET, NA_VALUE) \ R_len_t n = Rf_length(subscript); \ int* subscript_data = INTEGER(subscript); \ \ SEXP out = PROTECT(Rf_allocVector(RTYPE, n)); \ \ for (R_len_t i = 0; i < n; ++i, ++subscript_data) { \ int j = *subscript_data; \ SEXP elt = (j == NA_INTEGER) ? NA_VALUE : GET(x, j - 1); \ SET(out, i, elt); \ } \ \ UNPROTECT(1); \ return out #define SLICE_BARRIER_COMPACT_REP(RTYPE, GET, SET, NA_VALUE) \ int* subscript_data = INTEGER(subscript); \ R_len_t j = subscript_data[0]; \ R_len_t n = subscript_data[1]; \ \ SEXP out = PROTECT(Rf_allocVector(RTYPE, n)); \ \ SEXP elt = (j == NA_INTEGER) ? NA_VALUE : GET(x, j - 1); \ \ for (R_len_t i = 0; i < n; ++i) { \ SET(out, i, elt); \ } \ \ UNPROTECT(1); \ return out #define SLICE_BARRIER_COMPACT_SEQ(RTYPE, GET, SET) \ int* subscript_data = INTEGER(subscript); \ R_len_t start = subscript_data[0]; \ R_len_t n = subscript_data[1]; \ R_len_t step = subscript_data[2]; \ \ SEXP out = PROTECT(Rf_allocVector(RTYPE, n)); \ \ for (R_len_t i = 0; i < n; ++i, start += step) { \ SET(out, i, GET(x, start)); \ } \ \ UNPROTECT(1); \ return out #define SLICE_BARRIER(RTYPE, GET, SET, NA_VALUE) \ if (is_compact_rep(subscript)) { \ SLICE_BARRIER_COMPACT_REP(RTYPE, GET, SET, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_BARRIER_COMPACT_SEQ(RTYPE, GET, SET); \ } else { \ SLICE_BARRIER_SUBSCRIPT(RTYPE, GET, SET, NA_VALUE); \ } static SEXP list_slice(SEXP x, SEXP subscript) { SLICE_BARRIER(VECSXP, VECTOR_ELT, SET_VECTOR_ELT, R_NilValue); } #undef SLICE_BARRIER #undef SLICE_BARRIER_COMPACT_REP #undef SLICE_BARRIER_COMPACT_SEQ #undef SLICE_BARRIER_SUBSCRIPT static SEXP df_slice(SEXP x, SEXP subscript) { R_len_t n = Rf_length(x); R_len_t size = df_size(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); // FIXME: Should that be restored? SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, nms); UNPROTECT(1); for (R_len_t i = 0; i < n; ++i) { SEXP elt = VECTOR_ELT(x, i); if (vec_size(elt) != size) { stop_internal("df_slice", "Columns must match the data frame size."); } SEXP sliced = vec_slice_impl(elt, subscript); SET_VECTOR_ELT(out, i, sliced); } SEXP row_nms = PROTECT(df_rownames(x)); if (TYPEOF(row_nms) == STRSXP) { row_nms = PROTECT(slice_rownames(row_nms, subscript)); Rf_setAttrib(out, R_RowNamesSymbol, row_nms); UNPROTECT(1); } UNPROTECT(1); UNPROTECT(1); return out; } SEXP vec_slice_fallback(SEXP x, SEXP subscript) { // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. if (is_integer64(x)) { return vctrs_dispatch2(syms_vec_slice_fallback_integer64, fns_vec_slice_fallback_integer64, syms_x, x, syms_i, subscript); } return vctrs_dispatch2(syms_vec_slice_fallback, fns_vec_slice_fallback, syms_x, x, syms_i, subscript); } static SEXP vec_slice_dispatch(SEXP x, SEXP subscript) { // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. if (is_integer64(x)) { return vctrs_dispatch2(syms_vec_slice_dispatch_integer64, fns_vec_slice_dispatch_integer64, syms_x, x, syms_i, subscript); } return vctrs_dispatch2(syms_bracket, fns_bracket, syms_x, x, syms_i, subscript); } bool vec_requires_fallback(SEXP x, struct vctrs_proxy_info info) { return OBJECT(x) && info.proxy_method == R_NilValue && info.type != vctrs_type_dataframe; } SEXP vec_slice_base(enum vctrs_type type, SEXP x, SEXP subscript) { switch (type) { case vctrs_type_logical: return lgl_slice(x, subscript); case vctrs_type_integer: return int_slice(x, subscript); case vctrs_type_double: return dbl_slice(x, subscript); case vctrs_type_complex: return cpl_slice(x, subscript); case vctrs_type_character: return chr_slice(x, subscript); case vctrs_type_raw: return raw_slice(x, subscript); case vctrs_type_list: return list_slice(x, subscript); default: stop_unimplemented_vctrs_type("vec_slice_base", type); } } // Replace any `NA` name caused by `NA` subscript with the empty // string. It's ok mutate the names vector since it is freshly // created, but we make an additional check for that anyways // (and the empty string is persistently protected anyway). static void repair_na_names(SEXP names, SEXP subscript) { if (!NO_REFERENCES(names)) { stop_internal("repair_na_names", "`names` can't be referenced."); } // No possible way to have `NA_integer_` in a compact seq if (is_compact_seq(subscript)) { return; } R_len_t n = Rf_length(names); if (n == 0) { return; } const int* p_subscript = INTEGER_RO(subscript); // Special handling for a compact_rep object with repeated `NA` if (is_compact_rep(subscript)) { if (p_subscript[0] != NA_INTEGER) { return; } for (R_len_t i = 0; i < n; ++i) { SET_STRING_ELT(names, i, strings_empty); } return; } for (R_len_t i = 0; i < n; ++i) { if (p_subscript[i] == NA_INTEGER) { SET_STRING_ELT(names, i, strings_empty); } } } SEXP slice_names(SEXP names, SEXP subscript) { if (names == R_NilValue) { return names; } names = PROTECT(chr_slice(names, subscript)); repair_na_names(names, subscript); UNPROTECT(1); return names; } SEXP slice_rownames(SEXP names, SEXP subscript) { if (names == R_NilValue) { return names; } names = PROTECT(chr_slice(names, subscript)); // Rownames can't contain `NA` or duplicates names = vec_as_unique_names(names, true); UNPROTECT(1); return names; } SEXP vec_slice_impl(SEXP x, SEXP subscript) { int nprot = 0; SEXP restore_size = PROTECT_N(r_int(vec_subscript_size(subscript)), &nprot); struct vctrs_proxy_info info = vec_proxy_info(x); PROTECT_PROXY_INFO(&info, &nprot); SEXP data = info.proxy; // Fallback to `[` if the class doesn't implement a proxy. This is // to be maximally compatible with existing classes. if (vec_requires_fallback(x, info)) { if (info.type == vctrs_type_scalar) { vec_assert(x, NULL); } if (is_compact(subscript)) { subscript = PROTECT_N(compact_materialize(subscript), &nprot); } SEXP out; if (has_dim(x)) { out = PROTECT_N(vec_slice_fallback(x, subscript), &nprot); } else { out = PROTECT_N(vec_slice_dispatch(x, subscript), &nprot); } // Take over attribute restoration only if there is no `[` method if (!vec_is_restored(out, x)) { out = vec_restore(out, x, restore_size, vec_owned(out)); } UNPROTECT(nprot); return out; } switch (info.type) { case vctrs_type_null: stop_internal("vec_slice_impl", "Unexpected `NULL`."); case vctrs_type_logical: case vctrs_type_integer: case vctrs_type_double: case vctrs_type_complex: case vctrs_type_character: case vctrs_type_raw: case vctrs_type_list: { SEXP out; if (has_dim(x)) { out = PROTECT_N(vec_slice_shaped(info.type, data, subscript), &nprot); SEXP names = PROTECT_N(Rf_getAttrib(x, R_DimNamesSymbol), &nprot); if (names != R_NilValue) { names = PROTECT_N(Rf_shallow_duplicate(names), &nprot); SEXP row_names = VECTOR_ELT(names, 0); row_names = PROTECT_N(slice_names(row_names, subscript), &nprot); SET_VECTOR_ELT(names, 0, row_names); Rf_setAttrib(out, R_DimNamesSymbol, names); } } else { out = PROTECT_N(vec_slice_base(info.type, data, subscript), &nprot); SEXP names = PROTECT_N(Rf_getAttrib(x, R_NamesSymbol), &nprot); names = PROTECT_N(slice_names(names, subscript), &nprot); Rf_setAttrib(out, R_NamesSymbol, names); } out = vec_restore(out, x, restore_size, vec_owned(out)); UNPROTECT(nprot); return out; } case vctrs_type_dataframe: { SEXP out = PROTECT_N(df_slice(data, subscript), &nprot); out = vec_restore(out, x, restore_size, vec_owned(out)); UNPROTECT(nprot); return out; } default: stop_unimplemented_vctrs_type("vec_slice_impl", info.type); } } bool vec_is_restored(SEXP x, SEXP to) { // Don't restore if there is an actual `[` method that ignored // attributes. Some methods like [.ts intentionally strip the class // and attributes. FIXME: This branch is now probably sufficient. if (s3_find_method("[", to, base_method_table) != R_NilValue) { return true; } SEXP attrib = ATTRIB(x); if (attrib == R_NilValue) { return false; } // Class is restored if it contains any other attributes than names. // We might want to add support for data frames later on. SEXP node = attrib; while (node != R_NilValue) { if (TAG(node) == R_NamesSymbol) { node = CDR(node); continue; } return true; } return false; } // [[ include("vctrs.h"); register() ]] SEXP vec_slice(SEXP x, SEXP subscript) { vec_assert(x, args_empty); subscript = PROTECT(vec_as_location(subscript, vec_size(x), PROTECT(vec_names(x)))); SEXP out = vec_slice_impl(x, subscript); UNPROTECT(2); return out; } // [[ include("vctrs.h") ]] SEXP vec_init(SEXP x, R_len_t n) { vec_assert(x, NULL); SEXP i = PROTECT(compact_rep(NA_INTEGER, n)); SEXP out = vec_slice_impl(x, i); UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_init(SEXP x, SEXP n) { R_len_t n_ = r_int_get(n, 0); return vec_init(x, n_); } // Exported for testing // [[ register() ]] SEXP vec_slice_seq(SEXP x, SEXP start, SEXP size, SEXP increasing) { R_len_t start_ = r_int_get(start, 0); R_len_t size_ = r_int_get(size, 0); bool increasing_ = r_lgl_get(increasing, 0); SEXP subscript = PROTECT(compact_seq(start_, size_, increasing_)); SEXP out = vec_slice_impl(x, subscript); UNPROTECT(1); return out; } // Exported for testing // [[ register() ]] SEXP vec_slice_rep(SEXP x, SEXP i, SEXP n) { R_len_t i_ = r_int_get(i, 0); R_len_t n_ = r_int_get(n, 0); SEXP subscript = PROTECT(compact_rep(i_, n_)); SEXP out = vec_slice_impl(x, subscript); UNPROTECT(1); return out; } void vctrs_init_slice(SEXP ns) { syms_vec_slice_fallback = Rf_install("vec_slice_fallback"); syms_vec_slice_fallback_integer64 = Rf_install("vec_slice_fallback_integer64"); syms_vec_slice_dispatch_integer64 = Rf_install("vec_slice_dispatch_integer64"); fns_vec_slice_fallback = Rf_findVar(syms_vec_slice_fallback, ns); fns_vec_slice_fallback_integer64 = Rf_findVar(syms_vec_slice_fallback_integer64, ns); fns_vec_slice_dispatch_integer64 = Rf_findVar(syms_vec_slice_dispatch_integer64, ns); } vctrs/src/fields.c0000644000176200001440000000702014042540502013607 0ustar liggesusers#include "vctrs.h" // SEXP x and y must be CHARSXP // x_utf* is pointer to const char* which is lazily initialised: // This makes this function also suitable for use when repeated // comparing varying y to constant x bool equal_string(SEXP x, const char** x_utf8, SEXP y) { // Try fast pointer comparison if (x == y) return true; if (*x_utf8 == NULL) *x_utf8 = Rf_translateCharUTF8(x); // Try slower conversion to common encoding const char* y_utf = Rf_translateCharUTF8(y); return (strcmp(y_utf, *x_utf8) == 0); } int find_offset(SEXP x, SEXP index) { if (Rf_length(index) != 1) { Rf_errorcall(R_NilValue, "Invalid index: must have length 1"); } int n = Rf_length(x); if (TYPEOF(index) == INTSXP) { int val = INTEGER(index)[0]; if (val == NA_INTEGER) Rf_errorcall(R_NilValue, "Invalid index: NA_integer_"); val--; if (val < 0 || val >= n) Rf_errorcall(R_NilValue, "Invalid index: out of bounds"); return val; } else if (TYPEOF(index) == REALSXP) { double val = REAL(index)[0]; if (R_IsNA(val)) Rf_errorcall(R_NilValue, "Invalid index: NA_real_"); val--; if (val < 0 || val >= n) Rf_errorcall(R_NilValue, "Invalid index: out of bounds"); if (val > R_LEN_T_MAX) { Rf_errorcall(R_NilValue, "Invalid index: too large"); } return (int) val; } else if (TYPEOF(index) == STRSXP) { SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); if (names == R_NilValue) Rf_errorcall(R_NilValue, "Corrupt x: no names"); SEXP val_0 = STRING_ELT(index, 0); if (val_0 == NA_STRING) Rf_errorcall(R_NilValue, "Invalid index: NA_character_"); const char* val_0_chr = Rf_translateCharUTF8(val_0); if (val_0_chr[0] == '\0') Rf_errorcall(R_NilValue, "Invalid index: empty string"); for (int j = 0; j < Rf_length(names); ++j) { SEXP name_j = STRING_ELT(names, j); if (name_j == NA_STRING) Rf_errorcall(R_NilValue, "Corrupt x: element %i is unnamed", j + 1); if (equal_string(val_0, &val_0_chr, name_j)) { UNPROTECT(1); return j; } } Rf_errorcall(R_NilValue, "Invalid index: field name '%s' not found", val_0_chr); } else { Rf_errorcall(R_NilValue, "Invalid index: must be a character or numeric vector"); } } // Lists ------------------------------------------------------------------- SEXP vctrs_list_get(SEXP x, SEXP index) { int idx = find_offset(x, index); return VECTOR_ELT(x, idx); } SEXP vctrs_list_set(SEXP x, SEXP index, SEXP value) { int idx = find_offset(x, index); SEXP out = PROTECT(Rf_shallow_duplicate(x)); SET_VECTOR_ELT(out, idx, value); UNPROTECT(1); return out; } // Records ------------------------------------------------------------------ void check_rcrd(SEXP x) { if (!Rf_isVectorList(x)) Rf_errorcall(R_NilValue, "Corrupt rcrd: not a list"); if (Rf_length(x) == 0) Rf_errorcall(R_NilValue, "Corrupt rcrd: length 0"); } SEXP vctrs_fields(SEXP x) { check_rcrd(x); return Rf_getAttrib(x, R_NamesSymbol); } SEXP vctrs_n_fields(SEXP x) { check_rcrd(x); return Rf_ScalarInteger(Rf_length(x)); } SEXP vctrs_field_get(SEXP x, SEXP index) { check_rcrd(x); return vctrs_list_get(x, index); } SEXP vctrs_field_set(SEXP x, SEXP index, SEXP value) { check_rcrd(x); if (!vec_is_vector(value)) { Rf_errorcall(R_NilValue, "Invalid value: not a vector."); } if (vec_size(value) != vec_size(x)) { Rf_errorcall(R_NilValue, "Invalid value: incorrect length."); } return vctrs_list_set(x, index, value); } vctrs/src/cast.h0000644000176200001440000000456113671672047013330 0ustar liggesusers#ifndef VCTRS_CAST_H #define VCTRS_CAST_H #include "ptype2.h" struct cast_opts { SEXP x; SEXP to; struct vctrs_arg* x_arg; struct vctrs_arg* to_arg; struct fallback_opts fallback; }; SEXP df_cast_opts(const struct cast_opts* opts); // Defined in type-data-frame.c static inline SEXP df_cast(SEXP x, SEXP to, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg) { const struct cast_opts opts = { .x = x, .to = to, .x_arg = x_arg, .to_arg = to_arg }; return df_cast_opts(&opts); } SEXP vec_cast_opts(const struct cast_opts* opts); static inline SEXP vec_cast_params(SEXP x, SEXP to, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg, enum df_fallback df_fallback, enum s3_fallback s3_fallback) { const struct cast_opts opts = { .x = x, .to = to, .x_arg = x_arg, .to_arg = to_arg, .fallback = { .df = df_fallback, .s3 = s3_fallback } }; return vec_cast_opts(&opts); } SEXP vec_cast_common_opts(SEXP xs, SEXP to, const struct fallback_opts* fallback_opts); SEXP vec_cast_common_params(SEXP xs, SEXP to, enum df_fallback df_fallback, enum s3_fallback s3_fallback); struct cast_opts new_cast_opts(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, SEXP opts); SEXP vec_cast_dispatch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy); SEXP vec_cast_e(const struct cast_opts* opts, ERR* err); SEXP vec_cast_default(SEXP x, SEXP y, SEXP x_arg, SEXP to_arg, const struct fallback_opts* opts); // Defined in cast-bare.c SEXP int_as_double(SEXP x, bool* lossy); SEXP lgl_as_double(SEXP x, bool* lossy); SEXP dbl_as_integer(SEXP x, bool* lossy); SEXP lgl_as_integer(SEXP x, bool* lossy); SEXP chr_as_logical(SEXP x, bool* lossy); SEXP dbl_as_logical(SEXP x, bool* lossy); SEXP int_as_logical(SEXP x, bool* lossy); #endif vctrs/src/shape.h0000644000176200001440000000033613650511520013453 0ustar liggesusers#ifndef VCTRS_SHAPE_H #define VCTRS_SHAPE_H #include "vctrs.h" SEXP vec_shaped_ptype(SEXP ptype, SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); #endif vctrs/src/order-truelength.c0000644000176200001440000001426214042540502015641 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "order-truelength.h" #include "utils.h" /* * See the notes in the character ordering section at the top of `order.c` * for more details on how TRUELENGTH is used to detect unique strings. * * The helpers here are somewhat equivalent to the following from R's `order()` * https://github.com/wch/r-source/blob/91b4507bf6040c0167fc5b6037c202c8cbd98afd/src/main/radixsort.c#L66-L123 */ // ----------------------------------------------------------------------------- /* * Construct a new `truelength_info` * * Pair with `PROTECT_TRUELENGTH_INFO()` in the caller */ struct truelength_info* new_truelength_info(r_ssize max_size_alloc) { SEXP self = PROTECT(r_new_raw(sizeof(struct truelength_info))); struct truelength_info* p_truelength_info = (struct truelength_info*) RAW(self); p_truelength_info->self = self; p_truelength_info->strings = vctrs_shared_empty_chr; p_truelength_info->lengths = vctrs_shared_empty_raw; p_truelength_info->uniques = vctrs_shared_empty_chr; p_truelength_info->sizes = vctrs_shared_empty_int; p_truelength_info->sizes_aux = vctrs_shared_empty_int; p_truelength_info->size_alloc = 0; p_truelength_info->max_size_alloc = max_size_alloc; p_truelength_info->size_used = 0; p_truelength_info->max_string_size = 0; UNPROTECT(1); return p_truelength_info; } // ----------------------------------------------------------------------------- /* * Reset the truelengths of all unique strings captured in `strings` using * the original truelengths in `lengths`. * * Will be called after each character data frame column is processed, and * at the end of `chr_order()` for a single character vector. */ void truelength_reset(struct truelength_info* p_truelength_info) { r_ssize size = p_truelength_info->size_used; for (r_ssize i = 0; i < size; ++i) { SEXP string = p_truelength_info->p_strings[i]; r_ssize length = p_truelength_info->p_lengths[i]; SET_TRUELENGTH(string, length); } // Also reset vector specific details p_truelength_info->size_used = 0; p_truelength_info->max_string_size = 0; } // ----------------------------------------------------------------------------- static void truelength_realloc(struct truelength_info* p_truelength_info); /* * Saves a unique CHARSXP `x` along with its original truelength and * its "size" (i.e the number of characters). Will be reset later with * `truelength_reset()`. */ void truelength_save(SEXP x, r_ssize truelength, r_ssize size, struct truelength_info* p_truelength_info) { // Reallocate as needed if (p_truelength_info->size_used == p_truelength_info->size_alloc) { truelength_realloc(p_truelength_info); } p_truelength_info->p_strings[p_truelength_info->size_used] = x; p_truelength_info->p_lengths[p_truelength_info->size_used] = truelength; p_truelength_info->p_uniques[p_truelength_info->size_used] = x; p_truelength_info->p_sizes[p_truelength_info->size_used] = size; ++p_truelength_info->size_used; } // ----------------------------------------------------------------------------- static r_ssize truelength_realloc_size(struct truelength_info* p_truelength_info); static inline SEXP lengths_resize(SEXP x, r_ssize x_size, r_ssize size); /* * Extend the vectors in `truelength_info`. * Reprotects itself. */ static void truelength_realloc(struct truelength_info* p_truelength_info) { r_ssize size = truelength_realloc_size(p_truelength_info); p_truelength_info->strings = chr_resize( p_truelength_info->strings, p_truelength_info->size_used, size ); REPROTECT(p_truelength_info->strings, p_truelength_info->strings_pi); p_truelength_info->p_strings = STRING_PTR(p_truelength_info->strings); p_truelength_info->lengths = lengths_resize( p_truelength_info->lengths, p_truelength_info->size_used, size ); REPROTECT(p_truelength_info->lengths, p_truelength_info->lengths_pi); p_truelength_info->p_lengths = (r_ssize*) RAW(p_truelength_info->lengths); p_truelength_info->uniques = chr_resize( p_truelength_info->uniques, p_truelength_info->size_used, size ); REPROTECT(p_truelength_info->uniques, p_truelength_info->uniques_pi); p_truelength_info->p_uniques = STRING_PTR(p_truelength_info->uniques); p_truelength_info->sizes = int_resize( p_truelength_info->sizes, p_truelength_info->size_used, size ); REPROTECT(p_truelength_info->sizes, p_truelength_info->sizes_pi); p_truelength_info->p_sizes = INTEGER(p_truelength_info->sizes); p_truelength_info->sizes_aux = int_resize( p_truelength_info->sizes_aux, p_truelength_info->size_used, size ); REPROTECT(p_truelength_info->sizes_aux, p_truelength_info->sizes_aux_pi); p_truelength_info->p_sizes_aux = INTEGER(p_truelength_info->sizes_aux); p_truelength_info->size_alloc = size; } static inline SEXP lengths_resize(SEXP x, r_ssize x_size, r_ssize size) { return raw_resize( x, x_size * sizeof(r_ssize), size * sizeof(r_ssize) ); } // ----------------------------------------------------------------------------- static r_ssize truelength_realloc_size(struct truelength_info* p_truelength_info) { r_ssize size_alloc = p_truelength_info->size_alloc; r_ssize max_size_alloc = p_truelength_info->max_size_alloc; // First allocation if (size_alloc == 0) { if (TRUELENGTH_SIZE_ALLOC_DEFAULT < max_size_alloc) { return TRUELENGTH_SIZE_ALLOC_DEFAULT; } else { return max_size_alloc; } } // Avoid potential overflow when doubling size uint64_t new_size_alloc = ((uint64_t) size_alloc) * 2; // Clamp maximum allocation size to the size of the input if (new_size_alloc > max_size_alloc) { return max_size_alloc; } // Can now safely cast back to `r_ssize` return (r_ssize) new_size_alloc; } vctrs/src/growable.c0000644000176200001440000000053414042540502014146 0ustar liggesusers#include "vctrs.h" #include "utils.h" struct growable new_growable(SEXPTYPE type, int capacity) { struct growable g; g.x = Rf_allocVector(type, capacity); g.type = type; g.array = r_vec_unwrap(type, g.x); g.n = 0; g.capacity = capacity; return g; } SEXP growable_values(struct growable* g) { return Rf_lengthgets(g->x, g->n); } vctrs/src/dim.c0000644000176200001440000000041114042540502013107 0ustar liggesusers#include "vctrs.h" #include "dim.h" // [[ register() ]] SEXP vctrs_dim(SEXP x) { return vec_dim(x); } // [[ register() ]] SEXP vctrs_dim_n(SEXP x) { return r_int(vec_dim_n(x)); } // [[ register() ]] SEXP vctrs_has_dim(SEXP x) { return r_lgl(has_dim(x)); } vctrs/src/slice-assign-array.c0000644000176200001440000002635314042540502016050 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "strides.h" #include "slice-assign.h" #include "owned.h" #define ASSIGN_SHAPED_INDEX(CTYPE, DEREF, CONST_DEREF) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ CTYPE* p_out = DEREF(out); \ \ const CTYPE* p_value = CONST_DEREF(value); \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++k) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ continue; \ } \ \ loc += step; \ \ p_out[loc] = p_value[k]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out #define ASSIGN_SHAPED_COMPACT(CTYPE, DEREF, CONST_DEREF) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ CTYPE* p_out = DEREF(out); \ \ const R_len_t start = p_info->p_index[0]; \ const R_len_t n = p_info->p_index[1]; \ const R_len_t step = p_info->p_index[2]; \ \ const CTYPE* p_value = CONST_DEREF(value); \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++k, loc += step) { \ p_out[loc] = p_value[k]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out // ----------------------------------------------------------------------------- #define ASSIGN_SHAPED(CTYPE, DEREF, CONST_DEREF) \ if (is_compact_seq(index)) { \ ASSIGN_SHAPED_COMPACT(CTYPE, DEREF, CONST_DEREF); \ } else { \ ASSIGN_SHAPED_INDEX(CTYPE, DEREF, CONST_DEREF); \ } static inline SEXP lgl_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(int, LOGICAL, LOGICAL_RO); } static inline SEXP int_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(int, INTEGER, INTEGER_RO); } static inline SEXP dbl_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(double, REAL, REAL_RO); } static inline SEXP cpl_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(Rcomplex, COMPLEX, COMPLEX_RO); } static inline SEXP chr_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(SEXP, STRING_PTR, STRING_PTR_RO); } static inline SEXP raw_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(Rbyte, RAW, RAW_RO); } #undef ASSIGN_SHAPED #undef ASSIGN_SHAPED_COMPACT #undef ASSIGN_SHAPED_INDEX // ----------------------------------------------------------------------------- #define ASSIGN_BARRIER_SHAPED_INDEX(GET, SET) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++k) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ continue; \ } \ \ loc += step; \ \ SEXP elt = GET(value, k); \ SET(out, loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out #define ASSIGN_BARRIER_SHAPED_COMPACT(GET, SET) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ \ const R_len_t start = p_info->p_index[0]; \ const R_len_t n = p_info->p_index[1]; \ const R_len_t step = p_info->p_index[2]; \ \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++k, loc += step) { \ SEXP elt = GET(value, k); \ SET(out, loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out // ----------------------------------------------------------------------------- #define ASSIGN_BARRIER_SHAPED(GET, SET) \ if (is_compact_seq(index)) { \ ASSIGN_BARRIER_SHAPED_COMPACT(GET, SET); \ } else { \ ASSIGN_BARRIER_SHAPED_INDEX(GET, SET); \ } static SEXP list_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_BARRIER_SHAPED(VECTOR_ELT, SET_VECTOR_ELT); } #undef ASSIGN_BARRIER_SHAPED #undef ASSIGN_BARRIER_SHAPED_COMPACT #undef ASSIGN_BARRIER_SHAPED_INDEX // ----------------------------------------------------------------------------- static inline SEXP vec_assign_shaped_switch(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { switch (vec_proxy_typeof(proxy)) { case vctrs_type_logical: return lgl_assign_shaped(proxy, index, value, owned, p_info); case vctrs_type_integer: return int_assign_shaped(proxy, index, value, owned, p_info); case vctrs_type_double: return dbl_assign_shaped(proxy, index, value, owned, p_info); case vctrs_type_complex: return cpl_assign_shaped(proxy, index, value, owned, p_info); case vctrs_type_character: return chr_assign_shaped(proxy, index, value, owned, p_info); case vctrs_type_raw: return raw_assign_shaped(proxy, index, value, owned, p_info); case vctrs_type_list: return list_assign_shaped(proxy, index, value, owned, p_info); default: stop_unimplemented_vctrs_type("vec_assign_shaped_switch", vec_proxy_typeof(proxy)); } } // ----------------------------------------------------------------------------- // [[ include("vctrs.h") ]] SEXP vec_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { int n_protect = 0; struct strides_info info = new_strides_info(proxy, index); PROTECT_STRIDES_INFO(&info, &n_protect); SEXP out = vec_assign_shaped_switch(proxy, index, value, owned, &info); UNPROTECT(n_protect); return out; } vctrs/src/ptype2-dispatch.c0000644000176200001440000001434014042540502015364 0ustar liggesusers#include "vctrs.h" #include "ptype2.h" #include "type-data-frame.h" #include "type-factor.h" #include "type-tibble.h" #include "utils.h" // [[ include("ptype2.h") ]] SEXP vec_ptype2_dispatch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left) { SEXP x = opts->x; SEXP y = opts->y; enum vctrs_type2_s3 type2_s3 = vec_typeof2_s3_impl(x, y, x_type, y_type, left); switch (type2_s3) { case vctrs_type2_s3_character_bare_factor: case vctrs_type2_s3_character_bare_ordered: return vctrs_shared_empty_chr; case vctrs_type2_s3_bare_factor_bare_factor: return fct_ptype2(opts); case vctrs_type2_s3_bare_ordered_bare_ordered: return ord_ptype2(opts); case vctrs_type2_s3_bare_date_bare_date: return vctrs_shared_empty_date; case vctrs_type2_s3_bare_date_bare_posixct: case vctrs_type2_s3_bare_date_bare_posixlt: return date_datetime_ptype2(x, y); case vctrs_type2_s3_bare_posixct_bare_posixct: case vctrs_type2_s3_bare_posixct_bare_posixlt: case vctrs_type2_s3_bare_posixlt_bare_posixlt: return datetime_datetime_ptype2(x, y); case vctrs_type2_s3_dataframe_bare_tibble: case vctrs_type2_s3_bare_tibble_bare_tibble: return tib_ptype2(opts); default: return R_NilValue; } } // Initialised at load time static SEXP syms_vec_ptype2_default = NULL; static inline SEXP vec_ptype2_default(SEXP x, SEXP y, SEXP x_arg, SEXP y_arg, const struct fallback_opts* opts) { SEXP df_fallback_obj = PROTECT(r_int(opts->df)); SEXP s3_fallback_obj = PROTECT(r_int(opts->s3)); SEXP out = vctrs_eval_mask7(syms_vec_ptype2_default, syms_x, x, syms_y, y, syms_x_arg, x_arg, syms_y_arg, y_arg, syms_from_dispatch, vctrs_shared_true, syms_df_fallback, df_fallback_obj, syms_s3_fallback, s3_fallback_obj); UNPROTECT(2); return out; } SEXP find_common_class(SEXP x, SEXP y) { SEXP x_class = PROTECT(r_class(x)); SEXP y_class = PROTECT(r_class(y)); R_len_t x_n = Rf_length(x_class); R_len_t y_n = Rf_length(x_class); SEXP const * p_x_classes = STRING_PTR_RO(x_class); SEXP const * p_y_classes = STRING_PTR_RO(y_class); for (R_len_t i = 0; i < x_n; ++i) { for (R_len_t j = 0; j < y_n; ++j) { if (p_x_classes[i] == p_y_classes[j]) { UNPROTECT(2); return p_x_classes[i]; } } } UNPROTECT(2); return R_NilValue; } // [[ include("vctrs.h") ]] SEXP vec_ptype2_dispatch_s3(const struct ptype2_opts* opts) { SEXP x = PROTECT(vec_ptype(opts->x, opts->x_arg)); SEXP y = PROTECT(vec_ptype(opts->y, opts->y_arg)); SEXP r_x_arg = PROTECT(vctrs_arg(opts->x_arg)); SEXP r_y_arg = PROTECT(vctrs_arg(opts->y_arg)); SEXP method_sym = R_NilValue; SEXP method = s3_find_method_xy("vec_ptype2", x, y, vctrs_method_table, &method_sym); // Compatibility with legacy double dispatch mechanism if (method == R_NilValue) { SEXP x_method_sym = R_NilValue; SEXP x_method = PROTECT(s3_find_method2("vec_ptype2", x, vctrs_method_table, &x_method_sym)); if (x_method != R_NilValue) { const char* x_method_str = CHAR(PRINTNAME(x_method_sym)); SEXP x_table = s3_get_table(CLOENV(x_method)); method = s3_find_method2(x_method_str, y, x_table, &method_sym); } UNPROTECT(1); } PROTECT(method); if (method == R_NilValue) { SEXP out = vec_ptype2_default(x, y, r_x_arg, r_y_arg, &(opts->fallback)); UNPROTECT(5); return out; } SEXP out = vec_invoke_coerce_method(method_sym, method, syms_x, x, syms_y, y, syms_x_arg, r_x_arg, syms_y_arg, r_y_arg, &(opts->fallback)); UNPROTECT(5); return out; } SEXP vec_invoke_coerce_method(SEXP method_sym, SEXP method, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP x_arg_sym, SEXP x_arg, SEXP y_arg_sym, SEXP y_arg, const struct fallback_opts* opts) { if (opts->df != DF_FALLBACK_DEFAULT || opts->s3 != S3_FALLBACK_DEFAULT) { SEXP df_fallback_obj = PROTECT(r_int(opts->df)); SEXP s3_fallback_obj = PROTECT(r_int(opts->s3)); SEXP out = vctrs_dispatch6(method_sym, method, x_sym, x, y_sym, y, x_arg_sym, x_arg, y_arg_sym, y_arg, syms_df_fallback, df_fallback_obj, syms_s3_fallback, s3_fallback_obj); UNPROTECT(2); return out; } else { return vctrs_dispatch4(method_sym, method, x_sym, x, y_sym, y, x_arg_sym, x_arg, y_arg_sym, y_arg); } } // [[ register() ]] SEXP vctrs_ptype2_dispatch_native(SEXP x, SEXP y, SEXP fallback_opts, SEXP x_arg, SEXP y_arg) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_y_arg = vec_as_arg(y_arg); const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &c_x_arg, &c_y_arg, fallback_opts); int _left; SEXP out = vec_ptype2_dispatch_native(&c_opts, vec_typeof(x), vec_typeof(y), &_left); if (out == R_NilValue) { return vec_ptype2_default(x, y, x_arg, y_arg, &c_opts.fallback); } else { return out; } } void vctrs_init_ptype2_dispatch(SEXP ns) { syms_vec_ptype2_default = Rf_install("vec_default_ptype2"); } vctrs/src/hash.c0000644000176200001440000003475614042540502013304 0ustar liggesusers#include "vctrs.h" #include "equal.h" #include "hash.h" #include "utils.h" #include "dim.h" // boost::hash_combine from https://stackoverflow.com/questions/35985960 static inline uint32_t hash_combine(uint32_t x, uint32_t y) { return x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); } // 32-bit mixer from murmurhash // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L68 static inline uint32_t hash_int32(uint32_t x) { x ^= x >> 16; x *= 0x85ebca6b; x ^= x >> 13; x *= 0xc2b2ae35; x ^= x >> 16; return x; } // 64-bit mixer from murmurhash // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L81 static inline uint32_t hash_int64(int64_t x) { x ^= x >> 33; x *= UINT64_C(0xff51afd7ed558ccd); x ^= x >> 33; x *= UINT64_C(0xc4ceb9fe1a85ec53); x ^= x >> 33; return x; } // Seems like something designed specificaly for doubles should work better // but I haven't been able to find anything static inline uint32_t hash_double(double x) { // Treat positive/negative 0 as equivalent if (x == 0.0) { x = 0.0; } union { double d; uint64_t i; } value; value.d = x; return hash_int64(value.i); } static inline uint32_t hash_char(SEXP x) { return hash_int64((uintptr_t) x); } // Hashing scalars ----------------------------------------------------- static inline uint32_t lgl_hash_scalar(const int* x); static inline uint32_t int_hash_scalar(const int* x); static inline uint32_t dbl_hash_scalar(const double* x); static inline uint32_t cpl_hash_scalar(const Rcomplex* x); static inline uint32_t chr_hash_scalar(const SEXP* x); static inline uint32_t raw_hash_scalar(const Rbyte* x); static inline uint32_t lgl_hash_scalar(const int* x) { return hash_int32(*x); } static inline uint32_t int_hash_scalar(const int* x) { return hash_int32(*x); } static inline uint32_t dbl_hash_scalar(const double* x) { double val = *x; // Hash all NAs and NaNs to same value (i.e. ignoring significand) switch (dbl_classify(val)) { case vctrs_dbl_number: break; case vctrs_dbl_missing: val = NA_REAL; break; case vctrs_dbl_nan: val = R_NaN; break; } return hash_double(val); } static inline uint32_t cpl_hash_scalar(const Rcomplex* x) { uint32_t hash = 0; hash = hash_combine(hash, dbl_hash_scalar(&x->r)); hash = hash_combine(hash, dbl_hash_scalar(&x->i)); return hash; } static inline uint32_t chr_hash_scalar(const SEXP* x) { return hash_char(*x); } static inline uint32_t raw_hash_scalar(const Rbyte* x) { return hash_int32(*x); } static inline uint32_t list_hash_scalar_na_equal(SEXP x, R_len_t i) { return hash_object(VECTOR_ELT(x, i)); } static inline uint32_t list_hash_scalar_na_propagate(SEXP x, R_len_t i) { SEXP elt = VECTOR_ELT(x, i); if (elt == R_NilValue) { return HASH_MISSING; } else { return hash_object(elt); } } // Hashing objects ----------------------------------------------------- static uint32_t lgl_hash(SEXP x); static uint32_t int_hash(SEXP x); static uint32_t dbl_hash(SEXP x); static uint32_t chr_hash(SEXP x); static uint32_t list_hash(SEXP x); static uint32_t node_hash(SEXP x); static uint32_t fn_hash(SEXP x); static uint32_t sexp_hash(SEXP x); uint32_t hash_object(SEXP x) { uint32_t hash = sexp_hash(x); SEXP attrib = ATTRIB(x); if (attrib != R_NilValue) { hash = hash_combine(hash, hash_object(attrib)); } return hash; } // [[ register() ]] SEXP vctrs_hash_object(SEXP x) { SEXP out = PROTECT(Rf_allocVector(RAWSXP, sizeof(uint32_t))); uint32_t hash = 0; hash = hash_combine(hash, hash_object(x)); memcpy(RAW(out), &hash, sizeof(uint32_t)); UNPROTECT(1); return out; } static uint32_t sexp_hash(SEXP x) { switch (TYPEOF(x)) { case NILSXP: return 0; case LGLSXP: return lgl_hash(x); case INTSXP: return int_hash(x); case REALSXP: return dbl_hash(x); case STRSXP: return chr_hash(x); case EXPRSXP: case VECSXP: return list_hash(x); case DOTSXP: case LANGSXP: case LISTSXP: case BCODESXP: return node_hash(x); case CLOSXP: return fn_hash(x); case SYMSXP: case SPECIALSXP: case BUILTINSXP: case ENVSXP: case EXTPTRSXP: return hash_int64((uintptr_t) x); default: Rf_errorcall(R_NilValue, "Unsupported type %s", Rf_type2char(TYPEOF(x))); } } #define HASH(CTYPE, CONST_DEREF, HASHER) \ uint32_t hash = 0; \ R_len_t n = Rf_length(x); \ const CTYPE* p = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < n; ++i, ++p) { \ hash = hash_combine(hash, HASHER(p)); \ } \ \ return hash static uint32_t lgl_hash(SEXP x) { HASH(int, LOGICAL_RO, lgl_hash_scalar); } static uint32_t int_hash(SEXP x) { HASH(int, INTEGER_RO, int_hash_scalar); } static uint32_t dbl_hash(SEXP x) { HASH(double, REAL_RO, dbl_hash_scalar); } static uint32_t chr_hash(SEXP x) { HASH(SEXP, STRING_PTR_RO, chr_hash_scalar); } #undef HASH #define HASH_BARRIER(GET, HASHER) \ uint32_t hash = 0; \ R_len_t n = Rf_length(x); \ \ for (R_len_t i = 0; i < n; ++i) { \ hash = hash_combine(hash, HASHER(GET(x, i))); \ } \ \ return hash static uint32_t list_hash(SEXP x) { HASH_BARRIER(VECTOR_ELT, hash_object); } #undef HASH_BARRIER static uint32_t node_hash(SEXP x) { uint32_t hash = 0; hash = hash_combine(hash, hash_object(CAR(x))); hash = hash_combine(hash, hash_object(CDR(x))); return hash; } static uint32_t fn_hash(SEXP x) { uint32_t hash = 0; hash = hash_combine(hash, hash_object(BODY(x))); hash = hash_combine(hash, hash_object(CLOENV(x))); hash = hash_combine(hash, hash_object(FORMALS(x))); return hash; } // Fill hash array ----------------------------------------------------- static inline void lgl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void lgl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void int_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void int_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void dbl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void dbl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void cpl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void cpl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void chr_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void chr_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void raw_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void raw_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void list_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void list_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void df_hash_fill(uint32_t* p, R_len_t size, SEXP x, bool na_equal); // Not compatible with hash_scalar. When `@na_equal` is false, missing // values are propagated and encoded as `1`. // // [[ include("vctrs.h") ]] void hash_fill(uint32_t* p, R_len_t size, SEXP x, bool na_equal) { if (has_dim(x)) { // The conversion to data frame is only a stopgap, in the long // term, we'll hash arrays natively x = PROTECT(r_as_data_frame(x)); hash_fill(p, size, x, na_equal); UNPROTECT(1); return; } if (na_equal) { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: lgl_hash_fill_na_equal(p, size, x); return; case vctrs_type_integer: int_hash_fill_na_equal(p, size, x); return; case vctrs_type_double: dbl_hash_fill_na_equal(p, size, x); return; case vctrs_type_complex: cpl_hash_fill_na_equal(p, size, x); return; case vctrs_type_character: chr_hash_fill_na_equal(p, size, x); return; case vctrs_type_raw: raw_hash_fill_na_equal(p, size, x); return; case vctrs_type_list: list_hash_fill_na_equal(p, size, x); return; case vctrs_type_dataframe: df_hash_fill(p, size, x, true); return; default: break; } } else { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: lgl_hash_fill_na_propagate(p, size, x); return; case vctrs_type_integer: int_hash_fill_na_propagate(p, size, x); return; case vctrs_type_double: dbl_hash_fill_na_propagate(p, size, x); return; case vctrs_type_complex: cpl_hash_fill_na_propagate(p, size, x); return; case vctrs_type_character: chr_hash_fill_na_propagate(p, size, x); return; case vctrs_type_raw: raw_hash_fill_na_propagate(p, size, x); return; case vctrs_type_list: list_hash_fill_na_propagate(p, size, x); return; case vctrs_type_dataframe: df_hash_fill(p, size, x, false); return; default: break; } } stop_unimplemented_vctrs_type("hash_fill", vec_proxy_typeof(x)); } #define HASH_FILL(CTYPE, CONST_DEREF, HASHER) \ const CTYPE* xp = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i, ++xp) { \ p[i] = hash_combine(p[i], HASHER(xp)); \ } #define HASH_FILL_NA_PROPAGATE(CTYPE, CONST_DEREF, HASHER, NA_VALUE) \ const CTYPE* xp = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i, ++xp) { \ uint32_t h = p[i]; \ if (h == HASH_MISSING) { \ continue; \ } \ if (*xp == NA_VALUE) { \ p[i] = HASH_MISSING; \ } else { \ p[i] = hash_combine(h, HASHER(xp)); \ } \ } #define HASH_FILL_NA_PROPAGATE_CMP(CTYPE, CONST_DEREF, HASHER, NA_CMP) \ const CTYPE* xp = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i, ++xp) { \ uint32_t h = p[i]; \ if (h == HASH_MISSING) { \ continue; \ } \ if (NA_CMP(*xp)) { \ p[i] = HASH_MISSING; \ } else { \ p[i] = hash_combine(h, HASHER(xp)); \ } \ } static inline void lgl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(int, LOGICAL_RO, lgl_hash_scalar); } static inline void lgl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE(int, LOGICAL_RO, lgl_hash_scalar, NA_LOGICAL); } static inline void int_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(int, INTEGER_RO, int_hash_scalar); } static inline void int_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE(int, INTEGER_RO, int_hash_scalar, NA_INTEGER); } static inline void dbl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(double, REAL_RO, dbl_hash_scalar); } static inline void dbl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE_CMP(double, REAL_RO, dbl_hash_scalar, dbl_is_missing); } static inline void cpl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(Rcomplex, COMPLEX_RO, cpl_hash_scalar); } static inline void cpl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE_CMP(Rcomplex, COMPLEX_RO, cpl_hash_scalar, cpl_is_missing); } static inline void chr_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(SEXP, STRING_PTR_RO, chr_hash_scalar); } static inline void chr_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE(SEXP, STRING_PTR_RO, chr_hash_scalar, NA_STRING); } static inline void raw_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(Rbyte, RAW_RO, raw_hash_scalar); } static inline void raw_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(Rbyte, RAW_RO, raw_hash_scalar); } #undef HASH_FILL_NA_PROPAGATE_CMP #undef HASH_FILL_NA_PROPAGATE #undef HASH_FILL #define HASH_FILL_BARRIER(HASHER) \ for (R_len_t i = 0; i < size; ++i) { \ p[i] = hash_combine(p[i], HASHER(x, i)); \ } #define HASH_FILL_BARRIER_NA_PROPAGATE(HASHER) \ for (R_len_t i = 0; i < size; ++i) { \ uint32_t h = p[i]; \ if (h == HASH_MISSING) { \ continue; \ } \ uint32_t elt_hash = HASHER(x, i); \ if (elt_hash == HASH_MISSING) { \ p[i] = HASH_MISSING; \ } else { \ p[i] = hash_combine(p[i], elt_hash); \ } \ } static void list_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_BARRIER(list_hash_scalar_na_equal); } static void list_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_BARRIER_NA_PROPAGATE(list_hash_scalar_na_propagate); } #undef HASH_FILL_BARRIER_NA_PROPAGATE #undef HASH_FILL_BARRIER static void df_hash_fill(uint32_t* p, R_len_t size, SEXP x, bool na_equal) { R_len_t ncol = Rf_length(x); for (R_len_t i = 0; i < ncol; ++i) { SEXP col = VECTOR_ELT(x, i); hash_fill(p, size, col, na_equal); } } // [[ register() ]] SEXP vctrs_hash(SEXP x) { x = PROTECT(vec_proxy_equal(x)); R_len_t n = vec_size(x); SEXP out = PROTECT(Rf_allocVector(RAWSXP, n * sizeof(uint32_t))); uint32_t* p = (uint32_t*) RAW(out); memset(p, 0, n * sizeof(uint32_t)); hash_fill(p, n, x, true); UNPROTECT(2); return out; } vctrs/src/arg-counter.h0000644000176200001440000000336513657251762014626 0ustar liggesusers#ifndef VCTRS_ARG_COUNTER_H #define VCTRS_ARG_COUNTER_H struct counters { /* public: */ // Argument tags for the current value of the reduction (the result // so far) and the next value. These handles typically point to the // local counter args, but might also point to external arg objects // like the initial current arg, or a splice box counter arg. struct vctrs_arg* curr_arg; struct vctrs_arg* next_arg; /* private: */ // Global counters R_len_t curr; R_len_t next; SEXP names; R_len_t names_curr; R_len_t names_next; // `names` might be from a splice box whose reduction has already // finished. We protect those from up high. PROTECT_INDEX names_pi; // Local counters for splice boxes. Since the tags are generated // lazily, we need two counter states to handle the // `vec_c(!!!list(foo = 1), !!!list(bar = 2))` case. struct counters* next_box_counters; struct counters* prev_box_counters; // Actual counter args are stored here struct arg_data_counter curr_counter_data; struct arg_data_counter next_counter_data; struct vctrs_arg curr_counter; struct vctrs_arg next_counter; }; /** * Swap the argument tags of the reduction * * There are two counters used for generating argument tags when an * error occur during a reduction. The first represent the result so * far, and the second the next input. Call `counters_shift()` to set * the counter of the next input as current counter, and start * iterating with a new counter for the next input. */ void counters_shift(struct counters* counters); SEXP reduce(SEXP current, struct vctrs_arg* current_arg, SEXP rest, SEXP (*impl)(SEXP current, SEXP next, struct counters* counters, void* data), void* data); #endif vctrs/src/type-date-time.c0000644000176200001440000003234214042540502015176 0ustar liggesusers#include "vctrs.h" #include "owned.h" #include "utils.h" static SEXP new_date(SEXP x); static SEXP new_datetime(SEXP x, SEXP tzone); static SEXP new_empty_datetime(SEXP tzone); static SEXP date_validate(SEXP x); static SEXP datetime_validate(SEXP x); static SEXP datetime_validate_tzone(SEXP x); static SEXP datetime_validate_type(SEXP x); static SEXP datetime_rezone(SEXP x, SEXP tzone); static SEXP tzone_get(SEXP x); static SEXP tzone_union(SEXP x_tzone, SEXP y_tzone); static bool tzone_equal(SEXP x_tzone, SEXP y_tzone); static SEXP r_as_date(SEXP x); static SEXP r_as_posixct(SEXP x, SEXP tzone); static SEXP r_as_posixlt(SEXP x, SEXP tzone); static SEXP r_date_as_character(SEXP x); static SEXP r_chr_date_as_posixct(SEXP x, SEXP tzone); static SEXP r_chr_date_as_posixlt(SEXP x, SEXP tzone); static SEXP posixlt_as_posixct_impl(SEXP x, SEXP tzone); static SEXP posixct_as_posixlt_impl(SEXP x, SEXP tzone); // ----------------------------------------------------------------------------- // ptype2 // [[ include("vctrs.h") ]] SEXP date_datetime_ptype2(SEXP x, SEXP y) { SEXP x_class = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP x_first_class = STRING_ELT(x_class, 0); SEXP tzone = (x_first_class == strings_date) ? tzone_get(y) : tzone_get(x); PROTECT(tzone); SEXP out = new_empty_datetime(tzone); UNPROTECT(2); return out; } // [[ include("vctrs.h") ]] SEXP datetime_datetime_ptype2(SEXP x, SEXP y) { SEXP x_tzone = PROTECT(tzone_get(x)); SEXP y_tzone = PROTECT(tzone_get(y)); // Never allocates SEXP tzone = tzone_union(x_tzone, y_tzone); SEXP out = new_empty_datetime(tzone); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- // cast // [[ include("vctrs.h") ]] SEXP date_as_date(SEXP x) { return date_validate(x); } // [[ include("vctrs.h") ]] SEXP date_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); // Date -> character -> POSIXct // This is the only way to retain the same clock time SEXP out = PROTECT(r_date_as_character(x)); out = PROTECT(r_chr_date_as_posixct(out, tzone)); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP date_as_posixlt(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); // Date -> character -> POSIXlt // This is the only way to retain the same clock time SEXP out = PROTECT(r_date_as_character(x)); out = PROTECT(r_chr_date_as_posixlt(out, tzone)); UNPROTECT(3); return out; } static SEXP posixt_as_date(SEXP ct, SEXP lt, bool* lossy); // [[ include("vctrs.h") ]] SEXP posixct_as_date(SEXP x, bool* lossy) { SEXP ct = PROTECT(datetime_validate(x)); SEXP tzone = PROTECT(tzone_get(ct)); SEXP lt = PROTECT(posixct_as_posixlt_impl(ct, tzone)); SEXP out = posixt_as_date(ct, lt, lossy); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP posixlt_as_date(SEXP x, bool* lossy) { SEXP lt = x; SEXP tzone = PROTECT(tzone_get(lt)); SEXP ct = PROTECT(posixlt_as_posixct_impl(lt, tzone)); SEXP out = posixt_as_date(ct, lt, lossy); UNPROTECT(2); return out; } // POSIXct is required for lossy checking. // POSIXlt is required for converting to Date. // `as.Date.POSIXct()` must go through `as.POSIXlt()`, so the POSIXct // time alone is not enough. static SEXP posixt_as_date(SEXP ct, SEXP lt, bool* lossy) { ct = PROTECT(datetime_validate(ct)); const double* p_ct = REAL(ct); SEXP out = PROTECT(r_as_date(lt)); SEXP roundtrip = PROTECT(date_as_posixct(out, ct)); const double* p_roundtrip = REAL(roundtrip); const R_len_t size = Rf_length(out); for (R_len_t i = 0; i < size; ++i) { const double ct_elt = p_ct[i]; // `NaN` and `NA` always convert without issue if (isnan(ct_elt)) { continue; } const double roundtrip_elt = p_roundtrip[i]; if (ct_elt != roundtrip_elt) { *lossy = true; UNPROTECT(3); return R_NilValue; } } UNPROTECT(3); return out; } static SEXP posixct_as_posixct_impl(SEXP x, SEXP tzone); // [[ include("vctrs.h") ]] SEXP posixct_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixct_as_posixct_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixct_as_posixct_impl(SEXP x, SEXP tzone) { x = PROTECT(datetime_validate(x)); SEXP out = datetime_rezone(x, tzone); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP posixlt_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixlt_as_posixct_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixlt_as_posixct_impl(SEXP x, SEXP tzone) { SEXP x_tzone = PROTECT(tzone_get(x)); x = PROTECT(r_as_posixct(x, x_tzone)); SEXP out = posixct_as_posixct_impl(x, tzone); UNPROTECT(2); return out; } // [[ include("vctrs.h") ]] SEXP posixct_as_posixlt(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixct_as_posixlt_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixct_as_posixlt_impl(SEXP x, SEXP tzone) { return r_as_posixlt(x, tzone); } // [[ include("vctrs.h") ]] SEXP posixlt_as_posixlt(SEXP x, SEXP to) { SEXP x_tzone = PROTECT(tzone_get(x)); SEXP to_tzone = PROTECT(tzone_get(to)); if (tzone_equal(x_tzone, to_tzone)) { UNPROTECT(2); return x; } SEXP out = x; // `as.POSIXlt.default()` doesn't respect `tz` so we have to do: // POSIXlt -> POSIXct -> POSIXct -> POSIXlt out = PROTECT(posixlt_as_posixct_impl(out, x_tzone)); out = PROTECT(posixct_as_posixct_impl(out, to_tzone)); out = PROTECT(posixct_as_posixlt_impl(out, to_tzone)); UNPROTECT(5); return out; } // ----------------------------------------------------------------------------- // restore // [[ include("vctrs.h") ]] SEXP vec_date_restore(SEXP x, SEXP to, const enum vctrs_owned owned) { SEXP out = PROTECT(vec_restore_default(x, to, owned)); out = date_validate(out); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP vec_posixct_restore(SEXP x, SEXP to, const enum vctrs_owned owned) { SEXP out = PROTECT(vec_restore_default(x, to, owned)); out = datetime_validate(out); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP vec_posixlt_restore(SEXP x, SEXP to, const enum vctrs_owned owned) { SEXP out = PROTECT(vec_restore_default(x, to, owned)); out = datetime_validate_tzone(out); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_new_date(SEXP x) { return new_date(x); } static SEXP new_date(SEXP x) { if (TYPEOF(x) != REALSXP) { Rf_errorcall(R_NilValue, "`x` must be a double vector."); } SEXP names = PROTECT(r_names(x)); SEXP out = PROTECT(r_clone_referenced(x)); SET_ATTRIB(out, R_NilValue); r_poke_names(out, names); r_poke_class(out, classes_date); UNPROTECT(2); return out; } // [[ register() ]] SEXP vctrs_new_datetime(SEXP x, SEXP tzone) { return new_datetime(x, tzone); } static SEXP new_datetime(SEXP x, SEXP tzone) { if (TYPEOF(x) != REALSXP) { Rf_errorcall(R_NilValue, "`x` must be a double vector."); } // Convenience special case where we allow a // null `tzone` to represent local time if (tzone == R_NilValue) { tzone = chrs_empty; } if (TYPEOF(tzone) != STRSXP) { Rf_errorcall(R_NilValue, "`tzone` must be a character vector or `NULL`."); } SEXP names = PROTECT(r_names(x)); SEXP out = PROTECT(r_clone_referenced(x)); SET_ATTRIB(out, R_NilValue); r_poke_names(out, names); r_poke_class(out, classes_posixct); Rf_setAttrib(out, syms_tzone, tzone); UNPROTECT(2); return out; } static SEXP new_empty_datetime(SEXP tzone) { return new_datetime(vctrs_shared_empty_dbl, tzone); } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_date_validate(SEXP x) { return date_validate(x); } // Ensure that a `Date` is internally stored as a double vector static SEXP date_validate(SEXP x) { switch (TYPEOF(x)) { case REALSXP: return x; case INTSXP: // Keeps attributes return Rf_coerceVector(x, REALSXP); default: stop_internal("date_validate", "Corrupt `Date` with unknown type %s.", Rf_type2char(TYPEOF(x))); } } // [[ register() ]] SEXP vctrs_datetime_validate(SEXP x) { return datetime_validate(x); } // Ensure that a `POSIXct` is internally stored as a double vector. // Also checks that the `tzone` attribute is non-NULL. static SEXP datetime_validate(SEXP x) { x = PROTECT(datetime_validate_tzone(x)); x = PROTECT(datetime_validate_type(x)); UNPROTECT(2); return x; } static SEXP datetime_validate_tzone(SEXP x) { SEXP tzone = Rf_getAttrib(x, syms_tzone); if (tzone != R_NilValue) { return x; } x = PROTECT(r_clone_referenced(x)); Rf_setAttrib(x, syms_tzone, chrs_empty); UNPROTECT(1); return x; } static SEXP datetime_validate_type(SEXP x) { switch (TYPEOF(x)) { case REALSXP: return x; case INTSXP: // Keeps attributes return Rf_coerceVector(x, REALSXP); default: stop_internal("datetime_validate_type", "Corrupt `POSIXct` with unknown type %s.", Rf_type2char(TYPEOF(x))); } never_reached("datetime_validate_type"); } // ----------------------------------------------------------------------------- // Same underlying numeric representation, different `tzone` static SEXP datetime_rezone(SEXP x, SEXP tzone) { SEXP x_tzone = PROTECT(tzone_get(x)); if (tzone_equal(x_tzone, tzone)) { UNPROTECT(1); return x; } SEXP out = PROTECT(r_clone_referenced(x)); Rf_setAttrib(out, syms_tzone, tzone); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- // Time zone utilities static SEXP tzone_get(SEXP x) { SEXP tzone = PROTECT(Rf_getAttrib(x, syms_tzone)); if (tzone == R_NilValue) { UNPROTECT(1); return chrs_empty; } R_len_t size = Rf_length(tzone); if (size == 1) { UNPROTECT(1); return tzone; } if (size == 0) { Rf_errorcall(R_NilValue, "Corrupt datetime with 0-length `tzone` attribute"); } // If there are multiple, only take the first SEXP out = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(out, 0, STRING_ELT(tzone, 0)); UNPROTECT(2); return out; } // `tzone_get()` is guaranteed to return 1 element static inline bool tzone_is_local(SEXP tzone) { return STRING_ELT(tzone, 0) == strings_empty; } static SEXP tzone_union(SEXP x_tzone, SEXP y_tzone) { if (tzone_is_local(x_tzone)) { return y_tzone; } else { return x_tzone; } } // `tzone_get()` is guaranteed to return 1 element static bool tzone_equal(SEXP x_tzone, SEXP y_tzone) { // Equal objects? if (x_tzone == y_tzone) { return true; } // Equal CHARSXPs? SEXP x_string = STRING_ELT(x_tzone, 0); SEXP y_string = STRING_ELT(y_tzone, 0); if (x_string == y_string) { return true; } // Equal C char? const char* x_tzone_char = CHAR(x_string); const char* y_tzone_char = CHAR(y_string); return !strcmp(x_tzone_char, y_tzone_char); } // ----------------------------------------------------------------------------- static SEXP syms_tz = NULL; static SEXP syms_as_date = NULL; static SEXP fns_as_date = NULL; static SEXP r_as_date(SEXP x) { return vctrs_dispatch1(syms_as_date, fns_as_date, syms_x, x); } static SEXP syms_as_posixct = NULL; static SEXP fns_as_posixct = NULL; static SEXP r_as_posixct(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_as_posixct, fns_as_posixct, syms_x, x, syms_tz, tzone); } static SEXP syms_as_posixlt = NULL; static SEXP fns_as_posixlt = NULL; static SEXP r_as_posixlt(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_as_posixlt, fns_as_posixlt, syms_x, x, syms_tz, tzone); } static SEXP syms_date_as_character = NULL; static SEXP fns_date_as_character = NULL; static SEXP r_date_as_character(SEXP x) { return vctrs_dispatch1(syms_date_as_character, fns_date_as_character, syms_x, x); } static SEXP syms_chr_date_as_posixct = NULL; static SEXP fns_chr_date_as_posixct = NULL; static SEXP r_chr_date_as_posixct(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_chr_date_as_posixct, fns_chr_date_as_posixct, syms_x, x, syms_tzone, tzone); } static SEXP syms_chr_date_as_posixlt = NULL; static SEXP fns_chr_date_as_posixlt = NULL; static SEXP r_chr_date_as_posixlt(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_chr_date_as_posixlt, fns_chr_date_as_posixlt, syms_x, x, syms_tzone, tzone); } // ----------------------------------------------------------------------------- void vctrs_init_type_date_time(SEXP ns) { syms_tz = Rf_install("tz"); syms_as_date = Rf_install("as.Date"); syms_as_posixct = Rf_install("as.POSIXct"); syms_as_posixlt = Rf_install("as.POSIXlt"); syms_date_as_character = Rf_install("date_as_character"); syms_chr_date_as_posixct = Rf_install("chr_date_as_posixct"); syms_chr_date_as_posixlt = Rf_install("chr_date_as_posixlt"); fns_as_date = r_env_get(R_BaseEnv, syms_as_date); fns_as_posixct = r_env_get(R_BaseEnv, syms_as_posixct); fns_as_posixlt = r_env_get(R_BaseEnv, syms_as_posixlt); fns_date_as_character = r_env_get(ns, syms_date_as_character); fns_chr_date_as_posixct = r_env_get(ns, syms_chr_date_as_posixct); fns_chr_date_as_posixlt = r_env_get(ns, syms_chr_date_as_posixlt); } vctrs/src/typeof2-s3.c0000644000176200001440000007577114042540502014276 0ustar liggesusers#include "vctrs.h" #include "utils.h" static enum vctrs_type2_s3 vec_typeof2_s3_impl2(SEXP x, SEXP y, enum vctrs_type type_y, int* left); // [[ include("ptype2.h") ]] enum vctrs_type2_s3 vec_typeof2_s3_impl(SEXP x, SEXP y, enum vctrs_type type_x, enum vctrs_type type_y, int* left) { switch (type_x) { case vctrs_type_null: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_null_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_null_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_null_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_null_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_null_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_null_bare_tibble; default: *left = 0; return vctrs_type2_s3_null_unknown; } } case vctrs_type_unspecified: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_unspecified_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_unspecified_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_unspecified_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_unspecified_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_unspecified_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_unspecified_bare_tibble; default: *left = 0; return vctrs_type2_s3_unspecified_unknown; } } case vctrs_type_logical: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_logical_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_logical_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_logical_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_logical_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_logical_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_logical_bare_tibble; default: *left = 0; return vctrs_type2_s3_logical_unknown; } } case vctrs_type_integer: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_integer_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_integer_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_integer_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_integer_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_integer_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_integer_bare_tibble; default: *left = 0; return vctrs_type2_s3_integer_unknown; } } case vctrs_type_double: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_double_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_double_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_double_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_double_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_double_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_double_bare_tibble; default: *left = 0; return vctrs_type2_s3_double_unknown; } } case vctrs_type_complex: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_complex_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_complex_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_complex_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_complex_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_complex_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_complex_bare_tibble; default: *left = 0; return vctrs_type2_s3_complex_unknown; } } case vctrs_type_character: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_character_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_character_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_character_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_character_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_character_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_character_bare_tibble; default: *left = 0; return vctrs_type2_s3_character_unknown; } } case vctrs_type_raw: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_raw_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_raw_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_raw_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_raw_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_raw_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_raw_bare_tibble; default: *left = 0; return vctrs_type2_s3_raw_unknown; } } case vctrs_type_list: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_list_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_list_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_list_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_list_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_list_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_list_bare_tibble; default: *left = 0; return vctrs_type2_s3_list_unknown; } } case vctrs_type_dataframe: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_dataframe_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_dataframe_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_dataframe_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_dataframe_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_dataframe_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_dataframe_bare_tibble; default: *left = 0; return vctrs_type2_s3_dataframe_unknown; } } case vctrs_type_scalar: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 0; return vctrs_type2_s3_scalar_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_scalar_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_scalar_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_scalar_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_scalar_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_scalar_bare_tibble; default: *left = 0; return vctrs_type2_s3_scalar_unknown; } } case vctrs_type_s3: { return vec_typeof2_s3_impl2(x, y, type_y, left); }} never_reached("vec_typeof2_s3_impl()"); } static enum vctrs_type2_s3 vec_typeof2_s3_impl2(SEXP x, SEXP y, enum vctrs_type type_y, int* left) { switch (class_type(x)) { case vctrs_class_bare_factor: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_s3_null_bare_factor; case vctrs_type_unspecified: *left = 1; return vctrs_type2_s3_unspecified_bare_factor; case vctrs_type_logical: *left = 1; return vctrs_type2_s3_logical_bare_factor; case vctrs_type_integer: *left = 1; return vctrs_type2_s3_integer_bare_factor; case vctrs_type_double: *left = 1; return vctrs_type2_s3_double_bare_factor; case vctrs_type_complex: *left = 1; return vctrs_type2_s3_complex_bare_factor; case vctrs_type_character: *left = 1; return vctrs_type2_s3_character_bare_factor; case vctrs_type_raw: *left = 1; return vctrs_type2_s3_raw_bare_factor; case vctrs_type_list: *left = 1; return vctrs_type2_s3_list_bare_factor; case vctrs_type_dataframe: *left = 1; return vctrs_type2_s3_dataframe_bare_factor; case vctrs_type_scalar: *left = 1; return vctrs_type2_s3_scalar_bare_factor; case vctrs_type_s3: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = -1; return vctrs_type2_s3_bare_factor_bare_factor; case vctrs_class_bare_ordered: *left = 0; return vctrs_type2_s3_bare_factor_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_bare_factor_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_bare_factor_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_bare_factor_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_bare_factor_bare_tibble; default: *left = 0; return vctrs_type2_s3_bare_factor_unknown; } }} } case vctrs_class_bare_ordered: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_s3_null_bare_ordered; case vctrs_type_unspecified: *left = 1; return vctrs_type2_s3_unspecified_bare_ordered; case vctrs_type_logical: *left = 1; return vctrs_type2_s3_logical_bare_ordered; case vctrs_type_integer: *left = 1; return vctrs_type2_s3_integer_bare_ordered; case vctrs_type_double: *left = 1; return vctrs_type2_s3_double_bare_ordered; case vctrs_type_complex: *left = 1; return vctrs_type2_s3_complex_bare_ordered; case vctrs_type_character: *left = 1; return vctrs_type2_s3_character_bare_ordered; case vctrs_type_raw: *left = 1; return vctrs_type2_s3_raw_bare_ordered; case vctrs_type_list: *left = 1; return vctrs_type2_s3_list_bare_ordered; case vctrs_type_dataframe: *left = 1; return vctrs_type2_s3_dataframe_bare_ordered; case vctrs_type_scalar: *left = 1; return vctrs_type2_s3_scalar_bare_ordered; case vctrs_type_s3: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 1; return vctrs_type2_s3_bare_factor_bare_ordered; case vctrs_class_bare_ordered: *left = -1; return vctrs_type2_s3_bare_ordered_bare_ordered; case vctrs_class_bare_date: *left = 0; return vctrs_type2_s3_bare_ordered_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_bare_ordered_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_bare_ordered_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_bare_ordered_bare_tibble; default: *left = 0; return vctrs_type2_s3_bare_ordered_unknown; } }} } case vctrs_class_bare_date: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_s3_null_bare_date; case vctrs_type_unspecified: *left = 1; return vctrs_type2_s3_unspecified_bare_date; case vctrs_type_logical: *left = 1; return vctrs_type2_s3_logical_bare_date; case vctrs_type_integer: *left = 1; return vctrs_type2_s3_integer_bare_date; case vctrs_type_double: *left = 1; return vctrs_type2_s3_double_bare_date; case vctrs_type_complex: *left = 1; return vctrs_type2_s3_complex_bare_date; case vctrs_type_character: *left = 1; return vctrs_type2_s3_character_bare_date; case vctrs_type_raw: *left = 1; return vctrs_type2_s3_raw_bare_date; case vctrs_type_list: *left = 1; return vctrs_type2_s3_list_bare_date; case vctrs_type_dataframe: *left = 1; return vctrs_type2_s3_dataframe_bare_date; case vctrs_type_scalar: *left = 1; return vctrs_type2_s3_scalar_bare_date; case vctrs_type_s3: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 1; return vctrs_type2_s3_bare_factor_bare_date; case vctrs_class_bare_ordered: *left = 1; return vctrs_type2_s3_bare_ordered_bare_date; case vctrs_class_bare_date: *left = -1; return vctrs_type2_s3_bare_date_bare_date; case vctrs_class_bare_posixct: *left = 0; return vctrs_type2_s3_bare_date_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_bare_date_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_bare_date_bare_tibble; default: *left = 0; return vctrs_type2_s3_bare_date_unknown; } }} } case vctrs_class_bare_posixct: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_s3_null_bare_posixct; case vctrs_type_unspecified: *left = 1; return vctrs_type2_s3_unspecified_bare_posixct; case vctrs_type_logical: *left = 1; return vctrs_type2_s3_logical_bare_posixct; case vctrs_type_integer: *left = 1; return vctrs_type2_s3_integer_bare_posixct; case vctrs_type_double: *left = 1; return vctrs_type2_s3_double_bare_posixct; case vctrs_type_complex: *left = 1; return vctrs_type2_s3_complex_bare_posixct; case vctrs_type_character: *left = 1; return vctrs_type2_s3_character_bare_posixct; case vctrs_type_raw: *left = 1; return vctrs_type2_s3_raw_bare_posixct; case vctrs_type_list: *left = 1; return vctrs_type2_s3_list_bare_posixct; case vctrs_type_dataframe: *left = 1; return vctrs_type2_s3_dataframe_bare_posixct; case vctrs_type_scalar: *left = 1; return vctrs_type2_s3_scalar_bare_posixct; case vctrs_type_s3: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 1; return vctrs_type2_s3_bare_factor_bare_posixct; case vctrs_class_bare_ordered: *left = 1; return vctrs_type2_s3_bare_ordered_bare_posixct; case vctrs_class_bare_date: *left = 1; return vctrs_type2_s3_bare_date_bare_posixct; case vctrs_class_bare_posixct: *left = -1; return vctrs_type2_s3_bare_posixct_bare_posixct; case vctrs_class_bare_posixlt: *left = 0; return vctrs_type2_s3_bare_posixct_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_bare_posixct_bare_tibble; default: *left = 0; return vctrs_type2_s3_bare_posixct_unknown; } }} } case vctrs_class_bare_posixlt: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_s3_null_bare_posixlt; case vctrs_type_unspecified: *left = 1; return vctrs_type2_s3_unspecified_bare_posixlt; case vctrs_type_logical: *left = 1; return vctrs_type2_s3_logical_bare_posixlt; case vctrs_type_integer: *left = 1; return vctrs_type2_s3_integer_bare_posixlt; case vctrs_type_double: *left = 1; return vctrs_type2_s3_double_bare_posixlt; case vctrs_type_complex: *left = 1; return vctrs_type2_s3_complex_bare_posixlt; case vctrs_type_character: *left = 1; return vctrs_type2_s3_character_bare_posixlt; case vctrs_type_raw: *left = 1; return vctrs_type2_s3_raw_bare_posixlt; case vctrs_type_list: *left = 1; return vctrs_type2_s3_list_bare_posixlt; case vctrs_type_dataframe: *left = 1; return vctrs_type2_s3_dataframe_bare_posixlt; case vctrs_type_scalar: *left = 1; return vctrs_type2_s3_scalar_bare_posixlt; case vctrs_type_s3: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 1; return vctrs_type2_s3_bare_factor_bare_posixlt; case vctrs_class_bare_ordered: *left = 1; return vctrs_type2_s3_bare_ordered_bare_posixlt; case vctrs_class_bare_date: *left = 1; return vctrs_type2_s3_bare_date_bare_posixlt; case vctrs_class_bare_posixct: *left = 1; return vctrs_type2_s3_bare_posixct_bare_posixlt; case vctrs_class_bare_posixlt: *left = -1; return vctrs_type2_s3_bare_posixlt_bare_posixlt; case vctrs_class_bare_tibble: *left = 0; return vctrs_type2_s3_bare_posixlt_bare_tibble; default: *left = 0; return vctrs_type2_s3_bare_posixlt_unknown; } }} } case vctrs_class_bare_tibble: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_s3_null_bare_tibble; case vctrs_type_unspecified: *left = 1; return vctrs_type2_s3_unspecified_bare_tibble; case vctrs_type_logical: *left = 1; return vctrs_type2_s3_logical_bare_tibble; case vctrs_type_integer: *left = 1; return vctrs_type2_s3_integer_bare_tibble; case vctrs_type_double: *left = 1; return vctrs_type2_s3_double_bare_tibble; case vctrs_type_complex: *left = 1; return vctrs_type2_s3_complex_bare_tibble; case vctrs_type_character: *left = 1; return vctrs_type2_s3_character_bare_tibble; case vctrs_type_raw: *left = 1; return vctrs_type2_s3_raw_bare_tibble; case vctrs_type_list: *left = 1; return vctrs_type2_s3_list_bare_tibble; case vctrs_type_dataframe: *left = 1; return vctrs_type2_s3_dataframe_bare_tibble; case vctrs_type_scalar: *left = 1; return vctrs_type2_s3_scalar_bare_tibble; case vctrs_type_s3: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 1; return vctrs_type2_s3_bare_factor_bare_tibble; case vctrs_class_bare_ordered: *left = 1; return vctrs_type2_s3_bare_ordered_bare_tibble; case vctrs_class_bare_date: *left = 1; return vctrs_type2_s3_bare_date_bare_tibble; case vctrs_class_bare_posixct: *left = 1; return vctrs_type2_s3_bare_posixct_bare_tibble; case vctrs_class_bare_posixlt: *left = 1; return vctrs_type2_s3_bare_posixlt_bare_tibble; case vctrs_class_bare_tibble: *left = -1; return vctrs_type2_s3_bare_tibble_bare_tibble; default: *left = 0; return vctrs_type2_s3_bare_tibble_unknown; } }} } default: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_s3_null_unknown; case vctrs_type_unspecified: *left = 1; return vctrs_type2_s3_unspecified_unknown; case vctrs_type_logical: *left = 1; return vctrs_type2_s3_logical_unknown; case vctrs_type_integer: *left = 1; return vctrs_type2_s3_integer_unknown; case vctrs_type_double: *left = 1; return vctrs_type2_s3_double_unknown; case vctrs_type_complex: *left = 1; return vctrs_type2_s3_complex_unknown; case vctrs_type_character: *left = 1; return vctrs_type2_s3_character_unknown; case vctrs_type_raw: *left = 1; return vctrs_type2_s3_raw_unknown; case vctrs_type_list: *left = 1; return vctrs_type2_s3_list_unknown; case vctrs_type_dataframe: *left = 1; return vctrs_type2_s3_dataframe_unknown; case vctrs_type_scalar: *left = 1; return vctrs_type2_s3_scalar_unknown; case vctrs_type_s3: { switch (class_type(y)) { case vctrs_class_bare_factor: *left = 1; return vctrs_type2_s3_bare_factor_unknown; case vctrs_class_bare_ordered: *left = 1; return vctrs_type2_s3_bare_ordered_unknown; case vctrs_class_bare_date: *left = 1; return vctrs_type2_s3_bare_date_unknown; case vctrs_class_bare_posixct: *left = 1; return vctrs_type2_s3_bare_posixct_unknown; case vctrs_class_bare_posixlt: *left = 1; return vctrs_type2_s3_bare_posixlt_unknown; case vctrs_class_bare_tibble: *left = 1; return vctrs_type2_s3_bare_tibble_unknown; default: *left = -1; return vctrs_type2_s3_unknown_unknown; } }} }} never_reached("vec_typeof2_s3_impl2()"); } enum vctrs_type2_s3 vec_typeof2_s3(SEXP x, SEXP y) { int _; return vec_typeof2_s3_impl(x, y, vec_typeof(x), vec_typeof(y), &_); } const char* vctrs_type2_s3_as_str(enum vctrs_type2_s3 type) { switch (type) { case vctrs_type2_s3_null_bare_factor: return "vctrs_type2_s3_null_bare_factor"; case vctrs_type2_s3_null_bare_ordered: return "vctrs_type2_s3_null_bare_ordered"; case vctrs_type2_s3_null_bare_date: return "vctrs_type2_s3_null_bare_date"; case vctrs_type2_s3_null_bare_posixct: return "vctrs_type2_s3_null_bare_posixct"; case vctrs_type2_s3_null_bare_posixlt: return "vctrs_type2_s3_null_bare_posixlt"; case vctrs_type2_s3_null_bare_tibble: return "vctrs_type2_s3_null_bare_tibble"; case vctrs_type2_s3_null_unknown: return "vctrs_type2_s3_null_unknown"; case vctrs_type2_s3_unspecified_bare_factor: return "vctrs_type2_s3_unspecified_bare_factor"; case vctrs_type2_s3_unspecified_bare_ordered: return "vctrs_type2_s3_unspecified_bare_ordered"; case vctrs_type2_s3_unspecified_bare_date: return "vctrs_type2_s3_unspecified_bare_date"; case vctrs_type2_s3_unspecified_bare_posixct: return "vctrs_type2_s3_unspecified_bare_posixct"; case vctrs_type2_s3_unspecified_bare_posixlt: return "vctrs_type2_s3_unspecified_bare_posixlt"; case vctrs_type2_s3_unspecified_bare_tibble: return "vctrs_type2_s3_unspecified_bare_tibble"; case vctrs_type2_s3_unspecified_unknown: return "vctrs_type2_s3_unspecified_unknown"; case vctrs_type2_s3_logical_bare_factor: return "vctrs_type2_s3_logical_bare_factor"; case vctrs_type2_s3_logical_bare_ordered: return "vctrs_type2_s3_logical_bare_ordered"; case vctrs_type2_s3_logical_bare_date: return "vctrs_type2_s3_logical_bare_date"; case vctrs_type2_s3_logical_bare_posixct: return "vctrs_type2_s3_logical_bare_posixct"; case vctrs_type2_s3_logical_bare_posixlt: return "vctrs_type2_s3_logical_bare_posixlt"; case vctrs_type2_s3_logical_bare_tibble: return "vctrs_type2_s3_logical_bare_tibble"; case vctrs_type2_s3_logical_unknown: return "vctrs_type2_s3_logical_unknown"; case vctrs_type2_s3_integer_bare_factor: return "vctrs_type2_s3_integer_bare_factor"; case vctrs_type2_s3_integer_bare_ordered: return "vctrs_type2_s3_integer_bare_ordered"; case vctrs_type2_s3_integer_bare_date: return "vctrs_type2_s3_integer_bare_date"; case vctrs_type2_s3_integer_bare_posixct: return "vctrs_type2_s3_integer_bare_posixct"; case vctrs_type2_s3_integer_bare_posixlt: return "vctrs_type2_s3_integer_bare_posixlt"; case vctrs_type2_s3_integer_bare_tibble: return "vctrs_type2_s3_integer_bare_tibble"; case vctrs_type2_s3_integer_unknown: return "vctrs_type2_s3_integer_unknown"; case vctrs_type2_s3_double_bare_factor: return "vctrs_type2_s3_double_bare_factor"; case vctrs_type2_s3_double_bare_ordered: return "vctrs_type2_s3_double_bare_ordered"; case vctrs_type2_s3_double_bare_date: return "vctrs_type2_s3_double_bare_date"; case vctrs_type2_s3_double_bare_posixct: return "vctrs_type2_s3_double_bare_posixct"; case vctrs_type2_s3_double_bare_posixlt: return "vctrs_type2_s3_double_bare_posixlt"; case vctrs_type2_s3_double_bare_tibble: return "vctrs_type2_s3_double_bare_tibble"; case vctrs_type2_s3_double_unknown: return "vctrs_type2_s3_double_unknown"; case vctrs_type2_s3_complex_bare_factor: return "vctrs_type2_s3_complex_bare_factor"; case vctrs_type2_s3_complex_bare_ordered: return "vctrs_type2_s3_complex_bare_ordered"; case vctrs_type2_s3_complex_bare_date: return "vctrs_type2_s3_complex_bare_date"; case vctrs_type2_s3_complex_bare_posixct: return "vctrs_type2_s3_complex_bare_posixct"; case vctrs_type2_s3_complex_bare_posixlt: return "vctrs_type2_s3_complex_bare_posixlt"; case vctrs_type2_s3_complex_bare_tibble: return "vctrs_type2_s3_complex_bare_tibble"; case vctrs_type2_s3_complex_unknown: return "vctrs_type2_s3_complex_unknown"; case vctrs_type2_s3_character_bare_factor: return "vctrs_type2_s3_character_bare_factor"; case vctrs_type2_s3_character_bare_ordered: return "vctrs_type2_s3_character_bare_ordered"; case vctrs_type2_s3_character_bare_date: return "vctrs_type2_s3_character_bare_date"; case vctrs_type2_s3_character_bare_posixct: return "vctrs_type2_s3_character_bare_posixct"; case vctrs_type2_s3_character_bare_posixlt: return "vctrs_type2_s3_character_bare_posixlt"; case vctrs_type2_s3_character_bare_tibble: return "vctrs_type2_s3_character_bare_tibble"; case vctrs_type2_s3_character_unknown: return "vctrs_type2_s3_character_unknown"; case vctrs_type2_s3_raw_bare_factor: return "vctrs_type2_s3_raw_bare_factor"; case vctrs_type2_s3_raw_bare_ordered: return "vctrs_type2_s3_raw_bare_ordered"; case vctrs_type2_s3_raw_bare_date: return "vctrs_type2_s3_raw_bare_date"; case vctrs_type2_s3_raw_bare_posixct: return "vctrs_type2_s3_raw_bare_posixct"; case vctrs_type2_s3_raw_bare_posixlt: return "vctrs_type2_s3_raw_bare_posixlt"; case vctrs_type2_s3_raw_bare_tibble: return "vctrs_type2_s3_raw_bare_tibble"; case vctrs_type2_s3_raw_unknown: return "vctrs_type2_s3_raw_unknown"; case vctrs_type2_s3_list_bare_factor: return "vctrs_type2_s3_list_bare_factor"; case vctrs_type2_s3_list_bare_ordered: return "vctrs_type2_s3_list_bare_ordered"; case vctrs_type2_s3_list_bare_date: return "vctrs_type2_s3_list_bare_date"; case vctrs_type2_s3_list_bare_posixct: return "vctrs_type2_s3_list_bare_posixct"; case vctrs_type2_s3_list_bare_posixlt: return "vctrs_type2_s3_list_bare_posixlt"; case vctrs_type2_s3_list_bare_tibble: return "vctrs_type2_s3_list_bare_tibble"; case vctrs_type2_s3_list_unknown: return "vctrs_type2_s3_list_unknown"; case vctrs_type2_s3_dataframe_bare_factor: return "vctrs_type2_s3_dataframe_bare_factor"; case vctrs_type2_s3_dataframe_bare_ordered: return "vctrs_type2_s3_dataframe_bare_ordered"; case vctrs_type2_s3_dataframe_bare_date: return "vctrs_type2_s3_dataframe_bare_date"; case vctrs_type2_s3_dataframe_bare_posixct: return "vctrs_type2_s3_dataframe_bare_posixct"; case vctrs_type2_s3_dataframe_bare_posixlt: return "vctrs_type2_s3_dataframe_bare_posixlt"; case vctrs_type2_s3_dataframe_bare_tibble: return "vctrs_type2_s3_dataframe_bare_tibble"; case vctrs_type2_s3_dataframe_unknown: return "vctrs_type2_s3_dataframe_unknown"; case vctrs_type2_s3_scalar_bare_factor: return "vctrs_type2_s3_scalar_bare_factor"; case vctrs_type2_s3_scalar_bare_ordered: return "vctrs_type2_s3_scalar_bare_ordered"; case vctrs_type2_s3_scalar_bare_date: return "vctrs_type2_s3_scalar_bare_date"; case vctrs_type2_s3_scalar_bare_posixct: return "vctrs_type2_s3_scalar_bare_posixct"; case vctrs_type2_s3_scalar_bare_posixlt: return "vctrs_type2_s3_scalar_bare_posixlt"; case vctrs_type2_s3_scalar_bare_tibble: return "vctrs_type2_s3_scalar_bare_tibble"; case vctrs_type2_s3_scalar_unknown: return "vctrs_type2_s3_scalar_unknown"; case vctrs_type2_s3_bare_factor_bare_factor: return "vctrs_type2_s3_bare_factor_bare_factor"; case vctrs_type2_s3_bare_factor_bare_ordered: return "vctrs_type2_s3_bare_factor_bare_ordered"; case vctrs_type2_s3_bare_factor_bare_date: return "vctrs_type2_s3_bare_factor_bare_date"; case vctrs_type2_s3_bare_factor_bare_posixct: return "vctrs_type2_s3_bare_factor_bare_posixct"; case vctrs_type2_s3_bare_factor_bare_posixlt: return "vctrs_type2_s3_bare_factor_bare_posixlt"; case vctrs_type2_s3_bare_factor_bare_tibble: return "vctrs_type2_s3_bare_factor_bare_tibble"; case vctrs_type2_s3_bare_factor_unknown: return "vctrs_type2_s3_bare_factor_unknown"; case vctrs_type2_s3_bare_ordered_bare_ordered: return "vctrs_type2_s3_bare_ordered_bare_ordered"; case vctrs_type2_s3_bare_ordered_bare_date: return "vctrs_type2_s3_bare_ordered_bare_date"; case vctrs_type2_s3_bare_ordered_bare_posixct: return "vctrs_type2_s3_bare_ordered_bare_posixct"; case vctrs_type2_s3_bare_ordered_bare_posixlt: return "vctrs_type2_s3_bare_ordered_bare_posixlt"; case vctrs_type2_s3_bare_ordered_bare_tibble: return "vctrs_type2_s3_bare_ordered_bare_tibble"; case vctrs_type2_s3_bare_ordered_unknown: return "vctrs_type2_s3_bare_ordered_unknown"; case vctrs_type2_s3_bare_date_bare_date: return "vctrs_type2_s3_bare_date_bare_date"; case vctrs_type2_s3_bare_date_bare_posixct: return "vctrs_type2_s3_bare_date_bare_posixct"; case vctrs_type2_s3_bare_date_bare_posixlt: return "vctrs_type2_s3_bare_date_bare_posixlt"; case vctrs_type2_s3_bare_date_bare_tibble: return "vctrs_type2_s3_bare_date_bare_tibble"; case vctrs_type2_s3_bare_date_unknown: return "vctrs_type2_s3_bare_date_unknown"; case vctrs_type2_s3_bare_posixct_bare_posixct: return "vctrs_type2_s3_bare_posixct_bare_posixct"; case vctrs_type2_s3_bare_posixct_bare_posixlt: return "vctrs_type2_s3_bare_posixct_bare_posixlt"; case vctrs_type2_s3_bare_posixct_bare_tibble: return "vctrs_type2_s3_bare_posixct_bare_tibble"; case vctrs_type2_s3_bare_posixct_unknown: return "vctrs_type2_s3_bare_posixct_unknown"; case vctrs_type2_s3_bare_posixlt_bare_posixlt: return "vctrs_type2_s3_bare_posixlt_bare_posixlt"; case vctrs_type2_s3_bare_posixlt_bare_tibble: return "vctrs_type2_s3_bare_posixlt_bare_tibble"; case vctrs_type2_s3_bare_posixlt_unknown: return "vctrs_type2_s3_bare_posixlt_unknown"; case vctrs_type2_s3_bare_tibble_bare_tibble: return "vctrs_type2_s3_bare_tibble_bare_tibble"; case vctrs_type2_s3_bare_tibble_unknown: return "vctrs_type2_s3_bare_tibble_unknown"; case vctrs_type2_s3_unknown_unknown: return "vctrs_type2_s3_unknown_unknown"; } never_reached("vctrs_type2_s3_as_str"); } // [[ register() ]] SEXP vctrs_typeof2_s3(SEXP x, SEXP y) { enum vctrs_type2_s3 type = vec_typeof2_s3(x, y); return Rf_mkString(vctrs_type2_s3_as_str(type)); } vctrs/src/hash.h0000644000176200001440000000011213650511520013266 0ustar liggesusers#ifndef VCTRS_HASH_H #define VCTRS_HASH_H #define HASH_MISSING 1 #endif vctrs/src/cast-dispatch.c0000644000176200001440000000541414042540502015075 0ustar liggesusers#include "vctrs.h" #include "cast.h" #include "type-factor.h" #include "type-tibble.h" #include "utils.h" // [[ include("cast.h") ]] SEXP vec_cast_dispatch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy) { SEXP x = opts->x; SEXP to = opts->to; struct vctrs_arg* x_arg = opts->x_arg; struct vctrs_arg* to_arg = opts->to_arg; int dir = 0; enum vctrs_type2_s3 type2_s3 = vec_typeof2_s3_impl(x, to, x_type, to_type, &dir); switch (type2_s3) { case vctrs_type2_s3_character_bare_factor: if (dir == 0) { return chr_as_factor(x, to, lossy, to_arg); } else { return fct_as_character(x, x_arg); } case vctrs_type2_s3_character_bare_ordered: if (dir == 0) { return chr_as_ordered(x, to, lossy, to_arg); } else { return ord_as_character(x, x_arg); } case vctrs_type2_s3_bare_factor_bare_factor: return fct_as_factor(x, to, lossy, x_arg, to_arg); case vctrs_type2_s3_bare_ordered_bare_ordered: return ord_as_ordered(opts); case vctrs_type2_s3_bare_date_bare_posixct: if (dir == 0) { return date_as_posixct(x, to); } else { return posixct_as_date(x, lossy); } case vctrs_type2_s3_bare_date_bare_posixlt: if (dir == 0) { return date_as_posixlt(x, to); } else { return posixlt_as_date(x, lossy); } case vctrs_type2_s3_bare_posixct_bare_posixlt: if (dir == 0) { return posixct_as_posixlt(x, to); } else { return posixlt_as_posixct(x, to); } case vctrs_type2_s3_bare_date_bare_date: return date_as_date(x); case vctrs_type2_s3_bare_posixct_bare_posixct: return posixct_as_posixct(x, to); case vctrs_type2_s3_bare_posixlt_bare_posixlt: return posixlt_as_posixlt(x, to); case vctrs_type2_s3_dataframe_bare_tibble: if (dir == 0) { return tib_cast(opts); } else { return df_cast_opts(opts); } case vctrs_type2_s3_bare_tibble_bare_tibble: return tib_cast(opts); default: return R_NilValue; } } // [[ register() ]] SEXP vctrs_cast_dispatch_native(SEXP x, SEXP to, SEXP fallback_opts, SEXP x_arg, SEXP to_arg) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_to_arg = vec_as_arg(to_arg); const struct cast_opts c_opts = new_cast_opts(x, to, &c_x_arg, &c_to_arg, fallback_opts); bool lossy = false; SEXP out = vec_cast_dispatch_native(&c_opts, vec_typeof(x), vec_typeof(to), &lossy); if (lossy || out == R_NilValue) { return vec_cast_default(x, to, x_arg, to_arg, &c_opts.fallback); } else { return out; } } vctrs/src/slice-array.c0000644000176200001440000004273514042540502014570 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "strides.h" #define SLICE_SHAPED_INDEX(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ const CTYPE* x_data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_data) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ *out_data = NA_VALUE; \ continue; \ } \ \ loc += step; \ *out_data = x_data[loc]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ \ int size_index = p_info->p_index[0]; \ if (size_index == NA_INTEGER) { \ R_len_t out_n = p_info->shape_elem_n * p_info->index_n; \ for (R_len_t i = 0; i < out_n; ++i, ++out_data) { \ *out_data = NA_VALUE; \ } \ UNPROTECT(2); \ return(out); \ } \ \ const CTYPE* x_data = CONST_DEREF(x); \ \ /* Convert to C index */ \ size_index = size_index - 1; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += size_index; \ const CTYPE elt_x_data = x_data[loc]; \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_data) { \ *out_data = elt_x_data; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ \ R_len_t start = p_info->p_index[0]; \ R_len_t n = p_info->p_index[1]; \ R_len_t step = p_info->p_index[2]; \ \ const CTYPE* x_data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++out_data, loc += step) { \ *out_data = x_data[loc]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ if (is_compact_rep(index)) { \ SLICE_SHAPED_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } else if (is_compact_seq(index)) { \ SLICE_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \ } else { \ SLICE_SHAPED_INDEX(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } static SEXP lgl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(LGLSXP, int, LOGICAL, LOGICAL_RO, NA_LOGICAL); } static SEXP int_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(INTSXP, int, INTEGER, INTEGER_RO, NA_INTEGER); } static SEXP dbl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(REALSXP, double, REAL, REAL_RO, NA_REAL); } static SEXP cpl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(CPLXSXP, Rcomplex, COMPLEX, COMPLEX_RO, vctrs_shared_na_cpl); } static SEXP chr_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(STRSXP, SEXP, STRING_PTR, STRING_PTR_RO, NA_STRING); } static SEXP raw_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(RAWSXP, Rbyte, RAW, RAW_RO, 0); } #undef SLICE_SHAPED #undef SLICE_SHAPED_COMPACT_REP #undef SLICE_SHAPED_COMPACT_SEQ #undef SLICE_SHAPED_INDEX #define SLICE_BARRIER_SHAPED_INDEX(RTYPE, GET, SET, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ R_len_t out_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_loc) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ SET(out, out_loc, NA_VALUE); \ continue; \ } \ \ loc += step; \ SEXP elt = GET(x, loc); \ SET(out, out_loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED_COMPACT_REP(RTYPE, GET, SET, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ int size_index = p_info->p_index[0]; \ if (size_index == NA_INTEGER) { \ R_len_t out_n = p_info->shape_elem_n * p_info->index_n; \ for (R_len_t i = 0; i < out_n; ++i) { \ SET(out, i, NA_VALUE); \ } \ UNPROTECT(2); \ return(out); \ } \ \ R_len_t out_loc = 0; \ \ /* Convert to C index */ \ size_index = size_index - 1; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += size_index; \ const SEXP elt_x_data = GET(x, loc); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_loc) { \ SET(out, out_loc, elt_x_data); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED_COMPACT_SEQ(RTYPE, GET, SET) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ R_len_t start = p_info->p_index[0]; \ R_len_t n = p_info->p_index[1]; \ R_len_t step = p_info->p_index[2]; \ \ R_len_t out_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++out_loc, loc += step) { \ SEXP elt = GET(x, loc); \ SET(out, out_loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED(RTYPE, GET, SET, NA_VALUE) \ if (is_compact_rep(index)) { \ SLICE_BARRIER_SHAPED_COMPACT_REP(RTYPE, GET, SET, NA_VALUE); \ } else if (is_compact_seq(index)) { \ SLICE_BARRIER_SHAPED_COMPACT_SEQ(RTYPE, GET, SET); \ } else { \ SLICE_BARRIER_SHAPED_INDEX(RTYPE, GET, SET, NA_VALUE); \ } static SEXP list_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_BARRIER_SHAPED(VECSXP, VECTOR_ELT, SET_VECTOR_ELT, R_NilValue); } #undef SLICE_BARRIER_SHAPED #undef SLICE_BARRIER_SHAPED_COMPACT_REP #undef SLICE_BARRIER_SHAPED_COMPACT_SEQ #undef SLICE_BARRIER_SHAPED_INDEX SEXP vec_slice_shaped_base(enum vctrs_type type, SEXP x, SEXP index, struct strides_info* p_info) { switch (type) { case vctrs_type_logical: return lgl_slice_shaped(x, index, p_info); case vctrs_type_integer: return int_slice_shaped(x, index, p_info); case vctrs_type_double: return dbl_slice_shaped(x, index, p_info); case vctrs_type_complex: return cpl_slice_shaped(x, index, p_info); case vctrs_type_character: return chr_slice_shaped(x, index, p_info); case vctrs_type_raw: return raw_slice_shaped(x, index, p_info); case vctrs_type_list: return list_slice_shaped(x, index, p_info); default: stop_unimplemented_vctrs_type("vec_slice_shaped_base", type); } } SEXP vec_slice_shaped(enum vctrs_type type, SEXP x, SEXP index) { int n_protect = 0; struct strides_info info = new_strides_info(x, index); PROTECT_STRIDES_INFO(&info, &n_protect); SEXP out = vec_slice_shaped_base(type, x, index, &info); UNPROTECT(n_protect); return out; } vctrs/src/c.c0000644000176200001440000002272514042540502012574 0ustar liggesusers#include "vctrs.h" #include "c.h" #include "ptype-common.h" #include "slice-assign.h" #include "owned.h" #include "utils.h" // [[ register(external = TRUE) ]] SEXP vctrs_c(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP xs = PROTECT(rlang_env_dots_list(env)); SEXP ptype = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP name_spec = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP name_repair = PROTECT(Rf_eval(CAR(args), env)); struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false); PROTECT_NAME_REPAIR_OPTS(&name_repair_opts); SEXP out = vec_c(xs, ptype, name_spec, &name_repair_opts); UNPROTECT(5); return out; } // [[ include("vctrs.h") ]] SEXP vec_c(SEXP xs, SEXP ptype, SEXP name_spec, const struct name_repair_opts* name_repair) { struct fallback_opts opts = { .df = DF_FALLBACK_DEFAULT, .s3 = S3_FALLBACK_true }; return vec_c_opts(xs, ptype, name_spec, name_repair, &opts); } SEXP vec_c_opts(SEXP xs, SEXP ptype, SEXP name_spec, const struct name_repair_opts* name_repair, const struct fallback_opts* fallback_opts) { SEXP orig_ptype = ptype; ptype = PROTECT(vec_ptype_common_opts(xs, orig_ptype, fallback_opts)); if (ptype == R_NilValue) { UNPROTECT(1); return R_NilValue; } if (needs_vec_c_fallback(ptype)) { SEXP out = vec_c_fallback(ptype, xs, name_spec, name_repair); UNPROTECT(1); return out; } // FIXME: Needed for dplyr::summarise() which passes a non-fallback ptype if (needs_vec_c_homogeneous_fallback(xs, ptype)) { SEXP out = vec_c_fallback_invoke(xs, name_spec); UNPROTECT(1); return out; } // FIXME: If data frame, recompute ptype without common class // fallback. Should refactor this to allow common class fallback // with data frame columns. // // FIXME: If `ptype` is a `vctrs_vctr` class without a // `vec_ptype2()` method, the common type is a common class // fallback. To avoid infinit recursion through `c.vctrs_vctr()`, we // bail out from `needs_vec_c_fallback()`. In this case recurse with // fallback disabled as well. if ((is_data_frame(ptype) && fallback_opts->s3 == S3_FALLBACK_true) || vec_is_common_class_fallback(ptype)) { struct fallback_opts d_fallback_opts = *fallback_opts; d_fallback_opts.s3 = S3_FALLBACK_false; ptype = PROTECT(vec_ptype_common_opts(xs, orig_ptype, &d_fallback_opts)); } else { ptype = PROTECT(vec_ptype_common_opts(xs, ptype, fallback_opts)); } // Find individual input sizes and total size of output R_len_t n = Rf_length(xs); R_len_t out_size = 0; // Caching the sizes causes an extra allocation but it improves performance SEXP sizes = PROTECT(Rf_allocVector(INTSXP, n)); int* p_sizes = INTEGER(sizes); for (R_len_t i = 0; i < n; ++i) { SEXP x = VECTOR_ELT(xs, i); R_len_t size = (x == R_NilValue) ? 0 : vec_size(x); out_size += size; p_sizes[i] = size; } SEXP out = vec_init(ptype, out_size); PROTECT_INDEX out_pi; PROTECT_WITH_INDEX(out, &out_pi); out = vec_proxy(out); REPROTECT(out, out_pi); SEXP loc = PROTECT(compact_seq(0, 0, true)); int* p_loc = INTEGER(loc); bool assign_names = !Rf_inherits(name_spec, "rlang_zap"); SEXP xs_names = PROTECT(r_names(xs)); bool xs_is_named = xs_names != R_NilValue && !is_data_frame(ptype); SEXP out_names = R_NilValue; PROTECT_INDEX out_names_pi; PROTECT_WITH_INDEX(R_NilValue, &out_names_pi); // Compact sequences use 0-based counters R_len_t counter = 0; const struct vec_assign_opts c_assign_opts = { .assign_names = assign_names, .ignore_outer_names = true }; for (R_len_t i = 0; i < n; ++i) { SEXP x = VECTOR_ELT(xs, i); R_len_t size = p_sizes[i]; init_compact_seq(p_loc, counter, size, true); if (assign_names) { SEXP outer = xs_is_named ? STRING_ELT(xs_names, i) : R_NilValue; SEXP inner = PROTECT(vec_names(x)); SEXP x_nms = PROTECT(apply_name_spec(name_spec, outer, inner, size)); if (x_nms != R_NilValue) { R_LAZY_ALLOC(out_names, out_names_pi, STRSXP, out_size); // If there is no name to assign, skip the assignment since // `out_names` already contains empty strings if (x_nms != chrs_empty) { out_names = chr_assign(out_names, loc, x_nms, VCTRS_OWNED_true); REPROTECT(out_names, out_names_pi); } } UNPROTECT(2); } if (!size) { continue; } struct cast_opts opts = (struct cast_opts) { .x = x, .to = ptype, .fallback = *fallback_opts }; x = PROTECT(vec_cast_opts(&opts)); // Total ownership of `out` because it was freshly created with `vec_init()` out = vec_proxy_assign_opts(out, loc, x, VCTRS_OWNED_true, &c_assign_opts); REPROTECT(out, out_pi); counter += size; UNPROTECT(1); } out = PROTECT(vec_restore(out, ptype, R_NilValue, VCTRS_OWNED_true)); if (out_names != R_NilValue) { out_names = PROTECT(vec_as_names(out_names, name_repair)); out = vec_set_names(out, out_names); UNPROTECT(1); } else if (!assign_names) { // FIXME: `vec_ptype2()` doesn't consistently zaps names, so `out` // might have been initialised with names. This branch can be // removed once #1020 is resolved. out = vec_set_names(out, R_NilValue); } UNPROTECT(8); return out; } static inline bool vec_implements_base_c(SEXP x); // [[ include("c.h") ]] bool needs_vec_c_fallback(SEXP ptype) { if (!vec_is_common_class_fallback(ptype)) { return false; } // Suboptimal: Prevent infinite recursion through `vctrs_vctr` method SEXP class = PROTECT(Rf_getAttrib(ptype, syms_fallback_class)); class = r_chr_get(class, r_length(class) - 1); if (class == strings_vctrs_vctr) { UNPROTECT(1); return false; } UNPROTECT(1); return true; } // [[ include("c.h") ]] bool needs_vec_c_homogeneous_fallback(SEXP xs, SEXP ptype) { if (!Rf_length(xs)) { return false; } SEXP x = list_first_non_null(xs, NULL); if (!vec_is_vector(x)) { return false; } // Never fall back for `vctrs_vctr` classes to avoid infinite // recursion through `c.vctrs_vctr()` if (Rf_inherits(x, "vctrs_vctr")) { return false; } if (ptype != R_NilValue) { SEXP x_class = PROTECT(r_class(x)); SEXP ptype_class = PROTECT(r_class(ptype)); bool equal = equal_object(x_class, ptype_class); UNPROTECT(2); if (!equal) { return false; } } return !vec_implements_ptype2(x) && list_is_homogeneously_classed(xs) && vec_implements_base_c(x); } static inline bool vec_implements_base_c(SEXP x) { if (!OBJECT(x)) { return false; } if (IS_S4_OBJECT(x)) { return s4_find_method(x, s4_c_method_table) != R_NilValue; } else { return s3_find_method("c", x, base_method_table) != R_NilValue; } } static inline bool class_implements_base_c(SEXP cls) { if (s3_class_find_method("c", cls, base_method_table) != R_NilValue) { return true; } if (s4_class_find_method(cls, s4_c_method_table) != R_NilValue) { return true; } return false; } static inline int vec_c_fallback_validate_args(SEXP x, SEXP name_spec); static inline void stop_vec_c_fallback(SEXP xs, int err_type); // [[ include("c.h") ]] SEXP vec_c_fallback(SEXP ptype, SEXP xs, SEXP name_spec, const struct name_repair_opts* name_repair) { SEXP class = PROTECT(Rf_getAttrib(ptype, syms_fallback_class)); bool implements_c = class_implements_base_c(class); UNPROTECT(1); if (implements_c) { return vec_c_fallback_invoke(xs, name_spec); } else { struct fallback_opts fallback_opts = { .df = DF_FALLBACK_none, .s3 = S3_FALLBACK_false }; // Should cause a common type error, unless another fallback // kicks in (for instance, homogeneous class with homogeneous // attributes) vec_ptype_common_opts(xs, R_NilValue, &fallback_opts); // Suboptimal: Call `vec_c()` again to combine vector with // homogeneous class fallback return vec_c_opts(xs, R_NilValue, name_spec, name_repair, &fallback_opts); } } // [[ include("c.h") ]] SEXP vec_c_fallback_invoke(SEXP xs, SEXP name_spec) { SEXP x = list_first_non_null(xs, NULL); if (vctrs_debug_verbose) { Rprintf("Falling back to `base::c()` for class `%s`.\n", r_chr_get_c_string(r_class(x), 0)); } int err_type = vec_c_fallback_validate_args(x, name_spec); if (err_type) { stop_vec_c_fallback(xs, err_type); } SEXP call = PROTECT(Rf_lang2(Rf_install("base_c_invoke"), xs)); SEXP out = Rf_eval(call, vctrs_ns_env); UNPROTECT(1); return out; } static inline int vec_c_fallback_validate_args(SEXP x, SEXP name_spec) { if (name_spec != R_NilValue) { return 2; } return 0; } static void stop_vec_c_fallback(SEXP xs, int err_type) { SEXP common_class = PROTECT(r_class(list_first_non_null(xs, NULL))); const char* class_str = r_chr_get_c_string(common_class, 0); const char* msg = NULL; switch (err_type) { case 2: msg = "Can't use a name specification with non-vctrs types."; break; case 3: msg = "Can't find vctrs or base methods for concatenation."; break; default: msg = "Internal error: Unexpected error type."; break; } Rf_errorcall(R_NilValue, "%s\n" "vctrs methods must be implemented for class `%s`.\n" "See .", msg, class_str); } vctrs/src/type-info.c0000644000176200001440000001264014042540502014257 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "arg-counter.h" // Initialised at load time static SEXP syms_vec_is_vector_dispatch = NULL; static SEXP fns_vec_is_vector_dispatch = NULL; // From proxy.c SEXP vec_proxy_method(SEXP x); SEXP vec_proxy_invoke(SEXP x, SEXP method); static enum vctrs_type vec_base_typeof(SEXP x, bool proxied); // [[ include("vctrs.h") ]] struct vctrs_type_info vec_type_info(SEXP x) { struct vctrs_type_info info; info.type = vec_typeof(x); switch (info.type) { case vctrs_type_s3: info.proxy_method = vec_proxy_method(x); break; default: info.proxy_method = R_NilValue; } return info; } // [[ include("vctrs.h") ]] struct vctrs_proxy_info vec_proxy_info(SEXP x) { struct vctrs_proxy_info info; info.proxy_method = OBJECT(x) ? vec_proxy_method(x) : R_NilValue; PROTECT(info.proxy_method); if (info.proxy_method == R_NilValue) { info.type = vec_base_typeof(x, false); info.proxy = x; } else { SEXP proxy = PROTECT(vec_proxy_invoke(x, info.proxy_method)); info.type = vec_base_typeof(proxy, true); info.proxy = proxy; UNPROTECT(1); } UNPROTECT(1); return info; } // [[ register() ]] SEXP vctrs_type_info(SEXP x) { struct vctrs_type_info info = vec_type_info(x); SEXP out = PROTECT(Rf_mkNamed(VECSXP, (const char*[]) { "type", "proxy_method", "" })); SET_VECTOR_ELT(out, 0, Rf_mkString(vec_type_as_str(info.type))); SET_VECTOR_ELT(out, 1, info.proxy_method); UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_proxy_info(SEXP x) { struct vctrs_proxy_info info = vec_proxy_info(x); SEXP out = PROTECT(Rf_mkNamed(VECSXP, (const char*[]) { "type", "proxy_method", "proxy", "" })); SET_VECTOR_ELT(out, 0, Rf_mkString(vec_type_as_str(info.type))); SET_VECTOR_ELT(out, 1, info.proxy_method); SET_VECTOR_ELT(out, 2, info.proxy); UNPROTECT(1); return out; } static enum vctrs_type vec_base_typeof(SEXP x, bool proxied) { switch (TYPEOF(x)) { // Atomic types are always vectors case NILSXP: return vctrs_type_null; case LGLSXP: return vctrs_type_logical; case INTSXP: return vctrs_type_integer; case REALSXP: return vctrs_type_double; case CPLXSXP: return vctrs_type_complex; case STRSXP: return vctrs_type_character; case RAWSXP: return vctrs_type_raw; case VECSXP: // Bare lists and data frames are vectors if (!OBJECT(x)) return vctrs_type_list; if (is_data_frame(x)) return vctrs_type_dataframe; // S3 lists are only vectors if they are proxied if (proxied || Rf_inherits(x, "list")) return vctrs_type_list; // fallthrough default: return vctrs_type_scalar; } } // [[ include("vctrs.h") ]] enum vctrs_type vec_proxy_typeof(SEXP x) { return vec_base_typeof(x, true); } // [[ register() ]] SEXP vctrs_is_list(SEXP x) { return Rf_ScalarLogical(vec_is_list(x)); } // [[ include("vctrs.h") ]] bool vec_is_list(SEXP x) { // Require `x` to be a list internally if (TYPEOF(x) != VECSXP) { return false; } // Unclassed VECSXP are lists if (!OBJECT(x)) { return true; } // Classed VECSXP are only lists if the last class is explicitly `"list"` return class_type(x) == vctrs_class_list; } // [[ include("vctrs.h") ]] bool vec_is_vector(SEXP x) { if (x == R_NilValue) { return false; } struct vctrs_proxy_info info = vec_proxy_info(x); return info.type != vctrs_type_scalar; } // [[ register() ]] SEXP vctrs_is_vector(SEXP x) { return Rf_ScalarLogical(vec_is_vector(x)); } static bool class_is_null(SEXP x) { return Rf_getAttrib(x, R_ClassSymbol) == R_NilValue; } // [[ include("vctrs.h") ]] enum vctrs_type vec_typeof(SEXP x) { // Check for unspecified vectors before `vec_base_typeof()` which // allows vectors of `NA` to pass through as `vctrs_type_logical` if (vec_is_unspecified(x)) { return vctrs_type_unspecified; } if (!OBJECT(x) || class_is_null(x)) { return vec_base_typeof(x, false); } // Bare data frames are treated as a base atomic type. Subclasses of // data frames are treated as S3 to give them a chance to be proxied // or implement their own methods for cast, type2, etc. if (is_bare_data_frame(x)) { return vctrs_type_dataframe; } return vctrs_type_s3; } // [[ register() ]] SEXP vctrs_typeof(SEXP x, SEXP dispatch) { enum vctrs_type type; if (LOGICAL(dispatch)[0]) { type = vec_proxy_info(x).type; } else { type = vec_typeof(x); } return Rf_mkString(vec_type_as_str(type)); } __attribute__((noreturn)) void stop_unimplemented_vctrs_type(const char* fn, enum vctrs_type type) { stop_internal(fn, "Unsupported vctrs type `%s`.", vec_type_as_str(type)); } const char* vec_type_as_str(enum vctrs_type type) { switch (type) { case vctrs_type_null: return "null"; case vctrs_type_unspecified: return "unspecified"; case vctrs_type_logical: return "logical"; case vctrs_type_integer: return "integer"; case vctrs_type_double: return "double"; case vctrs_type_complex: return "complex"; case vctrs_type_character: return "character"; case vctrs_type_raw: return "raw"; case vctrs_type_list: return "list"; case vctrs_type_dataframe: return "dataframe"; case vctrs_type_s3: return "s3"; case vctrs_type_scalar: return "scalar"; } never_reached("vec_type_as_str"); } void vctrs_init_type_info(SEXP ns) { syms_vec_is_vector_dispatch = Rf_install("vec_is_vector"); fns_vec_is_vector_dispatch = Rf_findVar(syms_vec_is_vector_dispatch, ns); } vctrs/src/rep.c0000644000176200001440000002137514042540502013140 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "type-data-frame.h" // Initialised at load time static struct vctrs_arg args_times_; static struct vctrs_arg* const args_times = &args_times_; static inline void stop_rep_times_size(); static inline void check_rep_times(int times); static inline void check_rep_each_times(int times, R_len_t i); static inline bool multiply_would_overflow(R_len_t x, R_len_t y); static inline bool plus_would_overflow(R_len_t x, R_len_t y); static inline void stop_rep_size_oob(); // ----------------------------------------------------------------------------- static SEXP vec_rep(SEXP x, int times); // [[ register() ]] SEXP vctrs_rep(SEXP x, SEXP times) { times = PROTECT(vec_cast(times, vctrs_shared_empty_int, args_times, args_empty)); if (vec_size(times) != 1) { stop_rep_times_size(); } const int times_ = r_int_get(times, 0); SEXP out = vec_rep(x, times_); UNPROTECT(1); return out; } static SEXP vec_rep(SEXP x, int times) { check_rep_times(times); const R_len_t times_ = (R_len_t) times; const R_len_t x_size = vec_size(x); if (multiply_would_overflow(x_size, times_)) { stop_rep_size_oob(); }; const R_len_t size = x_size * times_; SEXP subscript = PROTECT(Rf_allocVector(INTSXP, size)); int* p_subscript = INTEGER(subscript); R_len_t k = 0; for (R_len_t i = 0; i < times_; ++i) { for (R_len_t j = 1; j <= x_size; ++j, ++k) { p_subscript[k] = j; } } SEXP out = vec_slice_impl(x, subscript); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- static SEXP vec_rep_each(SEXP x, SEXP times); // [[ register() ]] SEXP vctrs_rep_each(SEXP x, SEXP times) { return vec_rep_each(x, times); } static SEXP vec_rep_each_uniform(SEXP x, int times); static SEXP vec_rep_each_impl(SEXP x, SEXP times, const R_len_t times_size); static SEXP vec_rep_each(SEXP x, SEXP times) { times = PROTECT(vec_cast(times, vctrs_shared_empty_int, args_times, args_empty)); const R_len_t times_size = vec_size(times); SEXP out; if (times_size == 1) { const int times_ = r_int_get(times, 0); out = vec_rep_each_uniform(x, times_); } else { out = vec_rep_each_impl(x, times, times_size); } UNPROTECT(1); return out; } static SEXP vec_rep_each_uniform(SEXP x, int times) { check_rep_each_times(times, 1); const R_len_t times_ = (R_len_t) times; const R_len_t x_size = vec_size(x); if (multiply_would_overflow(x_size, times_)) { stop_rep_size_oob(); }; const R_len_t size = x_size * times_; SEXP subscript = PROTECT(Rf_allocVector(INTSXP, size)); int* p_subscript = INTEGER(subscript); R_len_t k = 0; for (R_len_t i = 1; i <= x_size; ++i) { for (R_len_t j = 0; j < times_; ++j, ++k) { p_subscript[k] = i; } } SEXP out = vec_slice_impl(x, subscript); UNPROTECT(1); return out; } static SEXP vec_rep_each_impl(SEXP x, SEXP times, const R_len_t times_size) { const R_len_t x_size = vec_size(x); if (x_size != times_size) { stop_recycle_incompatible_size(times_size, x_size, args_times); } const int* p_times = INTEGER_RO(times); R_len_t size = 0; for (R_len_t i = 0; i < times_size; ++i) { const int elt_times = p_times[i]; check_rep_each_times(elt_times, i + 1); const R_len_t elt_times_ = (R_len_t) elt_times; if (plus_would_overflow(size, elt_times_)) { stop_rep_size_oob(); } size += elt_times_; } SEXP subscript = PROTECT(Rf_allocVector(INTSXP, size)); int* p_subscript = INTEGER(subscript); R_len_t k = 0; for (R_len_t i = 1; i <= x_size; ++i) { const R_len_t elt_times = (R_len_t) p_times[i - 1]; for (R_len_t j = 0; j < elt_times; ++j, ++k) { p_subscript[k] = i; } } SEXP out = vec_slice_impl(x, subscript); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // TODO: Modify for long vectors with `R_XLEN_T_MAX` and `R_xlen_t`. static inline bool times_is_oob(int times) { return times > R_LEN_T_MAX; } // Only useful for positive or zero inputs static inline bool multiply_would_overflow(R_len_t x, R_len_t y) { return (double) x * y > R_LEN_T_MAX; } // Only useful for positive or zero inputs static inline bool plus_would_overflow(R_len_t x, R_len_t y) { return x > R_LEN_T_MAX - y; } // ----------------------------------------------------------------------------- static inline void stop_rep_times_negative(); static inline void stop_rep_times_missing(); static inline void stop_rep_times_oob(int times); static inline void check_rep_times(int times) { if (times < 0) { if (times == NA_INTEGER) { stop_rep_times_missing(); } else { stop_rep_times_negative(); } } else if (times_is_oob(times)) { stop_rep_times_oob(times); } } static inline void stop_rep_times_negative() { Rf_errorcall(R_NilValue, "`times` must be a positive number."); } static inline void stop_rep_times_missing() { Rf_errorcall(R_NilValue, "`times` can't be missing."); } // Not currently thrown since `R_len_t == int`, but might be once // long vectors are supported static inline void stop_rep_times_oob(int times) { Rf_errorcall( R_NilValue, "`times` must be less than %i, not %i.", R_LEN_T_MAX, times ); } // ----------------------------------------------------------------------------- static inline void stop_rep_each_times_negative(R_len_t i); static inline void stop_rep_each_times_missing(R_len_t i); static inline void stop_rep_each_times_oob(int times, R_len_t i); static inline void check_rep_each_times(int times, R_len_t i) { if (times < 0) { if (times == NA_INTEGER) { stop_rep_each_times_missing(i); } else { stop_rep_each_times_negative(i); } } else if (times_is_oob(times)) { stop_rep_each_times_oob(times, i); } } static inline void stop_rep_each_times_negative(R_len_t i) { Rf_errorcall(R_NilValue, "`times` must be a vector of positive numbers. Location %i is negative.", i); } static inline void stop_rep_each_times_missing(R_len_t i) { Rf_errorcall(R_NilValue, "`times` can't be missing. Location %i is missing.", i); } // Not currently thrown since `R_len_t == int`, but might be once // long vectors are supported static inline void stop_rep_each_times_oob(int times, R_len_t i) { Rf_errorcall( R_NilValue, "`times` must be less than %i, not %i. ", "Location %i is too large.", R_LEN_T_MAX, times, i ); } // ----------------------------------------------------------------------------- static inline void stop_rep_size_oob() { Rf_errorcall( R_NilValue, "Long vectors are not yet supported. " "Requested output size must be less than %i.", R_LEN_T_MAX ); } static inline void stop_rep_times_size() { Rf_errorcall(R_NilValue, "`times` must be a single number."); } // ----------------------------------------------------------------------------- static SEXP vec_unrep(SEXP x); // [[register()]] SEXP vctrs_unrep(SEXP x) { return vec_unrep(x); } static SEXP new_unrep_data_frame(SEXP key, SEXP times, r_ssize size); static SEXP vec_unrep(SEXP x) { SEXP id = PROTECT(vec_identify_runs(x)); const int* p_id = INTEGER_RO(id); r_ssize x_size = r_length(id); if (x_size == 0) { SEXP out = new_unrep_data_frame(x, vctrs_shared_empty_int, 0); UNPROTECT(1); return out; } r_ssize out_size = (r_ssize) r_int_get(r_attrib_get(id, syms_n), 0); // Size of each run SEXP times = PROTECT(r_new_integer(out_size)); int* p_times = INTEGER(times); // Location of the start of each run. For slicing `x`. SEXP loc = PROTECT(r_new_integer(out_size)); int* p_loc = INTEGER(loc); r_ssize idx = 0; r_ssize previous = 0; int reference = p_id[0]; // Handle first case p_loc[idx] = 1; ++idx; for (r_ssize i = 1; i < x_size; ++i) { const int elt = p_id[i]; if (elt == reference) { continue; } reference = elt; // Size of current run p_times[idx - 1] = i - previous; previous = i; // 1-based location of the start of the new run p_loc[idx] = i + 1; ++idx; } // Handle last case p_times[idx - 1] = x_size - previous; SEXP key = PROTECT(vec_slice(x, loc)); SEXP out = new_unrep_data_frame(key, times, out_size); UNPROTECT(4); return out; } static SEXP new_unrep_data_frame(SEXP key, SEXP times, r_ssize size) { SEXP out = PROTECT(r_new_list(2)); r_list_poke(out, 0, key); r_list_poke(out, 1, times); SEXP names = PROTECT(r_new_character(2)); r_poke_names(out, names); r_chr_poke(names, 0, strings_key); r_chr_poke(names, 1, strings_times); init_data_frame(out, size); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- void vctrs_init_rep(SEXP ns) { args_times_ = new_wrapper_arg(NULL, "times"); } vctrs/src/callables.c0000644000176200001440000000202014042540502014256 0ustar liggesusers#include "vctrs.h" #include "utils.h" // ----------------------------------------------------------------------------- // Maturing R_len_t short_vec_size(SEXP x) { return vec_size(x); } SEXP short_vec_recycle(SEXP x, R_len_t size) { return vec_recycle(x, size, args_empty); } // ----------------------------------------------------------------------------- // Experimental SEXP exp_vec_cast(SEXP x, SEXP to) { return vec_cast(x, to, args_empty, args_empty); } SEXP exp_vec_chop(SEXP x, SEXP indices) { return vec_chop(x, indices); } SEXP exp_vec_slice_impl(SEXP x, SEXP subscript) { return vec_slice_impl(x, subscript); } SEXP exp_vec_names(SEXP x) { return vec_names(x); } SEXP exp_vec_set_names(SEXP x, SEXP names) { return vec_set_names(x, names); } SEXP exp_short_compact_seq(R_len_t start, R_len_t size, bool increasing) { return compact_seq(start, size, increasing); } void exp_short_init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing) { init_compact_seq(p, start, size, increasing); } vctrs/src/type2.c0000644000176200001440000001731114042540502013410 0ustar liggesusers#include "vctrs.h" #include "ptype2.h" #include "type-data-frame.h" #include "utils.h" #include "shape.h" static SEXP vec_ptype2_switch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left); // [[ register() ]] SEXP vctrs_ptype2_opts(SEXP x, SEXP y, SEXP opts, SEXP x_arg, SEXP y_arg) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_y_arg = vec_as_arg(y_arg); const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &c_x_arg, &c_y_arg, opts); int _left; return vec_ptype2_opts(&c_opts, &_left); } SEXP vec_ptype2_opts_impl(const struct ptype2_opts* opts, int* left, bool first_pass) { SEXP x = opts->x; SEXP y = opts->y; struct vctrs_arg* x_arg = opts->x_arg; struct vctrs_arg* y_arg = opts->y_arg; enum vctrs_type x_type = vec_typeof(x); enum vctrs_type y_type = vec_typeof(y); if (x_type == vctrs_type_null) { *left = y == R_NilValue; return vec_ptype2_from_unspecified(opts, x_type, y, y_arg); } if (y_type == vctrs_type_null) { *left = x == R_NilValue; return vec_ptype2_from_unspecified(opts, x_type, x, x_arg); } if (x_type == vctrs_type_unspecified) { return vec_ptype2_from_unspecified(opts, y_type, y, y_arg); } if (y_type == vctrs_type_unspecified) { return vec_ptype2_from_unspecified(opts, x_type, x, x_arg); } if (x_type == vctrs_type_scalar) { stop_scalar_type(x, x_arg); } if (y_type == vctrs_type_scalar) { stop_scalar_type(y, y_arg); } if (x_type != vctrs_type_s3 && y_type != vctrs_type_s3) { return vec_ptype2_switch_native(opts, x_type, y_type, left); } if (x_type == vctrs_type_s3 || y_type == vctrs_type_s3) { SEXP out = vec_ptype2_dispatch_native(opts, x_type, y_type, left); if (out != R_NilValue) { return out; } } // Try native dispatch again with prototypes, in case the prototype // is another type. FIXME: Use R-level callback instead. if (first_pass) { struct ptype2_opts mut_opts = *opts; mut_opts.x = PROTECT(vec_ptype(x, x_arg)); mut_opts.y = PROTECT(vec_ptype(y, y_arg)); SEXP out = vec_ptype2_opts_impl(&mut_opts, left, false); UNPROTECT(2); return out; } return vec_ptype2_dispatch_s3(opts); } // [[ include("ptype2.h") ]] SEXP vec_ptype2_opts(const struct ptype2_opts* opts, int* left) { return vec_ptype2_opts_impl(opts, left, true); } static SEXP vec_ptype2_switch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left) { SEXP x = opts->x; SEXP y = opts->y; struct vctrs_arg* x_arg = opts->x_arg; struct vctrs_arg* y_arg = opts->y_arg; enum vctrs_type2 type2 = vec_typeof2_impl(x_type, y_type, left); switch (type2) { case vctrs_type2_null_null: return R_NilValue; case vctrs_type2_logical_logical: return vec_shaped_ptype(vctrs_shared_empty_lgl, x, y, x_arg, y_arg); case vctrs_type2_logical_integer: case vctrs_type2_integer_integer: return vec_shaped_ptype(vctrs_shared_empty_int, x, y, x_arg, y_arg); case vctrs_type2_logical_double: case vctrs_type2_integer_double: case vctrs_type2_double_double: return vec_shaped_ptype(vctrs_shared_empty_dbl, x, y, x_arg, y_arg); case vctrs_type2_integer_complex: case vctrs_type2_double_complex: case vctrs_type2_complex_complex: return vec_shaped_ptype(vctrs_shared_empty_cpl, x, y, x_arg, y_arg); case vctrs_type2_character_character: return vec_shaped_ptype(vctrs_shared_empty_chr, x, y, x_arg, y_arg); case vctrs_type2_raw_raw: return vec_shaped_ptype(vctrs_shared_empty_raw, x, y, x_arg, y_arg); case vctrs_type2_list_list: return vec_shaped_ptype(vctrs_shared_empty_list, x, y, x_arg, y_arg); case vctrs_type2_dataframe_dataframe: return df_ptype2(opts); default: return vec_ptype2_dispatch_s3(opts); } } /** * Return non-unspecified type. * * This is normally the `vec_ptype()` of the other input, but if the * common class fallback is enabled we return the `vec_ptype2()` of * this input with itself. This way we may return a fallback sentinel which can be * treated specially, for instance in `vec_c(NA, x, NA)`. */ SEXP vec_ptype2_from_unspecified(const struct ptype2_opts* opts, enum vctrs_type other_type, SEXP other, struct vctrs_arg* other_arg) { if (other_type == vctrs_type_unspecified || other_type == vctrs_type_null) { return vec_ptype(other, other_arg); } if (opts->fallback.s3) { const struct ptype2_opts self_self_opts = (const struct ptype2_opts) { .x = other, .y = other, .x_arg = other_arg, .y_arg = other_arg, .fallback = opts->fallback }; int _left = 0; return vec_ptype2_opts(&self_self_opts, &_left); } return vec_ptype(other, other_arg); } struct is_coercible_data { const struct ptype2_opts* opts; int* dir; }; static void vec_is_coercible_cb(void* data_) { struct is_coercible_data* data = (struct is_coercible_data*) data_; vec_ptype2_opts(data->opts, data->dir); } static void vec_is_coercible_e(const struct ptype2_opts* opts, int* dir, ERR* err) { struct is_coercible_data data = { .opts = opts, .dir = dir, }; *err = r_try_catch(&vec_is_coercible_cb, &data, syms_vctrs_error_incompatible_type, NULL, NULL); } // [[ include("ptype2.h") ]] bool vec_is_coercible(const struct ptype2_opts* opts, int* dir) { ERR err = NULL; vec_is_coercible_e(opts, dir, &err); return !err; } // [[ register() ]] SEXP vctrs_is_coercible(SEXP x, SEXP y, SEXP opts, SEXP x_arg, SEXP y_arg) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_y_arg = vec_as_arg(y_arg); const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &c_x_arg, &c_y_arg, opts); int dir = 0; return r_lgl(vec_is_coercible(&c_opts, &dir)); } // [[ register() ]] SEXP vctrs_ptype2(SEXP x, SEXP y, SEXP x_arg, SEXP y_arg) { struct vctrs_arg x_arg_ = vec_as_arg(x_arg); struct vctrs_arg y_arg_ = vec_as_arg(y_arg); int _left; return vec_ptype2(x, y, &x_arg_, &y_arg_, &_left); } // [[ include("ptype2.h") ]] struct fallback_opts new_fallback_opts(SEXP opts) { return (struct fallback_opts) { .df = r_int_get(r_list_get(opts, 0), 0), .s3 = r_int_get(r_list_get(opts, 1), 0) }; } // [[ include("ptype2.h") ]] struct ptype2_opts new_ptype2_opts(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, SEXP opts) { return (struct ptype2_opts) { .x = x, .y = y, .x_arg = x_arg, .y_arg = y_arg, .fallback = new_fallback_opts(opts) }; } static SEXP r_fallback_opts_template = NULL; // [[ include("ptype2.h") ]] SEXP new_fallback_r_opts(const struct ptype2_opts* opts) { SEXP r_opts = PROTECT(r_copy(r_fallback_opts_template)); r_int_poke(r_list_get(r_opts, 0), 0, opts->fallback.df); r_int_poke(r_list_get(r_opts, 1), 0, opts->fallback.s3); UNPROTECT(1); return r_opts; } void vctrs_init_ptype2(SEXP ns) { r_fallback_opts_template = r_parse_eval("fallback_opts()", ns); R_PreserveObject(r_fallback_opts_template); } vctrs/src/init.c0000644000176200001440000004554514042540502013322 0ustar liggesusers#include #include #include // for NULL #include #include #include "altrep-rle.h" #include "vctrs.h" // Compile with `-fvisibility=hidden -DHAVE_VISIBILITY_ATTRIBUTE` if you link to this library #include #define export attribute_visible extern extern SEXP vctrs_list_get(SEXP, SEXP); extern SEXP vctrs_list_set(SEXP, SEXP, SEXP); extern SEXP vctrs_field_get(SEXP, SEXP); extern SEXP vctrs_field_set(SEXP, SEXP, SEXP); extern SEXP vctrs_fields(SEXP); extern SEXP vctrs_n_fields(SEXP); extern SEXP vctrs_hash(SEXP); extern SEXP vctrs_hash_object(SEXP); extern SEXP vctrs_equal_object(SEXP, SEXP); extern SEXP vctrs_duplicated(SEXP); extern SEXP vctrs_unique_loc(SEXP); extern SEXP vctrs_count(SEXP); extern SEXP vctrs_id(SEXP); extern SEXP vctrs_n_distinct(SEXP); extern SEXP vec_split(SEXP, SEXP); extern SEXP vctrs_group_id(SEXP); extern SEXP vctrs_group_rle(SEXP); extern SEXP vec_group_loc(SEXP); extern SEXP vctrs_equal(SEXP, SEXP, SEXP); extern SEXP vctrs_equal_na(SEXP); extern SEXP vctrs_compare(SEXP, SEXP, SEXP); extern SEXP vctrs_match(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_in(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_duplicated_any(SEXP); extern SEXP vctrs_size(SEXP); extern SEXP vctrs_list_sizes(SEXP); extern SEXP vctrs_dim(SEXP); extern SEXP vctrs_dim_n(SEXP); extern SEXP vctrs_is_unspecified(SEXP); extern SEXP vctrs_typeof(SEXP, SEXP); extern SEXP vctrs_is_vector(SEXP); extern SEXP vctrs_ptype2(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_typeof2(SEXP, SEXP); extern SEXP vctrs_typeof2_s3(SEXP, SEXP); extern SEXP vctrs_cast(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_as_location(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vec_slice(SEXP, SEXP); extern SEXP vctrs_init(SEXP, SEXP); extern SEXP vctrs_chop(SEXP, SEXP); extern SEXP vctrs_unchop(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_chop_seq(SEXP, SEXP, SEXP, SEXP); extern SEXP vec_slice_seq(SEXP, SEXP, SEXP, SEXP); extern SEXP vec_slice_rep(SEXP, SEXP, SEXP); extern SEXP vctrs_restore(SEXP, SEXP, SEXP); extern SEXP vctrs_restore_default(SEXP, SEXP); extern SEXP vec_proxy(SEXP); extern SEXP vec_proxy_equal(SEXP); extern SEXP vec_proxy_compare(SEXP); extern SEXP vec_proxy_order(SEXP); extern SEXP vec_proxy_complete(SEXP); extern SEXP vctrs_df_proxy(SEXP, SEXP); extern SEXP vctrs_unspecified(SEXP); extern SEXP vctrs_ptype(SEXP, SEXP); extern SEXP vec_ptype_finalise(SEXP); extern SEXP vctrs_minimal_names(SEXP); extern SEXP vctrs_unique_names(SEXP, SEXP); extern SEXP vctrs_as_minimal_names(SEXP); extern SEXP vec_names(SEXP); extern SEXP vctrs_is_unique_names(SEXP); extern SEXP vctrs_as_unique_names(SEXP, SEXP); extern SEXP vec_set_names(SEXP, SEXP); extern SEXP vctrs_df_cast_opts(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_df_ptype2_opts(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_type_info(SEXP); extern SEXP vctrs_proxy_info(SEXP); extern SEXP vctrs_class_type(SEXP); extern SEXP vctrs_bare_df_restore(SEXP, SEXP, SEXP); extern SEXP vctrs_recycle(SEXP, SEXP, SEXP); extern SEXP vctrs_assign(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_assign_seq(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_set_attributes(SEXP, SEXP); extern SEXP vctrs_as_df_row(SEXP, SEXP); extern SEXP vctrs_outer_names(SEXP, SEXP, SEXP); extern SEXP vctrs_df_size(SEXP); extern SEXP vctrs_as_df_col(SEXP, SEXP); extern SEXP vctrs_apply_name_spec(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_unset_s4(SEXP); extern SEXP vctrs_validate_name_repair_arg(SEXP); extern SEXP vctrs_validate_minimal_names(SEXP, SEXP); extern SEXP vctrs_as_names(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_is_partial(SEXP); extern SEXP vctrs_is_list(SEXP); extern SEXP vctrs_try_catch_callback(SEXP, SEXP); extern SEXP vctrs_is_coercible(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_as_subscript(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_as_subscript_result(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_df_flatten_info(SEXP); extern SEXP df_flatten(SEXP); extern SEXP vctrs_linked_version(); extern SEXP vctrs_tib_ptype2(SEXP x, SEXP y, SEXP x_arg_, SEXP y_arg_); extern SEXP vctrs_tib_cast(SEXP x, SEXP y, SEXP x_arg_, SEXP y_arg_); extern SEXP vctrs_assign_params(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_has_dim(SEXP); extern SEXP vctrs_rep(SEXP, SEXP); extern SEXP vctrs_rep_each(SEXP, SEXP); extern SEXP vctrs_maybe_shared_col(SEXP, SEXP); extern SEXP vctrs_new_df_unshared_col(); extern SEXP vctrs_shaped_ptype(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_shape2(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_new_date(SEXP); extern SEXP vctrs_date_validate(SEXP); extern SEXP vctrs_new_datetime(SEXP, SEXP); extern SEXP vctrs_datetime_validate(SEXP); extern SEXP vctrs_ptype2_opts(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_s3_find_method(SEXP, SEXP, SEXP); extern SEXP vctrs_implements_ptype2(SEXP); extern SEXP vctrs_ptype2_dispatch_native(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_cast_dispatch_native(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_fast_c(SEXP, SEXP); extern SEXP vctrs_data_frame(SEXP, SEXP, SEXP); extern SEXP vctrs_df_list(SEXP, SEXP, SEXP); extern SEXP vctrs_identify_runs(SEXP); extern SEXP vctrs_locate_runs(SEXP, SEXP); extern SEXP vctrs_detect_runs(SEXP, SEXP); extern SEXP vctrs_slice_complete(SEXP); extern SEXP vctrs_locate_complete(SEXP); extern SEXP vctrs_detect_complete(SEXP); extern SEXP vctrs_normalize_encoding(SEXP); extern SEXP vctrs_order(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_order_locs(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_unrep(SEXP); extern SEXP vctrs_fill_missing(SEXP, SEXP, SEXP); extern SEXP vctrs_chr_paste_prefix(SEXP, SEXP, SEXP); // Maturing // In the public header extern bool vec_is_vector(SEXP); extern R_len_t short_vec_size(SEXP); extern SEXP short_vec_recycle(SEXP, R_len_t); // Experimental // Exported but not available in the public header extern SEXP exp_vec_cast(SEXP, SEXP); extern SEXP exp_vec_chop(SEXP, SEXP); extern SEXP exp_vec_slice_impl(SEXP, SEXP); extern SEXP exp_vec_names(SEXP); extern SEXP exp_vec_set_names(SEXP, SEXP); extern SEXP exp_short_compact_seq(R_len_t, R_len_t, bool); extern SEXP exp_short_init_compact_seq(int*, R_len_t, R_len_t, bool); // Defined below SEXP vctrs_init_library(SEXP); // Defined in altrep-rle.h extern SEXP altrep_rle_Make(SEXP); void vctrs_init_altrep_rle(DllInfo* dll); static const R_CallMethodDef CallEntries[] = { {"vctrs_list_get", (DL_FUNC) &vctrs_list_get, 2}, {"vctrs_list_set", (DL_FUNC) &vctrs_list_set, 3}, {"vctrs_field_get", (DL_FUNC) &vctrs_field_get, 2}, {"vctrs_field_set", (DL_FUNC) &vctrs_field_set, 3}, {"vctrs_fields", (DL_FUNC) &vctrs_fields, 1}, {"vctrs_n_fields", (DL_FUNC) &vctrs_n_fields, 1}, {"vctrs_hash", (DL_FUNC) &vctrs_hash, 1}, {"vctrs_hash_object", (DL_FUNC) &vctrs_hash_object, 1}, {"vctrs_equal_object", (DL_FUNC) &vctrs_equal_object, 2}, {"vctrs_unique_loc", (DL_FUNC) &vctrs_unique_loc, 1}, {"vctrs_duplicated", (DL_FUNC) &vctrs_duplicated, 1}, {"vctrs_duplicated_any", (DL_FUNC) &vctrs_duplicated_any, 1}, {"vctrs_count", (DL_FUNC) &vctrs_count, 1}, {"vctrs_id", (DL_FUNC) &vctrs_id, 1}, {"vctrs_n_distinct", (DL_FUNC) &vctrs_n_distinct, 1}, {"vctrs_split", (DL_FUNC) &vec_split, 2}, {"vctrs_group_id", (DL_FUNC) &vctrs_group_id, 1}, {"vctrs_group_rle", (DL_FUNC) &vctrs_group_rle, 1}, {"vctrs_group_loc", (DL_FUNC) &vec_group_loc, 1}, {"vctrs_size", (DL_FUNC) &vctrs_size, 1}, {"vctrs_list_sizes", (DL_FUNC) &vctrs_list_sizes, 1}, {"vctrs_dim", (DL_FUNC) &vctrs_dim, 1}, {"vctrs_dim_n", (DL_FUNC) &vctrs_dim_n, 1}, {"vctrs_is_unspecified", (DL_FUNC) &vctrs_is_unspecified, 1}, {"vctrs_equal", (DL_FUNC) &vctrs_equal, 3}, {"vctrs_equal_na", (DL_FUNC) &vctrs_equal_na, 1}, {"vctrs_compare", (DL_FUNC) &vctrs_compare, 3}, {"vctrs_match", (DL_FUNC) &vctrs_match, 5}, {"vctrs_in", (DL_FUNC) &vctrs_in, 5}, {"vctrs_typeof", (DL_FUNC) &vctrs_typeof, 2}, {"vctrs_init_library", (DL_FUNC) &vctrs_init_library, 1}, {"vctrs_is_vector", (DL_FUNC) &vctrs_is_vector, 1}, {"vctrs_ptype2", (DL_FUNC) &vctrs_ptype2, 4}, {"vctrs_typeof2", (DL_FUNC) &vctrs_typeof2, 2}, {"vctrs_typeof2_s3", (DL_FUNC) &vctrs_typeof2_s3, 2}, {"vctrs_cast", (DL_FUNC) &vctrs_cast, 4}, {"vctrs_as_location", (DL_FUNC) &vctrs_as_location, 8}, {"vctrs_slice", (DL_FUNC) &vec_slice, 2}, {"vctrs_init", (DL_FUNC) &vctrs_init, 2}, {"vctrs_chop", (DL_FUNC) &vctrs_chop, 2}, {"vctrs_unchop", (DL_FUNC) &vctrs_unchop, 5}, {"vctrs_chop_seq", (DL_FUNC) &vctrs_chop_seq, 4}, {"vctrs_slice_seq", (DL_FUNC) &vec_slice_seq, 4}, {"vctrs_slice_rep", (DL_FUNC) &vec_slice_rep, 3}, {"vctrs_restore", (DL_FUNC) &vctrs_restore, 3}, {"vctrs_restore_default", (DL_FUNC) &vctrs_restore_default, 2}, {"vctrs_proxy", (DL_FUNC) &vec_proxy, 1}, {"vctrs_proxy_equal", (DL_FUNC) &vec_proxy_equal, 1}, {"vctrs_proxy_compare", (DL_FUNC) &vec_proxy_compare, 1}, {"vctrs_proxy_order", (DL_FUNC) &vec_proxy_order, 1}, {"vctrs_proxy_complete", (DL_FUNC) &vec_proxy_complete, 1}, {"vctrs_df_proxy", (DL_FUNC) &vctrs_df_proxy, 2}, {"vctrs_unspecified", (DL_FUNC) &vctrs_unspecified, 1}, {"vctrs_ptype", (DL_FUNC) &vctrs_ptype, 2}, {"vctrs_ptype_finalise", (DL_FUNC) &vec_ptype_finalise, 1}, {"vctrs_minimal_names", (DL_FUNC) &vctrs_minimal_names, 1}, {"vctrs_unique_names", (DL_FUNC) &vctrs_unique_names, 2}, {"vctrs_as_minimal_names", (DL_FUNC) &vctrs_as_minimal_names, 1}, {"vctrs_names", (DL_FUNC) &vec_names, 1}, {"vctrs_is_unique_names", (DL_FUNC) &vctrs_is_unique_names, 1}, {"vctrs_as_unique_names", (DL_FUNC) &vctrs_as_unique_names, 2}, {"vctrs_set_names", (DL_FUNC) &vec_set_names, 2}, {"vctrs_df_cast_opts", (DL_FUNC) &vctrs_df_cast_opts, 5}, {"vctrs_df_ptype2_opts", (DL_FUNC) &vctrs_df_ptype2_opts, 5}, {"vctrs_type_info", (DL_FUNC) &vctrs_type_info, 1}, {"vctrs_proxy_info", (DL_FUNC) &vctrs_proxy_info, 1}, {"vctrs_class_type", (DL_FUNC) &vctrs_class_type, 1}, {"vctrs_bare_df_restore", (DL_FUNC) &vctrs_bare_df_restore, 3}, {"vctrs_recycle", (DL_FUNC) &vctrs_recycle, 3}, {"vctrs_assign", (DL_FUNC) &vctrs_assign, 5}, {"vctrs_assign_seq", (DL_FUNC) &vctrs_assign_seq, 5}, {"vctrs_set_attributes", (DL_FUNC) &vctrs_set_attributes, 2}, {"vctrs_as_df_row", (DL_FUNC) &vctrs_as_df_row, 2}, {"vctrs_outer_names", (DL_FUNC) &vctrs_outer_names, 3}, {"vctrs_df_size", (DL_FUNC) &vctrs_df_size, 1}, {"vctrs_as_df_col", (DL_FUNC) &vctrs_as_df_col, 2}, {"vctrs_apply_name_spec", (DL_FUNC) &vctrs_apply_name_spec, 4}, {"vctrs_unset_s4", (DL_FUNC) &vctrs_unset_s4, 1}, {"vctrs_altrep_rle_Make", (DL_FUNC) &altrep_rle_Make, 1}, {"vctrs_validate_name_repair_arg", (DL_FUNC) &vctrs_validate_name_repair_arg, 1}, {"vctrs_validate_minimal_names", (DL_FUNC) &vctrs_validate_minimal_names, 2}, {"vctrs_as_names", (DL_FUNC) &vctrs_as_names, 4}, {"vctrs_is_partial", (DL_FUNC) &vctrs_is_partial, 1}, {"vctrs_is_list", (DL_FUNC) &vctrs_is_list, 1}, {"vctrs_try_catch_callback", (DL_FUNC) &vctrs_try_catch_callback, 2}, {"vctrs_is_coercible", (DL_FUNC) &vctrs_is_coercible, 5}, {"vctrs_as_subscript", (DL_FUNC) &vctrs_as_subscript, 5}, {"vctrs_as_subscript_result", (DL_FUNC) &vctrs_as_subscript_result, 5}, {"vctrs_df_flatten_info", (DL_FUNC) &vctrs_df_flatten_info, 1}, {"vctrs_df_flatten", (DL_FUNC) &df_flatten, 1}, {"vctrs_linked_version", (DL_FUNC) &vctrs_linked_version, 0}, {"vctrs_tib_ptype2", (DL_FUNC) &vctrs_tib_ptype2, 4}, {"vctrs_tib_cast", (DL_FUNC) &vctrs_tib_cast, 4}, {"vctrs_assign_params", (DL_FUNC) &vctrs_assign_params, 4}, {"vctrs_has_dim", (DL_FUNC) &vctrs_has_dim, 1}, {"vctrs_rep", (DL_FUNC) &vctrs_rep, 2}, {"vctrs_rep_each", (DL_FUNC) &vctrs_rep_each, 2}, {"vctrs_maybe_shared_col", (DL_FUNC) &vctrs_maybe_shared_col, 2}, {"vctrs_new_df_unshared_col", (DL_FUNC) &vctrs_new_df_unshared_col, 0}, {"vctrs_shaped_ptype", (DL_FUNC) &vctrs_shaped_ptype, 5}, {"vctrs_shape2", (DL_FUNC) &vctrs_shape2, 4}, {"vctrs_new_date", (DL_FUNC) &vctrs_new_date, 1}, {"vctrs_date_validate", (DL_FUNC) &vctrs_date_validate, 1}, {"vctrs_new_datetime", (DL_FUNC) &vctrs_new_datetime, 2}, {"vctrs_datetime_validate", (DL_FUNC) &vctrs_datetime_validate, 1}, {"vctrs_ptype2_opts", (DL_FUNC) &vctrs_ptype2_opts, 5}, {"vctrs_s3_find_method", (DL_FUNC) &vctrs_s3_find_method, 3}, {"vctrs_implements_ptype2", (DL_FUNC) &vctrs_implements_ptype2, 1}, {"vctrs_ptype2_dispatch_native", (DL_FUNC) &vctrs_ptype2_dispatch_native, 5}, {"vctrs_cast_dispatch_native", (DL_FUNC) &vctrs_cast_dispatch_native, 5}, {"vctrs_fast_c", (DL_FUNC) &vctrs_fast_c, 2}, {"vctrs_data_frame", (DL_FUNC) &vctrs_data_frame, 3}, {"vctrs_df_list", (DL_FUNC) &vctrs_df_list, 3}, {"vctrs_identify_runs", (DL_FUNC) &vctrs_identify_runs, 1}, {"vctrs_locate_runs", (DL_FUNC) &vctrs_locate_runs, 2}, {"vctrs_detect_runs", (DL_FUNC) &vctrs_detect_runs, 2}, {"vctrs_slice_complete", (DL_FUNC) &vctrs_slice_complete, 1}, {"vctrs_locate_complete", (DL_FUNC) &vctrs_locate_complete, 1}, {"vctrs_detect_complete", (DL_FUNC) &vctrs_detect_complete, 1}, {"vctrs_normalize_encoding", (DL_FUNC) &vctrs_normalize_encoding, 1}, {"vctrs_order", (DL_FUNC) &vctrs_order, 4}, {"vctrs_order_locs", (DL_FUNC) &vctrs_order_locs, 4}, {"vctrs_unrep", (DL_FUNC) &vctrs_unrep, 1}, {"vctrs_fill_missing", (DL_FUNC) &vctrs_fill_missing, 3}, {"vctrs_chr_paste_prefix", (DL_FUNC) &vctrs_chr_paste_prefix, 3}, {NULL, NULL, 0} }; extern SEXP vctrs_type_common(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_ptype_common_opts(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_size_common(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_recycle_common(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_cast_common(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_cast_common_opts(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_rbind(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_cbind(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_c(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_new_data_frame(SEXP); static const R_ExternalMethodDef ExtEntries[] = { {"vctrs_type_common", (DL_FUNC) &vctrs_type_common, 1}, {"vctrs_ptype_common_opts", (DL_FUNC) &vctrs_ptype_common_opts, 2}, {"vctrs_size_common", (DL_FUNC) &vctrs_size_common, 2}, {"vctrs_recycle_common", (DL_FUNC) &vctrs_recycle_common, 1}, {"vctrs_cast_common", (DL_FUNC) &vctrs_cast_common, 1}, {"vctrs_cast_common_opts", (DL_FUNC) &vctrs_cast_common_opts, 2}, {"vctrs_rbind", (DL_FUNC) &vctrs_rbind, 4}, {"vctrs_cbind", (DL_FUNC) &vctrs_cbind, 3}, {"vctrs_c", (DL_FUNC) &vctrs_c, 3}, {"vctrs_new_data_frame", (DL_FUNC) &vctrs_new_data_frame, -1}, {NULL, NULL, 0} }; export void R_init_vctrs(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, ExtEntries); R_useDynamicSymbols(dll, FALSE); // Maturing // In the public header R_RegisterCCallable("vctrs", "vec_is_vector", (DL_FUNC) &vec_is_vector); R_RegisterCCallable("vctrs", "short_vec_size", (DL_FUNC) &short_vec_size); R_RegisterCCallable("vctrs", "short_vec_recycle", (DL_FUNC) &short_vec_recycle); // Experimental // Exported but not available in the public header R_RegisterCCallable("vctrs", "exp_vec_cast", (DL_FUNC) &exp_vec_cast); R_RegisterCCallable("vctrs", "exp_vec_chop", (DL_FUNC) &exp_vec_chop); R_RegisterCCallable("vctrs", "exp_vec_slice_impl", (DL_FUNC) &exp_vec_slice_impl); R_RegisterCCallable("vctrs", "exp_vec_names", (DL_FUNC) &exp_vec_names); R_RegisterCCallable("vctrs", "exp_vec_set_names", (DL_FUNC) &exp_vec_set_names); R_RegisterCCallable("vctrs", "exp_short_compact_seq", (DL_FUNC) &exp_short_compact_seq); R_RegisterCCallable("vctrs", "exp_short_init_compact_seq", (DL_FUNC) &exp_short_init_compact_seq); // Altrep classes vctrs_init_altrep_rle(dll); } void vctrs_init_bind(SEXP ns); void vctrs_init_cast(SEXP ns); void vctrs_init_data(SEXP ns); void vctrs_init_dictionary(SEXP ns); void vctrs_init_names(SEXP ns); void vctrs_init_proxy_restore(SEXP ns); void vctrs_init_slice(SEXP ns); void vctrs_init_slice_assign(SEXP ns); void vctrs_init_subscript(SEXP ns); void vctrs_init_subscript_loc(SEXP ns); void vctrs_init_ptype2(SEXP ns); void vctrs_init_ptype2_dispatch(SEXP ns); void vctrs_init_rep(SEXP ns); void vctrs_init_type(SEXP ns); void vctrs_init_type_data_frame(SEXP ns); void vctrs_init_type_date_time(SEXP ns); void vctrs_init_type_info(SEXP ns); void vctrs_init_unspecified(SEXP ns); void vctrs_init_utils(SEXP ns); SEXP vctrs_init_library(SEXP ns) { vctrs_init_bind(ns); vctrs_init_cast(ns); vctrs_init_data(ns); vctrs_init_dictionary(ns); vctrs_init_names(ns); vctrs_init_proxy_restore(ns); vctrs_init_slice(ns); vctrs_init_slice_assign(ns); vctrs_init_subscript(ns); vctrs_init_subscript_loc(ns); vctrs_init_ptype2(ns); vctrs_init_ptype2_dispatch(ns); vctrs_init_rep(ns); vctrs_init_type(ns); vctrs_init_type_data_frame(ns); vctrs_init_type_date_time(ns); vctrs_init_type_info(ns); vctrs_init_unspecified(ns); vctrs_init_utils(ns); return R_NilValue; } vctrs/src/type.c0000644000176200001440000001751114042540502013330 0ustar liggesusers#include "vctrs.h" #include "arg-counter.h" #include "ptype-common.h" #include "ptype2.h" #include "type-data-frame.h" #include "utils.h" #include "decl/ptype-decl.h" // Initialised at load time static SEXP syms_vec_ptype = NULL; static SEXP syms_vec_ptype_finalise_dispatch = NULL; static SEXP fns_vec_ptype_finalise_dispatch = NULL; static inline SEXP vec_ptype_slice(SEXP x, SEXP empty); static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg); static SEXP df_ptype(SEXP x, bool bare); // [[ register() ]] SEXP vctrs_ptype(SEXP x, SEXP x_arg) { struct vctrs_arg x_arg_ = vec_as_arg(x_arg); return vec_ptype(x, &x_arg_); } static SEXP col_ptype(SEXP x); // [[ include("vctrs.h") ]] SEXP vec_ptype(SEXP x, struct vctrs_arg* x_arg) { switch (vec_typeof(x)) { case vctrs_type_null: return R_NilValue; case vctrs_type_unspecified: return vctrs_shared_empty_uns; case vctrs_type_logical: return vec_ptype_slice(x, vctrs_shared_empty_lgl); case vctrs_type_integer: return vec_ptype_slice(x, vctrs_shared_empty_int); case vctrs_type_double: return vec_ptype_slice(x, vctrs_shared_empty_dbl); case vctrs_type_complex: return vec_ptype_slice(x, vctrs_shared_empty_cpl); case vctrs_type_character: return vec_ptype_slice(x, vctrs_shared_empty_chr); case vctrs_type_raw: return vec_ptype_slice(x, vctrs_shared_empty_raw); case vctrs_type_list: return vec_ptype_slice(x, vctrs_shared_empty_list); case vctrs_type_dataframe: return df_ptype(x, true); case vctrs_type_s3: return s3_type(x, x_arg); case vctrs_type_scalar: stop_scalar_type(x, x_arg); } never_reached("vec_ptype"); } static SEXP col_ptype(SEXP x) { return vec_ptype(x, args_empty); } static inline SEXP vec_ptype_slice(SEXP x, SEXP empty) { if (ATTRIB(x) == R_NilValue) { return empty; } else { // Slicing preserves attributes return vec_slice(x, R_NilValue); } } static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { switch (class_type(x)) { case vctrs_class_bare_tibble: return df_ptype(x, true); case vctrs_class_data_frame: return df_ptype(x, false); case vctrs_class_bare_data_frame: stop_internal("s3_type", "Bare data frames should be handled by `vec_ptype()`."); case vctrs_class_none: stop_internal("s3_type", "Non-S3 classes should be handled by `vec_ptype()`."); default: break; } if (vec_is_partial(x)) { return x; } SEXP method = PROTECT(vec_ptype_method(x)); SEXP out; if (method == r_null) { vec_assert(x, x_arg); out = vec_slice(x, r_null); } else { out = vec_ptype_invoke(x, method); } UNPROTECT(1); return out; } static inline SEXP vec_ptype_method(SEXP x) { SEXP cls = PROTECT(s3_get_class(x)); SEXP method = s3_class_find_method("vec_ptype", cls, vctrs_method_table); UNPROTECT(1); return method; } static inline SEXP vec_ptype_invoke(SEXP x, SEXP method) { return vctrs_dispatch1(syms_vec_ptype, method, syms_x, x); } SEXP df_ptype(SEXP x, bool bare) { SEXP row_nms = PROTECT(df_rownames(x)); SEXP ptype = R_NilValue; if (bare) { ptype = PROTECT(bare_df_map(x, &col_ptype)); } else { ptype = PROTECT(df_map(x, &col_ptype)); } if (TYPEOF(row_nms) == STRSXP) { Rf_setAttrib(ptype, R_RowNamesSymbol, vctrs_shared_empty_chr); } UNPROTECT(2); return ptype; } static SEXP vec_ptype_finalise_unspecified(SEXP x); static SEXP vec_ptype_finalise_dispatch(SEXP x); // [[ include("vctrs.h"); register() ]] SEXP vec_ptype_finalise(SEXP x) { if (x == R_NilValue) { return x; } if (!OBJECT(x)) { vec_assert(x, args_empty); return x; } if (vec_is_unspecified(x)) { return vec_ptype_finalise_unspecified(x); } if (vec_is_partial(x)) { return vec_ptype_finalise_dispatch(x); } vec_assert(x, args_empty); switch (class_type(x)) { case vctrs_class_bare_tibble: case vctrs_class_bare_data_frame: return bare_df_map(x, &vec_ptype_finalise); case vctrs_class_data_frame: x = PROTECT(df_map(x, &vec_ptype_finalise)); if (Rf_inherits(x, "vctrs:::df_fallback")) { SEXP seen_tibble_attr = PROTECT(Rf_getAttrib(x, Rf_install("seen_tibble"))); bool seen_tibble = r_is_true(seen_tibble_attr); UNPROTECT(1); if (seen_tibble) { r_poke_class(x, classes_tibble); } else { r_poke_class(x, classes_data_frame); } Rf_setAttrib(x, Rf_install("known_classes"), R_NilValue); Rf_setAttrib(x, Rf_install("seen_tibble"), R_NilValue); } UNPROTECT(1); return x; case vctrs_class_none: stop_internal("vec_ptype_finalise", "Non-S3 classes should have returned by now."); default: return vec_ptype_finalise_dispatch(x); } } static SEXP vec_ptype_finalise_unspecified(SEXP x) { R_len_t size = Rf_length(x); if (size == 0) { return vctrs_shared_empty_lgl; } SEXP out = PROTECT(Rf_allocVector(LGLSXP, size)); r_lgl_fill(out, NA_LOGICAL, size); UNPROTECT(1); return out; } static SEXP vec_ptype_finalise_dispatch(SEXP x) { return vctrs_dispatch1( syms_vec_ptype_finalise_dispatch, fns_vec_ptype_finalise_dispatch, syms_x, x ); } static SEXP vctrs_type2_common(SEXP current, SEXP next, struct counters* counters, void* data); // [[ register(external = TRUE) ]] SEXP vctrs_type_common(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP types = PROTECT(rlang_env_dots_values(env)); SEXP ptype = PROTECT(Rf_eval(CAR(args), env)); SEXP out = vec_ptype_common_params(types, ptype, DF_FALLBACK_DEFAULT, S3_FALLBACK_false); UNPROTECT(2); return out; } // [[ register(external = TRUE) ]] SEXP vctrs_ptype_common_opts(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP types = PROTECT(rlang_env_dots_values(env)); SEXP ptype = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP opts = PROTECT(Rf_eval(CAR(args), env)); const struct fallback_opts c_opts = new_fallback_opts(opts); SEXP out = vec_ptype_common_opts(types, ptype, &c_opts); UNPROTECT(3); return out; } // [[ include("ptype-common.h") ]] SEXP vec_ptype_common_params(SEXP dots, SEXP ptype, enum df_fallback df_fallback, enum s3_fallback s3_fallback) { struct fallback_opts opts = { .df = df_fallback, .s3 = s3_fallback }; return vec_ptype_common_opts(dots, ptype, &opts); } // [[ include("ptype-common.h") ]] SEXP vec_ptype_common_opts(SEXP dots, SEXP ptype, const struct fallback_opts* opts) { if (!vec_is_partial(ptype)) { return vec_ptype(ptype, args_dot_ptype); } if (r_is_true(r_peek_option("vctrs.no_guessing"))) { Rf_errorcall(R_NilValue, "strict mode is activated; you must supply complete `.ptype`."); } // Remove constness struct fallback_opts mut_opts = *opts; // Start reduction with the `.ptype` argument SEXP type = PROTECT(reduce(ptype, args_dot_ptype, dots, &vctrs_type2_common, &mut_opts)); type = vec_ptype_finalise(type); UNPROTECT(1); return type; } static SEXP vctrs_type2_common(SEXP current, SEXP next, struct counters* counters, void* data) { int left = -1; const struct ptype2_opts opts = { .x = current, .y = next, .x_arg = counters->curr_arg, .y_arg = counters->next_arg, .fallback = *((struct fallback_opts*) data) }; current = vec_ptype2_opts(&opts, &left); // Update current if RHS is the common type. Otherwise the previous // counter stays in effect. if (!left) { counters_shift(counters); } return current; } void vctrs_init_type(SEXP ns) { syms_vec_ptype = Rf_install("vec_ptype"); syms_vec_ptype_finalise_dispatch = Rf_install("vec_ptype_finalise_dispatch"); fns_vec_ptype_finalise_dispatch = Rf_findVar(syms_vec_ptype_finalise_dispatch, ns); } vctrs/src/slice-chop.c0000644000176200001440000002735414042540502014403 0ustar liggesusers#include "vctrs.h" #include "dim.h" #include "slice.h" #include "subscript-loc.h" #include "type-data-frame.h" #include "owned.h" #include "utils.h" /* * @member proxy_info The result of `vec_proxy_info(x)`. * @member restore_size The restore size used in each call to `vec_restore()`. * Will always be 1 for `indices = NULL`. * @member p_restore_size A pointer to update the restore size. * @member index The current index value. If `indices` are provided, this is * the i-th element of indices. For the default of `indices = NULL`, this * starts at 0 and is incremented by 1 repeatedly through `p_index`. * @member p_index A pointer to increment the `index` value for the default * case. * @member has_indices Whether indices were provided. * @member out_size The size of `out`. Will be `vec_size(x)` in the default * case, otherwise will be `vec_size(indices)`. * @member out The list container for the result. */ struct vctrs_chop_info { struct vctrs_proxy_info proxy_info; SEXP restore_size; int* p_restore_size; SEXP index; int* p_index; bool has_indices; R_len_t out_size; SEXP out; }; #define PROTECT_CHOP_INFO(info, n) do { \ PROTECT_PROXY_INFO(&(info)->proxy_info, n); \ PROTECT((info)->restore_size); \ PROTECT((info)->index); \ PROTECT((info)->out); \ *n += 3; \ } while (0) \ static struct vctrs_chop_info init_chop_info(SEXP x, SEXP indices) { int nprot = 0; struct vctrs_chop_info info; info.proxy_info = vec_proxy_info(x); PROTECT_PROXY_INFO(&info.proxy_info, &nprot); info.restore_size = PROTECT_N(r_int(1), &nprot); info.p_restore_size = INTEGER(info.restore_size); info.index = PROTECT_N(r_int(0), &nprot); info.p_index = INTEGER(info.index); if (indices == R_NilValue) { info.out_size = vec_size(x); info.has_indices = false; } else { info.out_size = vec_size(indices); info.has_indices = true; } info.out = PROTECT_N(Rf_allocVector(VECSXP, info.out_size), &nprot); UNPROTECT(nprot); return info; } // ----------------------------------------------------------------------------- static SEXP chop(SEXP x, SEXP indices, struct vctrs_chop_info info); static SEXP chop_shaped(SEXP x, SEXP indices, struct vctrs_chop_info info); static SEXP chop_df(SEXP x, SEXP indices, struct vctrs_chop_info info); static SEXP chop_fallback(SEXP x, SEXP indices, struct vctrs_chop_info info); static SEXP chop_fallback_shaped(SEXP x, SEXP indices, struct vctrs_chop_info info); static SEXP vec_chop_base(SEXP x, SEXP indices, struct vctrs_chop_info info); SEXP vec_as_indices(SEXP indices, R_len_t n, SEXP names); // [[ register() ]] SEXP vctrs_chop_seq(SEXP x, SEXP starts, SEXP sizes, SEXP increasings) { int* p_starts = INTEGER(starts); int* p_sizes = INTEGER(sizes); int* p_increasings = LOGICAL(increasings); int n = Rf_length(starts); SEXP indices = PROTECT(Rf_allocVector(VECSXP, n)); for (int i = 0; i < n; ++i) { SEXP index = compact_seq(p_starts[i], p_sizes[i], p_increasings[i]); SET_VECTOR_ELT(indices, i, index); } SEXP out = PROTECT(vec_chop(x, indices)); UNPROTECT(2); return out; } // [[ register() ]] SEXP vctrs_chop(SEXP x, SEXP indices) { R_len_t n = vec_size(x); SEXP names = PROTECT(vec_names(x)); indices = PROTECT(vec_as_indices(indices, n, names)); SEXP out = PROTECT(vec_chop(x, indices)); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP vec_chop(SEXP x, SEXP indices) { int nprot = 0; struct vctrs_chop_info info = init_chop_info(x, indices); PROTECT_CHOP_INFO(&info, &nprot); SEXP out = PROTECT_N(vec_chop_base(x, indices, info), &nprot); UNPROTECT(nprot); return out; } static SEXP vec_chop_base(SEXP x, SEXP indices, struct vctrs_chop_info info) { struct vctrs_proxy_info proxy_info = info.proxy_info; // Fallback to `[` if the class doesn't implement a proxy. This is // to be maximally compatible with existing classes. if (vec_requires_fallback(x, proxy_info)) { if (proxy_info.type == vctrs_type_scalar) { Rf_errorcall(R_NilValue, "Can't slice a scalar"); } if (info.has_indices) { for (int i = 0; i < info.out_size; ++i) { SEXP index = VECTOR_ELT(indices, i); if (is_compact(index)) { SET_VECTOR_ELT(indices, i, compact_materialize(index)); } } } if (has_dim(x)) { return chop_fallback_shaped(x, indices, info); } return chop_fallback(x, indices, info); } switch (proxy_info.type) { case vctrs_type_logical: case vctrs_type_integer: case vctrs_type_double: case vctrs_type_complex: case vctrs_type_character: case vctrs_type_raw: case vctrs_type_list: { if (has_dim(x)) { return chop_shaped(x, indices, info); } return chop(x, indices, info); } case vctrs_type_dataframe: { return chop_df(x, indices, info); } default: vec_assert(x, args_empty); stop_unimplemented_vctrs_type("vec_chop_base", proxy_info.type); } } static SEXP chop(SEXP x, SEXP indices, struct vctrs_chop_info info) { SEXP proxy = info.proxy_info.proxy; SEXP names = PROTECT(Rf_getAttrib(proxy, R_NamesSymbol)); for (R_len_t i = 0; i < info.out_size; ++i) { if (info.has_indices) { info.index = VECTOR_ELT(indices, i); *info.p_restore_size = vec_subscript_size(info.index); } else { ++(*info.p_index); } SEXP elt = PROTECT(vec_slice_base(info.proxy_info.type, proxy, info.index)); if (names != R_NilValue) { SEXP elt_names = PROTECT(slice_names(names, info.index)); r_poke_names(elt, elt_names); UNPROTECT(1); } elt = vec_restore(elt, x, info.restore_size, vec_owned(elt)); SET_VECTOR_ELT(info.out, i, elt); UNPROTECT(1); } UNPROTECT(1); return info.out; } static SEXP chop_df(SEXP x, SEXP indices, struct vctrs_chop_info info) { SEXP proxy = info.proxy_info.proxy; int n_cols = Rf_length(proxy); SEXP col_names = PROTECT(Rf_getAttrib(proxy, R_NamesSymbol)); SEXP row_names = PROTECT(df_rownames(proxy)); bool has_row_names = TYPEOF(row_names) == STRSXP; // Pre-load the `out` container with lists that will become data frames for (R_len_t i = 0; i < info.out_size; ++i) { SEXP elt = PROTECT(Rf_allocVector(VECSXP, n_cols)); Rf_setAttrib(elt, R_NamesSymbol, col_names); if (has_row_names) { if (info.has_indices) { info.index = VECTOR_ELT(indices, i); } else { ++(*info.p_index); } Rf_setAttrib(elt, R_RowNamesSymbol, slice_rownames(row_names, info.index)); } SET_VECTOR_ELT(info.out, i, elt); UNPROTECT(1); } // Split each column according to the indices, and then assign the results // into the appropriate data frame column in the `out` list for (int i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(proxy, i); SEXP split = PROTECT(vec_chop(col, indices)); for (int j = 0; j < info.out_size; ++j) { SEXP elt = VECTOR_ELT(info.out, j); SET_VECTOR_ELT(elt, i, VECTOR_ELT(split, j)); } UNPROTECT(1); } // Restore each data frame for (int i = 0; i < info.out_size; ++i) { if (info.has_indices) { *info.p_restore_size = vec_subscript_size(VECTOR_ELT(indices, i)); } SEXP elt = VECTOR_ELT(info.out, i); elt = vec_restore(elt, x, info.restore_size, vec_owned(elt)); SET_VECTOR_ELT(info.out, i, elt); } UNPROTECT(2); return info.out; } static SEXP chop_shaped(SEXP x, SEXP indices, struct vctrs_chop_info info) { SEXP proxy = info.proxy_info.proxy; SEXP dim_names = PROTECT(Rf_getAttrib(proxy, R_DimNamesSymbol)); SEXP row_names = R_NilValue; if (dim_names != R_NilValue) { row_names = VECTOR_ELT(dim_names, 0); } for (R_len_t i = 0; i < info.out_size; ++i) { if (info.has_indices) { info.index = VECTOR_ELT(indices, i); *info.p_restore_size = vec_subscript_size(info.index); } else { ++(*info.p_index); } SEXP elt = PROTECT(vec_slice_shaped(info.proxy_info.type, proxy, info.index)); if (dim_names != R_NilValue) { if (row_names != R_NilValue) { SEXP new_dim_names = PROTECT(Rf_shallow_duplicate(dim_names)); SEXP new_row_names = PROTECT(slice_names(row_names, info.index)); SET_VECTOR_ELT(new_dim_names, 0, new_row_names); Rf_setAttrib(elt, R_DimNamesSymbol, new_dim_names); UNPROTECT(2); } else { Rf_setAttrib(elt, R_DimNamesSymbol, dim_names); } } elt = vec_restore(elt, x, info.restore_size, vec_owned(elt)); SET_VECTOR_ELT(info.out, i, elt); UNPROTECT(1); } UNPROTECT(1); return info.out; } static SEXP chop_fallback(SEXP x, SEXP indices, struct vctrs_chop_info info) { // Evaluate in a child of the global environment to allow dispatch // to custom functions. We define `[` to point to its base // definition to ensure consistent look-up. This is the same logic // as in `vctrs_dispatch_n()`, reimplemented here to allow repeated // evaluations in a loop. SEXP env = PROTECT(r_new_environment(R_GlobalEnv)); Rf_defineVar(syms_x, x, env); Rf_defineVar(syms_i, info.index, env); // Construct call with symbols, not values, for performance. // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. SEXP call; if (is_integer64(x)) { call = PROTECT(Rf_lang3(syms_vec_slice_dispatch_integer64, syms_x, syms_i)); Rf_defineVar(syms_vec_slice_dispatch_integer64, fns_vec_slice_dispatch_integer64, env); } else { call = PROTECT(Rf_lang3(syms_bracket, syms_x, syms_i)); Rf_defineVar(syms_bracket, fns_bracket, env); } for (R_len_t i = 0; i < info.out_size; ++i) { if (info.has_indices) { info.index = VECTOR_ELT(indices, i); *info.p_restore_size = vec_size(info.index); // Update `i` binding with the new index value Rf_defineVar(syms_i, info.index, env); } else { ++(*info.p_index); } SEXP elt = PROTECT(Rf_eval(call, env)); if (!vec_is_restored(elt, x)) { elt = vec_restore(elt, x, info.restore_size, vec_owned(elt)); } SET_VECTOR_ELT(info.out, i, elt); UNPROTECT(1); } UNPROTECT(2); return info.out; } static SEXP chop_fallback_shaped(SEXP x, SEXP indices, struct vctrs_chop_info info) { for (R_len_t i = 0; i < info.out_size; ++i) { if (info.has_indices) { info.index = VECTOR_ELT(indices, i); } else { ++(*info.p_index); } // `vec_slice_fallback()` will also `vec_restore()` for us SEXP elt = PROTECT(vec_slice_fallback(x, info.index)); SET_VECTOR_ELT(info.out, i, elt); UNPROTECT(1); } return info.out; } // ----------------------------------------------------------------------------- SEXP vec_as_indices(SEXP indices, R_len_t n, SEXP names) { if (indices == R_NilValue) { return indices; } if (TYPEOF(indices) != VECSXP) { Rf_errorcall(R_NilValue, "`indices` must be a list of index values, or `NULL`."); } indices = PROTECT(r_clone_referenced(indices)); R_len_t size = vec_size(indices); const struct subscript_opts subscript_opts = { .action = SUBSCRIPT_ACTION_DEFAULT, .logical = SUBSCRIPT_TYPE_ACTION_ERROR, .numeric = SUBSCRIPT_TYPE_ACTION_CAST, .character = SUBSCRIPT_TYPE_ACTION_ERROR, .subscript_arg = NULL }; // Restrict index values to positive integer locations const struct location_opts opts = { .subscript_opts = &subscript_opts, .missing = SUBSCRIPT_MISSING_PROPAGATE, .loc_negative = LOC_NEGATIVE_ERROR, .loc_oob = LOC_OOB_ERROR, .loc_zero = LOC_ZERO_ERROR }; for (int i = 0; i < size; ++i) { SEXP index = VECTOR_ELT(indices, i); index = vec_as_location_opts(index, n, names, &opts); SET_VECTOR_ELT(indices, i, index); } UNPROTECT(1); return indices; } vctrs/src/slice-assign.c0000644000176200001440000004565714042540502014744 0ustar liggesusers#include "vctrs.h" #include "dim.h" #include "names.h" #include "owned.h" #include "slice-assign.h" #include "subscript-loc.h" #include "utils.h" // Initialised at load time SEXP syms_vec_assign_fallback = NULL; SEXP fns_vec_assign_fallback = NULL; const struct vec_assign_opts vec_assign_default_opts = { .assign_names = false }; static SEXP vec_assign_fallback(SEXP x, SEXP index, SEXP value); static SEXP vec_proxy_assign_names(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned); static SEXP lgl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned); static SEXP int_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned); static SEXP dbl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned); static SEXP cpl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned); SEXP chr_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned); static SEXP raw_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned); SEXP list_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned); // [[ include("slice-assign.h") ]] SEXP vec_assign_opts(SEXP x, SEXP index, SEXP value, const struct vec_assign_opts* opts) { if (x == R_NilValue) { return R_NilValue; } vec_assert(x, opts->x_arg); vec_assert(value, opts->value_arg); index = PROTECT(vec_as_location_opts(index, vec_size(x), PROTECT(vec_names(x)), location_default_assign_opts)); // Cast and recycle `value` value = PROTECT(vec_cast(value, x, opts->value_arg, opts->x_arg)); value = PROTECT(vec_recycle(value, vec_size(index), opts->value_arg)); SEXP proxy = PROTECT(vec_proxy(x)); const enum vctrs_owned owned = vec_owned(proxy); proxy = PROTECT(vec_proxy_assign_opts(proxy, index, value, owned, opts)); SEXP out = vec_restore(proxy, x, R_NilValue, owned); UNPROTECT(6); return out; } // [[ register() ]] SEXP vctrs_assign(SEXP x, SEXP index, SEXP value, SEXP x_arg_, SEXP value_arg_) { struct vctrs_arg x_arg = vec_as_arg(x_arg_); struct vctrs_arg value_arg = vec_as_arg(value_arg_); const struct vec_assign_opts opts = { .assign_names = false, .x_arg = &x_arg, .value_arg = &value_arg }; return vec_assign_opts(x, index, value, &opts); } // [[ register() ]] SEXP vctrs_assign_params(SEXP x, SEXP index, SEXP value, SEXP assign_names) { const struct vec_assign_opts opts = { .assign_names = r_bool_as_int(assign_names) }; return vec_assign_opts(x, index, value, &opts); } static SEXP vec_assign_switch(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { switch (vec_proxy_typeof(proxy)) { case vctrs_type_logical: return lgl_assign(proxy, index, value, owned); case vctrs_type_integer: return int_assign(proxy, index, value, owned); case vctrs_type_double: return dbl_assign(proxy, index, value, owned); case vctrs_type_complex: return cpl_assign(proxy, index, value, owned); case vctrs_type_character: return chr_assign(proxy, index, value, owned); case vctrs_type_raw: return raw_assign(proxy, index, value, owned); case vctrs_type_list: return list_assign(proxy, index, value, owned); case vctrs_type_dataframe: return df_assign(proxy, index, value, owned, opts); case vctrs_type_scalar: stop_scalar_type(proxy, args_empty); default: stop_unimplemented_vctrs_type("vec_assign_switch", vec_typeof(proxy)); } never_reached("vec_assign_switch"); } // `vec_proxy_assign_opts()` conditionally duplicates the `proxy` depending // on a number of factors. // // - If a fallback is required, the `proxy` is duplicated at the R level. // - If `owned` is `VCTRS_OWNED_true`, the `proxy` is typically not duplicated. // However, if it is an ALTREP object, it is duplicated because we need to be // able to assign into the object it represents, not the ALTREP SEXP itself. // - If `owned` is `VCTRS_OWNED_false`, the `proxy` is only // duplicated if it is referenced, i.e. `MAYBE_REFERENCED()` returns `true`. // // In `vec_proxy_assign()`, which is part of the experimental public API, // ownership is determined with a call to `NO_REFERENCES()`. If there are no // references, then `VCTRS_OWNED_true` is used, else // `VCTRS_OWNED_false` is used. // // Ownership of the `proxy` must be recursive. For data frames, the `owned` // argument is passed along to each column. // // Practically, we only set `VCTRS_OWNED_true` when we create a fresh data // structure at the C level and then assign into it to fill it. This happens // in `vec_c()` and `vec_rbind()`. For data frames, this `owned` parameter // is particularly important for R 4.0.0 where references are tracked more // precisely. In R 4.0.0, a freshly created data frame's columns all have a // refcount of 1 because of the `SET_VECTOR_ELT()` call that set them in the // data frame. This makes them referenced, but not shared. If // `VCTRS_OWNED_false` was set and `df_assign()` was used in a loop // (as it is in `vec_rbind()`), then a copy of each column would be made at // each iteration of the loop (any time a new set of rows is assigned // into the output object). // // Even though it can directly assign, the safe // way to call `vec_proxy_assign()` and `vec_proxy_assign_opts()` is to catch // and protect their output rather than relying on them to assign directly. /* * @param proxy The proxy of the output container * @param index The locations to assign `value` to * @param value The value to assign into the proxy. Must already be * cast to the type of the true output container, and have been * recycled to the correct size. Should not be proxied, in case * we have to fallback. */ SEXP vec_proxy_assign(SEXP proxy, SEXP index, SEXP value) { return vec_proxy_assign_opts(proxy, index, value, vec_owned(proxy), &vec_assign_default_opts); } SEXP vec_proxy_assign_opts(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { int n_protect = 0; struct vec_assign_opts mut_opts = *opts; bool ignore_outer_names = mut_opts.ignore_outer_names; mut_opts.ignore_outer_names = false; struct vctrs_proxy_info value_info = vec_proxy_info(value); PROTECT_PROXY_INFO(&value_info, &n_protect); if (TYPEOF(proxy) != TYPEOF(value_info.proxy)) { stop_internal("vec_proxy_assign_opts", "`proxy` of type `%s` incompatible with `value` proxy of type `%s`.", Rf_type2char(TYPEOF(proxy)), Rf_type2char(TYPEOF(value_info.proxy))); } // If a fallback is required, the `proxy` is identical to the output container // because no proxy method was called SEXP out = R_NilValue; if (vec_requires_fallback(value, value_info)) { index = PROTECT(compact_materialize(index)); out = PROTECT(vec_assign_fallback(proxy, index, value)); ++n_protect; } else if (has_dim(proxy)) { out = PROTECT(vec_assign_shaped(proxy, index, value_info.proxy, owned, &mut_opts)); } else { out = PROTECT(vec_assign_switch(proxy, index, value_info.proxy, owned, &mut_opts)); } ++n_protect; if (!ignore_outer_names && opts->assign_names) { out = vec_proxy_assign_names(out, index, value_info.proxy, owned); } UNPROTECT(n_protect); return out; } #define ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF) \ R_len_t n = Rf_length(index); \ int* index_data = INTEGER(index); \ \ if (n != Rf_length(value)) { \ stop_internal("vec_assign", \ "`value` should have been recycled to fit `x`."); \ } \ \ const CTYPE* value_data = CONST_DEREF(value); \ \ SEXP out = PROTECT(vec_clone_referenced(x, owned)); \ CTYPE* out_data = DEREF(out); \ \ for (R_len_t i = 0; i < n; ++i) { \ int j = index_data[i]; \ if (j != NA_INTEGER) { \ out_data[j - 1] = value_data[i]; \ } \ } \ \ UNPROTECT(1); \ return out #define ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF) \ int* index_data = INTEGER(index); \ R_len_t start = index_data[0]; \ R_len_t n = index_data[1]; \ R_len_t step = index_data[2]; \ \ if (n != Rf_length(value)) { \ stop_internal("vec_assign", \ "`value` should have been recycled to fit `x`."); \ } \ \ const CTYPE* value_data = CONST_DEREF(value); \ \ SEXP out = PROTECT(vec_clone_referenced(x, owned)); \ CTYPE* out_data = DEREF(out) + start; \ \ for (int i = 0; i < n; ++i, out_data += step, ++value_data) { \ *out_data = *value_data; \ } \ \ UNPROTECT(1); \ return out #define ASSIGN(CTYPE, DEREF, CONST_DEREF) \ if (is_compact_seq(index)) { \ ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF); \ } else { \ ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF); \ } static SEXP lgl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) { ASSIGN(int, LOGICAL, LOGICAL_RO); } static SEXP int_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) { ASSIGN(int, INTEGER, INTEGER_RO); } static SEXP dbl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) { ASSIGN(double, REAL, REAL_RO); } static SEXP cpl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) { ASSIGN(Rcomplex, COMPLEX, COMPLEX_RO); } SEXP chr_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) { ASSIGN(SEXP, STRING_PTR, STRING_PTR_RO); } static SEXP raw_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) { ASSIGN(Rbyte, RAW, RAW_RO); } #undef ASSIGN #undef ASSIGN_INDEX #undef ASSIGN_COMPACT #define ASSIGN_BARRIER_INDEX(GET, SET) \ R_len_t n = Rf_length(index); \ int* index_data = INTEGER(index); \ \ if (n != Rf_length(value)) { \ stop_internal("vec_assign", \ "`value` should have been recycled to fit `x`."); \ } \ \ SEXP out = PROTECT(vec_clone_referenced(x, owned)); \ \ for (R_len_t i = 0; i < n; ++i) { \ int j = index_data[i]; \ if (j != NA_INTEGER) { \ SET(out, j - 1, GET(value, i)); \ } \ } \ \ UNPROTECT(1); \ return out #define ASSIGN_BARRIER_COMPACT(GET, SET) \ int* index_data = INTEGER(index); \ R_len_t start = index_data[0]; \ R_len_t n = index_data[1]; \ R_len_t step = index_data[2]; \ \ if (n != Rf_length(value)) { \ stop_internal("vec_assign", \ "`value` should have been recycled to fit `x`."); \ } \ \ SEXP out = PROTECT(vec_clone_referenced(x, owned)); \ \ for (R_len_t i = 0; i < n; ++i, start += step) { \ SET(out, start, GET(value, i)); \ } \ \ UNPROTECT(1); \ return out #define ASSIGN_BARRIER(GET, SET) \ if (is_compact_seq(index)) { \ ASSIGN_BARRIER_COMPACT(GET, SET); \ } else { \ ASSIGN_BARRIER_INDEX(GET, SET); \ } SEXP list_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) { ASSIGN_BARRIER(VECTOR_ELT, SET_VECTOR_ELT); } #undef ASSIGN_BARRIER #undef ASSIGN_BARRIER_INDEX #undef ASSIGN_BARRIER_COMPACT /** * - `out` and `value` must be rectangular lists. * - `value` must have the same size as `index`. * * Performance and safety notes: * If `x` is a fresh data frame (which would be the case in `vec_c()` and * `vec_rbind()`) then `r_clone_referenced()` will return it untouched. Each * column will also be fresh, so if `vec_proxy()` just returns its input then * `vec_proxy_assign_opts()` will directly assign to that column in `x`. This * makes it extremely fast to assign to a data frame. * * If `x` is referenced already, then `r_clone_referenced()` will call * `Rf_shallow_duplicate()`. For lists, this loops over the list and marks * each list element with max namedness. This is helpful for us, because * it is possible to have a data frame that is itself referenced, with columns * that are not (mtcars is an example). If each list element wasn't marked, then * `vec_proxy_assign_opts()` would see an unreferenced column and modify it * directly, resulting in improper mutable semantics. See #986 for full details. * * [[ include("vctrs.h") ]] */ SEXP df_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { SEXP out = PROTECT(vec_clone_referenced(x, owned)); R_len_t n = Rf_length(out); if (Rf_length(value) != n) { stop_internal("df_assign", "Can't assign %d columns to df of length %d.", Rf_length(value), n); } for (R_len_t i = 0; i < n; ++i) { SEXP out_elt = VECTOR_ELT(out, i); SEXP value_elt = VECTOR_ELT(value, i); // No need to cast or recycle because those operations are // recursive and have already been performed. However, proxy and // restore are not recursive so need to be done for each element // we recurse into. `vec_proxy_assign()` will proxy the `value_elt`. SEXP proxy_elt = PROTECT(vec_proxy(out_elt)); SEXP assigned = PROTECT(vec_proxy_assign_opts(proxy_elt, index, value_elt, owned, opts)); assigned = vec_restore(assigned, out_elt, R_NilValue, owned); SET_VECTOR_ELT(out, i, assigned); UNPROTECT(2); } UNPROTECT(1); return out; } static SEXP vec_assign_fallback(SEXP x, SEXP index, SEXP value) { return vctrs_dispatch3(syms_vec_assign_fallback, fns_vec_assign_fallback, syms_x, x, syms_i, index, syms_value, value); } static SEXP vec_proxy_assign_names(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned) { SEXP value_nms = PROTECT(vec_names(value)); if (value_nms == R_NilValue) { UNPROTECT(1); return proxy; } SEXP proxy_nms = PROTECT(vec_proxy_names(proxy)); if (proxy_nms == R_NilValue) { proxy_nms = PROTECT(Rf_allocVector(STRSXP, vec_size(proxy))); } else { proxy_nms = PROTECT(vec_clone_referenced(proxy_nms, owned)); } proxy_nms = PROTECT(chr_assign(proxy_nms, index, value_nms, owned)); proxy = PROTECT(vec_clone_referenced(proxy, owned)); proxy = vec_proxy_set_names(proxy, proxy_nms, owned); UNPROTECT(5); return proxy; } // Exported for testing // [[ register() ]] SEXP vctrs_assign_seq(SEXP x, SEXP value, SEXP start, SEXP size, SEXP increasing) { R_len_t start_ = r_int_get(start, 0); R_len_t size_ = r_int_get(size, 0); bool increasing_ = r_lgl_get(increasing, 0); SEXP index = PROTECT(compact_seq(start_, size_, increasing_)); const struct vec_assign_opts* opts = &vec_assign_default_opts; // Cast and recycle `value` value = PROTECT(vec_cast(value, x, opts->value_arg, opts->x_arg)); value = PROTECT(vec_recycle(value, vec_subscript_size(index), opts->value_arg)); SEXP proxy = PROTECT(vec_proxy(x)); const enum vctrs_owned owned = vec_owned(proxy); proxy = PROTECT(vec_proxy_assign_opts(proxy, index, value, owned, opts)); SEXP out = vec_restore(proxy, x, R_NilValue, owned); UNPROTECT(5); return out; } void vctrs_init_slice_assign(SEXP ns) { syms_vec_assign_fallback = Rf_install("vec_assign_fallback"); fns_vec_assign_fallback = Rf_findVar(syms_vec_assign_fallback, ns); } vctrs/src/utils.c0000644000176200001440000016270014042540502013510 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "type-data-frame.h" #include "owned.h" #include // Initialised at load time bool (*rlang_is_splice_box)(SEXP) = NULL; SEXP (*rlang_unbox)(SEXP) = NULL; SEXP (*rlang_env_dots_values)(SEXP) = NULL; SEXP (*rlang_env_dots_list)(SEXP) = NULL; SEXP vctrs_method_table = NULL; SEXP base_method_table = NULL; SEXP s4_c_method_table = NULL; SEXP strings_tbl = NULL; SEXP strings_tbl_df = NULL; SEXP strings_data_frame = NULL; SEXP strings_date = NULL; SEXP strings_posixct = NULL; SEXP strings_posixlt = NULL; SEXP strings_posixt = NULL; SEXP strings_factor = NULL; SEXP strings_ordered = NULL; SEXP strings_list = NULL; SEXP classes_data_frame = NULL; SEXP classes_factor = NULL; SEXP classes_ordered = NULL; SEXP classes_date = NULL; SEXP classes_posixct = NULL; SEXP classes_tibble = NULL; SEXP classes_vctrs_group_rle = NULL; static SEXP syms_as_data_frame2 = NULL; static SEXP fns_as_data_frame2 = NULL; static SEXP vctrs_eval_mask_n_impl(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args, SEXP mask); /** * Evaluate with masked arguments * * This takes two arrays of argument (`args`) and argument names * `syms`). The names should correspond to formal arguments of `fn`. * Elements of `args` are assigned to their corresponding name in * `syms` directly in the current environment, i.e. the environment of * the closure wrapping the `.Call()` invokation. Since masked * evaluation causes side effects and variable assignments in that * frame environment, the native code invokation must be tailing: no * further R code (including `on.exit()` expressions) should be * evaluated in that closure wrapper. * * A call to `fn` is constructed with the * CARs and TAGs assigned symmetrically to the elements of * `syms`. This way the arguments are masked by symbols corresponding * to the formal parameters. * * @param fn The function to call. * @param syms A null-terminated array of symbols. The arguments * `args` are assigned to these symbols. The assignment occurs in a * child of `env` and the dispatch call refers to these symbols. * @param args A null-terminated array of arguments passed to the method. * @param env The environment in which to evaluate. */ SEXP vctrs_eval_mask_n(SEXP fn, SEXP* syms, SEXP* args) { SEXP mask = PROTECT(r_peek_frame()); SEXP out = vctrs_eval_mask_n_impl(R_NilValue, fn, syms, args, mask); UNPROTECT(1); return out; } SEXP vctrs_eval_mask1(SEXP fn, SEXP x_sym, SEXP x) { SEXP syms[2] = { x_sym, NULL }; SEXP args[2] = { x, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask2(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y) { SEXP syms[3] = { x_sym, y_sym, NULL }; SEXP args[3] = { x, y, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask3(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[4] = { x_sym, y_sym, z_sym, NULL }; SEXP args[4] = { x, y, z, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask4(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4) { SEXP syms[5] = { x1_sym, x2_sym, x3_sym, x4_sym, NULL }; SEXP args[5] = { x1, x2, x3, x4, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask5(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5) { SEXP syms[6] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, NULL }; SEXP args[6] = { x1, x2, x3, x4, x5, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask6(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6) { SEXP syms[7] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, NULL }; SEXP args[7] = { x1, x2, x3, x4, x5, x6, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask7(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6, SEXP x7_sym, SEXP x7) { SEXP syms[8] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, x7_sym, NULL }; SEXP args[8] = { x1, x2, x3, x4, x5, x6, x7, NULL }; return vctrs_eval_mask_n(fn, syms, args); } /** * Dispatch in the current environment * * Like `vctrs_eval_mask_n()`, the arguments `args` are are assigned * to the symbols `syms`. In addition, the function `fn` is assigned * to `fn_sym`. The mask is the current environment which has hygiene * implications regarding the closure wrapping `.Call()`, as * documented in `vctrs_eval_mask_n()`. * * @param fn_sym A symbol to which `fn` is assigned. * @inheritParams vctrs_eval_mask_n */ SEXP vctrs_dispatch_n(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args) { SEXP mask = PROTECT(r_peek_frame()); SEXP out = vctrs_eval_mask_n_impl(fn_sym, fn, syms, args, mask); UNPROTECT(1); return out; } SEXP vctrs_dispatch1(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x) { SEXP syms[2] = { x_sym, NULL }; SEXP args[2] = { x, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch2(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y) { SEXP syms[3] = { x_sym, y_sym, NULL }; SEXP args[3] = { x, y, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch3(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[4] = { x_sym, y_sym, z_sym, NULL }; SEXP args[4] = { x, y, z, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch4(SEXP fn_sym, SEXP fn, SEXP w_sym, SEXP w, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[5] = { w_sym, x_sym, y_sym, z_sym, NULL }; SEXP args[5] = { w, x, y, z, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch6(SEXP fn_sym, SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6) { SEXP syms[7] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, NULL }; SEXP args[7] = { x1, x2, x3, x4, x5, x6, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } static SEXP vctrs_eval_mask_n_impl(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args, SEXP env) { SEXP mask = PROTECT(r_new_environment(env)); if (fn_sym != R_NilValue) { Rf_defineVar(fn_sym, fn, mask); fn = fn_sym; } SEXP body = PROTECT(r_call(fn, syms, syms)); SEXP call_fn = PROTECT(r_new_function(R_NilValue, body, mask)); SEXP call = PROTECT(Rf_lang1(call_fn)); while (*syms) { Rf_defineVar(*syms, *args, mask); ++syms; ++args; } SEXP out = Rf_eval(call, env); UNPROTECT(4); return out; } // [[ register() ]] SEXP vctrs_maybe_shared_col(SEXP x, SEXP i) { int i_ = r_int_get(i, 0) - 1; SEXP col = VECTOR_ELT(x, i_); bool out = MAYBE_SHARED(col); return Rf_ScalarLogical(out); } // [[ register() ]] SEXP vctrs_new_df_unshared_col() { SEXP col = PROTECT(Rf_allocVector(INTSXP, 1)); INTEGER(col)[0] = 1; SEXP out = PROTECT(Rf_allocVector(VECSXP, 1)); // In R 4.0.0, `SET_VECTOR_ELT()` bumps the REFCNT of // `col`. Because of this, `col` is now referenced (refcnt > 0), // but it isn't shared (refcnt > 1). SET_VECTOR_ELT(out, 0, col); SEXP names = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(names, 0, Rf_mkChar("x")); Rf_setAttrib(out, R_NamesSymbol, names); init_data_frame(out, 1); UNPROTECT(3); return out; } // An alternative to `attributes(x) <- attrib`, which makes // two copies on R < 3.6.0 // [[ register() ]] SEXP vctrs_set_attributes(SEXP x, SEXP attrib) { R_len_t n_attrib = Rf_length(attrib); int n_protect = 0; x = PROTECT(r_clone_referenced(x)); ++n_protect; // Remove existing attributes, and unset the object bit SET_ATTRIB(x, R_NilValue); SET_OBJECT(x, 0); // Possible early exit after removing attributes if (n_attrib == 0) { UNPROTECT(n_protect); return x; } SEXP names = Rf_getAttrib(attrib, R_NamesSymbol); if (Rf_isNull(names)) { Rf_errorcall(R_NilValue, "Attributes must be named."); } // Check that each element of `names` is named. for (R_len_t i = 0; i < n_attrib; ++i) { SEXP name = STRING_ELT(names, i); if (name == NA_STRING || name == R_BlankString) { const char* msg = "All attributes must have names. Attribute %i does not."; Rf_errorcall(R_NilValue, msg, i + 1); } } // Always set `dim` first, if it exists. This way it is set before `dimnames`. int dim_pos = -1; for (R_len_t i = 0; i < n_attrib; ++i) { if (!strcmp(CHAR(STRING_ELT(names, i)), "dim")) { dim_pos = i; break; } } if (dim_pos != -1) { Rf_setAttrib(x, R_DimSymbol, VECTOR_ELT(attrib, dim_pos)); } for (R_len_t i = 0; i < n_attrib; ++i) { if (i == dim_pos) { continue; } Rf_setAttrib(x, Rf_installChar(STRING_ELT(names, i)), VECTOR_ELT(attrib, i)); } UNPROTECT(n_protect); return x; } // [[ include("utils.h") ]] SEXP map(SEXP x, SEXP (*fn)(SEXP)) { R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); for (R_len_t i = 0; i < n; ++i) { SET_VECTOR_ELT(out, i, fn(VECTOR_ELT(x, i))); } SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, nms); UNPROTECT(2); return out; } // [[ include("utils.h") ]] SEXP map_with_data(SEXP x, SEXP (*fn)(SEXP, void*), void* data) { R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); for (R_len_t i = 0; i < n; ++i) { SET_VECTOR_ELT(out, i, fn(VECTOR_ELT(x, i), data)); } SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, nms); UNPROTECT(2); return out; } // [[ include("utils.h") ]] SEXP bare_df_map(SEXP df, SEXP (*fn)(SEXP)) { SEXP out = PROTECT(map(df, fn)); // Total ownership because `map()` generates a fresh list out = vec_bare_df_restore(out, df, vctrs_shared_zero_int, VCTRS_OWNED_true); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP df_map(SEXP df, SEXP (*fn)(SEXP)) { SEXP out = PROTECT(map(df, fn)); // Total ownership because `map()` generates a fresh list out = vec_df_restore(out, df, vctrs_shared_zero_int, VCTRS_OWNED_true); UNPROTECT(1); return out; } #define RESIZE(CONST_DEREF, DEREF, CTYPE, SEXPTYPE) do { \ if (x_size == size) { \ return x; \ } \ \ const CTYPE* p_x = CONST_DEREF(x); \ \ SEXP out = PROTECT(Rf_allocVector(SEXPTYPE, size)); \ CTYPE* p_out = DEREF(out); \ \ r_ssize copy_size = (size > x_size) ? x_size : size; \ \ memcpy(p_out, p_x, copy_size * sizeof(CTYPE)); \ \ UNPROTECT(1); \ return out; \ } while (0) #define RESIZE_BARRIER(CONST_DEREF, SEXPTYPE, SET) do { \ if (x_size == size) { \ return x; \ } \ \ const SEXP* p_x = CONST_DEREF(x); \ \ SEXP out = PROTECT(Rf_allocVector(SEXPTYPE, size)); \ \ r_ssize copy_size = (size > x_size) ? x_size : size; \ \ for (r_ssize i = 0; i < copy_size; ++i) { \ SET(out, i, p_x[i]); \ } \ \ UNPROTECT(1); \ return out; \ } while (0) // Faster than `Rf_xlengthgets()` because that fills the new extended // locations with `NA`, which we don't need. // [[ include("utils.h") ]] SEXP int_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE(INTEGER_RO, INTEGER, int, INTSXP); } // [[ include("utils.h") ]] SEXP raw_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE(RAW_RO, RAW, Rbyte, RAWSXP); } // [[ include("utils.h") ]] SEXP chr_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE_BARRIER(STRING_PTR_RO, STRSXP, SET_STRING_ELT); } #undef RESIZE #undef RESIZE_BARRIER inline void never_reached(const char* fn) { Rf_error("Internal error in `%s()`: Reached the unreachable.", fn); } static char s3_buf[200]; SEXP s3_paste_method_sym(const char* generic, const char* class) { int gen_len = strlen(generic); int class_len = strlen(class); int dot_len = 1; if (gen_len + class_len + dot_len >= sizeof(s3_buf)) { stop_internal("s3_paste_method_sym", "Generic or class name is too long."); } char* buf = s3_buf; memcpy(buf, generic, gen_len); buf += gen_len; *buf = '.'; ++buf; memcpy(buf, class, class_len); buf += class_len; *buf = '\0'; return Rf_install(s3_buf); } // First check in global env, then in method table SEXP s3_get_method(const char* generic, const char* class, SEXP table) { SEXP sym = s3_paste_method_sym(generic, class); return s3_sym_get_method(sym, table); } SEXP s3_sym_get_method(SEXP sym, SEXP table) { SEXP method = r_env_get(R_GlobalEnv, sym); if (r_is_function(method)) { return method; } method = r_env_get(table, sym); if (r_is_function(method)) { return method; } return R_NilValue; } // [[ register() ]] SEXP vctrs_s3_find_method(SEXP generic, SEXP x, SEXP table) { return s3_find_method(r_chr_get_c_string(generic, 0), x, table); } // [[ include("utils.h") ]] SEXP s3_find_method(const char* generic, SEXP x, SEXP table) { if (!OBJECT(x)) { return R_NilValue; } SEXP class = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP method = s3_class_find_method(generic, class, table); UNPROTECT(1); return method; } // [[ include("utils.h") ]] SEXP s3_class_find_method(const char* generic, SEXP class, SEXP table) { // Avoid corrupt objects where `x` is an OBJECT(), but the class is NULL if (class == R_NilValue) { return R_NilValue; } SEXP const* p_class = STRING_PTR_RO(class); int n_class = Rf_length(class); for (int i = 0; i < n_class; ++i) { SEXP method = s3_get_method(generic, CHAR(p_class[i]), table); if (method != R_NilValue) { return method; } } return R_NilValue; } // [[ include("utils.h") ]] SEXP s3_get_class(SEXP x) { SEXP class = R_NilValue; if (OBJECT(x)) { class = Rf_getAttrib(x, R_ClassSymbol); } // This handles unclassed objects as well as gremlins objects where // `x` is an OBJECT(), but the class is NULL if (class == R_NilValue) { class = s3_bare_class(x); } if (!Rf_length(class)) { stop_internal("s3_get_class", "Class must have length."); } return class; } SEXP s3_get_class0(SEXP x) { SEXP class = PROTECT(s3_get_class(x)); SEXP out = STRING_ELT(class, 0); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP s3_find_method_xy(const char* generic, SEXP x, SEXP y, SEXP table, SEXP* method_sym_out) { SEXP x_class = PROTECT(s3_get_class0(x)); SEXP y_class = PROTECT(s3_get_class0(y)); SEXP method_sym = R_NilValue; method_sym = s3_paste_method_sym(generic, CHAR(x_class)); method_sym = s3_paste_method_sym(CHAR(PRINTNAME(method_sym)), CHAR(y_class)); SEXP method = s3_sym_get_method(method_sym, table); if (method == R_NilValue) { *method_sym_out = R_NilValue; } else { *method_sym_out = method_sym; } UNPROTECT(2); return method; } // [[ include("utils.h") ]] SEXP s3_find_method2(const char* generic, SEXP x, SEXP table, SEXP* method_sym_out) { SEXP class = PROTECT(s3_get_class0(x)); SEXP method_sym = s3_paste_method_sym(generic, CHAR(class)); SEXP method = s3_sym_get_method(method_sym, table); if (method == R_NilValue) { *method_sym_out = R_NilValue; } else { *method_sym_out = method_sym; } UNPROTECT(1); return method; } // [[ include("utils.h") ]] SEXP s3_bare_class(SEXP x) { switch (TYPEOF(x)) { case NILSXP: return chrs_null; case LGLSXP: return chrs_logical; case INTSXP: return chrs_integer; case REALSXP: return chrs_double; case CPLXSXP: return chrs_complex; case STRSXP: return chrs_character; case RAWSXP: return chrs_raw; case VECSXP: return chrs_list; case EXPRSXP: return chrs_expression; case CLOSXP: case SPECIALSXP: case BUILTINSXP: return chrs_function; default: stop_unimplemented_vctrs_type("base_dispatch_class_str", vec_typeof(x)); } } static SEXP s4_get_method(const char* class, SEXP table) { SEXP sym = Rf_install(class); SEXP method = r_env_get(table, sym); if (r_is_function(method)) { return method; } return R_NilValue; } // For S4 objects, the `table` is specific to the generic SEXP s4_find_method(SEXP x, SEXP table) { if (!IS_S4_OBJECT(x)) { return R_NilValue; } SEXP class = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP out = s4_class_find_method(class, table); UNPROTECT(1); return out; } SEXP s4_class_find_method(SEXP class, SEXP table) { // Avoid corrupt objects where `x` is an OBJECT(), but the class is NULL if (class == R_NilValue) { return R_NilValue; } SEXP const* p_class = STRING_PTR_RO(class); int n_class = Rf_length(class); for (int i = 0; i < n_class; ++i) { SEXP method = s4_get_method(CHAR(p_class[i]), table); if (method != R_NilValue) { return method; } } return R_NilValue; } // [[ include("utils.h") ]] bool vec_implements_ptype2(SEXP x) { switch (vec_typeof(x)) { case vctrs_type_scalar: return false; case vctrs_type_s3: { SEXP method_sym = R_NilValue; SEXP method = s3_find_method_xy("vec_ptype2", x, x, vctrs_method_table, &method_sym); if (method != R_NilValue) { return true; } method = s3_find_method2("vec_ptype2", x, vctrs_method_table, &method_sym); return method != R_NilValue; } default: return true; } } // [[ register() ]] SEXP vctrs_implements_ptype2(SEXP x) { return r_lgl(vec_implements_ptype2(x)); } // [[ include("utils.h") ]] SEXP list_first_non_null(SEXP xs, R_len_t* non_null_i) { SEXP x = R_NilValue; R_len_t n = Rf_length(xs); R_len_t i = 0; for (; i < n; ++i) { x = VECTOR_ELT(xs, i); if (x != R_NilValue) { break; } } if (non_null_i) { *non_null_i = i; } return x; } // [[ include("utils.h") ]] bool list_is_homogeneously_classed(SEXP xs) { R_len_t n = Rf_length(xs); if (n == 0 || n == 1) { return true; } R_len_t i = -1; SEXP first = list_first_non_null(xs, &i); SEXP first_class = PROTECT(r_class(first)); for (; i < n; ++i) { SEXP this = VECTOR_ELT(xs, i); if (this == R_NilValue) { continue; } SEXP this_class = PROTECT(r_class(this)); if (!equal_object(first_class, this_class)) { UNPROTECT(2); return false; } UNPROTECT(1); } UNPROTECT(1); return true; } // [[ include("utils.h") ]] SEXP node_compact_d(SEXP node) { SEXP handle = PROTECT(Rf_cons(R_NilValue, node)); SEXP prev = handle; while (node != R_NilValue) { if (CAR(node) == R_NilValue) { SETCDR(prev, CDR(node)); } else { prev = node; } node = CDR(node); } UNPROTECT(1); return CDR(handle); } // [[ include("utils.h") ]] SEXP new_empty_factor(SEXP levels) { if (TYPEOF(levels) != STRSXP) { stop_internal("new_empty_factor", "`level` must be a character vector."); } SEXP out = PROTECT(Rf_allocVector(INTSXP, 0)); Rf_setAttrib(out, R_LevelsSymbol, levels); Rf_setAttrib(out, R_ClassSymbol, classes_factor); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP new_empty_ordered(SEXP levels) { SEXP out = PROTECT(Rf_allocVector(INTSXP, 0)); Rf_setAttrib(out, R_LevelsSymbol, levels); Rf_setAttrib(out, R_ClassSymbol, classes_ordered); UNPROTECT(1); return out; } // [[ include("utils.h") ]] bool list_has_inner_vec_names(SEXP x, R_len_t size) { for (R_len_t i = 0; i < size; ++i) { SEXP elt = VECTOR_ELT(x, i); if (vec_names(elt) != R_NilValue) { return true; } } return false; } /** * Pluck elements `i` from a list of lists. * @return A list of the same length as `xs`. */ // [[ include("utils.h") ]] SEXP list_pluck(SEXP xs, R_len_t i) { R_len_t n = Rf_length(xs); SEXP out = PROTECT(r_new_list(n)); for (R_len_t j = 0; j < n; ++j) { SEXP x = r_list_get(xs, j); r_list_poke(out, j, r_list_get(x, i)); } UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] enum vctrs_dbl_class dbl_classify(double x) { if (!isnan(x)) { return vctrs_dbl_number; } union vctrs_dbl_indicator indicator; indicator.value = x; if (indicator.key[vctrs_indicator_pos] == 1954) { return vctrs_dbl_missing; } else { return vctrs_dbl_nan; } } // Initialised at load time SEXP compact_seq_attrib = NULL; // p[0] = Start value // p[1] = Sequence size. Always >= 1. // p[2] = Step size to increment/decrement `start` with void init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing) { int step = increasing ? 1 : -1; p[0] = start; p[1] = size; p[2] = step; } // Returns a compact sequence that `vec_slice()` understands // The sequence is generally generated as `[start, start +/- size)` // If `size == 0` a 0-length sequence is generated // `start` is 0-based SEXP compact_seq(R_len_t start, R_len_t size, bool increasing) { if (start < 0) { stop_internal("compact_seq", "`start` must not be negative."); } if (size < 0) { stop_internal("compact_seq", "`size` must not be negative."); } if (!increasing && size > start + 1) { stop_internal("compact_seq", "`size` must not be larger than `start` for decreasing sequences."); } SEXP info = PROTECT(Rf_allocVector(INTSXP, 3)); int* p = INTEGER(info); init_compact_seq(p, start, size, increasing); SET_ATTRIB(info, compact_seq_attrib); UNPROTECT(1); return info; } bool is_compact_seq(SEXP x) { return ATTRIB(x) == compact_seq_attrib; } // Materialize a 1-based sequence SEXP compact_seq_materialize(SEXP x) { int* p = INTEGER(x); R_len_t start = p[0] + 1; R_len_t size = p[1]; R_len_t step = p[2]; SEXP out = PROTECT(Rf_allocVector(INTSXP, size)); int* out_data = INTEGER(out); for (R_len_t i = 0; i < size; ++i, ++out_data, start += step) { *out_data = start; } UNPROTECT(1); return out; } // Initialised at load time SEXP compact_rep_attrib = NULL; void init_compact_rep(int* p, R_len_t i, R_len_t n) { p[0] = i; p[1] = n; } // Returns a compact repetition that `vec_slice()` understands // `i` should be an R-based index SEXP compact_rep(R_len_t i, R_len_t n) { if (n < 0) { stop_internal("compact_rep", "Negative `n` in `compact_rep()`."); } SEXP rep = PROTECT(Rf_allocVector(INTSXP, 2)); int* p = INTEGER(rep); init_compact_rep(p, i, n); SET_ATTRIB(rep, compact_rep_attrib); UNPROTECT(1); return rep; } bool is_compact_rep(SEXP x) { return ATTRIB(x) == compact_rep_attrib; } SEXP compact_rep_materialize(SEXP x) { int i = r_int_get(x, 0); int n = r_int_get(x, 1); SEXP out = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill(out, i, n); UNPROTECT(1); return out; } bool is_compact(SEXP x) { return is_compact_rep(x) || is_compact_seq(x); } SEXP compact_materialize(SEXP x) { if (is_compact_rep(x)) { return compact_rep_materialize(x); } else if (is_compact_seq(x)) { return compact_seq_materialize(x); } else { return x; } } R_len_t vec_subscript_size(SEXP x) { if (is_compact_rep(x)) { return r_int_get(x, 1); } else if (is_compact_seq(x)) { return r_int_get(x, 1); } else { return vec_size(x); } } static SEXP syms_colnames = NULL; static SEXP fns_colnames = NULL; // [[ include("utils.h") ]] SEXP colnames(SEXP x) { return vctrs_dispatch1(syms_colnames, fns_colnames, syms_x, x); } // [[ include("utils.h") ]] bool is_integer64(SEXP x) { return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); } // [[ include("utils.h") ]] bool lgl_any_na(SEXP x) { R_xlen_t size = Rf_xlength(x); const int* p_x = LOGICAL_RO(x); for (R_xlen_t i = 0; i < size; ++i) { if (p_x[i] == NA_LOGICAL) { return true; } } return false; } void* r_vec_deref(SEXP x) { switch (TYPEOF(x)) { case LGLSXP: return LOGICAL(x); case INTSXP: return INTEGER(x); case REALSXP: return REAL(x); case CPLXSXP: return COMPLEX(x); case RAWSXP: return RAW(x); default: stop_unimplemented_type("r_vec_deref", TYPEOF(x)); } } const void* r_vec_deref_const(SEXP x) { switch (TYPEOF(x)) { case LGLSXP: return LOGICAL_RO(x); case INTSXP: return INTEGER_RO(x); case REALSXP: return REAL_RO(x); case CPLXSXP: return COMPLEX_RO(x); case STRSXP: return STRING_PTR_RO(x); case RAWSXP: return RAW_RO(x); case VECSXP: return VECTOR_PTR_RO(x); default: stop_unimplemented_type("r_vec_deref_const", TYPEOF(x)); } } void* r_vec_deref_barrier(SEXP x) { switch (TYPEOF(x)) { case STRSXP: case VECSXP: return (void*) x; default: return r_vec_deref(x); } } const void* r_vec_deref_barrier_const(SEXP x) { switch (TYPEOF(x)) { case STRSXP: case VECSXP: return (const void*) x; default: return r_vec_deref_const(x); } } #define FILL(CTYPE, DEST, DEST_I, SRC, SRC_I, N) \ do { \ CTYPE* p_dest = (CTYPE*) DEST; \ p_dest += DEST_I; \ CTYPE* end = p_dest + N; \ CTYPE value = ((const CTYPE*) SRC)[SRC_I]; \ \ while (p_dest != end) { \ *p_dest++ = value; \ } \ } while (false) #define FILL_BARRIER(GET, SET, DEST, DEST_I, SRC, SRC_I, N) \ do { \ SEXP out = (SEXP) DEST; \ SEXP value = GET((SEXP) SRC, SRC_I); \ \ for (r_ssize i = 0; i < N; ++i) { \ SET(out, DEST_I + i, value); \ } \ } while (false) void r_vec_fill(SEXPTYPE type, void* dest, r_ssize dest_i, const void* src, r_ssize src_i, r_ssize n) { switch (type) { case INTSXP: FILL(int, dest, dest_i, src, src_i, n); return; case STRSXP: FILL_BARRIER(STRING_ELT, SET_STRING_ELT, dest, dest_i, src, src_i, n); return; default: stop_unimplemented_type("r_vec_fill", type); } } #undef FILL_BARRIER #undef FILL r_ssize r_lgl_sum(SEXP x, bool na_true) { if (TYPEOF(x) != LGLSXP) { stop_internal("r_lgl_sum", "Expected logical vector."); } r_ssize n = r_length(x); const int* p_x = LOGICAL(x); // This can't overflow since `sum` is necessarily smaller or equal // to the vector length expressed in `r_ssize`. r_ssize sum = 0; if (na_true) { for (r_ssize i = 0; i < n; ++i) { const int elt = p_x[i]; if (elt) { ++sum; } } } else { for (r_ssize i = 0; i < n; ++i) { const int elt = p_x[i]; if (elt == 1) { ++sum; } } } return sum; } SEXP r_lgl_which(SEXP x, bool na_propagate) { if (TYPEOF(x) != LGLSXP) { stop_internal("r_lgl_which", "Expected logical vector."); } r_ssize n = r_length(x); const int* p_x = LOGICAL(x); r_ssize out_n = r_lgl_sum(x, na_propagate); SEXP out = PROTECT(Rf_allocVector(INTSXP, out_n)); int* p_out = INTEGER(out); r_ssize loc = 0; if (na_propagate) { for (r_ssize i = 0; i < n; ++i) { const int elt = p_x[i]; if (elt) { p_out[loc] = (elt == NA_LOGICAL) ? NA_INTEGER : i + 1; ++loc; } } } else { for (r_ssize i = 0; i < n; ++i) { const int elt = p_x[i]; if (elt) { p_out[loc] = i + 1; ++loc; } } } UNPROTECT(1); return out; } #define FILL() { \ for (R_len_t i = 0; i < n; ++i) { \ p_x[i] = value; \ } \ } void r_p_lgl_fill(int* p_x, int value, R_len_t n) { FILL(); } void r_p_int_fill(int* p_x, int value, R_len_t n) { FILL(); } void r_p_chr_fill(SEXP* p_x, SEXP value, R_len_t n) { FILL(); } #undef FILL void r_lgl_fill(SEXP x, int value, R_len_t n) { r_p_lgl_fill(LOGICAL(x), value, n); } void r_int_fill(SEXP x, int value, R_len_t n) { r_p_int_fill(INTEGER(x), value, n); } void r_chr_fill(SEXP x, SEXP value, R_len_t n) { r_p_chr_fill(STRING_PTR(x), value, n); } void r_int_fill_seq(SEXP x, int start, R_len_t n) { int* data = INTEGER(x); for (R_len_t i = 0; i < n; ++i, ++data, ++start) { *data = start; } } SEXP r_seq(R_len_t from, R_len_t to) { R_len_t n = to - from; if (n < 0) { stop_internal("r_seq", "Negative length."); } SEXP seq = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill_seq(seq, from, n); UNPROTECT(1); return seq; } #define FIND(CTYPE, CONST_DEREF) \ R_len_t n = Rf_length(x); \ const CTYPE* data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < n; ++i) { \ if (data[i] == value) { \ return i; \ } \ } \ return -1 R_len_t r_chr_find(SEXP x, SEXP value) { FIND(SEXP, STRING_PTR_RO); } #undef FIND bool r_int_any_na(SEXP x) { int* data = INTEGER(x); R_len_t n = Rf_length(x); for (R_len_t i = 0; i < n; ++i, ++data) { if (*data == NA_INTEGER) { return true; } } return false; } int r_chr_max_len(SEXP x) { R_len_t n = Rf_length(x); SEXP const* p = STRING_PTR_RO(x); int max = 0; for (R_len_t i = 0; i < n; ++i, ++p) { int len = strlen(CHAR(*p)); max = len > max ? len : max; } return max; } /** * Create a character vector of sequential integers * * @param n The sequence is from 1 to `n`. * @param buf,len A memory buffer of size `len`. * @param prefix A null-terminated string that is prefixed to the * sequence. */ SEXP r_chr_iota(R_len_t n, char* buf, int len, const char* prefix) { int prefix_len = strlen(prefix); if (len - 1 < prefix_len) { stop_internal("r_chr_iota", "Prefix is larger than iota buffer."); } memcpy(buf, prefix, prefix_len); len -= prefix_len; char* beg = buf + prefix_len; SEXP out = PROTECT(Rf_allocVector(STRSXP, n)); for (R_len_t i = 0; i < n; ++i) { int written = snprintf(beg, len, "%d", i + 1); if (written >= len) { UNPROTECT(1); return R_NilValue; } SET_STRING_ELT(out, i, Rf_mkChar(buf)); } UNPROTECT(1); return out; } #include static void abort_parse(SEXP code, const char* why) { if (Rf_GetOption1(Rf_install("rlang__verbose_errors")) != R_NilValue) { Rf_PrintValue(code); } stop_internal("r_parse", why); } SEXP r_parse(const char* str) { SEXP str_ = PROTECT(Rf_mkString(str)); ParseStatus status; SEXP out = PROTECT(R_ParseVector(str_, -1, &status, R_NilValue)); if (status != PARSE_OK) { abort_parse(str_, "Parsing failed."); } if (Rf_length(out) != 1) { abort_parse(str_, "Expected a single expression."); } out = VECTOR_ELT(out, 0); UNPROTECT(2); return out; } SEXP r_parse_eval(const char* str, SEXP env) { SEXP out = Rf_eval(PROTECT(r_parse(str)), env); UNPROTECT(1); return out; } static SEXP new_env_call = NULL; static SEXP new_env__parent_node = NULL; static SEXP new_env__size_node = NULL; #if 0 SEXP r_new_environment(SEXP parent, R_len_t size) { parent = parent ? parent : R_EmptyEnv; SETCAR(new_env__parent_node, parent); size = size ? size : 29; SETCAR(new_env__size_node, Rf_ScalarInteger(size)); SEXP env = Rf_eval(new_env_call, R_BaseEnv); // Free for gc SETCAR(new_env__parent_node, R_NilValue); return env; } #endif static SEXP new_function_call = NULL; static SEXP new_function__formals_node = NULL; static SEXP new_function__body_node = NULL; #if 0 SEXP r_new_function(SEXP formals, SEXP body, SEXP env) { SETCAR(new_function__formals_node, formals); SETCAR(new_function__body_node, body); SEXP fn = Rf_eval(new_function_call, env); // Free for gc SETCAR(new_function__formals_node, R_NilValue); SETCAR(new_function__body_node, R_NilValue); return fn; } #endif // [[ include("utils.h") ]] SEXP r_protect(SEXP x) { return Rf_lang2(fns_quote, x); } // [[ include("utils.h") ]] bool r_is_bool(SEXP x) { return TYPEOF(x) == LGLSXP && Rf_length(x) == 1 && LOGICAL(x)[0] != NA_LOGICAL; } bool r_is_true(SEXP x) { return r_is_bool(x) && LOGICAL(x)[0] == 1; } // [[ include("utils.h") ]] int r_bool_as_int(SEXP x) { if (!r_is_bool(x)) { Rf_errorcall(R_NilValue, "Input must be a single `TRUE` or `FALSE`."); } return LOGICAL(x)[0]; } bool r_is_string(SEXP x) { return TYPEOF(x) == STRSXP && Rf_length(x) == 1 && STRING_ELT(x, 0) != NA_STRING; } bool r_is_number(SEXP x) { return TYPEOF(x) == INTSXP && Rf_length(x) == 1 && INTEGER(x)[0] != NA_INTEGER; } bool r_is_positive_number(SEXP x) { return r_is_number(x) && INTEGER(x)[0] > 0; } SEXP r_peek_option(const char* option) { return Rf_GetOption1(Rf_install(option)); } static SEXP peek_frame_call = NULL; // Calling `sys.frame()` has a cost of 1.5us compared to 300ns for // `R_GetCurrentEnv()`. However the latter is currently buggy, see // https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17839. SEXP r_peek_frame() { return Rf_eval(peek_frame_call, R_EmptyEnv); } /** * Create a call or pairlist * * @param tags Optional. If not `NULL`, a null-terminated array of symbols. * @param cars Mandatory. A null-terminated array of CAR values. * @param fn The first CAR value of the language list. * * [[ include("utils.h") ]] */ SEXP r_pairlist(SEXP* tags, SEXP* cars) { if (!cars) { stop_internal("r_pairlist", "NULL `cars`."); } SEXP list = PROTECT(Rf_cons(R_NilValue, R_NilValue)); SEXP node = list; while (*cars) { SEXP next_node = Rf_cons(*cars, R_NilValue); SETCDR(node, next_node); node = next_node; if (tags) { SET_TAG(next_node, *tags); ++tags; } ++cars; } UNPROTECT(1); return CDR(list); } SEXP r_call(SEXP fn, SEXP* tags, SEXP* cars) { return Rf_lcons(fn, r_pairlist(tags, cars)); } bool r_has_name_at(SEXP names, R_len_t i) { if (TYPEOF(names) != STRSXP) { return false; } R_len_t n = Rf_length(names); if (n <= i) { stop_internal("r_has_name_at", "Names shorter than expected: (%d/%d).", i + 1, n); } SEXP elt = STRING_ELT(names, i); return elt != NA_STRING && elt != strings_empty; } bool r_is_minimal_names(SEXP x) { if (TYPEOF(x) != STRSXP) { return false; } R_len_t n = Rf_length(x); const SEXP* p = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP elt = *p; if (elt == NA_STRING || elt == strings_empty) { return false; } } return true; } bool r_is_empty_names(SEXP x) { if (TYPEOF(x) != STRSXP) { if (x == R_NilValue) { return true; } else { return false; } } R_len_t n = Rf_length(x); const SEXP* p = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP elt = *p; if (elt != NA_STRING && elt != strings_empty) { return false; } } return true; } SEXP r_env_get(SEXP env, SEXP sym) { SEXP obj = PROTECT(Rf_findVarInFrame3(env, sym, FALSE)); // Force lazy loaded bindings if (TYPEOF(obj) == PROMSXP) { obj = Rf_eval(obj, R_BaseEnv); } UNPROTECT(1); return obj; } bool r_is_function(SEXP x) { switch (TYPEOF(x)) { case CLOSXP: case BUILTINSXP: case SPECIALSXP: return true; default: return false; } } SEXP r_clone_referenced(SEXP x) { if (MAYBE_REFERENCED(x)) { return Rf_shallow_duplicate(x); } else { return x; } } SEXP r_clone_shared(SEXP x) { if (MAYBE_SHARED(x)) { return Rf_shallow_duplicate(x); } else { return x; } } bool r_is_names(SEXP names) { if (names == R_NilValue) { return false; } R_len_t n = Rf_length(names); const SEXP* p = STRING_PTR_RO(names); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP nm = *p; if (nm == strings_empty || nm == NA_STRING) { return false; } } return true; } bool r_chr_has_string(SEXP x, SEXP str) { R_len_t n = Rf_length(x); const SEXP* xp = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++xp) { if (*xp == str) { return true; } } return false; } SEXP r_as_data_frame(SEXP x) { if (is_bare_data_frame(x)) { return x; } else { return vctrs_dispatch1(syms_as_data_frame2, fns_as_data_frame2, syms_x, x); } } SEXP rlang_formula_formals = NULL; SEXP r_as_function(SEXP x, const char* arg) { switch (TYPEOF(x)) { case CLOSXP: case BUILTINSXP: case SPECIALSXP: return x; case LANGSXP: if (CAR(x) == syms_tilde && CDDR(x) == R_NilValue) { SEXP env = PROTECT(Rf_getAttrib(x, syms_dot_environment)); if (env == R_NilValue) { Rf_errorcall(R_NilValue, "Can't transform formula to function because it doesn't have an environment."); } SEXP fn = r_new_function(rlang_formula_formals, CADR(x), env); UNPROTECT(1); return fn; } // else fallthrough; default: Rf_errorcall(R_NilValue, "Can't convert `%s` to a function", arg); } } static SEXP syms_try_catch_hnd = NULL; static inline SEXP try_catch_hnd(SEXP ptr) { SEXP call = PROTECT(Rf_lang2(syms_try_catch_hnd, ptr)); SEXP out = Rf_eval(call, vctrs_ns_env); UNPROTECT(1); return out; } struct r_try_catch_data { void (*fn)(void*); void* fn_data; SEXP cnd_sym; void (*hnd)(void*); void* hnd_data; ERR err; }; // [[ register() ]] SEXP vctrs_try_catch_callback(SEXP ptr, SEXP cnd) { struct r_try_catch_data* data = (struct r_try_catch_data*) R_ExternalPtrAddr(ptr); if (cnd == R_NilValue) { if (data->fn) { data->fn(data->fn_data); } } else { data->err = cnd; if (data->hnd) { data->hnd(data->hnd_data); } } return R_NilValue; } static SEXP syms_try_catch_impl = NULL; // [[ include("utils.h") ]] ERR r_try_catch(void (*fn)(void*), void* fn_data, SEXP cnd_sym, void (*hnd)(void*), void* hnd_data) { struct r_try_catch_data data = { .fn = fn, .fn_data = fn_data, .cnd_sym = cnd_sym, .hnd = hnd, .hnd_data = hnd_data, .err = NULL }; SEXP xptr = PROTECT(R_MakeExternalPtr(&data, R_NilValue, R_NilValue)); SEXP hnd_fn = PROTECT(try_catch_hnd(xptr)); SEXP syms[3] = { syms_data, cnd_sym, NULL }; SEXP args[3] = { xptr, hnd_fn, NULL }; SEXP call = PROTECT(r_call(syms_try_catch_impl, syms, args)); Rf_eval(call, vctrs_ns_env); UNPROTECT(3); return data.err; } SEXP (*rlang_sym_as_character)(SEXP x); // [[ include("utils.h") ]] SEXP chr_c(SEXP x, SEXP y) { r_ssize x_n = r_length(x); r_ssize y_n = r_length(y); if (x_n == 0) { return y; } if (y_n == 0) { return x; } r_ssize out_n = r_ssize_add(x_n, y_n); SEXP out = PROTECT(r_new_vector(STRSXP, out_n)); const SEXP* p_x = STRING_PTR_RO(x); const SEXP* p_y = STRING_PTR_RO(y); for (r_ssize i = 0; i < x_n; ++i) { SET_STRING_ELT(out, i, p_x[i]); } for (r_ssize i = 0, j = x_n; i < y_n; ++i, ++j) { SET_STRING_ELT(out, j, p_y[i]); } UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_fast_c(SEXP x, SEXP y) { SEXPTYPE x_type = TYPEOF(x); if (x_type != TYPEOF(y)) { Rf_error("`x` and `y` must have the same types."); } switch (x_type) { case STRSXP: return chr_c(x, y); default: stop_unimplemented_type("vctrs_fast_c", x_type); } } #define FMT_BUFSIZE 4096 #define FMT_INTERP(BUF, FMT, DOTS) \ { \ va_list dots; \ va_start(dots, FMT); \ vsnprintf(BUF, FMT_BUFSIZE, FMT, dots); \ va_end(dots); \ \ BUF[FMT_BUFSIZE - 1] = '\0'; \ } __attribute__((noreturn)) void r_abort(const char* fmt, ...) { R_CheckStack2(FMT_BUFSIZE); char msg[FMT_BUFSIZE]; FMT_INTERP(msg, fmt, ...); SEXP r_msg = PROTECT(r_chr(msg)); vctrs_eval_mask1(syms_abort, syms_message, r_msg); never_reached("r_abort"); } __attribute__((noreturn)) void stop_internal(const char* fn, const char* fmt, ...) { R_CheckStack2(FMT_BUFSIZE); char msg[FMT_BUFSIZE]; FMT_INTERP(msg, fmt, ...); r_abort("Internal error in `%s()`: %s", fn, msg); } #undef FMT_INTERP #undef FMT_BUFSIZE bool vctrs_debug_verbose = false; SEXP vctrs_ns_env = NULL; SEXP vctrs_shared_empty_str = NULL; SEXP vctrs_shared_empty_lgl = NULL; SEXP vctrs_shared_empty_int = NULL; SEXP vctrs_shared_empty_dbl = NULL; SEXP vctrs_shared_empty_cpl = NULL; SEXP vctrs_shared_empty_chr = NULL; SEXP vctrs_shared_empty_raw = NULL; SEXP vctrs_shared_empty_list = NULL; SEXP vctrs_shared_empty_date = NULL; SEXP vctrs_shared_true = NULL; SEXP vctrs_shared_false = NULL; Rcomplex vctrs_shared_na_cpl; SEXP vctrs_shared_na_lgl = NULL; SEXP vctrs_shared_na_list = NULL; SEXP vctrs_shared_zero_int = NULL; SEXP strings = NULL; SEXP strings_empty = NULL; SEXP strings_dots = NULL; SEXP strings_none = NULL; SEXP strings_minimal = NULL; SEXP strings_unique = NULL; SEXP strings_universal = NULL; SEXP strings_check_unique = NULL; SEXP strings_key = NULL; SEXP strings_loc = NULL; SEXP strings_val = NULL; SEXP strings_group = NULL; SEXP strings_length = NULL; SEXP strings_vctrs_vctr = NULL; SEXP strings_times = NULL; SEXP chrs_subset = NULL; SEXP chrs_extract = NULL; SEXP chrs_assign = NULL; SEXP chrs_rename = NULL; SEXP chrs_remove = NULL; SEXP chrs_negate = NULL; SEXP chrs_null = NULL; SEXP chrs_logical = NULL; SEXP chrs_integer = NULL; SEXP chrs_double = NULL; SEXP chrs_complex = NULL; SEXP chrs_character = NULL; SEXP chrs_raw = NULL; SEXP chrs_list = NULL; SEXP chrs_expression = NULL; SEXP chrs_numeric = NULL; SEXP chrs_function = NULL; SEXP chrs_empty = NULL; SEXP chrs_cast = NULL; SEXP chrs_error = NULL; SEXP chrs_combine = NULL; SEXP chrs_convert = NULL; SEXP syms_i = NULL; SEXP syms_n = NULL; SEXP syms_x = NULL; SEXP syms_y = NULL; SEXP syms_x_size = NULL; SEXP syms_y_size = NULL; SEXP syms_to = NULL; SEXP syms_dots = NULL; SEXP syms_bracket = NULL; SEXP syms_arg = NULL; SEXP syms_x_arg = NULL; SEXP syms_y_arg = NULL; SEXP syms_to_arg = NULL; SEXP syms_times_arg = NULL; SEXP syms_subscript_arg = NULL; SEXP syms_out = NULL; SEXP syms_value = NULL; SEXP syms_quiet = NULL; SEXP syms_dot_name_spec = NULL; SEXP syms_outer = NULL; SEXP syms_inner = NULL; SEXP syms_tilde = NULL; SEXP syms_dot_environment = NULL; SEXP syms_ptype = NULL; SEXP syms_missing = NULL; SEXP syms_size = NULL; SEXP syms_subscript_action = NULL; SEXP syms_subscript_type = NULL; SEXP syms_repair = NULL; SEXP syms_tzone = NULL; SEXP syms_data = NULL; SEXP syms_vctrs_error_incompatible_type = NULL; SEXP syms_vctrs_error_cast_lossy = NULL; SEXP syms_cnd_signal = NULL; SEXP syms_logical = NULL; SEXP syms_numeric = NULL; SEXP syms_character = NULL; SEXP syms_body = NULL; SEXP syms_parent = NULL; SEXP syms_s3_methods_table = NULL; SEXP syms_from_dispatch = NULL; SEXP syms_df_fallback = NULL; SEXP syms_s3_fallback = NULL; SEXP syms_stop_incompatible_type = NULL; SEXP syms_stop_incompatible_size = NULL; SEXP syms_action = NULL; SEXP syms_vctrs_common_class_fallback = NULL; SEXP syms_fallback_class = NULL; SEXP syms_abort = NULL; SEXP syms_message = NULL; SEXP syms_chr_transform = NULL; SEXP fns_bracket = NULL; SEXP fns_quote = NULL; SEXP fns_names = NULL; SEXP result_attrib = NULL; struct vctrs_arg args_empty_; struct vctrs_arg args_dot_ptype_; struct vctrs_arg args_max_fill_; SEXP r_new_shared_vector(SEXPTYPE type, R_len_t n) { SEXP out = Rf_allocVector(type, n); R_PreserveObject(out); MARK_NOT_MUTABLE(out); return out; } SEXP r_new_shared_character(const char* name) { SEXP out = Rf_mkString(name); R_PreserveObject(out); MARK_NOT_MUTABLE(out); return out; } void c_print_backtrace() { #if defined(RLIB_DEBUG) #include void *buffer[500]; int nptrs = backtrace(buffer, 100); char **strings = backtrace_symbols(buffer, nptrs); for (int j = 0; j < nptrs; ++j) { Rprintf("%s\n", strings[j]); } free(strings); #else Rprintf("vctrs must be compliled with -DRLIB_DEBUG."); #endif } void r_browse(SEXP x) { r_env_poke(R_GlobalEnv, Rf_install(".debug"), x); Rprintf("Object saved in `.debug`:\n"); Rf_PrintValue(x); // `browser()` can't be trailing due to ESS limitations SEXP call = PROTECT(r_parse("{ base::browser(); NULL }")); Rf_eval(call, R_GlobalEnv); UNPROTECT(1); } void vctrs_init_utils(SEXP ns) { vctrs_ns_env = ns; vctrs_debug_verbose = r_is_true(Rf_GetOption1(Rf_install("vctrs:::debug"))); vctrs_method_table = r_env_get(ns, Rf_install(".__S3MethodsTable__.")); base_method_table = r_env_get(R_BaseNamespace, Rf_install(".__S3MethodsTable__.")); s4_c_method_table = r_parse_eval("environment(methods::getGeneric('c'))$.MTable", R_GlobalEnv); R_PreserveObject(s4_c_method_table); vctrs_shared_empty_str = Rf_mkString(""); R_PreserveObject(vctrs_shared_empty_str); // Holds the CHARSXP objects because unlike symbols they can be // garbage collected strings = r_new_shared_vector(STRSXP, 21); strings_dots = Rf_mkChar("..."); SET_STRING_ELT(strings, 0, strings_dots); strings_empty = Rf_mkChar(""); SET_STRING_ELT(strings, 1, strings_empty); strings_date = Rf_mkChar("Date"); SET_STRING_ELT(strings, 2, strings_date); strings_posixct = Rf_mkChar("POSIXct"); SET_STRING_ELT(strings, 3, strings_posixct); strings_posixlt = Rf_mkChar("POSIXlt"); SET_STRING_ELT(strings, 4, strings_posixlt); strings_posixt = Rf_mkChar("POSIXt"); SET_STRING_ELT(strings, 5, strings_posixlt); strings_none = Rf_mkChar("none"); SET_STRING_ELT(strings, 6, strings_none); strings_minimal = Rf_mkChar("minimal"); SET_STRING_ELT(strings, 7, strings_minimal); strings_unique = Rf_mkChar("unique"); SET_STRING_ELT(strings, 8, strings_unique); strings_universal = Rf_mkChar("universal"); SET_STRING_ELT(strings, 9, strings_universal); strings_check_unique = Rf_mkChar("check_unique"); SET_STRING_ELT(strings, 10, strings_check_unique); strings_key = Rf_mkChar("key"); SET_STRING_ELT(strings, 11, strings_key); strings_loc = Rf_mkChar("loc"); SET_STRING_ELT(strings, 12, strings_loc); strings_val = Rf_mkChar("val"); SET_STRING_ELT(strings, 13, strings_val); strings_group = Rf_mkChar("group"); SET_STRING_ELT(strings, 14, strings_group); strings_length = Rf_mkChar("length"); SET_STRING_ELT(strings, 15, strings_length); strings_factor = Rf_mkChar("factor"); SET_STRING_ELT(strings, 16, strings_factor); strings_ordered = Rf_mkChar("ordered"); SET_STRING_ELT(strings, 17, strings_ordered); strings_list = Rf_mkChar("list"); SET_STRING_ELT(strings, 18, strings_list); strings_vctrs_vctr = Rf_mkChar("vctrs_vctr"); SET_STRING_ELT(strings, 19, strings_vctrs_vctr); strings_times = Rf_mkChar("times"); SET_STRING_ELT(strings, 20, strings_times); classes_data_frame = r_new_shared_vector(STRSXP, 1); strings_data_frame = Rf_mkChar("data.frame"); SET_STRING_ELT(classes_data_frame, 0, strings_data_frame); classes_factor = r_new_shared_vector(STRSXP, 1); SET_STRING_ELT(classes_factor, 0, strings_factor); classes_ordered = r_new_shared_vector(STRSXP, 2); SET_STRING_ELT(classes_ordered, 0, strings_ordered); SET_STRING_ELT(classes_ordered, 1, strings_factor); classes_date = r_new_shared_vector(STRSXP, 1); SET_STRING_ELT(classes_date, 0, strings_date); classes_posixct = r_new_shared_vector(STRSXP, 2); SET_STRING_ELT(classes_posixct, 0, strings_posixct); SET_STRING_ELT(classes_posixct, 1, strings_posixt); chrs_subset = r_new_shared_character("subset"); chrs_extract = r_new_shared_character("extract"); chrs_assign = r_new_shared_character("assign"); chrs_rename = r_new_shared_character("rename"); chrs_remove = r_new_shared_character("remove"); chrs_negate = r_new_shared_character("negate"); chrs_null = r_new_shared_character("NULL"); chrs_logical = r_new_shared_character("logical"); chrs_integer = r_new_shared_character("integer"); chrs_double = r_new_shared_character("double"); chrs_complex = r_new_shared_character("complex"); chrs_character = r_new_shared_character("character"); chrs_raw = r_new_shared_character("raw"); chrs_list = r_new_shared_character("list"); chrs_expression = r_new_shared_character("expression"); chrs_numeric = r_new_shared_character("numeric"); chrs_function = r_new_shared_character("function"); chrs_empty = r_new_shared_character(""); chrs_cast = r_new_shared_character("cast"); chrs_error = r_new_shared_character("error"); chrs_combine = r_new_shared_character("combine"); chrs_convert = r_new_shared_character("convert"); classes_tibble = r_new_shared_vector(STRSXP, 3); strings_tbl_df = Rf_mkChar("tbl_df"); SET_STRING_ELT(classes_tibble, 0, strings_tbl_df); strings_tbl = Rf_mkChar("tbl"); SET_STRING_ELT(classes_tibble, 1, strings_tbl); SET_STRING_ELT(classes_tibble, 2, strings_data_frame); classes_vctrs_group_rle = r_new_shared_vector(STRSXP, 3); SET_STRING_ELT(classes_vctrs_group_rle, 0, Rf_mkChar("vctrs_group_rle")); SET_STRING_ELT(classes_vctrs_group_rle, 1, Rf_mkChar("vctrs_rcrd")); SET_STRING_ELT(classes_vctrs_group_rle, 2, Rf_mkChar("vctrs_vctr")); vctrs_shared_empty_lgl = r_new_shared_vector(LGLSXP, 0); vctrs_shared_empty_int = r_new_shared_vector(INTSXP, 0); vctrs_shared_empty_dbl = r_new_shared_vector(REALSXP, 0); vctrs_shared_empty_cpl = r_new_shared_vector(CPLXSXP, 0); vctrs_shared_empty_chr = r_new_shared_vector(STRSXP, 0); vctrs_shared_empty_raw = r_new_shared_vector(RAWSXP, 0); vctrs_shared_empty_list = r_new_shared_vector(VECSXP, 0); vctrs_shared_empty_date = r_new_shared_vector(REALSXP, 0); Rf_setAttrib(vctrs_shared_empty_date, R_ClassSymbol, classes_date); vctrs_shared_true = r_new_shared_vector(LGLSXP, 1); LOGICAL(vctrs_shared_true)[0] = 1; vctrs_shared_false = r_new_shared_vector(LGLSXP, 1); LOGICAL(vctrs_shared_false)[0] = 0; vctrs_shared_na_cpl.i = NA_REAL; vctrs_shared_na_cpl.r = NA_REAL; vctrs_shared_na_lgl = r_new_shared_vector(LGLSXP, 1); LOGICAL(vctrs_shared_na_lgl)[0] = NA_LOGICAL; vctrs_shared_na_list = r_new_shared_vector(VECSXP, 1); SET_VECTOR_ELT(vctrs_shared_na_list, 0, R_NilValue); vctrs_shared_zero_int = r_new_shared_vector(INTSXP, 1); INTEGER(vctrs_shared_zero_int)[0] = 0; syms_i = Rf_install("i"); syms_n = Rf_install("n"); syms_x = Rf_install("x"); syms_y = Rf_install("y"); syms_x_size = Rf_install("x_size"); syms_y_size = Rf_install("y_size"); syms_to = Rf_install("to"); syms_dots = Rf_install("..."); syms_bracket = Rf_install("["); syms_arg = Rf_install("arg"); syms_x_arg = Rf_install("x_arg"); syms_y_arg = Rf_install("y_arg"); syms_to_arg = Rf_install("to_arg"); syms_times_arg = Rf_install("times_arg"); syms_subscript_arg = Rf_install("subscript_arg"); syms_out = Rf_install("out"); syms_value = Rf_install("value"); syms_quiet = Rf_install("quiet"); syms_dot_name_spec = Rf_install(".name_spec"); syms_outer = Rf_install("outer"); syms_inner = Rf_install("inner"); syms_tilde = Rf_install("~"); syms_dot_environment = Rf_install(".Environment"); syms_ptype = Rf_install("ptype"); syms_missing = R_MissingArg; syms_size = Rf_install("size"); syms_subscript_action = Rf_install("subscript_action"); syms_subscript_type = Rf_install("subscript_type"); syms_repair = Rf_install("repair"); syms_tzone = Rf_install("tzone"); syms_data = Rf_install("data"); syms_try_catch_impl = Rf_install("try_catch_impl"); syms_try_catch_hnd = Rf_install("try_catch_hnd"); syms_vctrs_error_incompatible_type = Rf_install("vctrs_error_incompatible_type"); syms_vctrs_error_cast_lossy = Rf_install("vctrs_error_cast_lossy"); syms_cnd_signal = Rf_install("cnd_signal"); syms_logical = Rf_install("logical"); syms_numeric = Rf_install("numeric"); syms_character = Rf_install("character"); syms_body = Rf_install("body"); syms_parent = Rf_install("parent"); syms_s3_methods_table = Rf_install(".__S3MethodsTable__."); syms_from_dispatch = Rf_install("vctrs:::from_dispatch"); syms_df_fallback = Rf_install("vctrs:::df_fallback"); syms_s3_fallback = Rf_install("vctrs:::s3_fallback"); syms_stop_incompatible_type = Rf_install("stop_incompatible_type"); syms_stop_incompatible_size = Rf_install("stop_incompatible_size"); syms_action = Rf_install("action"); syms_vctrs_common_class_fallback = Rf_install(c_strs_vctrs_common_class_fallback); syms_fallback_class = Rf_install("fallback_class"); syms_abort = Rf_install("abort"); syms_message = Rf_install("message"); syms_chr_transform = Rf_install("chr_transform"); fns_bracket = Rf_findVar(syms_bracket, R_BaseEnv); fns_quote = Rf_findVar(Rf_install("quote"), R_BaseEnv); fns_names = Rf_findVar(Rf_install("names"), R_BaseEnv); new_env_call = r_parse_eval("as.call(list(new.env, TRUE, NULL, NULL))", R_BaseEnv); R_PreserveObject(new_env_call); new_env__parent_node = CDDR(new_env_call); new_env__size_node = CDR(new_env__parent_node); new_function_call = r_parse_eval("as.call(list(`function`, NULL, NULL))", R_BaseEnv); R_PreserveObject(new_function_call); new_function__formals_node = CDR(new_function_call); new_function__body_node = CDR(new_function__formals_node); const char* formals_code = "pairlist2(... = , .x = quote(..1), .y = quote(..2), . = quote(..1))"; rlang_formula_formals = r_parse_eval(formals_code, ns); R_PreserveObject(rlang_formula_formals); args_empty_ = new_wrapper_arg(NULL, ""); args_dot_ptype_ = new_wrapper_arg(NULL, ".ptype"); args_max_fill_ = new_wrapper_arg(NULL, "max_fill"); rlang_is_splice_box = (bool (*)(SEXP)) R_GetCCallable("rlang", "rlang_is_splice_box"); rlang_unbox = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_unbox"); rlang_env_dots_values = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_env_dots_values"); rlang_env_dots_list = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_env_dots_list"); rlang_sym_as_character = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_sym_as_character"); syms_as_data_frame2 = Rf_install("as.data.frame2"); syms_colnames = Rf_install("colnames"); fns_as_data_frame2 = r_env_get(ns, syms_as_data_frame2); fns_colnames = r_env_get(R_BaseEnv, syms_colnames); compact_seq_attrib = Rf_cons(R_NilValue, R_NilValue); R_PreserveObject(compact_seq_attrib); SET_TAG(compact_seq_attrib, Rf_install("vctrs_compact_seq")); compact_rep_attrib = Rf_cons(R_NilValue, R_NilValue); R_PreserveObject(compact_rep_attrib); SET_TAG(compact_rep_attrib, Rf_install("vctrs_compact_rep")); { SEXP result_names = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(result_names, 0, Rf_mkChar("ok")); SET_STRING_ELT(result_names, 1, Rf_mkChar("err")); result_attrib = PROTECT(Rf_cons(result_names, R_NilValue)); SET_TAG(result_attrib, R_NamesSymbol); SEXP result_class = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(result_class, 0, Rf_mkChar("rlang_result")); result_attrib = PROTECT(Rf_cons(result_class, result_attrib)); SET_TAG(result_attrib, R_ClassSymbol); R_PreserveObject(result_attrib); MARK_NOT_MUTABLE(result_attrib); UNPROTECT(4); } // We assume the following in `union vctrs_dbl_indicator` VCTRS_ASSERT(sizeof(double) == sizeof(int64_t)); VCTRS_ASSERT(sizeof(double) == 2 * sizeof(int)); // We assume the following in `vec_order()` VCTRS_ASSERT(sizeof(int) == sizeof(int32_t)); VCTRS_ASSERT(sizeof(double) == sizeof(int64_t)); SEXP current_frame_body = PROTECT(r_parse_eval("as.call(list(sys.frame, -1))", R_BaseEnv)); SEXP current_frame_fn = PROTECT(r_new_function(R_NilValue, current_frame_body, R_EmptyEnv)); peek_frame_call = Rf_lcons(current_frame_fn, R_NilValue); R_PreserveObject(peek_frame_call); UNPROTECT(2); } vctrs/src/dim.h0000644000176200001440000000134613650511520013126 0ustar liggesusers#ifndef VCTRS_DIM_H #define VCTRS_DIM_H #include "vctrs.h" #include "utils.h" // These versions return NULL and 0 for bare vectors. // This is useful to distinguish them from 1D arrays. static inline SEXP vec_bare_dim(SEXP x) { return r_dim(x); } static inline R_len_t vec_bare_dim_n(SEXP x) { return Rf_length(vec_bare_dim(x)); } static inline SEXP vec_dim(SEXP x) { SEXP dim = vec_bare_dim(x); if (dim == R_NilValue) { dim = r_int(Rf_length(x)); } return dim; } static inline R_len_t vec_dim_n(SEXP x) { SEXP dim = vec_bare_dim(x); if (dim == R_NilValue) { return 1; } return Rf_length(dim); } static inline bool has_dim(SEXP x) { return ATTRIB(x) != R_NilValue && r_dim(x) != R_NilValue; } #endif vctrs/src/order-groups.c0000644000176200001440000001333014042540502014772 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "order-groups.h" #include "utils.h" // ----------------------------------------------------------------------------- // Pair with `PROTECT_GROUP_INFO()` in the caller struct group_info* new_group_info() { SEXP self = PROTECT(r_new_raw(sizeof(struct group_info))); struct group_info* p_group_info = (struct group_info*) RAW(self); p_group_info->self = self; p_group_info->data_size = 0; p_group_info->data = vctrs_shared_empty_int; p_group_info->n_groups = 0; p_group_info->max_group_size = 0; UNPROTECT(1); return p_group_info; } // ----------------------------------------------------------------------------- struct group_infos* new_group_infos(struct group_info* p_group_info0, struct group_info* p_group_info1, r_ssize max_data_size, bool force_groups, bool ignore_groups) { SEXP self = PROTECT(r_new_raw(sizeof(struct group_infos))); struct group_infos* p_group_infos = (struct group_infos*) RAW(self); SEXP p_p_group_info_data = PROTECT(r_new_raw(2 * sizeof(struct group_info*))); struct group_info** p_p_group_info = (struct group_info**) RAW(p_p_group_info_data); p_p_group_info[0] = p_group_info0; p_p_group_info[1] = p_group_info1; p_group_infos->self = self; p_group_infos->p_p_group_info_data = p_p_group_info_data; p_group_infos->p_p_group_info = p_p_group_info; p_group_infos->max_data_size = max_data_size; p_group_infos->current = 0; p_group_infos->force_groups = force_groups; p_group_infos->ignore_groups = ignore_groups; UNPROTECT(2); return p_group_infos; } // ----------------------------------------------------------------------------- static void group_realloc(r_ssize size, struct group_info* p_group_info); static r_ssize groups_realloc_size(r_ssize data_size, r_ssize max_data_size); /* * Push a group size onto the current `group_info*` * - Reallocates as needed * - Updates number of groups / max group size as well * * Should only be called through `groups_size_maybe_push()` to ensure * that we only push groups if we are tracking them. */ void groups_size_push(r_ssize size, struct group_infos* p_group_infos) { if (size == 0) { Rf_errorcall(R_NilValue, "Internal error: Group `size` to push should never be zero."); } struct group_info* p_group_info = groups_current(p_group_infos); // Extend `data` as required - reprotects itself if (p_group_info->data_size == p_group_info->n_groups) { r_ssize new_data_size = groups_realloc_size( p_group_info->data_size, p_group_infos->max_data_size ); group_realloc(new_data_size, p_group_info); } // Push group size p_group_info->p_data[p_group_info->n_groups] = size; // Bump number of groups ++p_group_info->n_groups; // Update max group size if (p_group_info->max_group_size < size) { p_group_info->max_group_size = size; } } // ----------------------------------------------------------------------------- /* * Reallocate `data` to be as long as `size`. */ static void group_realloc(r_ssize size, struct group_info* p_group_info) { // First allocation if (size == 0) { size = GROUP_DATA_SIZE_DEFAULT; } // Reallocate p_group_info->data = int_resize( p_group_info->data, p_group_info->data_size, size ); // Reprotect REPROTECT(p_group_info->data, p_group_info->data_pi); // Update pointer p_group_info->p_data = INTEGER(p_group_info->data); // Update size p_group_info->data_size = size; } // ----------------------------------------------------------------------------- static r_ssize groups_realloc_size(r_ssize data_size, r_ssize max_data_size) { // Avoid potential overflow when doubling size uint64_t new_data_size = ((uint64_t) data_size) * 2; // Clamp maximum allocation size to the size of the input if (new_data_size > max_data_size) { return max_data_size; } // Can now safely cast back to `r_ssize` return (r_ssize) new_data_size; } // ----------------------------------------------------------------------------- /* * `groups_swap()` is called after each data frame column is processed. * It handles switching the `current` group info that we are working on, * and ensures that the information that might have been there before has * been zeroed out. It also ensures that the new current group info has at * least as much space as the previous one, which is especially important for * the first column swap where the 2nd group info array starts as a size 0 * integer vector (because we don't know if it will get used or not). */ void groups_swap(struct group_infos* p_group_infos) { if (p_group_infos->ignore_groups) { return; } struct group_info* p_group_info_pre = groups_current(p_group_infos); // Make the swap p_group_infos->current = 1 - p_group_infos->current; struct group_info* p_group_info_post = groups_current(p_group_infos); // Clear the info from last time the swap was made p_group_info_post->max_group_size = 0; p_group_info_post->n_groups = 0; // Ensure the new group info is at least as big as the old group info if (p_group_info_post->data_size < p_group_info_pre->data_size) { r_ssize new_data_size = p_group_info_pre->data_size; group_realloc(new_data_size, p_group_info_post); } } vctrs/src/bind.c0000644000176200001440000004177114042540502013270 0ustar liggesusers#include "vctrs.h" #include "c.h" #include "dim.h" #include "ptype-common.h" #include "slice-assign.h" #include "type-data-frame.h" #include "owned.h" #include "utils.h" static SEXP vec_rbind(SEXP xs, SEXP ptype, SEXP id, struct name_repair_opts* name_repair, SEXP name_spec); static SEXP as_df_row(SEXP x, struct name_repair_opts* name_repair); static SEXP as_df_row_impl(SEXP x, struct name_repair_opts* name_repair); struct name_repair_opts validate_bind_name_repair(SEXP name_repair, bool allow_minimal); static SEXP vec_cbind(SEXP xs, SEXP ptype, SEXP size, struct name_repair_opts* name_repair); static SEXP cbind_names_to(bool has_names, SEXP names_to, SEXP ptype); // [[ register(external = TRUE) ]] SEXP vctrs_rbind(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP xs = PROTECT(rlang_env_dots_list(env)); SEXP ptype = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP names_to = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP name_repair = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP name_spec = PROTECT(Rf_eval(CAR(args), env)); if (names_to != R_NilValue) { if (Rf_inherits(names_to, "rlang_zap")) { r_poke_names(xs, R_NilValue); names_to = R_NilValue; } else if (r_is_string(names_to)) { names_to = r_chr_get(names_to, 0); } else { Rf_errorcall(R_NilValue, "`.names_to` must be `NULL`, a string, or an `rlang::zap()` object."); } } struct name_repair_opts name_repair_opts = validate_bind_name_repair(name_repair, false); PROTECT_NAME_REPAIR_OPTS(&name_repair_opts); SEXP out = vec_rbind(xs, ptype, names_to, &name_repair_opts, name_spec); UNPROTECT(6); return out; } static SEXP vec_rbind(SEXP xs, SEXP ptype, SEXP names_to, struct name_repair_opts* name_repair, SEXP name_spec) { int n_prot = 0; R_len_t n_inputs = Rf_length(xs); for (R_len_t i = 0; i < n_inputs; ++i) { SET_VECTOR_ELT(xs, i, as_df_row(VECTOR_ELT(xs, i), name_repair)); } // The common type holds information about common column names, // types, etc. Each element of `xs` needs to be cast to that type // before assignment. ptype = vec_ptype_common_params(xs, ptype, DF_FALLBACK_DEFAULT, S3_FALLBACK_true); PROTECT_N(ptype, &n_prot); R_len_t n_cols = Rf_length(ptype); if (ptype == R_NilValue) { UNPROTECT(n_prot); return new_data_frame(vctrs_shared_empty_list, 0); } if (TYPEOF(ptype) == LGLSXP && !n_cols) { ptype = as_df_row_impl(vctrs_shared_na_lgl, name_repair); PROTECT_N(ptype, &n_prot); } if (!is_data_frame(ptype)) { Rf_errorcall(R_NilValue, "Can't bind objects that are not coercible to a data frame."); } bool assign_names = !Rf_inherits(name_spec, "rlang_zap"); bool has_names_to = names_to != R_NilValue; R_len_t names_to_loc = 0; if (has_names_to) { if (!assign_names) { r_abort("Can't zap outer names when `.names_to` is supplied."); } SEXP ptype_nms = PROTECT(r_names(ptype)); names_to_loc = r_chr_find(ptype_nms, names_to); UNPROTECT(1); if (names_to_loc < 0) { ptype = PROTECT_N(cbind_names_to(r_names(xs) != R_NilValue, names_to, ptype), &n_prot); names_to_loc = 0; } } // Must happen after the `names_to` column has been added to `ptype` xs = vec_cast_common_params(xs, ptype, DF_FALLBACK_DEFAULT, S3_FALLBACK_true); PROTECT_N(xs, &n_prot); // Find individual input sizes and total size of output R_len_t n_rows = 0; SEXP ns_placeholder = PROTECT_N(Rf_allocVector(INTSXP, n_inputs), &n_prot); int* ns = INTEGER(ns_placeholder); for (R_len_t i = 0; i < n_inputs; ++i) { SEXP elt = VECTOR_ELT(xs, i); R_len_t size = (elt == R_NilValue) ? 0 : vec_size(elt); n_rows += size; ns[i] = size; } SEXP proxy = PROTECT_N(vec_proxy(ptype), &n_prot); if (!is_data_frame(proxy)) { Rf_errorcall(R_NilValue, "Can't fill a data frame that doesn't have a data frame proxy."); } PROTECT_INDEX out_pi; SEXP out = vec_init(proxy, n_rows); PROTECT_WITH_INDEX(out, &out_pi); ++n_prot; SEXP loc = PROTECT_N(compact_seq(0, 0, true), &n_prot); int* p_loc = INTEGER(loc); SEXP rownames = R_NilValue; PROTECT_INDEX rownames_pi; PROTECT_WITH_INDEX(rownames, &rownames_pi); ++n_prot; SEXP names_to_col = R_NilValue; SEXPTYPE names_to_type = 99; void* p_names_to_col = NULL; const void* p_index = NULL; SEXP xs_names = PROTECT_N(r_names(xs), &n_prot); bool xs_is_named = xs_names != R_NilValue; if (has_names_to) { SEXP index = R_NilValue; if (xs_is_named) { index = xs_names; } else { index = PROTECT_N(Rf_allocVector(INTSXP, n_inputs), &n_prot); r_int_fill_seq(index, 1, n_inputs); } names_to_type = TYPEOF(index); names_to_col = PROTECT_N(Rf_allocVector(names_to_type, n_rows), &n_prot); p_index = r_vec_deref_barrier_const(index); p_names_to_col = r_vec_deref_barrier(names_to_col); xs_names = R_NilValue; xs_is_named = false; } const SEXP* p_xs_names = NULL; if (xs_is_named) { p_xs_names = STRING_PTR_RO(xs_names); } // Compact sequences use 0-based counters R_len_t counter = 0; const struct vec_assign_opts bind_assign_opts = { .assign_names = assign_names, // Unlike in `vec_c()` we don't need to ignore outer names because // `df_assign()` doesn't deal with those .ignore_outer_names = false }; for (R_len_t i = 0; i < n_inputs; ++i) { R_len_t size = ns[i]; if (!size) { continue; } SEXP x = VECTOR_ELT(xs, i); init_compact_seq(p_loc, counter, size, true); // Total ownership of `out` because it was freshly created with `vec_init()` out = df_assign(out, loc, x, VCTRS_OWNED_true, &bind_assign_opts); REPROTECT(out, out_pi); if (assign_names) { SEXP outer = xs_is_named ? p_xs_names[i] : R_NilValue; SEXP inner = PROTECT(vec_names(x)); SEXP x_nms = PROTECT(apply_name_spec(name_spec, outer, inner, size)); if (x_nms != R_NilValue) { R_LAZY_ALLOC(rownames, rownames_pi, STRSXP, n_rows); // If there is no name to assign, skip the assignment since // `out_names` already contains empty strings if (inner != chrs_empty) { rownames = chr_assign(rownames, loc, x_nms, VCTRS_OWNED_true); REPROTECT(rownames, rownames_pi); } } UNPROTECT(2); } // Assign current name to group vector, if supplied if (has_names_to) { r_vec_fill(names_to_type, p_names_to_col, counter, p_index, i, size); } counter += size; } if (rownames != R_NilValue) { Rf_setAttrib(out, R_RowNamesSymbol, rownames); } if (has_names_to) { out = df_poke(out, names_to_loc, names_to_col); REPROTECT(out, out_pi); } // Not optimal. Happens after the fallback columns have been // assigned already, ideally they should be ignored. Also this is // currently not recursive. Should we deal with this during // restoration? for (R_len_t i = 0; i < n_cols; ++i) { SEXP col = r_list_get(ptype, i); if (vec_is_common_class_fallback(col)) { SEXP col_xs = PROTECT(list_pluck(xs, i)); SEXP col_out = vec_c_fallback(col, col_xs, name_spec, name_repair); r_list_poke(out, i, col_out); UNPROTECT(1); } } SEXP r_n_rows = PROTECT_N(r_int(n_rows), &n_prot); out = vec_restore(out, ptype, r_n_rows, VCTRS_OWNED_true); UNPROTECT(n_prot); return out; } static SEXP as_df_row(SEXP x, struct name_repair_opts* name_repair) { if (vec_is_unspecified(x) && r_names(x) == R_NilValue) { return x; } else { return as_df_row_impl(x, name_repair); } } static SEXP as_df_row_impl(SEXP x, struct name_repair_opts* name_repair) { if (x == R_NilValue) { return x; } if (is_data_frame(x)) { return df_repair_names(x, name_repair); } int nprot = 0; SEXP dim = vec_bare_dim(x); R_len_t ndim = (dim == R_NilValue) ? 1 : Rf_length(dim); if (ndim > 2) { Rf_errorcall(R_NilValue, "Can't bind arrays."); } if (ndim == 2) { SEXP names = PROTECT_N(vec_unique_colnames(x, name_repair->quiet), &nprot); SEXP out = PROTECT_N(r_as_data_frame(x), &nprot); r_poke_names(out, names); UNPROTECT(nprot); return out; } SEXP nms = PROTECT_N(vec_names(x), &nprot); if (dim != R_NilValue) { x = PROTECT_N(r_clone_referenced(x), &nprot); r_attrib_poke(x, R_DimSymbol, R_NilValue); r_attrib_poke(x, R_DimNamesSymbol, R_NilValue); } // Remove names as they are promoted to data frame column names if (nms != R_NilValue) { x = PROTECT_N(vec_set_names(x, R_NilValue), &nprot); } if (nms == R_NilValue) { nms = PROTECT_N(vec_unique_names(x, name_repair->quiet), &nprot); } else { nms = PROTECT_N(vec_as_names(nms, name_repair), &nprot); } x = PROTECT_N(vec_chop(x, R_NilValue), &nprot); r_poke_names(x, nms); x = new_data_frame(x, 1); UNPROTECT(nprot); return x; } // [[ register() ]] SEXP vctrs_as_df_row(SEXP x, SEXP quiet) { struct name_repair_opts name_repair_opts = { .type = name_repair_unique, .fn = R_NilValue, .quiet = LOGICAL(quiet)[0] }; return as_df_row(x, &name_repair_opts); } static SEXP cbind_names_to(bool has_names, SEXP names_to, SEXP ptype) { SEXP index_ptype = has_names ? vctrs_shared_empty_chr : vctrs_shared_empty_int; SEXP tmp = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(tmp, 0, index_ptype); SET_VECTOR_ELT(tmp, 1, ptype); SEXP tmp_nms = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(tmp_nms, 0, names_to); SET_STRING_ELT(tmp_nms, 1, strings_empty); r_poke_names(tmp, tmp_nms); SEXP out = vec_cbind(tmp, R_NilValue, R_NilValue, NULL); UNPROTECT(2); return out; } static SEXP as_df_col(SEXP x, SEXP outer, bool* allow_pack); static SEXP cbind_container_type(SEXP x, void* data); // [[ register(external = TRUE) ]] SEXP vctrs_cbind(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP xs = PROTECT(rlang_env_dots_list(env)); SEXP ptype = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP size = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP name_repair = PROTECT(Rf_eval(CAR(args), env)); struct name_repair_opts name_repair_opts = validate_bind_name_repair(name_repair, true); PROTECT_NAME_REPAIR_OPTS(&name_repair_opts); SEXP out = vec_cbind(xs, ptype, size, &name_repair_opts); UNPROTECT(5); return out; } static SEXP vec_cbind(SEXP xs, SEXP ptype, SEXP size, struct name_repair_opts* name_repair) { R_len_t n = Rf_length(xs); // Find the common container type of inputs SEXP rownames = R_NilValue; SEXP containers = PROTECT(map_with_data(xs, &cbind_container_type, &rownames)); ptype = PROTECT(cbind_container_type(ptype, &rownames)); SEXP type = PROTECT(vec_ptype_common_params(containers, ptype, DF_FALLBACK_DEFAULT, S3_FALLBACK_false)); if (type == R_NilValue) { type = new_data_frame(vctrs_shared_empty_list, 0); } else if (!is_data_frame(type)) { type = r_as_data_frame(type); } UNPROTECT(1); PROTECT(type); R_len_t nrow; if (size == R_NilValue) { nrow = vec_size_common(xs, 0); } else { nrow = size_validate(size, ".size"); } if (rownames != R_NilValue && Rf_length(rownames) != nrow) { rownames = PROTECT(vec_recycle(rownames, nrow, args_empty)); rownames = vec_as_unique_names(rownames, false); UNPROTECT(1); } PROTECT(rownames); // Convert inputs to data frames, validate, and collect total number of columns SEXP xs_names = PROTECT(r_names(xs)); bool has_names = xs_names != R_NilValue; SEXP const* xs_names_p = has_names ? STRING_PTR_RO(xs_names) : NULL; R_len_t ncol = 0; for (R_len_t i = 0; i < n; ++i) { SEXP x = VECTOR_ELT(xs, i); if (x == R_NilValue) { continue; } x = PROTECT(vec_recycle(x, nrow, args_empty)); SEXP outer_name = has_names ? xs_names_p[i] : strings_empty; bool allow_packing; x = PROTECT(as_df_col(x, outer_name, &allow_packing)); // Remove outer name of column vectors because they shouldn't be repacked if (has_names && !allow_packing) { SET_STRING_ELT(xs_names, i, strings_empty); } SET_VECTOR_ELT(xs, i, x); UNPROTECT(2); // Named inputs are packed in a single column R_len_t x_ncol = outer_name == strings_empty ? Rf_length(x) : 1; ncol += x_ncol; } // Fill in columns PROTECT_INDEX out_pi; SEXP out = Rf_allocVector(VECSXP, ncol); PROTECT_WITH_INDEX(out, &out_pi); init_data_frame(out, nrow); PROTECT_INDEX names_pi; SEXP names = Rf_allocVector(STRSXP, ncol); PROTECT_WITH_INDEX(names, &names_pi); SEXP idx = PROTECT(compact_seq(0, 0, true)); int* idx_ptr = INTEGER(idx); R_len_t counter = 0; for (R_len_t i = 0; i < n; ++i) { SEXP x = VECTOR_ELT(xs, i); if (x == R_NilValue) { continue; } SEXP outer_name = has_names ? xs_names_p[i] : strings_empty; if (outer_name != strings_empty) { SET_VECTOR_ELT(out, counter, x); SET_STRING_ELT(names, counter, outer_name); ++counter; continue; } R_len_t xn = Rf_length(x); init_compact_seq(idx_ptr, counter, xn, true); // Total ownership of `out` because it was freshly created with `Rf_allocVector()` out = list_assign(out, idx, x, VCTRS_OWNED_true); REPROTECT(out, out_pi); SEXP xnms = PROTECT(r_names(x)); if (xnms != R_NilValue) { names = chr_assign(names, idx, xnms, VCTRS_OWNED_true); REPROTECT(names, names_pi); } UNPROTECT(1); counter += xn; } names = PROTECT(vec_as_names(names, name_repair)); Rf_setAttrib(out, R_NamesSymbol, names); if (rownames != R_NilValue) { Rf_setAttrib(out, R_RowNamesSymbol, rownames); } out = vec_restore(out, type, R_NilValue, VCTRS_OWNED_true); UNPROTECT(9); return out; } SEXP syms_vec_cbind_frame_ptype = NULL; SEXP fns_vec_cbind_frame_ptype = NULL; SEXP vec_cbind_frame_ptype(SEXP x) { return vctrs_dispatch1(syms_vec_cbind_frame_ptype, fns_vec_cbind_frame_ptype, syms_x, x); } static SEXP cbind_container_type(SEXP x, void* data) { if (is_data_frame(x)) { SEXP rn = df_rownames(x); if (rownames_type(rn) == ROWNAMES_IDENTIFIERS) { SEXP* learned_rn_p = (SEXP*) data; SEXP learned_rn = *learned_rn_p; if (learned_rn == R_NilValue) { *learned_rn_p = rn; } } return vec_cbind_frame_ptype(x); } else { return R_NilValue; } } static SEXP shaped_as_df_col(SEXP x, SEXP outer); static SEXP vec_as_df_col(SEXP x, SEXP outer); // [[ register() ]] SEXP vctrs_as_df_col(SEXP x, SEXP outer) { bool allow_pack; return as_df_col(x, r_chr_get(outer, 0), &allow_pack); } static SEXP as_df_col(SEXP x, SEXP outer, bool* allow_pack) { if (is_data_frame(x)) { *allow_pack = true; return Rf_shallow_duplicate(x); } R_len_t ndim = vec_bare_dim_n(x); if (ndim > 2) { Rf_errorcall(R_NilValue, "Can't bind arrays."); } if (ndim > 0) { *allow_pack = true; return shaped_as_df_col(x, outer); } *allow_pack = false; return vec_as_df_col(x, outer); } static SEXP shaped_as_df_col(SEXP x, SEXP outer) { // If packed, store array as a column if (outer != strings_empty) { return x; } // If unpacked, transform to data frame first. We repair names // after unpacking and concatenation. SEXP out = PROTECT(r_as_data_frame(x)); // Remove names if they were repaired by `as.data.frame()` if (colnames(x) == R_NilValue) { r_poke_names(out, R_NilValue); } UNPROTECT(1); return out; } static SEXP vec_as_df_col(SEXP x, SEXP outer) { SEXP out = PROTECT(Rf_allocVector(VECSXP, 1)); SET_VECTOR_ELT(out, 0, x); if (outer != strings_empty) { SEXP names = PROTECT(r_str_as_character(outer)); Rf_setAttrib(out, R_NamesSymbol, names); UNPROTECT(1); } init_data_frame(out, Rf_length(x)); UNPROTECT(1); return out; } struct name_repair_opts validate_bind_name_repair(SEXP name_repair, bool allow_minimal) { struct name_repair_opts opts = new_name_repair_opts(name_repair, args_empty, false); switch (opts.type) { case name_repair_custom: case name_repair_unique: case name_repair_universal: case name_repair_check_unique: break; case name_repair_minimal: if (allow_minimal) break; // else fallthrough default: if (allow_minimal) { Rf_errorcall(R_NilValue, "`.name_repair` can't be `\"%s\"`.\n" "It must be one of `\"unique\"`, `\"universal\"`, `\"check_unique\"`, or `\"minimal\"`.", name_repair_arg_as_c_string(opts.type)); } else { Rf_errorcall(R_NilValue, "`.name_repair` can't be `\"%s\"`.\n" "It must be one of `\"unique\"`, `\"universal\"`, or `\"check_unique\"`.", name_repair_arg_as_c_string(opts.type)); } } return opts; } void vctrs_init_bind(SEXP ns) { syms_vec_cbind_frame_ptype = Rf_install("vec_cbind_frame_ptype"); fns_vec_cbind_frame_ptype = r_env_get(ns, syms_vec_cbind_frame_ptype); } vctrs/src/type-data-frame.h0000644000176200001440000000245614027045462015346 0ustar liggesusers#ifndef VCTRS_TYPE_DATA_FRAME_H #define VCTRS_TYPE_DATA_FRAME_H #include "arg.h" #include "names.h" #include "ptype2.h" SEXP new_data_frame(SEXP x, R_len_t n); void init_data_frame(SEXP x, R_len_t n); void init_tibble(SEXP x, R_len_t n); void init_compact_rownames(SEXP x, R_len_t n); static inline SEXP df_rownames(SEXP x) { return r_attrib_get(x, R_RowNamesSymbol); } bool is_native_df(SEXP x); SEXP df_poke(SEXP x, R_len_t i, SEXP value); SEXP df_poke_at(SEXP x, SEXP name, SEXP value); SEXP df_flatten(SEXP x); SEXP df_repair_names(SEXP x, struct name_repair_opts* name_repair); static inline SEXP df_cast(SEXP x, SEXP to, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg); enum rownames_type { ROWNAMES_AUTOMATIC, ROWNAMES_AUTOMATIC_COMPACT, ROWNAMES_IDENTIFIERS }; enum rownames_type rownames_type(SEXP rn); R_len_t rownames_size(SEXP rn); SEXP df_ptype2(const struct ptype2_opts* opts); static inline SEXP df_ptype2_params(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, enum df_fallback df_fallback) { const struct ptype2_opts opts = { .x = x, .y = y, .x_arg = x_arg, .y_arg = y_arg, .fallback = { .df = df_fallback } }; return df_ptype2(&opts); } #endif vctrs/src/compare.c0000644000176200001440000002516714042540502014003 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "translate.h" #include static void stop_not_comparable(SEXP x, SEXP y, const char* message) { Rf_errorcall(R_NilValue, "`x` and `y` are not comparable: %s", message); } // https://stackoverflow.com/questions/10996418 static inline int icmp(int x, int y) { return (x > y) - (x < y); } int qsort_icmp(const void* x, const void* y) { return icmp(*((int*) x), *((int*) y)); } static int dcmp(double x, double y) { return (x > y) - (x < y); } // Assume translation handled by `vec_normalize_encoding()` static inline int scmp(SEXP x, SEXP y) { if (x == y) { return 0; } int cmp = strcmp(CHAR(x), CHAR(y)); return cmp / abs(cmp); } // ----------------------------------------------------------------------------- static inline int lgl_compare_scalar(const int* x, const int* y, bool na_equal) { int xi = *x; int yj = *y; if (na_equal) { return icmp(xi, yj); } else { return (xi == NA_LOGICAL || yj == NA_LOGICAL) ? NA_INTEGER : icmp(xi, yj); } } static inline int int_compare_scalar(const int* x, const int* y, bool na_equal) { int xi = *x; int yj = *y; if (na_equal) { return icmp(xi, yj); } else { return (xi == NA_INTEGER || yj == NA_INTEGER) ? NA_INTEGER : icmp(xi, yj); } } static inline int dbl_compare_scalar(const double* x, const double* y, bool na_equal) { double xi = *x; double yj = *y; if (na_equal) { enum vctrs_dbl_class x_class = dbl_classify(xi); enum vctrs_dbl_class y_class = dbl_classify(yj); switch (x_class) { case vctrs_dbl_number: { switch (y_class) { case vctrs_dbl_number: return dcmp(xi, yj); case vctrs_dbl_missing: return 1; case vctrs_dbl_nan: return 1; } } case vctrs_dbl_missing: { switch (y_class) { case vctrs_dbl_number: return -1; case vctrs_dbl_missing: return 0; case vctrs_dbl_nan: return 1; } } case vctrs_dbl_nan: { switch (y_class) { case vctrs_dbl_number: return -1; case vctrs_dbl_missing: return -1; case vctrs_dbl_nan: return 0; } } } } else { return (isnan(xi) || isnan(yj)) ? NA_INTEGER : dcmp(xi, yj); } never_reached("dbl_compare_scalar"); } static inline int chr_compare_scalar(const SEXP* x, const SEXP* y, bool na_equal) { const SEXP xi = *x; const SEXP yj = *y; if (na_equal) { if (xi == NA_STRING) { return (yj == NA_STRING) ? 0 : -1; } else { return (yj == NA_STRING) ? 1 : scmp(xi, yj); } } else { return (xi == NA_STRING || yj == NA_STRING) ? NA_INTEGER : scmp(xi, yj); } } static inline int df_compare_scalar(SEXP x, R_len_t i, SEXP y, R_len_t j, bool na_equal, int n_col) { int cmp; for (int k = 0; k < n_col; ++k) { SEXP col_x = VECTOR_ELT(x, k); SEXP col_y = VECTOR_ELT(y, k); cmp = compare_scalar(col_x, i, col_y, j, na_equal); if (cmp != 0) { return cmp; } } return cmp; } // ----------------------------------------------------------------------------- // [[ include("vctrs.h") ]] int compare_scalar(SEXP x, R_len_t i, SEXP y, R_len_t j, bool na_equal) { switch (TYPEOF(x)) { case LGLSXP: return lgl_compare_scalar(LOGICAL_RO(x) + i, LOGICAL_RO(y) + j, na_equal); case INTSXP: return int_compare_scalar(INTEGER_RO(x) + i, INTEGER_RO(y) + j, na_equal); case REALSXP: return dbl_compare_scalar(REAL_RO(x) + i, REAL_RO(y) + j, na_equal); case STRSXP: return chr_compare_scalar(STRING_PTR_RO(x) + i, STRING_PTR_RO(y) + j, na_equal); default: break; } switch (vec_proxy_typeof(x)) { case vctrs_type_list: stop_not_comparable(x, y, "lists are not comparable"); case vctrs_type_dataframe: { int n_col = Rf_length(x); if (n_col != Rf_length(y)) { stop_not_comparable(x, y, "must have the same number of columns"); } if (n_col == 0) { stop_not_comparable(x, y, "data frame with zero columns"); } return df_compare_scalar(x, i, y, j, na_equal, n_col); } default: break; } Rf_errorcall(R_NilValue, "Unsupported type %s", Rf_type2char(TYPEOF(x))); } // ----------------------------------------------------------------------------- static SEXP df_compare(SEXP x, SEXP y, bool na_equal, R_len_t size); #define COMPARE(CTYPE, CONST_DEREF, SCALAR_COMPARE) \ do { \ SEXP out = PROTECT(Rf_allocVector(INTSXP, size)); \ int* p_out = INTEGER(out); \ \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ for (R_len_t i = 0; i < size; ++i, ++p_x, ++p_y) { \ p_out[i] = SCALAR_COMPARE(p_x, p_y, na_equal); \ } \ \ UNPROTECT(3); \ return out; \ } \ while (0) // [[ register() ]] SEXP vctrs_compare(SEXP x, SEXP y, SEXP na_equal_) { bool na_equal = r_bool_as_int(na_equal_); R_len_t size = vec_size(x); enum vctrs_type type = vec_proxy_typeof(x); if (type != vec_proxy_typeof(y) || size != vec_size(y)) { stop_not_comparable(x, y, "must have the same types and lengths"); } x = PROTECT(vec_normalize_encoding(x)); y = PROTECT(vec_normalize_encoding(y)); switch (type) { case vctrs_type_logical: COMPARE(int, LOGICAL_RO, lgl_compare_scalar); case vctrs_type_integer: COMPARE(int, INTEGER_RO, int_compare_scalar); case vctrs_type_double: COMPARE(double, REAL_RO, dbl_compare_scalar); case vctrs_type_character: COMPARE(SEXP, STRING_PTR_RO, chr_compare_scalar); case vctrs_type_dataframe: { SEXP out = df_compare(x, y, na_equal, size); UNPROTECT(2); return out; } case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vctrs_compare()`"); case vctrs_type_list: Rf_errorcall(R_NilValue, "Can't compare lists with `vctrs_compare()`"); default: Rf_error("Unimplemented type in `vctrs_compare()`"); } } #undef COMPARE // ----------------------------------------------------------------------------- static void vec_compare_col(int* p_out, struct df_short_circuit_info* p_info, SEXP x, SEXP y, bool na_equal); static void df_compare_impl(int* p_out, struct df_short_circuit_info* p_info, SEXP x, SEXP y, bool na_equal); static SEXP df_compare(SEXP x, SEXP y, bool na_equal, R_len_t size) { int nprot = 0; SEXP out = PROTECT_N(Rf_allocVector(INTSXP, size), &nprot); int* p_out = INTEGER(out); // Initialize to "equality" value // and only change if we learn that it differs memset(p_out, 0, size * sizeof(int)); struct df_short_circuit_info info = new_df_short_circuit_info(size, false); struct df_short_circuit_info* p_info = &info; PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, &nprot); df_compare_impl(p_out, p_info, x, y, na_equal); UNPROTECT(nprot); return out; } static void df_compare_impl(int* p_out, struct df_short_circuit_info* p_info, SEXP x, SEXP y, bool na_equal) { int n_col = Rf_length(x); if (n_col == 0) { stop_not_comparable(x, y, "data frame with zero columns"); } if (n_col != Rf_length(y)) { stop_not_comparable(x, y, "must have the same number of columns"); } for (R_len_t i = 0; i < n_col; ++i) { SEXP x_col = VECTOR_ELT(x, i); SEXP y_col = VECTOR_ELT(y, i); vec_compare_col(p_out, p_info, x_col, y_col, na_equal); // If we know all comparison values, break if (p_info->remaining == 0) { break; } } } // ----------------------------------------------------------------------------- #define COMPARE_COL(CTYPE, CONST_DEREF, SCALAR_COMPARE) \ do { \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ for (R_len_t i = 0; i < p_info->size; ++i, ++p_x, ++p_y) { \ if (p_info->p_row_known[i]) { \ continue; \ } \ \ int cmp = SCALAR_COMPARE(p_x, p_y, na_equal); \ \ if (cmp != 0) { \ p_out[i] = cmp; \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ } \ } \ while (0) static void vec_compare_col(int* p_out, struct df_short_circuit_info* p_info, SEXP x, SEXP y, bool na_equal) { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: COMPARE_COL(int, LOGICAL_RO, lgl_compare_scalar); break; case vctrs_type_integer: COMPARE_COL(int, INTEGER_RO, int_compare_scalar); break; case vctrs_type_double: COMPARE_COL(double, REAL_RO, dbl_compare_scalar); break; case vctrs_type_character: COMPARE_COL(SEXP, STRING_PTR_RO, chr_compare_scalar); break; case vctrs_type_dataframe: df_compare_impl(p_out, p_info, x, y, na_equal); break; case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vctrs_compare()`"); case vctrs_type_list: Rf_errorcall(R_NilValue, "Can't compare lists with `vctrs_compare()`"); default: Rf_error("Unimplemented type in `vctrs_compare()`"); } } #undef COMPARE_COL vctrs/src/unspecified.c0000644000176200001440000000376214042540502014650 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "dim.h" // Initialised at load time static SEXP unspecified_attrib = NULL; SEXP vctrs_shared_empty_uns = NULL; // [[ include("vctrs.h") ]] SEXP vec_unspecified(R_len_t n) { SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); r_lgl_fill(out, NA_LOGICAL, n); SET_ATTRIB(out, unspecified_attrib); SET_OBJECT(out, 1); UNPROTECT(1); return out; } // [[ register ]] SEXP vctrs_unspecified(SEXP n) { if (Rf_length(n) != 1) { Rf_errorcall(R_NilValue, "`n` must be a single number"); } if (TYPEOF(n) != INTSXP) { n = vec_cast(n, vctrs_shared_empty_int, args_empty, args_empty); } int len = INTEGER(n)[0]; return vec_unspecified(len); } // [[ include("vctrs.h") ]] bool vec_is_unspecified(SEXP x) { if (TYPEOF(x) != LGLSXP) { return false; } SEXP attrib = ATTRIB(x); if (attrib == unspecified_attrib) { return true; } if (attrib != R_NilValue) { // The unspecified vector might have been created outside the // session (e.g. serialisation) if (Rf_inherits(x, "vctrs_unspecified")) { return true; } if (OBJECT(x)) { return false; } if (has_dim(x)) { return false; } } R_len_t n = Rf_length(x); if (n == 0) { return false; } R_len_t* p_x = LOGICAL(x); for (R_len_t i = 0; i < n; ++i) { if (p_x[i] != NA_LOGICAL) { return false; } } return true; } // [[ register ]] SEXP vctrs_is_unspecified(SEXP x) { return Rf_ScalarLogical(vec_is_unspecified(x)); } void vctrs_init_unspecified(SEXP ns) { { SEXP unspecified_class = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(unspecified_class, 0, Rf_mkChar("vctrs_unspecified")); unspecified_attrib = Rf_cons(unspecified_class, R_NilValue); R_PreserveObject(unspecified_attrib); SET_TAG(unspecified_attrib, R_ClassSymbol); UNPROTECT(1); } vctrs_shared_empty_uns = vec_unspecified(0); R_PreserveObject(vctrs_shared_empty_uns); MARK_NOT_MUTABLE(vctrs_shared_empty_uns); } vctrs/src/slice.h0000644000176200001440000000061713723213047013460 0ustar liggesusers#ifndef VCTRS_SLICE_H #define VCTRS_SLICE_H extern SEXP syms_vec_slice_dispatch_integer64; extern SEXP fns_vec_slice_dispatch_integer64; SEXP slice_names(SEXP names, SEXP subscript); SEXP slice_rownames(SEXP names, SEXP subscript); SEXP vec_slice_base(enum vctrs_type type, SEXP x, SEXP subscript); SEXP vec_slice_fallback(SEXP x, SEXP subscript); bool vec_is_restored(SEXP x, SEXP to); #endif vctrs/src/cast.c0000644000176200001440000002032214042540502013273 0ustar liggesusers#include "vctrs.h" #include "cast.h" #include "dim.h" #include "ptype2.h" #include "ptype-common.h" #include "type-data-frame.h" #include "utils.h" static SEXP vec_cast_switch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy); static SEXP vec_cast_dispatch_s3(const struct cast_opts* opts); // [[ register() ]] SEXP vctrs_cast(SEXP x, SEXP to, SEXP x_arg_, SEXP to_arg_) { struct vctrs_arg x_arg = vec_as_arg(x_arg_); struct vctrs_arg to_arg = vec_as_arg(to_arg_); return vec_cast(x, to, &x_arg, &to_arg); } // [[ include("cast.h") ]] SEXP vec_cast_opts(const struct cast_opts* opts) { SEXP x = opts->x; SEXP to = opts->to; struct vctrs_arg* x_arg = opts->x_arg; struct vctrs_arg* to_arg = opts->to_arg; if (x == R_NilValue) { if (!vec_is_partial(to)) { vec_assert(to, to_arg); } return x; } if (to == R_NilValue) { if (!vec_is_partial(x)) { vec_assert(x, x_arg); } return x; } enum vctrs_type x_type = vec_typeof(x); enum vctrs_type to_type = vec_typeof(to); if (x_type == vctrs_type_unspecified) { return vec_init(to, vec_size(x)); } if (x_type == vctrs_type_scalar) { stop_scalar_type(x, x_arg); } if (to_type == vctrs_type_scalar) { stop_scalar_type(to, to_arg); } if (has_dim(x) || has_dim(to)) { return vec_cast_dispatch_s3(opts); } SEXP out = R_NilValue; bool lossy = false; if (to_type == vctrs_type_s3 || x_type == vctrs_type_s3) { out = vec_cast_dispatch_native(opts, x_type, to_type, &lossy); } else { out = vec_cast_switch_native(opts, x_type, to_type, &lossy); } if (lossy || out == R_NilValue) { return vec_cast_dispatch_s3(opts); } else { return out; } } static SEXP vec_cast_switch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy) { SEXP x = opts->x; int dir = 0; enum vctrs_type2 type2 = vec_typeof2_impl(x_type, to_type, &dir); switch (type2) { case vctrs_type2_logical_logical: case vctrs_type2_double_double: case vctrs_type2_character_character: case vctrs_type2_integer_integer: return x; case vctrs_type2_logical_integer: if (dir == 0) { return lgl_as_integer(x, lossy); } else { return int_as_logical(x, lossy); } case vctrs_type2_logical_double: if (dir == 0) { return lgl_as_double(x, lossy); } else { return dbl_as_logical(x, lossy); } case vctrs_type2_integer_double: if (dir == 0) { return int_as_double(x, lossy); } else { return dbl_as_integer(x, lossy); } case vctrs_type2_dataframe_dataframe: return df_cast_opts(opts); default: break; } return R_NilValue; } static SEXP syms_vec_cast_default = NULL; // [[ include("cast.h") ]] SEXP vec_cast_default(SEXP x, SEXP to, SEXP x_arg, SEXP to_arg, const struct fallback_opts* opts) { SEXP df_fallback = PROTECT(r_int(opts->df)); SEXP s3_fallback = PROTECT(r_int(opts->s3)); SEXP out = vctrs_eval_mask7(syms_vec_cast_default, syms_x, x, syms_to, to, syms_x_arg, x_arg, syms_to_arg, to_arg, syms_from_dispatch, vctrs_shared_true, syms_df_fallback, df_fallback, syms_s3_fallback, s3_fallback); UNPROTECT(2); return out; } static SEXP vec_cast_dispatch_s3(const struct cast_opts* opts) { SEXP x = opts->x; SEXP to = opts->to; SEXP r_x_arg = PROTECT(vctrs_arg(opts->x_arg)); SEXP r_to_arg = PROTECT(vctrs_arg(opts->to_arg)); SEXP method_sym = R_NilValue; SEXP method = s3_find_method_xy("vec_cast", to, x, vctrs_method_table, &method_sym); // Compatibility with legacy double dispatch mechanism if (method == R_NilValue) { SEXP to_method_sym = R_NilValue; SEXP to_method = PROTECT(s3_find_method2("vec_cast", to, vctrs_method_table, &to_method_sym)); if (to_method != R_NilValue) { const char* to_method_str = CHAR(PRINTNAME(to_method_sym)); SEXP to_table = s3_get_table(CLOENV(to_method)); method = s3_find_method2(to_method_str, x, to_table, &method_sym); } UNPROTECT(1); } PROTECT(method); if (method == R_NilValue) { SEXP out = vec_cast_default(x, to, r_x_arg, r_to_arg, &(opts->fallback)); UNPROTECT(3); return out; } SEXP out = vec_invoke_coerce_method(method_sym, method, syms_x, x, syms_to, to, syms_x_arg, r_x_arg, syms_to_arg, r_to_arg, &(opts->fallback)); UNPROTECT(3); return out; } struct cast_err_data { const struct cast_opts* opts; SEXP out; }; static void vec_cast_e_cb(void* data_) { struct cast_err_data* data = (struct cast_err_data*) data_; data->out = vec_cast_opts(data->opts); } // [[ include("cast.h") ]] SEXP vec_cast_e(const struct cast_opts* opts, ERR* err) { struct cast_err_data data = { .opts = opts, .out = R_NilValue }; *err = r_try_catch(&vec_cast_e_cb, &data, syms_vctrs_error_cast_lossy, NULL, NULL); return data.out; } // [[ include("cast.h") ]] SEXP vec_cast_common_opts(SEXP xs, SEXP to, const struct fallback_opts* fallback_opts) { SEXP type = PROTECT(vec_ptype_common_opts(xs, to, fallback_opts)); R_len_t n = Rf_length(xs); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); for (R_len_t i = 0; i < n; ++i) { SEXP elt = VECTOR_ELT(xs, i); struct cast_opts opts = { .x = elt, .to = type, .fallback = *fallback_opts }; SET_VECTOR_ELT(out, i, vec_cast_opts(&opts)); } SEXP names = PROTECT(Rf_getAttrib(xs, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, names); UNPROTECT(3); return out; } // [[ include("cast.h") ]] SEXP vec_cast_common_params(SEXP xs, SEXP to, enum df_fallback df_fallback, enum s3_fallback s3_fallback) { struct fallback_opts opts = { .df = df_fallback, .s3 = s3_fallback }; return vec_cast_common_opts(xs, to, &opts); } // [[ include("vctrs.h") ]] SEXP vec_cast_common(SEXP xs, SEXP to) { return vec_cast_common_params(xs, to, DF_FALLBACK_DEFAULT, S3_FALLBACK_DEFAULT); } // [[ register(external = TRUE) ]] SEXP vctrs_cast_common(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP dots = PROTECT(rlang_env_dots_list(env)); SEXP to = PROTECT(Rf_eval(CAR(args), env)); SEXP out = vec_cast_common(dots, to); UNPROTECT(2); return out; } // [[ register(external = TRUE) ]] SEXP vctrs_cast_common_opts(SEXP call, SEXP op, SEXP args, SEXP env) { args = CDR(args); SEXP dots = PROTECT(rlang_env_dots_list(env)); SEXP to = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args); SEXP opts = PROTECT(Rf_eval(CAR(args), env)); const struct fallback_opts c_opts = new_fallback_opts(opts); SEXP out = vec_cast_common_opts(dots, to, &c_opts); UNPROTECT(3); return out; } // [[ include("cast.h") ]] struct cast_opts new_cast_opts(SEXP x, SEXP to, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg, SEXP opts) { return (struct cast_opts) { .x = x, .to = to, .x_arg = x_arg, .to_arg = to_arg, .fallback = { .df = r_int_get(r_list_get(opts, 0), 0), .s3 = r_int_get(r_list_get(opts, 1), 0) } }; } void vctrs_init_cast(SEXP ns) { syms_vec_cast_default = Rf_install("vec_default_cast"); } vctrs/src/owned.h0000644000176200001440000000153613712211241013466 0ustar liggesusers#ifndef VCTRS_OWNED_H #define VCTRS_OWNED_H #include "altrep.h" #include "utils.h" // Ownership is recursive enum vctrs_owned { VCTRS_OWNED_false = 0, VCTRS_OWNED_true }; static inline enum vctrs_owned vec_owned(SEXP x) { return NO_REFERENCES(x) ? VCTRS_OWNED_true : VCTRS_OWNED_false; } // Wrapper around `r_clone_referenced()` that only attempts to clone if // we indicate that we don't own `x`, or if `x` is ALTREP. // If `x` is ALTREP, we must unconditionally clone it before dereferencing, // otherwise we get a pointer into the ALTREP internals rather than into the // object it truly represents. static inline SEXP vec_clone_referenced(SEXP x, const enum vctrs_owned owned) { if (ALTREP(x)) { return r_clone_referenced(x); } if (owned == VCTRS_OWNED_false) { return r_clone_referenced(x); } else { return x; } } #endif vctrs/src/order-transform.h0000644000176200001440000000215614027045462015507 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_TRANSFORM_H #define VCTRS_ORDER_TRANSFORM_H #include "vctrs.h" // ----------------------------------------------------------------------------- /* * `proxy_chr_transform()` iterates over `proxy`, applying `chr_transform` * on any character vectors that it detects. * * It expects that: * - If `proxy` is a data frame, it has been flattened by its corresponding * `vec_proxy_*()` function. * - All character vectors in `proxy` have already been normalized to UTF-8 * by `vec_normalize_encoding()`. */ SEXP proxy_chr_transform(SEXP proxy, SEXP chr_transform); // ----------------------------------------------------------------------------- #endif vctrs/src/utils.h0000644000176200001440000004356114042540502013520 0ustar liggesusers#ifndef VCTRS_UTILS_H #define VCTRS_UTILS_H #include "arg-counter.h" #include "utils-rlang.h" #define SWAP(T, x, y) do { \ T tmp = x; \ x = y; \ y = tmp; \ } while (0) #define PROTECT_N(x, n) (++*n, PROTECT(x)) #define PROTECT2(x, y) (PROTECT(x), PROTECT(y)) enum vctrs_class_type { vctrs_class_list, vctrs_class_data_frame, vctrs_class_bare_data_frame, vctrs_class_bare_tibble, vctrs_class_bare_factor, vctrs_class_bare_ordered, vctrs_class_bare_date, vctrs_class_bare_posixct, vctrs_class_bare_posixlt, vctrs_class_unknown, vctrs_class_none }; bool r_is_bool(SEXP x); int r_bool_as_int(SEXP x); SEXP vctrs_eval_mask_n(SEXP fn, SEXP* syms, SEXP* args); SEXP vctrs_eval_mask1(SEXP fn, SEXP x_sym, SEXP x); SEXP vctrs_eval_mask2(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y); SEXP vctrs_eval_mask3(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); SEXP vctrs_eval_mask4(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4); SEXP vctrs_eval_mask5(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5); SEXP vctrs_eval_mask6(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6); SEXP vctrs_eval_mask7(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6, SEXP x7_sym, SEXP x7); SEXP vctrs_dispatch_n(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args); SEXP vctrs_dispatch1(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x); SEXP vctrs_dispatch2(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y); SEXP vctrs_dispatch3(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); SEXP vctrs_dispatch4(SEXP fn_sym, SEXP fn, SEXP w_sym, SEXP w, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); SEXP vctrs_dispatch6(SEXP fn_sym, SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6); __attribute__((noreturn)) void r_abort(const char* fmt, ...); __attribute__((noreturn)) void stop_internal(const char* fn, const char* fmt, ...); __attribute__((noreturn)) void stop_unimplemented_vctrs_type(const char* fn, enum vctrs_type); static inline __attribute__((noreturn)) void stop_unimplemented_type(const char* fn, SEXPTYPE type) { stop_internal(fn, "Unimplemented type `%s`.", Rf_type2char(type)); } SEXP map(SEXP x, SEXP (*fn)(SEXP)); SEXP map_with_data(SEXP x, SEXP (*fn)(SEXP, void*), void* data); SEXP df_map(SEXP df, SEXP (*fn)(SEXP)); SEXP bare_df_map(SEXP df, SEXP (*fn)(SEXP)); enum vctrs_class_type class_type(SEXP x); bool is_data_frame(SEXP x); bool is_bare_data_frame(SEXP x); bool is_bare_tibble(SEXP x); SEXP int_resize(SEXP x, r_ssize x_size, r_ssize size); SEXP raw_resize(SEXP x, r_ssize x_size, r_ssize size); SEXP chr_resize(SEXP x, r_ssize x_size, r_ssize size); SEXP vec_unique_names(SEXP x, bool quiet); SEXP vec_unique_colnames(SEXP x, bool quiet); // Returns S3 / S4 method for `generic` suitable for the class of `x`. The // inheritance hierarchy is explored except for the default method. SEXP s3_get_method(const char* generic, const char* cls, SEXP table); SEXP s3_sym_get_method(SEXP sym, SEXP table); SEXP s3_find_method(const char* generic, SEXP x, SEXP table); SEXP s3_class_find_method(const char* generic, SEXP class, SEXP table); SEXP s3_get_class(SEXP x); SEXP s3_find_method_xy(const char* generic, SEXP x, SEXP y, SEXP table, SEXP* method_sym_out); SEXP s3_find_method2(const char* generic, SEXP x, SEXP table, SEXP* method_sym_out); SEXP s3_paste_method_sym(const char* generic, const char* cls); SEXP s3_bare_class(SEXP x); SEXP s4_find_method(SEXP x, SEXP table); SEXP s4_class_find_method(SEXP class, SEXP table); bool vec_implements_ptype2(SEXP x); SEXP r_env_get(SEXP env, SEXP sym); static inline void r_env_poke(SEXP env, SEXP sym, SEXP value) { Rf_defineVar(sym, value, env); } extern SEXP syms_s3_methods_table; static inline SEXP s3_get_table(SEXP env) { return r_env_get(env, syms_s3_methods_table); } SEXP list_first_non_null(SEXP xs, R_len_t* non_null_i); bool list_is_homogeneously_classed(SEXP xs); // Destructive compacting SEXP node_compact_d(SEXP xs); extern struct vctrs_arg args_empty_; static struct vctrs_arg* const args_empty = &args_empty_; extern struct vctrs_arg args_dot_ptype_; static struct vctrs_arg* const args_dot_ptype = &args_dot_ptype_; extern struct vctrs_arg args_max_fill_; static struct vctrs_arg* const args_max_fill = &args_max_fill_; void never_reached(const char* fn) __attribute__((noreturn)); enum vctrs_type2 vec_typeof2_impl(enum vctrs_type type_x, enum vctrs_type type_y, int* left); enum vctrs_type2_s3 vec_typeof2_s3_impl(SEXP x, SEXP y, enum vctrs_type type_x, enum vctrs_type type_y, int* left); enum vctrs_class_type class_type(SEXP x); SEXP new_empty_factor(SEXP levels); SEXP new_empty_ordered(SEXP levels); bool list_has_inner_vec_names(SEXP x, R_len_t size); SEXP list_pluck(SEXP xs, R_len_t i); void init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing); SEXP compact_seq(R_len_t start, R_len_t size, bool increasing); bool is_compact_seq(SEXP x); void init_compact_rep(int* p, R_len_t i, R_len_t n); SEXP compact_rep(R_len_t i, R_len_t n); bool is_compact_rep(SEXP x); bool is_compact(SEXP x); SEXP compact_materialize(SEXP x); R_len_t vec_subscript_size(SEXP x); bool is_integer64(SEXP x); bool lgl_any_na(SEXP x); SEXP apply_name_spec(SEXP name_spec, SEXP outer, SEXP inner, R_len_t n); SEXP outer_names(SEXP names, SEXP outer, R_len_t n); SEXP vec_set_names(SEXP x, SEXP names); SEXP colnames(SEXP x); R_len_t size_validate(SEXP size, const char* arg); extern bool (*rlang_is_splice_box)(SEXP); extern SEXP (*rlang_unbox)(SEXP); extern SEXP (*rlang_env_dots_values)(SEXP); extern SEXP (*rlang_env_dots_list)(SEXP); void* r_vec_deref(SEXP x); const void* r_vec_deref_const(SEXP x); void* r_vec_deref_barrier(SEXP x); const void* r_vec_deref_barrier_const(SEXP x); void r_vec_fill(SEXPTYPE type, void* p_dest, r_ssize dest_i, const void* p_src, r_ssize src_i, r_ssize n); r_ssize r_lgl_sum(SEXP lgl, bool na_true); SEXP r_lgl_which(SEXP x, bool na_true); void r_lgl_fill(SEXP x, int value, R_len_t n); void r_int_fill(SEXP x, int value, R_len_t n); void r_chr_fill(SEXP x, SEXP value, R_len_t n); void r_p_lgl_fill(int* p_x, int value, R_len_t n); void r_p_int_fill(int* p_x, int value, R_len_t n); void r_p_chr_fill(SEXP* p_x, SEXP value, R_len_t n); void r_int_fill_seq(SEXP x, int start, R_len_t n); SEXP r_seq(R_len_t from, R_len_t to); bool r_int_any_na(SEXP x); R_len_t r_chr_find(SEXP x, SEXP value); #define r_resize Rf_xlengthgets int r_chr_max_len(SEXP x); SEXP r_chr_iota(R_len_t n, char* buf, int len, const char* prefix); #define R_LAZY_ALLOC(SYM, PI, R_TYPE, SIZE) do { \ if (SYM == R_NilValue) { \ SYM = Rf_allocVector(R_TYPE, SIZE); \ REPROTECT(SYM, PI); \ } \ } while (0); static inline SEXP r_new_logical(R_len_t n) { return Rf_allocVector(LGLSXP, n); } static inline SEXP r_new_integer(R_len_t n) { return Rf_allocVector(INTSXP, n); } static inline SEXP r_new_character(R_len_t n) { return Rf_allocVector(STRSXP, n); } static inline SEXP r_new_raw(R_len_t n) { return Rf_allocVector(RAWSXP, n); } static inline SEXP r_new_list(R_len_t n) { return Rf_allocVector(VECSXP, n); } static inline SEXP r_new_environment(SEXP parent) { SEXP env = Rf_allocSExp(ENVSXP); SET_ENCLOS(env, parent); return env; } static inline SEXP r_new_function(SEXP formals, SEXP body, SEXP env) { SEXP fn = Rf_allocSExp(CLOSXP); SET_FORMALS(fn, formals); SET_BODY(fn, body); SET_CLOENV(fn, env); return fn; } SEXP r_as_function(SEXP x, const char* arg); SEXP r_protect(SEXP x); bool r_is_true(SEXP x); bool r_is_string(SEXP x); bool r_is_number(SEXP x); bool r_is_positive_number(SEXP x); SEXP r_peek_option(const char* option); SEXP r_peek_frame(); SEXP r_clone_referenced(SEXP x); SEXP r_clone_shared(SEXP x); SEXP r_parse(const char* str); SEXP r_parse_eval(const char* str, SEXP env); static inline SEXP r_copy(SEXP x) { return Rf_duplicate(x); } static inline SEXP r_clone(SEXP x) { return Rf_shallow_duplicate(x); } SEXP r_pairlist(SEXP* tags, SEXP* cars); SEXP r_call(SEXP fn, SEXP* tags, SEXP* cars); static inline SEXP r_poke_names(SEXP x, SEXP names) { return Rf_setAttrib(x, R_NamesSymbol, names); } static inline SEXP r_poke_class(SEXP x, SEXP names) { return Rf_setAttrib(x, R_ClassSymbol, names); } static inline SEXP r_dim(SEXP x) { return Rf_getAttrib(x, R_DimSymbol); } static inline SEXP r_poke_dim(SEXP x, SEXP dim) { return Rf_setAttrib(x, R_DimSymbol, dim); } static inline SEXP r_mark_s4(SEXP x) { SET_S4_OBJECT(x); return(x); } static inline SEXP r_unmark_s4(SEXP x) { UNSET_S4_OBJECT(x); return(x); } bool r_has_name_at(SEXP names, R_len_t i); bool r_is_names(SEXP names); bool r_is_minimal_names(SEXP x); bool r_is_empty_names(SEXP x); bool r_is_function(SEXP x); bool r_chr_has_string(SEXP x, SEXP str); static inline const char* r_chr_get_c_string(SEXP chr, R_len_t i) { return CHAR(STRING_ELT(chr, i)); } static inline void r__vec_get_check(SEXP x, R_len_t i, const char* fn) { if ((Rf_length(x) - 1) < i) { stop_internal(fn, "Vector is too small."); } } static inline int r_lgl_get(SEXP x, R_len_t i) { r__vec_get_check(x, i, "r_lgl_get"); return LOGICAL(x)[i]; } static inline int r_int_get(SEXP x, R_len_t i) { r__vec_get_check(x, i, "r_int_get"); return INTEGER(x)[i]; } static inline double r_dbl_get(SEXP x, R_len_t i) { r__vec_get_check(x, i, "r_dbl_get"); return REAL(x)[i]; } #define r_chr_get STRING_ELT #define r_list_get VECTOR_ELT #define r_chr_poke SET_STRING_ELT #define r_list_poke SET_VECTOR_ELT static inline void r_int_poke(SEXP x, R_len_t i, int value) { r__vec_get_check(x, i, "r_int_poke"); INTEGER(x)[i] = value; } static inline void* r_vec_unwrap(SEXPTYPE type, SEXP x) { switch (type) { case INTSXP: return (void*) INTEGER(x); default: stop_unimplemented_type("r_vec_unwrap", type); } } #define r_lgl Rf_ScalarLogical #define r_int Rf_ScalarInteger #define r_str Rf_mkChar #define r_chr Rf_mkString #define r_sym Rf_install static inline SEXP r_list(SEXP x) { SEXP out = Rf_allocVector(VECSXP, 1); SET_VECTOR_ELT(out, 0, x); return out; } #define r_str_as_character Rf_ScalarString static inline SEXP r_sym_as_character(SEXP x) { return r_str_as_character(PRINTNAME(x)); } // This unserialises ASCII Unicode tags of the form `` extern SEXP (*rlang_sym_as_character)(SEXP x); SEXP r_as_data_frame(SEXP x); static inline void r_dbg_save(SEXP x, const char* name) { Rf_defineVar(Rf_install(name), x, R_GlobalEnv); } ERR r_try_catch(void (*fn)(void*), void* fn_data, SEXP cnd_sym, void (*hnd)(void*), void* hnd_data); extern SEXP vctrs_ns_env; extern SEXP syms_cnd_signal; static inline void r_cnd_signal(SEXP cnd) { SEXP call = PROTECT(Rf_lang2(syms_cnd_signal, cnd)); Rf_eval(call, vctrs_ns_env); UNPROTECT(1); } extern SEXP result_attrib; static inline SEXP r_result(SEXP x, ERR err) { if (!err) { err = R_NilValue; } SEXP result = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(result, 0, x); SET_VECTOR_ELT(result, 1, err); SET_ATTRIB(result, result_attrib); SET_OBJECT(result, 1); UNPROTECT(1); return result; } static inline SEXP r_result_get(SEXP x, ERR err) { if (err) { r_cnd_signal(err); } return x; } static inline struct vctrs_arg vec_as_arg(SEXP x) { if (x == R_NilValue) { return *args_empty; } if (!r_is_string(x)) { Rf_errorcall(R_NilValue, "Argument tag must be a string."); } return new_wrapper_arg(NULL, r_chr_get_c_string(x, 0)); } extern SEXP fns_quote; static inline SEXP expr_protect(SEXP x) { switch (TYPEOF(x)) { case SYMSXP: case LANGSXP: return Rf_lang2(fns_quote, x); default: return x; } } static inline const void* vec_type_missing_value(enum vctrs_type type) { switch (type) { case vctrs_type_logical: return &NA_LOGICAL; case vctrs_type_integer: return &NA_INTEGER; case vctrs_type_double: return &NA_REAL; case vctrs_type_complex: return &vctrs_shared_na_cpl; case vctrs_type_character: return &NA_STRING; case vctrs_type_list: return &R_NilValue; default: stop_unimplemented_vctrs_type("vec_type_missing_value", type); } } void c_print_backtrace(); void r_browse(SEXP x); // Adapted from CERT C coding standards static inline intmax_t intmax_add(intmax_t x, intmax_t y) { if ((y > 0 && x > (INTMAX_MAX - y)) || (y < 0 && x < (INTMAX_MIN - y))) { stop_internal("intmax_add", "Values too large to be added."); } return x + y; } static inline intmax_t intmax_subtract(intmax_t x, intmax_t y) { if ((y > 0 && x < (INTMAX_MIN + y)) || (y < 0 && x < (INTMAX_MAX + y))) { stop_internal("intmax_subtract", "Subtraction resulted in overflow or underflow."); } return x - y; } static inline r_ssize r_ssize_add(r_ssize x, r_ssize y) { intmax_t out = intmax_add(x, y); if (out > R_SSIZE_MAX) { stop_internal("r_ssize_safe_add", "Result too large for an `r_ssize`."); } return (r_ssize) out; } SEXP chr_c(SEXP x, SEXP y); extern SEXP vctrs_ns_env; extern SEXP vctrs_shared_empty_str; extern SEXP vctrs_shared_zero_int; extern SEXP classes_data_frame; extern SEXP classes_factor; extern SEXP classes_ordered; extern SEXP classes_date; extern SEXP classes_posixct; extern SEXP classes_tibble; extern SEXP classes_vctrs_group_rle; extern SEXP strings_dots; extern SEXP strings_empty; extern SEXP strings_tbl; extern SEXP strings_tbl_df; extern SEXP strings_data_frame; extern SEXP strings_date; extern SEXP strings_posixct; extern SEXP strings_posixlt; extern SEXP strings_posixt; extern SEXP strings_factor; extern SEXP strings_ordered; extern SEXP strings_list; extern SEXP strings_none; extern SEXP strings_minimal; extern SEXP strings_unique; extern SEXP strings_universal; extern SEXP strings_check_unique; extern SEXP strings_key; extern SEXP strings_loc; extern SEXP strings_val; extern SEXP strings_group; extern SEXP strings_length; extern SEXP strings_vctrs_vctr; extern SEXP strings_times; extern SEXP chrs_subset; extern SEXP chrs_extract; extern SEXP chrs_assign; extern SEXP chrs_rename; extern SEXP chrs_remove; extern SEXP chrs_negate; extern SEXP chrs_null; extern SEXP chrs_logical; extern SEXP chrs_integer; extern SEXP chrs_double; extern SEXP chrs_complex; extern SEXP chrs_character; extern SEXP chrs_raw; extern SEXP chrs_list; extern SEXP chrs_expression; extern SEXP chrs_numeric; extern SEXP chrs_function; extern SEXP chrs_empty; extern SEXP chrs_cast; extern SEXP chrs_error; extern SEXP chrs_combine; extern SEXP chrs_convert; extern SEXP syms_i; extern SEXP syms_n; extern SEXP syms_x; extern SEXP syms_y; extern SEXP syms_x_size; extern SEXP syms_y_size; extern SEXP syms_to; extern SEXP syms_dots; extern SEXP syms_bracket; extern SEXP syms_arg; extern SEXP syms_x_arg; extern SEXP syms_y_arg; extern SEXP syms_to_arg; extern SEXP syms_times_arg; extern SEXP syms_subscript_arg; extern SEXP syms_out; extern SEXP syms_value; extern SEXP syms_quiet; extern SEXP syms_dot_name_spec; extern SEXP syms_outer; extern SEXP syms_inner; extern SEXP syms_tilde; extern SEXP syms_dot_environment; extern SEXP syms_ptype; extern SEXP syms_missing; extern SEXP syms_size; extern SEXP syms_subscript_action; extern SEXP syms_subscript_type; extern SEXP syms_repair; extern SEXP syms_tzone; extern SEXP syms_data; extern SEXP syms_vctrs_error_incompatible_type; extern SEXP syms_vctrs_error_cast_lossy; extern SEXP syms_cnd_signal; extern SEXP syms_logical; extern SEXP syms_numeric; extern SEXP syms_character; extern SEXP syms_body; extern SEXP syms_parent; extern SEXP syms_from_dispatch; extern SEXP syms_df_fallback; extern SEXP syms_s3_fallback; extern SEXP syms_stop_incompatible_type; extern SEXP syms_stop_incompatible_size; extern SEXP syms_action; extern SEXP syms_vctrs_common_class_fallback; extern SEXP syms_fallback_class; extern SEXP syms_abort; extern SEXP syms_message; extern SEXP syms_chr_transform; static const char * const c_strs_vctrs_common_class_fallback = "vctrs:::common_class_fallback"; #define syms_names R_NamesSymbol extern SEXP fns_bracket; extern SEXP fns_quote; extern SEXP fns_names; extern SEXP vctrs_method_table; extern SEXP base_method_table; extern SEXP s4_c_method_table; #if defined(RLIB_DEBUG) SEXP R_inspect(SEXP x); SEXP R_inspect3(SEXP x, int deep, int pvec); #endif #endif vctrs/src/order-radix.h0000644000176200001440000000441614042540502014574 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_RADIX_H #define VCTRS_ORDER_RADIX_H #include "vctrs.h" // ----------------------------------------------------------------------------- SEXP parse_na_value(SEXP na_value); SEXP parse_direction(SEXP direction); // ----------------------------------------------------------------------------- /* * `order` is an integer vector intended to hold the ordering vector * in `vec_order()`. It is allocated eagerly, but the initialization of its * values is done lazily. Typically, it is initialized to a 1-based sequential * ordering which is rearranged by the internal algorithm. However, for the * counting order, the initialization is not required for the first integer * column, which can result in a nice performance improvement. */ struct order { SEXP self; SEXP data; int* p_data; r_ssize size; bool initialized; }; #define PROTECT_ORDER(p_order, p_n) do { \ PROTECT((p_order)->self); \ PROTECT((p_order)->data); \ *(p_n) += 2; \ } while (0) static inline struct order* new_order(r_ssize size) { SEXP self = PROTECT(r_new_raw(sizeof(struct order))); struct order* p_order = (struct order*) RAW(self); SEXP data = PROTECT(Rf_allocVector(INTSXP, size)); int* p_data = INTEGER(data); p_order->self = self; p_order->data = data; p_order->p_data = p_data; p_order->size = size; p_order->initialized = false; UNPROTECT(2); return p_order; } static inline int* init_order(struct order* p_order) { if (p_order->initialized) { return p_order->p_data; } // Initialize `x` with sequential 1-based ordering for (r_ssize i = 0; i < p_order->size; ++i) { p_order->p_data[i] = i + 1; } p_order->initialized = true; return p_order->p_data; } // ----------------------------------------------------------------------------- #endif vctrs/src/slice-assign.h0000644000176200001440000000174713717456727014770 0ustar liggesusers#ifndef VCTRS_SLICE_ASSIGN_H #define VCTRS_SLICE_ASSIGN_H #include "owned.h" struct vec_assign_opts { bool assign_names; bool ignore_outer_names; struct vctrs_arg* x_arg; struct vctrs_arg* value_arg; }; SEXP vec_assign_opts(SEXP x, SEXP index, SEXP value, const struct vec_assign_opts* opts); SEXP vec_proxy_assign_opts(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts); SEXP chr_assign(SEXP out, SEXP index, SEXP value, const enum vctrs_owned owned); SEXP list_assign(SEXP out, SEXP index, SEXP value, const enum vctrs_owned owned); SEXP df_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts); SEXP vec_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts); #endif vctrs/src/order-sortedness.c0000644000176200001440000003172714042540502015656 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "order-sortedness.h" #include "utils.h" // ----------------------------------------------------------------------------- static inline int dbl_cmp(double x, double y, const int direction, const int na_order); /* * Check if a double vector is ordered, handling `decreasing` and `na_last` * * If the double vector is in the expected ordering, no sorting needs to * occur. In these cases, if `p_x` is in exactly the expected ordering. * If `p_x` is in exactly the opposite ordering, the the ordering will later * be reversed (this only happens if it is strictly opposite of expected * ordering, ties would prevent the reversal from being stable). Group * information is also pushed in these cases for use in the next columns. */ enum vctrs_sortedness dbl_sortedness(const double* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; double previous = p_x[0]; r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { double current = p_x[i]; int cmp = dbl_cmp( current, previous, direction, na_order ); if (cmp >= 0) { break; } previous = current; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { double current = p_x[i]; int cmp = dbl_cmp( current, previous, direction, na_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } /* * Compare two doubles, handling `na_order` and `direction` */ static inline int dbl_cmp(double x, double y, const int direction, const int na_order) { if (isnan(x)) { if (isnan(y)) { return 0; } else { return na_order; } } if (isnan(y)) { return -na_order; } int cmp = (x > y) - (x < y); return cmp * direction; } // ----------------------------------------------------------------------------- static inline int int_cmp(int x, int y, const int direction, const int na_order); // Very similar to `dbl_sortedness()` enum vctrs_sortedness int_sortedness(const int* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; int previous = p_x[0]; r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { int current = p_x[i]; int cmp = int_cmp( current, previous, direction, na_order ); if (cmp >= 0) { break; } previous = current; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { int current = p_x[i]; int cmp = int_cmp( current, previous, direction, na_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } // Very similar to `dbl_cmp()` static inline int int_cmp(int x, int y, const int direction, const int na_order) { if (x == NA_INTEGER) { if (y == NA_INTEGER) { return 0; } else { return na_order; } } if (y == NA_INTEGER) { return -na_order; } int cmp = (x > y) - (x < y); return cmp * direction; } // ----------------------------------------------------------------------------- static inline int chr_cmp(SEXP x, SEXP y, const char* c_x, const char* c_y, const int direction, const int na_order); /* * Check if `p_x` is in the "expected" ordering as defined by `decreasing` and * `na_last`. If `p_x` is in the expected ordering, or if it is in the strictly * opposite of the expected ordering (with no ties), then groups are pushed, * and a `vctrs_sortedness` value is returned indicating how to finalize the * order. */ enum vctrs_sortedness chr_sortedness(const SEXP* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; SEXP previous = p_x[0]; const char* c_previous = CHAR(previous); r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { SEXP current = p_x[i]; const char* c_current = CHAR(current); int cmp = chr_cmp( current, previous, c_current, c_previous, direction, na_order ); if (cmp >= 0) { break; } previous = current; c_previous = c_current; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { SEXP current = p_x[i]; const char* c_current = CHAR(current); int cmp = chr_cmp( current, previous, c_current, c_previous, direction, na_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; c_previous = c_current; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } /* * `direction` is `1` for ascending and `-1` for descending. * `na_order` is `1` if `na_last = true` and `-1` if `na_last = false`. */ static inline int chr_cmp(SEXP x, SEXP y, const char* c_x, const char* c_y, const int direction, const int na_order) { // Same pointer - including `NA`s if (x == y) { return 0; } if (x == NA_STRING) { return na_order; } if (y == NA_STRING) { return -na_order; } return direction * strcmp(c_x, c_y); } // ----------------------------------------------------------------------------- static inline void int_incr(r_ssize size, int* p_x); static inline void ord_reverse(r_ssize size, int* p_o); /* * Resolve ordering based on the sortedness and whether or not `p_o` has * been initialized. For a vector / first column, this function has to * initialize the ordering (for reversed ordering this is faster than * initializing the order sequentially then reversing it). * * `size` will correspond to the size of `x` for the first column, but will * correspond to the size of the current group for subsequent columns. */ void ord_resolve_sortedness(enum vctrs_sortedness sortedness, r_ssize size, int* p_o) { switch (sortedness) { case VCTRS_SORTEDNESS_sorted: int_incr(size, p_o); return; case VCTRS_SORTEDNESS_reversed: ord_reverse(size, p_o); return; case VCTRS_SORTEDNESS_unsorted: Rf_errorcall(R_NilValue, "Internal error: Unsorted case should be handled elsewhere."); } never_reached("ord_resolve_sortedness"); } // Initialize with sequential 1-based ordering static inline void int_incr(r_ssize size, int* p_x) { for (r_ssize i = 0; i < size; ++i) { p_x[i] = i + 1; } } // Used when in strictly opposite of expected order and uninitialized. static inline void ord_reverse(r_ssize size, int* p_o) { const r_ssize half = size / 2; for (r_ssize i = 0; i < half; ++i) { r_ssize swap = size - 1 - i; p_o[i] = swap + 1; p_o[swap] = i + 1; } // Initialize center value if odd number if (size % 2 != 0) { p_o[half] = half + 1; } } static inline void ord_reverse_chunk(r_ssize size, int* p_o); void ord_resolve_sortedness_chunk(enum vctrs_sortedness sortedness, r_ssize size, int* p_o) { switch (sortedness) { case VCTRS_SORTEDNESS_sorted: return; case VCTRS_SORTEDNESS_reversed: ord_reverse_chunk(size, p_o); return; case VCTRS_SORTEDNESS_unsorted: Rf_errorcall(R_NilValue, "Internal error: Unsorted case should be handled elsewhere."); } never_reached("ord_resolve_sortedness_chunk"); } // Used when in strictly opposite of expected order and initialized. // No need to alter "center" value here, it will be initialized to a value // already and it won't be swapped. static inline void ord_reverse_chunk(r_ssize size, int* p_o) { const r_ssize half = size / 2; for (r_ssize i = 0; i < half; ++i) { r_ssize swap = size - 1 - i; const int temp = p_o[i]; p_o[i] = p_o[swap]; p_o[swap] = temp; } } vctrs/src/names.c0000644000176200001440000005673714042540502013467 0ustar liggesusers#include #include "vctrs.h" #include "type-data-frame.h" #include "utils.h" #include "dim.h" static void describe_repair(SEXP old_names, SEXP new_names); // 3 leading '.' + 1 trailing '\0' + 24 characters #define MAX_IOTA_SIZE 28 // Initialised at load time SEXP syms_as_universal_names = NULL; SEXP syms_validate_unique_names = NULL; SEXP fns_as_universal_names = NULL; SEXP fns_validate_unique_names = NULL; // Defined below SEXP vctrs_as_minimal_names(SEXP names); SEXP vec_as_universal_names(SEXP names, bool quiet); SEXP vec_validate_unique_names(SEXP names, struct vctrs_arg* arg); SEXP vec_as_custom_names(SEXP names, const struct name_repair_opts* opts); static void vec_validate_minimal_names(SEXP names, R_len_t n); // [[ include("names.h") ]] SEXP vec_as_names(SEXP names, const struct name_repair_opts* opts) { if (!opts) { return names; } switch (opts->type) { case name_repair_none: return names; case name_repair_minimal: return vctrs_as_minimal_names(names); case name_repair_unique: return vec_as_unique_names(names, opts->quiet); case name_repair_universal: return vec_as_universal_names(names, opts->quiet); case name_repair_check_unique: return vec_validate_unique_names(names, opts->arg); case name_repair_custom: return vec_as_custom_names(names, opts); } never_reached("vec_as_names"); } // [[ register() ]] SEXP vctrs_as_names(SEXP names, SEXP repair, SEXP repair_arg, SEXP quiet) { if (!r_is_bool(quiet)) { Rf_errorcall(R_NilValue, "`quiet` must a boolean value."); } bool quiet_ = LOGICAL(quiet)[0]; struct vctrs_arg arg_ = vec_as_arg(repair_arg); struct name_repair_opts repair_opts = new_name_repair_opts(repair, &arg_, quiet_); PROTECT_NAME_REPAIR_OPTS(&repair_opts); SEXP out = vec_as_names(names, &repair_opts); UNPROTECT(1); return out; } SEXP vec_as_universal_names(SEXP names, bool quiet) { SEXP quiet_obj = PROTECT(r_lgl(quiet)); SEXP out = vctrs_dispatch2(syms_as_universal_names, fns_as_universal_names, syms_names, names, syms_quiet, quiet_obj); UNPROTECT(1); return out; } SEXP vec_validate_unique_names(SEXP names, struct vctrs_arg* arg) { SEXP arg_obj = PROTECT(vctrs_arg(arg)); SEXP out = PROTECT(vctrs_dispatch2(syms_validate_unique_names, fns_validate_unique_names, syms_names, names, syms_arg, arg_obj)); // Restore visibility Rf_eval(R_NilValue, R_EmptyEnv); UNPROTECT(2); return out; } SEXP vec_as_custom_names(SEXP names, const struct name_repair_opts* opts) { names = PROTECT(vctrs_as_minimal_names(names)); // Don't use vctrs dispatch utils because we match argument positionally SEXP call = PROTECT(Rf_lang2(syms_repair, syms_names)); SEXP mask = PROTECT(r_new_environment(R_GlobalEnv)); Rf_defineVar(syms_repair, opts->fn, mask); Rf_defineVar(syms_names, names, mask); SEXP out = PROTECT(Rf_eval(call, mask)); vec_validate_minimal_names(out, Rf_length(names)); UNPROTECT(4); return out; } static SEXP vec_names_impl(SEXP x, bool proxy) { bool has_class = OBJECT(x); if (has_class && Rf_inherits(x, "data.frame")) { // Only return row names if they are character. Data frames with // automatic row names are treated as unnamed. SEXP rn = df_rownames(x); if (rownames_type(rn) == ROWNAMES_IDENTIFIERS) { return rn; } else { return R_NilValue; } } if (vec_bare_dim(x) == R_NilValue) { if (!proxy && has_class) { return vctrs_dispatch1(syms_names, fns_names, syms_x, x); } else { return r_names(x); } } SEXP dimnames = PROTECT(r_attrib_get(x, R_DimNamesSymbol)); if (dimnames == R_NilValue || Rf_length(dimnames) < 1) { UNPROTECT(1); return R_NilValue; } SEXP out = VECTOR_ELT(dimnames, 0); UNPROTECT(1); return out; } // [[ register(); include("vctrs.h") ]] SEXP vec_names(SEXP x) { return vec_names_impl(x, false); } // [[ include("vctrs.h") ]] SEXP vec_proxy_names(SEXP x) { return vec_names_impl(x, true); } // [[ register() ]] SEXP vctrs_as_minimal_names(SEXP names) { if (TYPEOF(names) != STRSXP) { Rf_errorcall(R_NilValue, "`names` must be a character vector"); } R_len_t i = 0; R_len_t n = Rf_length(names); const SEXP* ptr = STRING_PTR_RO(names); for (; i < n; ++i, ++ptr) { SEXP elt = *ptr; if (elt == NA_STRING) { break; } } if (i == n) { return names; } names = PROTECT(Rf_shallow_duplicate(names)); for (; i < n; ++i, ++ptr) { SEXP elt = *ptr; if (elt == NA_STRING) { SET_STRING_ELT(names, i, strings_empty); } } UNPROTECT(1); return names; } // [[ register() ]] SEXP vctrs_minimal_names(SEXP x) { SEXP names = PROTECT(vec_names(x)); if (names == R_NilValue) { names = Rf_allocVector(STRSXP, vec_size(x)); } else { names = vctrs_as_minimal_names(names); } UNPROTECT(1); return names; } // From dictionary.c SEXP vctrs_duplicated(SEXP x); static bool any_has_suffix(SEXP names); static SEXP as_unique_names_impl(SEXP names, bool quiet); static void stop_large_name(); static bool is_dotdotint(const char* name); static ptrdiff_t suffix_pos(const char* name); static bool needs_suffix(SEXP str); // [[ include("vctrs.h") ]] SEXP vec_as_unique_names(SEXP names, bool quiet) { if (is_unique_names(names) && !any_has_suffix(names)) { return names; } else { return(as_unique_names_impl(names, quiet)); } } // [[ include("vctrs.h") ]] bool is_unique_names(SEXP names) { if (TYPEOF(names) != STRSXP) { Rf_errorcall(R_NilValue, "`names` must be a character vector"); } R_len_t n = Rf_length(names); const SEXP* names_ptr = STRING_PTR_RO(names); if (duplicated_any(names)) { return false; } for (R_len_t i = 0; i < n; ++i) { SEXP elt = names_ptr[i]; if (needs_suffix(elt)) { return false; } } return true; } bool any_has_suffix(SEXP names) { R_len_t n = Rf_length(names); const SEXP* names_ptr = STRING_PTR_RO(names); for (R_len_t i = 0; i < n; ++i) { SEXP elt = names_ptr[i]; if (suffix_pos(CHAR(elt)) >= 0) { return true; } } return false; } SEXP as_unique_names_impl(SEXP names, bool quiet) { R_len_t n = Rf_length(names); SEXP new_names = PROTECT(Rf_shallow_duplicate(names)); const SEXP* new_names_ptr = STRING_PTR_RO(new_names); for (R_len_t i = 0; i < n; ++i) { SEXP elt = new_names_ptr[i]; // Set `NA` and dots values to "" so they get replaced by `...n` // later on if (needs_suffix(elt)) { elt = strings_empty; SET_STRING_ELT(new_names, i, elt); continue; } // Strip `...n` suffixes const char* nm = CHAR(elt); int pos = suffix_pos(nm); if (pos >= 0) { elt = Rf_mkCharLenCE(nm, pos, Rf_getCharCE(elt)); SET_STRING_ELT(new_names, i, elt); continue; } } // Append all duplicates with a suffix SEXP dups = PROTECT(vctrs_duplicated(new_names)); const int* dups_ptr = LOGICAL_RO(dups); for (R_len_t i = 0; i < n; ++i) { SEXP elt = new_names_ptr[i]; if (elt != strings_empty && !dups_ptr[i]) { continue; } const char* name = CHAR(elt); int size = strlen(name); int buf_size = size + MAX_IOTA_SIZE; R_CheckStack2(buf_size); char buf[buf_size]; buf[0] = '\0'; memcpy(buf, name, size); int remaining = buf_size - size; int needed = snprintf(buf + size, remaining, "...%d", i + 1); if (needed >= remaining) { stop_large_name(); } SET_STRING_ELT(new_names, i, Rf_mkCharLenCE(buf, size + needed, Rf_getCharCE(elt))); } if (!quiet) { describe_repair(names, new_names); } UNPROTECT(2); return new_names; } SEXP vctrs_as_unique_names(SEXP names, SEXP quiet) { SEXP out = PROTECT(vec_as_unique_names(names, LOGICAL(quiet)[0])); UNPROTECT(1); return out; } SEXP vctrs_is_unique_names(SEXP names) { bool out = is_unique_names(names); return Rf_ScalarLogical(out); } static bool is_dotdotint(const char* name) { int n = strlen(name); if (n < 3) { return false; } if (name[0] != '.' || name[1] != '.') { return false; } if (name[2] == '.') { name += 3; } else { name += 2; } return (bool) strtol(name, NULL, 10); } static ptrdiff_t suffix_pos(const char* name) { int n = strlen(name); const char* suffix_end = NULL; int in_dots = 0; bool in_digits = false; for (const char* ptr = name + n - 1; ptr >= name; --ptr) { char c = *ptr; if (in_digits) { if (c == '.') { in_digits = false; in_dots = 1; continue; } if (isdigit(c)) { continue; } goto done; } switch (in_dots) { case 0: if (isdigit(c)) { in_digits = true; continue; } goto done; case 1: case 2: if (c == '.') { ++in_dots; continue; } goto done; case 3: suffix_end = ptr + 1; if (isdigit(c)) { in_dots = 0; in_digits = true; continue; } goto done; default: stop_internal("suffix_pos", "Unexpected state."); }} done: if (suffix_end) { return suffix_end - name; } else { return -1; } } static void stop_large_name() { Rf_errorcall(R_NilValue, "Can't tidy up name because it is too large"); } static bool needs_suffix(SEXP str) { return str == NA_STRING || str == strings_dots || str == strings_empty || is_dotdotint(CHAR(str)); } static SEXP names_iota(R_len_t n); static SEXP vec_unique_names_impl(SEXP names, R_len_t n, bool quiet); // [[ register() ]] SEXP vctrs_unique_names(SEXP x, SEXP quiet) { return vec_unique_names(x, LOGICAL(quiet)[0]); } // [[ include("utils.h") ]] SEXP vec_unique_names(SEXP x, bool quiet) { SEXP names = PROTECT(vec_names(x)); SEXP out = vec_unique_names_impl(names, vec_size(x), quiet); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP vec_unique_colnames(SEXP x, bool quiet) { SEXP names = PROTECT(colnames(x)); SEXP out = vec_unique_names_impl(names, Rf_ncols(x), quiet); UNPROTECT(1); return out; } static SEXP vec_unique_names_impl(SEXP names, R_len_t n, bool quiet) { SEXP out; if (names == R_NilValue) { out = PROTECT(names_iota(n)); if (!quiet) { describe_repair(names, out); } } else { out = PROTECT(vec_as_unique_names(names, quiet)); } UNPROTECT(1); return(out); } static SEXP names_iota(R_len_t n) { char buf[MAX_IOTA_SIZE]; SEXP nms = r_chr_iota(n, buf, MAX_IOTA_SIZE, "..."); if (nms == R_NilValue) { Rf_errorcall(R_NilValue, "Too many names to repair."); } return nms; } static void describe_repair(SEXP old_names, SEXP new_names) { SEXP call = PROTECT(Rf_lang3(Rf_install("describe_repair"), old_names, new_names)); Rf_eval(call, vctrs_ns_env); // To reset visibility when called from a `.External2()` Rf_eval(R_NilValue, R_EmptyEnv); UNPROTECT(1); } // [[ register() ]] SEXP vctrs_outer_names(SEXP names, SEXP outer, SEXP n) { if (names != R_NilValue && TYPEOF(names) != STRSXP) { stop_internal("vctrs_outer_names", "`names` must be `NULL` or a string."); } if (!r_is_number(n)) { stop_internal("vctrs_outer_names", "`n` must be a single integer."); } if (outer != R_NilValue) { outer = r_chr_get(outer, 0); } return outer_names(names, outer, r_int_get(n, 0)); } // [[ include("utils.h") ]] SEXP outer_names(SEXP names, SEXP outer, R_len_t n) { if (outer == R_NilValue) { return names; } if (TYPEOF(outer) != CHARSXP) { stop_internal("outer_names", "`outer` must be a scalar string."); } if (outer == strings_empty || outer == NA_STRING) { return names; } if (r_is_empty_names(names)) { if (n == 1) { return r_str_as_character(outer); } else { return r_seq_chr(CHAR(outer), n); } } else { return r_chr_paste_prefix(names, CHAR(outer), ".."); } } // [[ register() ]] SEXP vctrs_apply_name_spec(SEXP name_spec, SEXP outer, SEXP inner, SEXP n) { return apply_name_spec(name_spec, r_chr_get(outer, 0), inner, r_int_get(n, 0)); } static SEXP glue_as_name_spec(SEXP spec); // [[ include("utils.h") ]] SEXP apply_name_spec(SEXP name_spec, SEXP outer, SEXP inner, R_len_t n) { if (Rf_inherits(name_spec, "rlang_zap")) { return R_NilValue; } if (outer == R_NilValue) { return inner; } if (TYPEOF(outer) != CHARSXP) { stop_internal("apply_name_spec", "`outer` must be a scalar string."); } if (outer == strings_empty || outer == NA_STRING) { if (inner == R_NilValue) { return chrs_empty; } else { return inner; } } if (r_is_empty_names(inner)) { if (n == 0) { return vctrs_shared_empty_chr; } if (n == 1) { return r_str_as_character(outer); } inner = PROTECT(r_seq(1, n + 1)); } else { inner = PROTECT(inner); } switch (TYPEOF(name_spec)) { case CLOSXP: break; case STRSXP: name_spec = glue_as_name_spec(name_spec); break; default: name_spec = r_as_function(name_spec, ".name_spec"); break; case NILSXP: Rf_errorcall(R_NilValue, "Can't merge the outer name `%s` with a vector of length > 1.\n" "Please supply a `.name_spec` specification.", CHAR(outer)); } PROTECT(name_spec); SEXP outer_chr = PROTECT(r_str_as_character(outer)); SEXP out = PROTECT(vctrs_dispatch2(syms_dot_name_spec, name_spec, syms_outer, outer_chr, syms_inner, inner)); out = vec_recycle(out, n, NULL); if (out != R_NilValue) { if (TYPEOF(out) != STRSXP) { Rf_errorcall(R_NilValue, "`.name_spec` must return a character vector."); } if (Rf_length(out) != n) { Rf_errorcall(R_NilValue, "`.name_spec` must return a character vector as long as `inner`."); } } UNPROTECT(4); return out; } static SEXP syms_glue_as_name_spec = NULL; static SEXP fns_glue_as_name_spec = NULL; static SEXP syms_internal_spec = NULL; static SEXP glue_as_name_spec(SEXP spec) { if (!r_is_string(spec)) { Rf_errorcall(R_NilValue, "Glue specification in `.name_spec` must be a single string."); } return vctrs_dispatch1(syms_glue_as_name_spec, fns_glue_as_name_spec, syms_internal_spec, spec); } #define VCTRS_PASTE_BUFFER_MAX_SIZE 4096 char vctrs_paste_buffer[VCTRS_PASTE_BUFFER_MAX_SIZE]; // [[ include("names.h") ]] SEXP r_chr_paste_prefix(SEXP names, const char* prefix, const char* sep) { int n_protect = 0; names = PROTECT_N(Rf_shallow_duplicate(names), &n_protect); R_len_t n = Rf_length(names); int outer_len = strlen(prefix); int names_len = r_chr_max_len(names); int sep_len = strlen(sep); int total_len = outer_len + names_len + sep_len + 1; char* buf = vctrs_paste_buffer; if (total_len > VCTRS_PASTE_BUFFER_MAX_SIZE) { SEXP buf_box = PROTECT_N( Rf_allocVector(RAWSXP, total_len * sizeof(char)), &n_protect ); buf = (char*) RAW(buf_box); } buf[total_len - 1] = '\0'; char* bufp = buf; memcpy(bufp, prefix, outer_len); bufp += outer_len; for (int i = 0; i < sep_len; ++i) { *bufp++ = sep[i]; } SEXP const* p_names = STRING_PTR_RO(names); for (R_len_t i = 0; i < n; ++i) { const char* inner = CHAR(p_names[i]); int inner_n = strlen(inner); memcpy(bufp, inner, inner_n); bufp[inner_n] = '\0'; SET_STRING_ELT(names, i, r_str(buf)); } UNPROTECT(n_protect); return names; } // [[ register() ]] SEXP vctrs_chr_paste_prefix(SEXP names, SEXP prefix, SEXP sep) { return r_chr_paste_prefix(names, r_chr_get_c_string(prefix, 0), r_chr_get_c_string(sep, 0)); } // [[ include("names.h") ]] SEXP r_seq_chr(const char* prefix, R_len_t n) { int total_len = 24 + strlen(prefix) + 1; R_CheckStack2(total_len); char buf[total_len]; return r_chr_iota(n, buf, total_len, prefix); } // Initialised at load time SEXP syms_set_rownames_fallback = NULL; SEXP fns_set_rownames_fallback = NULL; static SEXP set_rownames_fallback(SEXP x, SEXP names) { return vctrs_dispatch2(syms_set_rownames_fallback, fns_set_rownames_fallback, syms_x, x, syms_names, names); } // Initialised at load time SEXP syms_set_names_fallback = NULL; SEXP fns_set_names_fallback = NULL; static SEXP set_names_fallback(SEXP x, SEXP names) { return vctrs_dispatch2(syms_set_names_fallback, fns_set_names_fallback, syms_x, x, syms_names, names); } static void check_names(SEXP x, SEXP names) { if (names == R_NilValue) { return; } if (TYPEOF(names) != STRSXP) { Rf_errorcall( R_NilValue, "`names` must be a character vector, not a %s.", Rf_type2char(TYPEOF(names)) ); } R_len_t x_size = vec_size(x); R_len_t names_size = vec_size(names); if (x_size != names_size) { Rf_errorcall( R_NilValue, "The size of `names`, %i, must be the same as the size of `x`, %i.", names_size, x_size ); } } SEXP vec_set_rownames(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) { if (!proxy && OBJECT(x)) { return set_rownames_fallback(x, names); } int nprot = 0; SEXP dim_names = Rf_getAttrib(x, R_DimNamesSymbol); // Early exit when no new row names and no existing row names if (names == R_NilValue) { if (dim_names == R_NilValue || VECTOR_ELT(dim_names, 0) == R_NilValue) { return x; } } x = PROTECT_N(vec_clone_referenced(x, owned), &nprot); if (dim_names == R_NilValue) { dim_names = PROTECT_N(Rf_allocVector(VECSXP, vec_dim_n(x)), &nprot); } else { // Also clone attribute dim_names = PROTECT_N(Rf_shallow_duplicate(dim_names), &nprot); } SET_VECTOR_ELT(dim_names, 0, names); Rf_setAttrib(x, R_DimNamesSymbol, dim_names); UNPROTECT(nprot); return x; } SEXP vec_set_df_rownames(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) { if (names == R_NilValue) { if (rownames_type(df_rownames(x)) != ROWNAMES_IDENTIFIERS) { return(x); } x = PROTECT(vec_clone_referenced(x, owned)); init_compact_rownames(x, vec_size(x)); UNPROTECT(1); return x; } // Repair row names silently if (!proxy) { names = vec_as_names(names, p_unique_repair_silent_opts); } PROTECT(names); x = PROTECT(vec_clone_referenced(x, owned)); Rf_setAttrib(x, R_RowNamesSymbol, names); UNPROTECT(2); return x; } // FIXME: Do we need to get the vec_proxy() and only fall back if it doesn't // exist? See #526 and #531 for discussion and the related issue. SEXP vec_set_names_impl(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) { check_names(x, names); if (is_data_frame(x)) { return vec_set_df_rownames(x, names, proxy, owned); } if (has_dim(x)) { return vec_set_rownames(x, names, proxy, owned); } if (!proxy && OBJECT(x)) { return set_names_fallback(x, names); } // Early exit if no new names and no existing names if (names == R_NilValue && Rf_getAttrib(x, R_NamesSymbol) == R_NilValue) { return x; } x = PROTECT(vec_clone_referenced(x, owned)); Rf_setAttrib(x, R_NamesSymbol, names); UNPROTECT(1); return x; } // [[ include("utils.h"); register() ]] SEXP vec_set_names(SEXP x, SEXP names) { return vec_set_names_impl(x, names, false, VCTRS_OWNED_false); } // [[ include("utils.h") ]] SEXP vec_proxy_set_names(SEXP x, SEXP names, const enum vctrs_owned owned) { return vec_set_names_impl(x, names, true, owned); } SEXP vctrs_validate_name_repair_arg(SEXP arg) { struct name_repair_opts opts = new_name_repair_opts(arg, args_empty, true); if (opts.type == name_repair_custom) { return opts.fn; } else if (Rf_length(arg) != 1) { return r_str_as_character(r_str(name_repair_arg_as_c_string(opts.type))); } else { return arg; } } void stop_name_repair() { Rf_errorcall(R_NilValue, "`.name_repair` must be a string or a function. See `?vctrs::vec_as_names`."); } struct name_repair_opts new_name_repair_opts(SEXP name_repair, struct vctrs_arg* arg, bool quiet) { struct name_repair_opts opts = { .type = 0, .fn = R_NilValue, .arg = arg, .quiet = quiet }; switch (TYPEOF(name_repair)) { case STRSXP: { if (!Rf_length(name_repair)) { stop_name_repair(); } SEXP c = r_chr_get(name_repair, 0); if (c == strings_none) { opts.type = name_repair_none; } else if (c == strings_minimal) { opts.type = name_repair_minimal; } else if (c == strings_unique) { opts.type = name_repair_unique; } else if (c == strings_universal) { opts.type = name_repair_universal; } else if (c == strings_check_unique) { opts.type = name_repair_check_unique; } else { Rf_errorcall(R_NilValue, "`.name_repair` can't be \"%s\". See `?vctrs::vec_as_names`.", CHAR(c)); } return opts; } case LANGSXP: opts.fn = r_as_function(name_repair, ".name_repair"); opts.type = name_repair_custom; return opts; case CLOSXP: opts.fn = name_repair; opts.type = name_repair_custom; return opts; default: stop_name_repair(); } never_reached("new_name_repair_opts"); } // [[ include("vctrs.h") ]] const char* name_repair_arg_as_c_string(enum name_repair_type type) { switch (type) { case name_repair_none: return "none"; case name_repair_minimal: return "minimal"; case name_repair_unique: return "unique"; case name_repair_universal: return "universal"; case name_repair_check_unique: return "check_unique"; case name_repair_custom: return "custom"; } never_reached("name_repair_arg_as_c_string"); } static void vec_validate_minimal_names(SEXP names, R_len_t n) { if (names == R_NilValue) { Rf_errorcall(R_NilValue, "Names repair functions can't return `NULL`."); } if (TYPEOF(names) != STRSXP) { Rf_errorcall(R_NilValue, "Names repair functions must return a character vector."); } if (n >= 0 && Rf_length(names) != n) { Rf_errorcall(R_NilValue, "Repaired names have length %d instead of length %d.", Rf_length(names), n); } if (r_chr_has_string(names, NA_STRING)) { Rf_errorcall(R_NilValue, "Names repair functions can't return `NA` values."); } } SEXP vctrs_validate_minimal_names(SEXP names, SEXP n_) { R_len_t n = -1; if (TYPEOF(n_) == INTSXP) { if (Rf_length(n_) != 1) { stop_internal("vctrs_validate_minimal_names", "`n` must be a single number."); } n = INTEGER(n_)[0]; } vec_validate_minimal_names(names, n); return names; } struct name_repair_opts unique_repair_default_opts; struct name_repair_opts unique_repair_silent_opts; void vctrs_init_names(SEXP ns) { syms_set_rownames_fallback = Rf_install("set_rownames_fallback"); syms_set_names_fallback = Rf_install("set_names_fallback"); syms_as_universal_names = Rf_install("as_universal_names"); syms_validate_unique_names = Rf_install("validate_unique"); fns_set_rownames_fallback = r_env_get(ns, syms_set_rownames_fallback); fns_set_names_fallback = r_env_get(ns, syms_set_names_fallback); fns_as_universal_names = r_env_get(ns, syms_as_universal_names); fns_validate_unique_names = r_env_get(ns, syms_validate_unique_names); syms_glue_as_name_spec = Rf_install("glue_as_name_spec"); fns_glue_as_name_spec = r_env_get(ns, syms_glue_as_name_spec); syms_internal_spec = Rf_install("_spec"); unique_repair_default_opts.type = name_repair_unique; unique_repair_default_opts.fn = R_NilValue; unique_repair_default_opts.quiet = false; unique_repair_silent_opts.type = name_repair_unique; unique_repair_silent_opts.fn = R_NilValue; unique_repair_silent_opts.quiet = true; } vctrs/src/order-radix.c0000644000176200001440000036424614042540502014601 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" #include "utils.h" #include "lazy.h" #include "type-data-frame.h" #include "translate.h" #include "order-radix.h" #include "order-groups.h" #include "order-truelength.h" #include "order-sortedness.h" #include "order-transform.h" // ----------------------------------------------------------------------------- /* * High level description of `vec_order()` * * Heavily inspired by `radixsort.c` in base R and `forder()` from data.table * https://github.com/wch/r-source/blob/trunk/src/main/radixsort.c * https://github.com/Rdatatable/data.table/blob/master/src/forder.c * * Additional resources about radix sorting: * http://codercorner.com/RadixSortRevisited.htm * http://stereopsis.com/radix.html * * The very end of this has a MSB radix sort implementation * https://eternallyconfuzzled.com/sorting-c-introduction-to-the-automatic-ordering-of-data * * ----------------------------------------------------------------------------- * Integers * * This uses a combination of 3 ordering algorithms. * * - `int_order_insertion()` - An insertion sort is used when `x` is very * small. This has less overhead than the counting or radix sort and is * faster for small input. * * - `int_order_counting()` - A counting sort is used when `x` has a range * of less than `INT_ORDER_COUNTING_RANGE_BOUNDARY`. For integers with a * small range like this, the bucketing in the counting sort can be very * fast when compared with the recursive multipass approach of the radix sort. * * - `int_order_radix()` - A radix sort is used for everything else. * This is a MSB radix sort. It orders the vector 1 byte (8 bits) at a time, * so for a 4 byte int this makes a maximum of 4 passes over each integer. * It orders from most significant byte to least significant. After each * pass, there are 256 buckets (1 for each possible byte value). Each bucket * is then ordered separately on the next byte. This happens recursively for * the 4 passes. When the buckets get small enough in size, the insertion * sort is used to finish them off. * * For radix sorting, we have to use unsigned types for bit shifting to * work reliably. We map `int` to `uint32_t` in a way that preserves order, * and also handle `na_last` and `decreasing` in this mapping. This all happens * in `int_adjust()`. It is assumed and checked at load time that * `sizeof(int) == 4`. * * ----------------------------------------------------------------------------- * Doubles * * This uses a combination of 2 ordering algorithms: * * - `dbl_order_insertion()` - An insertion sort is used when `x` is very small. * * - `dbl_order_radix()` - This is similar to `int_order_radix()`, see above, * but makes a max of 8 passes over the data. * * For doubles, we assume `sizeof(double) == 8`, which should pretty much be * ensured by IEEE 754 specifications. * * For the mapping here, it is possible to map `double -> uint64_t` in an * order preserving way. This is very cool, and involves always flipping the * sign bit of the value, and flipping all other bits if the value was negative. * This is described more in: http://stereopsis.com/radix.html. * This is implemented in `dbl_adjust()` which also handles `na_last` and * `decreasing`. For `na_last`, we treat `NA_real_` and `NaN` equivalently. * Base R does as well, but data.table does not. * * ----------------------------------------------------------------------------- * Characters * * Character vector ordering is a bit trickier than integers or doubles. It * uses two internal details in R: * * - CHARSXP values in R are in a global string pool, and * repeated strings like `c("hi", "hi")` both point to the same CHARSXP. * * - There is a property that all vectors have called the TRUELENGTH. This is * used to overallocate in lists, but is otherwise unused * in CHARSXPs. The truelength is stored as a `r_ssize`. * * Character ordering is achieved by first iterating through `x` and extracting * the unique values by using the TRUELENGTH. If the truelength is positive * or zero, we save it as an unseen string and set the truelength to `-1`. * Otherwise if it is negative we assume we have seen it already. At the * end of this we have a vector of unique strings. We order this with * `chr_order_radix()`. Then we iterate over the now sorted unique strings * and set their truelength to `-i - 1` (where `i` is the index while * iterating). This marks the unique strings with their ordering (as a negative * value to be different from R) and also updates the original character vector. * We then iterate through the original vector again, plucking off the now * updated truelength integer values. This gives us an integer proxy for the * ordering, which we can run through `int_order_chunk()` to get the final * ordering. * * The ordering of unique characters uses a combination of 2 ordering * algorithms: * * - `chr_order_insertion()` - Used when `x` is small. * * - `chr_order_radix()` - Same principle as integer/double ordering, but * we iterate 1 character at a time. We assume a C locale here. Any non-ASCII * and non-UTF-8 strings are translated up front by * `vec_normalize_encoding()`. * * ----------------------------------------------------------------------------- * Logicals * * Uses the same infrastructure as integers. Because the number of possible * unique values is low, this will always use either an insertion sort for * small vectors, or a counting sort for large ones. * * ----------------------------------------------------------------------------- * Complex * * We treat complex as a data frame of two double columns. We order the * real part first using `dbl_order_chunk()`, then order the imaginary part also * using `dbl_order_chunk()`. * * ----------------------------------------------------------------------------- * Data frames * * Multi-column data frame ordering uses the same principle as MSB ordering. * It starts with the first column (the most "significant" one) and orders it. * While ordering the column, group sizes are tracked ("groups" are duplicate * values in the column). The next column is broken into chunks corresponding * to these group sizes from the first column, and the chunks are ordered * individually. While ordering the chunks of the 2nd column, group sizes are * again tracked to use in subsequent columns. */ // ----------------------------------------------------------------------------- #define UINT8_MAX_SIZE (UINT8_MAX + 1) /* * Maximum number of passes required to completely sort ints and doubles */ #define INT_MAX_RADIX_PASS 4 #define DBL_MAX_RADIX_PASS 8 /* * Maximum range allowed when deciding whether or not to use a counting sort * vs a radix sort. Counting sort is somewhat faster when less than this * boundary value. */ #define INT_ORDER_COUNTING_RANGE_BOUNDARY 100000 /* * Size of `x` that determines when an insertion sort should be used. Seems * to work better than 256 (from limited testing), base R uses 200. * Somewhat based on this post: * https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/ */ #define ORDER_INSERTION_BOUNDARY 128 /* * Adjustments for translating current `pass` into the current `radix` byte * that we need to shift to. */ #define PASS_TO_RADIX(X, MAX) (MAX - 1 - X) #define SHIFT_ADJUSTMENT -CHAR_BIT // ----------------------------------------------------------------------------- static SEXP vec_order(SEXP x, SEXP decreasing, SEXP na_last, SEXP chr_transform); // [[ register() ]] SEXP vctrs_order(SEXP x, SEXP direction, SEXP na_value, SEXP chr_transform) { SEXP decreasing = PROTECT(parse_direction(direction)); SEXP na_last = PROTECT(parse_na_value(na_value)); SEXP out = vec_order(x, decreasing, na_last, chr_transform); UNPROTECT(2); return out; } static SEXP vec_order_impl(SEXP x, SEXP decreasing, SEXP na_last, SEXP chr_transform, bool locations); static SEXP vec_order(SEXP x, SEXP decreasing, SEXP na_last, SEXP chr_transform) { return vec_order_impl(x, decreasing, na_last, chr_transform, false); } // ----------------------------------------------------------------------------- static SEXP vec_order_locs(SEXP x, SEXP decreasing, SEXP na_last, SEXP chr_transform); // [[ register() ]] SEXP vctrs_order_locs(SEXP x, SEXP direction, SEXP na_value, SEXP chr_transform) { SEXP decreasing = PROTECT(parse_direction(direction)); SEXP na_last = PROTECT(parse_na_value(na_value)); SEXP out = vec_order_locs(x, decreasing, na_last, chr_transform); UNPROTECT(2); return out; } static SEXP vec_order_locs(SEXP x, SEXP decreasing, SEXP na_last, SEXP chr_transform) { return vec_order_impl(x, decreasing, na_last, chr_transform, true); } // ----------------------------------------------------------------------------- static SEXP vec_order_locs_impl(SEXP x, const int* p_o, const int* p_sizes, r_ssize n_groups); static inline size_t vec_compute_n_bytes_lazy_raw(SEXP x, const enum vctrs_type type); static inline size_t vec_compute_n_bytes_lazy_counts(SEXP x, const enum vctrs_type type); static SEXP vec_order_expand_args(SEXP x, SEXP decreasing, SEXP na_last); static void vec_order_switch(SEXP x, SEXP decreasing, SEXP na_last, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); /* * Returns an integer vector of the ordering unless `locations` is true. In * that case it returns a data frame with two columns. The first is the * `key` which is a slice of `x` containing the ordered unique values, and * the second is `loc` which is a list column of integer vectors containing * the locations in `x` corresponding to each key. */ static SEXP vec_order_impl(SEXP x, SEXP decreasing, SEXP na_last, SEXP chr_transform, bool locations) { int n_prot = 0; // Call on `x` before potentially flattening cols with `vec_proxy_order()` SEXP args = PROTECT_N(vec_order_expand_args(x, decreasing, na_last), &n_prot); decreasing = VECTOR_ELT(args, 0); na_last = VECTOR_ELT(args, 1); SEXP proxy = PROTECT_N(vec_proxy_order(x), &n_prot); proxy = PROTECT_N(vec_normalize_encoding(proxy), &n_prot); proxy = PROTECT_N(proxy_chr_transform(proxy, chr_transform), &n_prot); r_ssize size = vec_size(proxy); const enum vctrs_type type = vec_proxy_typeof(proxy); // Compute the maximum size required for auxiliary working memory const size_t n_bytes_lazy_raw = vec_compute_n_bytes_lazy_raw(proxy, type); // Auxiliary vectors to hold intermediate results while ordering. // If `x` is a data frame we allocate enough room for the largest column type. struct lazy_raw* p_lazy_x_chunk = new_lazy_raw(size, n_bytes_lazy_raw); PROTECT_LAZY_VEC(p_lazy_x_chunk, &n_prot); struct lazy_raw* p_lazy_x_aux = new_lazy_raw(size, n_bytes_lazy_raw); PROTECT_LAZY_VEC(p_lazy_x_aux, &n_prot); struct lazy_raw* p_lazy_o_aux = new_lazy_raw(size, sizeof(int)); PROTECT_LAZY_VEC(p_lazy_o_aux, &n_prot); struct lazy_raw* p_lazy_bytes = new_lazy_raw(size, sizeof(uint8_t)); PROTECT_LAZY_VEC(p_lazy_bytes, &n_prot); // Compute the maximum size of the `counts` vector needed during radix // ordering. 4 * 256 for integers, 8 * 256 for doubles. size_t n_bytes_lazy_counts = vec_compute_n_bytes_lazy_counts(proxy, type); r_ssize size_lazy_counts = UINT8_MAX_SIZE * n_bytes_lazy_counts; struct lazy_raw* p_lazy_counts = new_lazy_raw(size_lazy_counts, sizeof(r_ssize)); PROTECT_LAZY_VEC(p_lazy_counts, &n_prot); // Determine if group tracking can be turned off. // We turn if off if ordering non-data frame input as long as // locations haven't been requested by the user. // It is more efficient to ignore it when possible. bool force_groups = locations; bool ignore_groups = force_groups ? false : (is_data_frame(proxy) ? false : true); // Construct the two sets of group info needed for tracking groups. // We switch between them after each data frame column is processed. struct group_info* p_group_info0 = new_group_info(); PROTECT_GROUP_INFO(p_group_info0, &n_prot); struct group_info* p_group_info1 = new_group_info(); PROTECT_GROUP_INFO(p_group_info1, &n_prot); struct group_infos* p_group_infos = new_group_infos( p_group_info0, p_group_info1, size, force_groups, ignore_groups ); PROTECT_GROUP_INFOS(p_group_infos, &n_prot); // Used for character ordering - lazily generated to be fast // when not ordering character vectors struct truelength_info* p_truelength_info = new_truelength_info(size); PROTECT_TRUELENGTH_INFO(p_truelength_info, &n_prot); struct order* p_order = new_order(size); PROTECT_ORDER(p_order, &n_prot); vec_order_switch( proxy, decreasing, na_last, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); // Return ordered location info rather than ordering if (locations) { struct group_info* p_group_info = groups_current(p_group_infos); const int* p_sizes = p_group_info->p_data; r_ssize n_groups = p_group_info->n_groups; const int* p_o = p_order->p_data; SEXP out = vec_order_locs_impl(x, p_o, p_sizes, n_groups); UNPROTECT(n_prot); return out; } UNPROTECT(n_prot); return p_order->data; } // ----------------------------------------------------------------------------- static SEXP vec_order_locs_impl(SEXP x, const int* p_o, const int* p_sizes, r_ssize n_groups) { SEXP loc = PROTECT(Rf_allocVector(VECSXP, n_groups)); SEXP key_loc = PROTECT(Rf_allocVector(INTSXP, n_groups)); int* p_key_loc = INTEGER(key_loc); int start = 0; for (r_ssize i = 0; i < n_groups; ++i) { p_key_loc[i] = p_o[start]; const int size = p_sizes[i]; SEXP elt = Rf_allocVector(INTSXP, size); SET_VECTOR_ELT(loc, i, elt); int* p_elt = INTEGER(elt); R_len_t k = 0; for (int j = 0; j < size; ++j) { p_elt[k] = p_o[start]; ++start; ++k; } } SEXP key = PROTECT(vec_slice(x, key_loc)); // Construct output data frame SEXP out = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, key); SET_VECTOR_ELT(out, 1, loc); SEXP names = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(names, 0, strings_key); SET_STRING_ELT(names, 1, strings_loc); Rf_setAttrib(out, R_NamesSymbol, names); out = new_data_frame(out, n_groups); UNPROTECT(5); return out; } // ----------------------------------------------------------------------------- static void df_order(SEXP x, SEXP decreasing, SEXP na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static void vec_order_base_switch(SEXP x, bool decreasing, bool na_last, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static void vec_order_switch(SEXP x, SEXP decreasing, SEXP na_last, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { if (type == vctrs_type_dataframe) { df_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); return; } if (r_length(decreasing) != 1) { Rf_errorcall( R_NilValue, "Internal error: Size of decreasing != 1, but " "`vec_order_expand_args()` didn't catch it." ); } if (r_length(na_last) != 1) { Rf_errorcall( R_NilValue, "Internal error: Size of na_last != 1, but " "`vec_order_expand_args()` didn't catch it." ); } bool c_decreasing = LOGICAL(decreasing)[0]; bool c_na_last = LOGICAL(na_last)[0]; vec_order_base_switch( x, c_decreasing, c_na_last, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); } // ----------------------------------------------------------------------------- static void int_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void lgl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void dbl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void cpl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void chr_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); // Used on bare vectors and the first column of data frame `x`s static void vec_order_base_switch(SEXP x, bool decreasing, bool na_last, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { switch (type) { case vctrs_type_integer: { int_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_logical: { lgl_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_double: { dbl_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_complex: { cpl_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_character: { chr_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); break; } case vctrs_type_dataframe: { Rf_errorcall(R_NilValue, "Internal error: Data frames should have been handled by now"); } default: { Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } } // ----------------------------------------------------------------------------- static void int_order_chunk_impl(bool decreasing, bool na_last, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); /* * These are the main entry points for integer ordering. They are nearly * identical except `int_order()` assumes that `p_x` cannot be * modified directly and is user input. * * `int_order_chunk()` assumes `p_x` is modifiable by reference. It is called * when iterating over data frame columns and `p_x` is the 2nd or greater * column, in which case `p_x` is really a chunk of that column that has been * copied into `x_chunk`. * * `int_order()` assumes `p_x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly unless a * counting sort is going to be used, in which case `p_x` can be used directly. */ static void int_order_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { void* p_x_chunk = p_lazy_x_chunk->p_data; const enum vctrs_sortedness sortedness = int_sortedness( p_x_chunk, size, decreasing, na_last, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } int_order_chunk_impl( decreasing, na_last, size, p_x_chunk, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void int_order_impl(const int* p_x, bool decreasing, bool na_last, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void int_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const int* p_x = INTEGER_RO(x); const enum vctrs_sortedness sortedness = int_sortedness( p_x, size, decreasing, na_last, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } int_order_impl( p_x, decreasing, na_last, size, true, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void int_adjust(const bool decreasing, const bool na_last, const r_ssize size, void* p_x); static void int_compute_range(const int* p_x, r_ssize size, int* p_x_min, uint32_t* p_range); static void int_order_counting(const int* p_x, r_ssize size, int x_min, uint32_t range, bool initialized, bool decreasing, bool na_last, int* p_o, int* p_o_aux, struct group_infos* p_group_infos); static void int_order_insertion(const r_ssize size, uint32_t* p_x, int* p_o, struct group_infos* p_group_infos); static void int_order_radix(const r_ssize size, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos); /* * `int_order_chunk_impl()` is used by both `int_order_chunk()` and by * `chr_order_chunk()` */ static void int_order_chunk_impl(bool decreasing, bool na_last, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { if (size <= ORDER_INSERTION_BOUNDARY) { int_adjust(decreasing, na_last, size, p_x); int_order_insertion(size, p_x, p_o, p_group_infos); return; } int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint32_t range; int x_min; int_compute_range(p_x, size, &x_min, &range); /* * If in counting order range and on the second or higher column, we will * need `p_o_aux` as working memory. At this point, `p_o` will have been * initialized from ordering the first column. */ if (range < INT_ORDER_COUNTING_RANGE_BOUNDARY) { const bool initialized = true; int_order_counting( p_x, size, x_min, range, initialized, decreasing, na_last, p_o, p_o_aux, p_group_infos ); return; } uint32_t* p_x_aux = (uint32_t*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); int_adjust(decreasing, na_last, size, p_x); int_order_radix( size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } /* * `int_order_impl()` is used by both `int_order()` and by * `chr_order()`. * * For `chr_order()`, the TRUELENGTH values will already be extracted into * `p_lazy_x_chunk`. In this case, we set `copy = false` to tell * `int_order_impl()` to use `p_lazy_x_chunk` directly rather than copying `p_x` * over to `p_x_chunk`. */ static void int_order_impl(const int* p_x, bool decreasing, bool na_last, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { if (size <= ORDER_INSERTION_BOUNDARY) { int* p_o = init_order(p_order); void* p_x_chunk; if (copy) { p_x_chunk = init_lazy_raw(p_lazy_x_chunk); memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); } else { p_x_chunk = p_lazy_x_chunk->p_data; } int_adjust(decreasing, na_last, size, p_x_chunk); int_order_insertion(size, p_x_chunk, p_o, p_group_infos); return; } uint32_t range; int x_min; int_compute_range(p_x, size, &x_min, &range); /* * If in counting order range and on the first column / single vector, * `p_o_aux` won't be used, so no need to initialize it. * * Also, `p_o` will be filled directly, so for performance we don't * initialize its order. */ if (range < INT_ORDER_COUNTING_RANGE_BOUNDARY) { const bool initialized = false; int* p_o = p_order->p_data; int* p_o_aux = (int*) p_lazy_o_aux->p_data; int_order_counting( p_x, size, x_min, range, initialized, decreasing, na_last, p_o, p_o_aux, p_group_infos ); p_order->initialized = true; return; } int* p_o = init_order(p_order); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint32_t* p_x_aux = (uint32_t*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); void* p_x_chunk; if (copy) { p_x_chunk = init_lazy_raw(p_lazy_x_chunk); memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); } else { p_x_chunk = p_lazy_x_chunk->p_data; } int_adjust(decreasing, na_last, size, p_x_chunk); int_order_radix( size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } // ----------------------------------------------------------------------------- static inline uint32_t int_map_to_uint32(int x); /* * - Shifts the integer elements of `p_x` in a way that correctly maintains * ordering for `na_last` and `decreasing` * * - After shifting, also maps the elements from `int32_t` to `uint32_t` and * stores them back in `p_x`. * * - Used before both the integer insertion sort and radix sort, which both * expect their input to already have been "adjusted" for `na_last` and * `decreasing` and expect a `uint32_t` pointer input. * * - If `na_last = true`, `NA` is always the maximum element, so we set it to * `UINT32_MAX`. In that case, we also shift all non-NA values down by 1 to * make room for it (defined by `na_shift`). * * - If `na_last = false`, we set `NA` to the minimum element of 0. * * - The multiplication by `direction` applies to non-NA values and correctly * orders inputs based on whether we are in a decreasing order or not. */ static void int_adjust(const bool decreasing, const bool na_last, const r_ssize size, void* p_x) { const int direction = decreasing ? -1 : 1; const uint32_t na_u32 = na_last ? UINT32_MAX : 0; const int na_shift = na_last ? -1 : 0; const int* p_x_int = (const int*) p_x; uint32_t* p_x_u32 = (uint32_t*) p_x; for (r_ssize i = 0; i < size; ++i) { int elt = p_x_int[i]; if (elt == NA_INTEGER) { p_x_u32[i] = na_u32; continue; } elt = elt * direction + na_shift; p_x_u32[i] = int_map_to_uint32(elt); } } #define HEX_UINT32_SIGN_BIT 0x80000000u // Flipping the sign bit is all we need to do to map in an order preserving way. // [INT32_MIN, INT32_MAX] => [0, UINT32_MAX] static inline uint32_t int_map_to_uint32(int x) { return ((uint32_t) x) ^ HEX_UINT32_SIGN_BIT; } #undef HEX_UINT32_SIGN_BIT // ----------------------------------------------------------------------------- /* * `int_compute_range()` computes the range of all values in `p_x`. * It is used by counting sort to computes buckets with `p_x[i] - x_min`. * * - `p_range` and `p_x_min` are updated on the way out to retain both the * range and the minimum value. * * - `NA` values are skipped over. If all values are `NA`, we defer to radix * sort (which definitely can handle that case) by returning a `range` of the * maximum uint32 value (which will be greater than * INT_ORDER_COUNTING_RANGE_BOUNDARY). */ static void int_compute_range(const int* p_x, r_ssize size, int* p_x_min, uint32_t* p_range) { uint32_t range = UINT32_MAX; int x_min = NA_INTEGER; int x_max = NA_INTEGER; r_ssize i = 0; // Find first non-NA value while (i < size) { const int elt = p_x[i]; if (elt == NA_INTEGER) { ++i; continue; } x_min = elt; x_max = elt; range = 0; // Bump to next `i` since we know this one's value ++i; break; } // All NAs - Return max range to signal to use radix sort if (x_min == NA_INTEGER) { *p_x_min = x_min; *p_range = range; return; } // Now that we have initial values, iterate through the rest // to compute the final min/max. for (r_ssize j = i; j < size; ++j) { const int elt = p_x[j]; if (elt == NA_INTEGER) { continue; } if (elt > x_max) { x_max = elt; } else if (elt < x_min) { x_min = elt; } } /* * - Max possible range is from * `c(.Machine$integer.max, -.Machine$integer.max)` which is exactly the * max of a `uint32_t`. * - We need to go up to `intmax_t` to avoid intermediate overflow. * - `+ 1` to get an inclusive range on both ends. */ range = (uint32_t) intmax_add(intmax_subtract(x_max, x_min), 1); *p_x_min = x_min; *p_range = range; } // ----------------------------------------------------------------------------- /* * The counting sort expects `p_x` to be unadjusted (i.e. `int_adjust()` has * not been used). It handles `decreasing` and `na_last` internally. * * Counting sort is used when `p_x` has a range less than * `INT_ORDER_COUNTING_RANGE_BOUNDARY`. In these cases radix sort * doesn't spread out values as much when looking at individual radixes. * * Counting sort does not modify `p_x` in any way. */ static void int_order_counting(const int* p_x, r_ssize size, int x_min, uint32_t range, bool initialized, bool decreasing, bool na_last, int* p_o, int* p_o_aux, struct group_infos* p_group_infos) { // - Only allocate this once (counts are reset to 0 at end) // - Allocating as static allows us to allocate an array this large // - `+ 1` to ensure there is room for the extra `NA` bucket static r_ssize p_counts[INT_ORDER_COUNTING_RANGE_BOUNDARY + 1] = { 0 }; // `NA` values get counted in the last used bucket uint32_t na_bucket = range; r_ssize na_count = 0; // Sanity check if (range > INT_ORDER_COUNTING_RANGE_BOUNDARY) { Rf_errorcall(R_NilValue, "Internal error: `range > INT_ORDER_COUNTING_RANGE_BOUNDARY`."); } // Histogram pass for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; if (elt == NA_INTEGER) { ++na_count; } else { const uint32_t bucket = elt - x_min; ++p_counts[bucket]; } } // Add `NA` counts once at the end p_counts[na_bucket] = na_count; r_ssize cumulative = 0; // Handle decreasing/increasing by altering the order in which // counts are accumulated const int direction = decreasing ? -1 : 1; r_ssize j = decreasing ? range - 1 : 0; // `na_last = false` pushes NA counts to the front if (!na_last && na_count != 0) { p_counts[na_bucket] = cumulative; cumulative += na_count; groups_size_maybe_push(na_count, p_group_infos); } // Accumulate counts, skip zeros for (uint32_t i = 0; i < range; ++i) { r_ssize count = p_counts[j]; if (count == 0) { j += direction; continue; } // Insert current cumulative value, then increment p_counts[j] = cumulative; cumulative += count; // At this point we will handle this group completely groups_size_maybe_push(count, p_group_infos); j += direction; } // `na_last = true` pushes NA counts to the back if (na_last && na_count != 0) { p_counts[na_bucket] = cumulative; groups_size_maybe_push(na_count, p_group_infos); } // If order is not initialized, we are on the first column / atomic vector // and can place the order directly into the result. Much faster than // initializing, placing in `p_o_aux`, and copying back over. if (initialized) { for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; uint32_t bucket = (elt == NA_INTEGER) ? na_bucket : elt - x_min; const r_ssize loc = p_counts[bucket]++; p_o_aux[loc] = p_o[i]; } memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); } else { for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; uint32_t bucket = (elt == NA_INTEGER) ? na_bucket : elt - x_min; const r_ssize loc = p_counts[bucket]++; p_o[loc] = i + 1; } } // Reset counts for next column. // Only reset what we might have touched. // `+ 1` to reset the NA bucket too. memset(p_counts, 0, (range + 1) * sizeof(r_ssize)); } // ----------------------------------------------------------------------------- /* * `int_order_insertion()` is used in two ways: * - It is how we "finish off" radix sorts rather than deep recursion. * - If we have an original `x` input that is small enough, we just immediately * insertion sort it. * * For small inputs, it is much faster than deeply recursing with * radix ordering. * * Insertion ordering expects that `p_x` has been adjusted with `int_adjust()` * which takes care of `na_last` and `decreasing` and also maps `int32_t` to * `uint32_t` ahead of time. */ static void int_order_insertion(const r_ssize size, uint32_t* p_x, int* p_o, struct group_infos* p_group_infos) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const uint32_t x_elt = p_x[i]; const int o_elt = p_o[i]; r_ssize j = i - 1; while (j >= 0) { const uint32_t x_cmp_elt = p_x[j]; if (x_elt >= x_cmp_elt) { break; } int o_cmp_elt = p_o[j]; // Swap p_x[j + 1] = x_cmp_elt; p_o[j + 1] = o_cmp_elt; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_o[j + 1] = o_elt; } // We've ordered a small chunk, we need to push at least one group size. // Depends on the post-ordered results so we have to do this // in a separate loop. r_ssize group_size = 1; uint32_t previous = p_x[0]; for (r_ssize i = 1; i < size; ++i) { const uint32_t current = p_x[i]; // Continue the current group run if (current == previous) { ++group_size; continue; } // Push current run size and reset size tracker groups_size_maybe_push(group_size, p_group_infos); group_size = 1; previous = current; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); } // ----------------------------------------------------------------------------- static uint8_t int_compute_skips(const uint32_t* p_x, r_ssize size, bool* p_skips); static void int_order_radix_recurse(const r_ssize size, const uint8_t pass, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos); /* * Integer radix ordering entry point * * Expects that `int_adjust()` has been called on `p_x`, which takes care * of `na_last` and `decreasing` and also maps `int32_t` to `uint32_t` once * up front so we don't have to do it for each radix pass. * * Sorts `p_x` and `p_o` in place */ static void int_order_radix(const r_ssize size, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos) { bool p_skips[INT_MAX_RADIX_PASS]; uint8_t pass = int_compute_skips(p_x, size, p_skips); // Skipped all passes - Happens when `x` is 1 value repeated if (pass == INT_MAX_RADIX_PASS) { groups_size_maybe_push(size, p_group_infos); return; } int_order_radix_recurse( size, pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_skips, p_group_infos ); } // ----------------------------------------------------------------------------- static inline uint8_t int_extract_uint32_byte(uint32_t x, uint8_t shift); /* * Recursive function for radix ordering. Orders the current byte, then iterates * over the sub groups and recursively calls itself on each subgroup to order * the next byte. */ static void int_order_radix_recurse(const r_ssize size, const uint8_t pass, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { int_order_insertion(size, p_x, p_o, p_group_infos); return; } // Skip passes where our up front check told us that all bytes were the same uint8_t next_pass = pass + 1; r_ssize* p_counts_next_pass = p_counts + UINT8_MAX_SIZE; while (next_pass < INT_MAX_RADIX_PASS && p_skips[next_pass]) { ++next_pass; p_counts_next_pass += UINT8_MAX_SIZE; } const uint8_t radix = PASS_TO_RADIX(pass, INT_MAX_RADIX_PASS); const uint8_t shift = radix * 8; uint8_t byte = 0; // Histogram for this pass for (r_ssize i = 0; i < size; ++i) { const uint32_t x_elt = p_x[i]; byte = int_extract_uint32_byte(x_elt, shift); p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass == INT_MAX_RADIX_PASS) { // If we are already at the last pass, we are done groups_size_maybe_push(size, p_group_infos); } else { // Otherwise, recurse on next byte using the same `size` since // the group size hasn't changed int_order_radix_recurse( size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Replace with `cumulative` first, then bump `cumulative`. // `p_counts` now represents starting locations for each radix group. p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_o_aux[loc] = p_o[i]; p_x_aux[loc] = p_x[i]; } // Copy back over memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } // Set to zero to clear for subsequent groups p_counts[i] = 0; // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { groups_size_maybe_push(1, p_group_infos); ++p_x; ++p_o; continue; } // Can get here in the case of ties, like c(1L, 1L), which have a // `group_size` of 2 in the last radix, but there is nothing left to // compare so we are done. if (next_pass == INT_MAX_RADIX_PASS) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // Order next byte of this subgroup int_order_radix_recurse( group_size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); p_x += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- /* * Do a parallel histogram run over all 4 passes to determine if any passes * can be skipped (because all bytes were the same) */ static uint8_t int_compute_skips(const uint32_t* p_x, r_ssize size, bool* p_skips) { uint8_t radix_start = PASS_TO_RADIX(0, INT_MAX_RADIX_PASS); uint8_t shift_start = radix_start * 8; for (uint8_t i = 0; i < INT_MAX_RADIX_PASS; ++i) { p_skips[i] = true; } uint8_t p_bytes[INT_MAX_RADIX_PASS]; const uint32_t elt0 = p_x[0]; // Get bytes of first element in MSD->LSD order. // Placed in `p_bytes` in a way that aligns with the `pass` variable for (uint8_t pass = 0, shift = shift_start; pass < INT_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { p_bytes[pass] = int_extract_uint32_byte(elt0, shift); } // Check to see which passes are skippable for (r_ssize i = 1; i < size; ++i) { uint8_t n_skips = INT_MAX_RADIX_PASS; const uint32_t elt = p_x[i]; for (uint8_t pass = 0, shift = shift_start; pass < INT_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { bool skip = p_skips[pass]; if (skip) { p_skips[pass] = (p_bytes[pass] == int_extract_uint32_byte(elt, shift)); } else { --n_skips; } } // No passes are skippable if (n_skips == 0) { break; } } uint8_t pass = 0; // Shift forward to the first pass with varying bytes while (pass < INT_MAX_RADIX_PASS && p_skips[pass]) { ++pass; } return pass; } // ----------------------------------------------------------------------------- // Bytes will be extracted 8 bits at a time. // This is a MSB radix sort, so they are extracted MSB->LSB. static inline uint8_t int_extract_uint32_byte(uint32_t x, uint8_t shift) { return (x >> shift) & UINT8_MAX; } // ----------------------------------------------------------------------------- /* * Entry points for logical ordering. These just use integer infrastructure. */ static void lgl_order_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { int_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void lgl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { int_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } // ----------------------------------------------------------------------------- static void dbl_order_chunk_impl(bool decreasing, bool na_last, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); /* * These are the main entry points for double ordering. They are nearly * identical except `dbl_order()` assumes that `p_x` cannot be * modified directly and is user input. * * `dbl_order_chunk()` assumes `p_x` is modifiable by reference. It is called * when iterating over data frame columns and `p_x` is the 2nd or greater * column, in which case `p_x` is really a chunk of that column that has been * copied into `x_chunk`. * * `dbl_order()` assumes `p_x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly. * * Unlike `int_order_chunk()`, there is no intermediate counting sort, as it is * sort of unclear how to compute the range of a double vector in the same * way, and even after adjusting to a `uint64_t`, it is unlikely that they * have a very small range of values. */ static void dbl_order_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { void* p_x_chunk = p_lazy_x_chunk->p_data; dbl_order_chunk_impl( decreasing, na_last, size, p_x_chunk, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void dbl_order_impl(const double* p_x, bool decreasing, bool na_last, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void dbl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const double* p_x = REAL_RO(x); dbl_order_impl( p_x, decreasing, na_last, size, true, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void dbl_adjust(const bool decreasing, const bool na_last, const r_ssize size, void* p_x); static void dbl_order_insertion(const r_ssize size, uint64_t* p_x, int* p_o, struct group_infos* p_group_infos); static void dbl_order_radix(const r_ssize size, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos); /* * Used by `dbl_order_chunk()` and by `cpl_order()` * * Unlike `int_order_chunk_impl()`, `dbl_order_chunk_impl()` also deals with * sortedness since we don't have an up front sortedness check on complex * vectors. */ static void dbl_order_chunk_impl(bool decreasing, bool na_last, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const enum vctrs_sortedness sortedness = dbl_sortedness( p_x, size, decreasing, na_last, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } dbl_adjust(decreasing, na_last, size, p_x); if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x, p_o, p_group_infos); return; } uint64_t* p_x_aux = (uint64_t*) init_lazy_raw(p_lazy_x_aux); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); dbl_order_radix( size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } /* * Used by `dbl_order()` and by `cpl_order()` * * Unlike `int_order_impl()`, `dbl_order_impl()` also deals with sortedness * since we don't have an up front sortedness check on complex vectors. * * When dealing with complex vectors, `p_x` and `p_lazy_x_chunk->p_data` will * already point to the same memory. In this case, we don't need to copy `p_x` * into `p_lazy_x_chunk`, so we set `copy = false` which tells * `dbl_order_impl()` to just use `p_lazy_x_chunk` directly. */ static void dbl_order_impl(const double* p_x, bool decreasing, bool na_last, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const enum vctrs_sortedness sortedness = dbl_sortedness( p_x, size, decreasing, na_last, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } int* p_o = init_order(p_order); void* p_x_chunk; if (copy) { p_x_chunk = init_lazy_raw(p_lazy_x_chunk); memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); } else { p_x_chunk = p_lazy_x_chunk->p_data; } dbl_adjust(decreasing, na_last, size, p_x_chunk); if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x_chunk, p_o, p_group_infos); return; } uint64_t* p_x_aux = (uint64_t*) init_lazy_raw(p_lazy_x_aux); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); dbl_order_radix( size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } // ----------------------------------------------------------------------------- static inline uint64_t dbl_map_to_uint64(double x); /* * When mapping double -> uint64_t: * * Smallest possible value comes from: * dbl_map_to_uint64(-Inf) -> 4503599627370495 * * One larger is: * dbl_map_to_uint64(-.Machine$double.xmax) -> 4503599627370496 * * Largest possible value comes from: * dbl_map_to_uint64(Inf) -> 18442240474082181120 * * One smaller is: * dbl_map_to_uint64(.Machine$double.xmax) -> 18442240474082181119 * * This gives us room to manually map (depending on `na_last`): * dbl_map_to_uint64(NA_real_) -> UINT64_MAX (or 0 if `na_last = false`) * dbl_map_to_uint64(NaN) -> UINT64_MAX (or 0 if `na_last = false`) */ static void dbl_adjust(const bool decreasing, const bool na_last, const r_ssize size, void* p_x) { const int direction = decreasing ? -1 : 1; const uint64_t na_u64 = na_last ? UINT64_MAX : 0; double* p_x_dbl = (double*) p_x; uint64_t* p_x_u64 = (uint64_t*) p_x; for (r_ssize i = 0; i < size; ++i) { // Flip direction ahead of time. Won't affect `NA_real`, `NaN` values. double elt = p_x_dbl[i] * direction; if (isnan(elt)) { p_x_u64[i] = na_u64; continue; } p_x_u64[i] = dbl_map_to_uint64(elt); } } static inline uint64_t dbl_flip_uint64(uint64_t x); static union { double d; uint64_t u64; } d_u64; /* * Map `double -> `uint64_t` retaining ordering. * * Assumes `x` is not a `NA_real_` or `NaN` value. * Correctly handles `Inf` and `-Inf`. */ static inline uint64_t dbl_map_to_uint64(double x) { // Catch `-0` vs `0` if (x == 0) { x = 0; } // Reinterpret as uint64_t without changing bytes d_u64.d = x; d_u64.u64 = dbl_flip_uint64(d_u64.u64); return d_u64.u64; } #define HEX_UINT64_SIGN 0x8000000000000000u #define HEX_UINT64_ONES 0xffffffffffffffffu // To retain ordering in mapping from double -> uint64_t we always have to // flip the sign bit, and for negative numbers we also flip all of the other // bits. Described more here: http://stereopsis.com/radix.html static inline uint64_t dbl_flip_uint64(uint64_t x) { const uint64_t mask = (x & HEX_UINT64_SIGN) ? HEX_UINT64_ONES : HEX_UINT64_SIGN; return x ^ mask; } #undef HEX_UINT64_SIGN #undef HEX_UINT64_ONES // ----------------------------------------------------------------------------- /* * `dbl_order_insertion()` is used in two ways: * - It is how we "finish off" radix sorts rather than deep recursion. * - If we have an original `x` input that is small enough, we just immediately * insertion sort it. * * For small inputs, it is much faster than deeply recursing with * radix ordering. * * Insertion ordering expects that `p_x` has been adjusted with `dbl_adjust()` * which takes care of `na_last` and `decreasing` and also maps `double` to * `uint64_t` ahead of time. * * It is essentially the same as `int_order_insertion()` with different types. */ static void dbl_order_insertion(const r_ssize size, uint64_t* p_x, int* p_o, struct group_infos* p_group_infos) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const uint64_t x_elt = p_x[i]; const int o_elt = p_o[i]; r_ssize j = i - 1; while (j >= 0) { const uint64_t x_cmp_elt = p_x[j]; if (x_elt >= x_cmp_elt) { break; } int o_cmp_elt = p_o[j]; // Swap p_x[j + 1] = x_cmp_elt; p_o[j + 1] = o_cmp_elt; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_o[j + 1] = o_elt; } // We've ordered a small chunk, we need to push at least one group size. // Depends on the post-ordered results so we have to do this // in a separate loop. r_ssize group_size = 1; uint64_t previous = p_x[0]; for (r_ssize i = 1; i < size; ++i) { const uint64_t current = p_x[i]; // Continue the current group run if (current == previous) { ++group_size; continue; } // Push current run size and reset size tracker groups_size_maybe_push(group_size, p_group_infos); group_size = 1; previous = current; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); } // ----------------------------------------------------------------------------- static uint8_t dbl_compute_skips(const uint64_t* p_x, r_ssize size, bool* p_skips); static void dbl_order_radix_recurse(const r_ssize size, const uint8_t pass, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos); /* * Double radix ordering entry point * * Expects that `dbl_adjust()` has been called on `p_x`, which takes care * of `na_last` and `decreasing` and also maps `double` to `uint64_t` once * up front so we don't have to do it for each radix pass. * * Sorts `p_x` and `p_o` in place */ static void dbl_order_radix(const r_ssize size, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos) { bool p_skips[DBL_MAX_RADIX_PASS]; uint8_t pass = dbl_compute_skips(p_x, size, p_skips); // Skipped all passes - Happens when `x` is 1 value repeated if (pass == DBL_MAX_RADIX_PASS) { groups_size_maybe_push(size, p_group_infos); return; } dbl_order_radix_recurse( size, pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_skips, p_group_infos ); } // ----------------------------------------------------------------------------- static inline uint8_t dbl_extract_uint64_byte(uint64_t x, uint8_t shift); /* * Recursive function for radix ordering. Orders the current byte, then iterates * over the sub groups and recursively calls itself on each subgroup to order * the next byte. * * This needs 8 passes, unlike the 4 required by `int_order_radix()`. */ static void dbl_order_radix_recurse(const r_ssize size, const uint8_t pass, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x, p_o, p_group_infos); return; } // Skip passes where our up front check told us that all bytes were the same uint8_t next_pass = pass + 1; r_ssize* p_counts_next_pass = p_counts + UINT8_MAX_SIZE; while (next_pass < DBL_MAX_RADIX_PASS && p_skips[next_pass]) { ++next_pass; p_counts_next_pass += UINT8_MAX_SIZE; } const uint8_t radix = PASS_TO_RADIX(pass, DBL_MAX_RADIX_PASS); const uint8_t shift = radix * 8; uint8_t byte = 0; // Histogram for (r_ssize i = 0; i < size; ++i) { const uint64_t x_elt = p_x[i]; byte = dbl_extract_uint64_byte(x_elt, shift); p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass == DBL_MAX_RADIX_PASS) { // If we are already at the last pass, we are done groups_size_maybe_push(size, p_group_infos); } else { // Otherwise, recurse on next byte using the same `size` since // the group size hasn't changed dbl_order_radix_recurse( size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Replace with `cumulative` first, then bump `cumulative`. // `p_counts` now represents starting locations for each radix group. p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_o_aux[loc] = p_o[i]; p_x_aux[loc] = p_x[i]; } // Copy back over memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } p_counts[i] = 0; // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { groups_size_maybe_push(1, p_group_infos); ++p_x; ++p_o; continue; } // Can get here in the case of ties, like c(1, 1), which have a // `group_size` of 2 in the last radix, but there is nothing left to // compare so we are done. if (next_pass == DBL_MAX_RADIX_PASS) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // Order next byte of this subgroup dbl_order_radix_recurse( group_size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); p_x += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- /* * Detect completely skippable bytes * * There are 8 passes over a double, 1 for each byte. Often times for the entire * `x` vector a few of those passes are useless because all of the bytes are * the same. This does an up front computation in 1 pass over the data to * determine which bytes are completely skippable. * * It is worth noting that just because byte 0 wasn't skippable doesn't mean * that byte 1 isn't. With the way that doubles are mapped to uint64_t, it * is often the case that, for small doubles, bytes 0-2 aren't skippable but * the rest of them are (for example, this happens with doubles in the range * of 1:128). This provides a nice performance increase there. */ static uint8_t dbl_compute_skips(const uint64_t* p_x, r_ssize size, bool* p_skips) { uint8_t radix_start = PASS_TO_RADIX(0, DBL_MAX_RADIX_PASS); uint8_t shift_start = radix_start * 8; for (uint8_t i = 0; i < DBL_MAX_RADIX_PASS; ++i) { p_skips[i] = true; } uint8_t p_bytes[DBL_MAX_RADIX_PASS]; const uint64_t elt0 = p_x[0]; // Get bytes of first element in MSD->LSD order. // Placed in `p_bytes` in a way that aligns with the `pass` variable for (uint8_t pass = 0, shift = shift_start; pass < DBL_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { p_bytes[pass] = dbl_extract_uint64_byte(elt0, shift); } // Check to see which passes are skippable for (r_ssize i = 1; i < size; ++i) { uint8_t n_skips = DBL_MAX_RADIX_PASS; const uint64_t elt = p_x[i]; for (uint8_t pass = 0, shift = shift_start; pass < DBL_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { bool skip = p_skips[pass]; if (skip) { p_skips[pass] = (p_bytes[pass] == dbl_extract_uint64_byte(elt, shift)); } else { --n_skips; } } // No passes are skippable if (n_skips == 0) { break; } } uint8_t pass = 0; // Shift forward to the first pass with varying bytes while (pass < DBL_MAX_RADIX_PASS && p_skips[pass]) { ++pass; } return pass; } // ----------------------------------------------------------------------------- // Bytes will be extracted 8 bits at a time. // This is a MSB radix sort, so they are extracted MSB->LSB. static inline uint8_t dbl_extract_uint64_byte(uint64_t x, uint8_t shift) { return (x >> shift) & UINT8_MAX; } // ----------------------------------------------------------------------------- /* * `cpl_order()` uses the fact that Rcomplex is really just a rcrd * type of two double vectors. It orders first on the real vector, and then on * the imaginary vector. * * `cpl_order_chunk()` isn't required. It would only be called from data frames * when there is a complex column, but in those cases we split the column * into two double vectors (real / imaginary) and "rerun" the column using * `dbl_order_chunk()`. */ static void cpl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { // We treat complex as a two column data frame, so we have to use group // information for at least the first column. // - If a complex atomic vector is used, `ignore_groups` will be true unless // the user also requested group information. // - If the first column of a df is a complex column, `ignore_groups` will // be false. bool reset_ignore_groups = false; if (p_group_infos->ignore_groups) { p_group_infos->ignore_groups = false; reset_ignore_groups = true; } const Rcomplex* p_x_cpl = COMPLEX_RO(x); // When a complex column is present, // `lazy_x_chunk` and `lazy_x_aux` are created to have the // size of a double vector. double* p_x_chunk_dbl = (double*) init_lazy_raw(p_lazy_x_chunk); // Handle the real portion first for (r_ssize i = 0; i < size; ++i) { p_x_chunk_dbl[i] = p_x_cpl[i].r; } /* * Call double ordering algorithm on real section. * * In this case, both `p_x_chunk_dbl` and `p_lazy_x_chunk` are passed through, * but we set `copy = false` which tells `dbl_order_impl()` not to copy * the input (`p_x_chunk_dbl`) over to the chunk vector of (`p_lazy_x_chunk`). * It has already been done when we extracted the real section. */ dbl_order_impl( p_x_chunk_dbl, decreasing, na_last, size, false, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); // Ordering will now be initialized int* p_o = p_order->p_data; // Reset `ignore_groups` for the second pass if we don't need to track groups. // This happens if an atomic complex vector is passed in and the user // hasn't requested group information. if (reset_ignore_groups) { p_group_infos->ignore_groups = true; } // Get the number of group chunks from the first pass struct group_info* p_group_info_pre = groups_current(p_group_infos); r_ssize n_groups = p_group_info_pre->n_groups; // If there were no ties, we are completely done if (n_groups == size) { return; } // Swap to other group info to prepare for the imaginary section groups_swap(p_group_infos); // Fill with the imaginary portion. // Uses updated ordering to place it in sequential order. for (r_ssize i = 0; i < size; ++i) { const int loc = p_o[i] - 1; p_x_chunk_dbl[i] = p_x_cpl[loc].i; } // Iterate over the group chunks from the first pass for (r_ssize group = 0; group < n_groups; ++group) { r_ssize group_size = p_group_info_pre->p_data[group]; // Fast handling of simplest case if (group_size == 1) { ++p_x_chunk_dbl; ++p_o; groups_size_maybe_push(1, p_group_infos); continue; } dbl_order_chunk_impl( decreasing, na_last, group_size, p_x_chunk_dbl, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); p_x_chunk_dbl += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- static void chr_mark_sorted_uniques(const SEXP* p_x, r_ssize size, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_bytes, struct truelength_info* p_truelength_info); static inline void chr_extract_ordering(const SEXP* p_x, r_ssize size, int* p_x_aux); static void chr_order_radix(const r_ssize size, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes); /* * These are the main entry points for character ordering. * * `chr_order_chunk()` assumes `p_x` is modifiable by reference. It also * assumes that `chr_mark_sorted_uniques()` has already been called. For data * frame columns where `chr_order_chunk()` is called on each group chunk, * `chr_mark_sorted_uniques()` is only called once on the entire column. * * `chr_order()` assumes `x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly. * * `chr_order_chunk()` essentially calls `int_order_chunk()`, however we can't * call it directly because we don't have access to all the required arguments. * Specifically we reuse `p_x` here as the auxiliary data structure for integer * ordering, but to call `int_order_chunk()` we would need the lazy wrapper * for it. * * Because these functions modify TRUELENGTHs, we have to reset them on the * way out. `chr_order()` does it directly, but `chr_order_chunk()` relies * on `df_order()` to do it after the entire column is processed. It is * important to not error inside these functions because the TRUELENGTHs won't * be reset if we do. */ static void chr_order_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { void* p_x_chunk = p_lazy_x_chunk->p_data; const enum vctrs_sortedness sortedness = chr_sortedness( p_x_chunk, size, decreasing, na_last, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } void* p_x_aux = init_lazy_raw(p_lazy_x_aux); // Move integer ordering into `p_x_aux`. // `p_x_aux` is allocated as the larger of `int` and `SEXP*`. chr_extract_ordering(p_x_chunk, size, p_x_aux); /* * Call integer ordering algorithm on TRUELENGTHs. Reuse the chunk memory of * `p_lazy_x_chunk` that held the current `SEXP*` chunk as the new auxiliary * memory since those are no longer needed. It is allocated as the * larger of `int` and `SEXP*` so it is large enough. */ int_order_chunk_impl( decreasing, na_last, size, p_x_aux, p_o, p_lazy_x_chunk, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } struct chr_order_info { SEXP x; bool decreasing; bool na_last; r_ssize size; struct order* p_order; struct lazy_raw* p_lazy_x_chunk; struct lazy_raw* p_lazy_x_aux; struct lazy_raw* p_lazy_o_aux; struct lazy_raw* p_lazy_bytes; struct lazy_raw* p_lazy_counts; struct group_infos* p_group_infos; struct truelength_info* p_truelength_info; }; struct chr_order_cleanup_info { struct truelength_info* p_truelength_info; }; static SEXP chr_order_exec(void* p_data); static void chr_order_cleanup(void* p_data); /* * `chr_order()` directly modifies the `TRUELENGTH()` values of the CHARSXPs * in `x`. These must be reset after the call with `truelength_reset()`. To * ensure that this function is called (even on a longjump), * `R_ExecWithCleanup()` is used. */ static void chr_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { struct chr_order_info info = { .x = x, .decreasing = decreasing, .na_last = na_last, .size = size, .p_order = p_order, .p_lazy_x_chunk = p_lazy_x_chunk, .p_lazy_x_aux = p_lazy_x_aux, .p_lazy_o_aux = p_lazy_o_aux, .p_lazy_bytes = p_lazy_bytes, .p_lazy_counts = p_lazy_counts, .p_group_infos = p_group_infos, .p_truelength_info = p_truelength_info }; struct chr_order_cleanup_info cleanup_info = { .p_truelength_info = p_truelength_info }; R_ExecWithCleanup( chr_order_exec, &info, chr_order_cleanup, &cleanup_info ); } static void chr_order_internal(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static SEXP chr_order_exec(void* p_data) { struct chr_order_info* p_info = (struct chr_order_info*) p_data; chr_order_internal( p_info->x, p_info->decreasing, p_info->na_last, p_info->size, p_info->p_order, p_info->p_lazy_x_chunk, p_info->p_lazy_x_aux, p_info->p_lazy_o_aux, p_info->p_lazy_bytes, p_info->p_lazy_counts, p_info->p_group_infos, p_info->p_truelength_info ); return R_NilValue; } static void chr_order_cleanup(void* p_data) { struct chr_order_cleanup_info* p_info = (struct chr_order_cleanup_info*) p_data; truelength_reset(p_info->p_truelength_info); } static void chr_order_internal(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { const SEXP* p_x = STRING_PTR_RO(x); const enum vctrs_sortedness sortedness = chr_sortedness( p_x, size, decreasing, na_last, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } // Sort unique strings and mark their truelengths with ordering. // Use `p_lazy_x_chunk` as auxiliary memory for `chr_order_radix()` so we // hopefully don't have to also allocate `p_lazy_x_aux`. chr_mark_sorted_uniques( p_x, size, p_lazy_x_chunk, p_lazy_bytes, p_truelength_info ); void* p_x_chunk = init_lazy_raw(p_lazy_x_chunk); // Move integer ordering into `p_x_chunk`. // `p_x_chunk` is allocated as the larger of `int` and `SEXP*`. chr_extract_ordering(p_x, size, p_x_chunk); /* * Call integer ordering algorithm on TRUELENGTHs. * * In this case, both `p_x_chunk` and `p_lazy_x_chunk` are passed through, * but we set `copy = false` which tells `int_order_impl()` not to copy * the input (`p_x_chunk`) over to the chunk vector of (`p_lazy_x_chunk`). * It has already been done when we extracted the truelength ordering. */ int_order_impl( p_x_chunk, decreasing, na_last, size, false, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } // ----------------------------------------------------------------------------- /* * Pull ordering off of marked `p_x` and place it into `p_x_aux` working memory. * We mark the CHARSXP TRUELENGTHs with negative ordering to be different from * what R might use, so that gets reversed here to get the true ordering back. */ static inline void chr_extract_ordering(const SEXP* p_x, r_ssize size, int* p_x_aux) { for (r_ssize i = 0; i < size; ++i) { SEXP elt = p_x[i]; if (elt == NA_STRING) { p_x_aux[i] = NA_INTEGER; continue; } // Negative to flip where we set the order using a negative value. // Cast to `int` because `TRUELENGTH()` returns a `r_ssize`. p_x_aux[i] = (int) -TRUELENGTH(elt); } } // ----------------------------------------------------------------------------- static void chr_mark_uniques(const SEXP* p_x, r_ssize size, struct truelength_info* p_truelength_info); /* * `chr_mark_sorted_uniques()` runs through the strings in `p_x` and places the * unique strings in `p_truelength_info->p_uniques`. It marks the unique ones * with a negative TRUELENGTH as it goes. Since identical strings share the * same CHARSXP, this marks all strings in the vector at once. * * After detecting all unique strings, it sorts them in place with * `chr_order_radix()`. * * Finally, it loops over the now sorted unique strings and marks them with * their ordering (as a negative value). This allows `chr_order_chunk()` to loop * through `p_x` and just pluck off the TRUELENGTH value, which will be an * integer proxy for the value's ordering. * * `truelength_save()` also saves the unique strings and their original * TRUELENGTH values so they can be reset after each column with * `truelength_reset()`. */ static void chr_mark_sorted_uniques(const SEXP* p_x, r_ssize size, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_bytes, struct truelength_info* p_truelength_info) { chr_mark_uniques(p_x, size, p_truelength_info); r_ssize n_uniques = p_truelength_info->size_used; SEXP* p_x_aux = (SEXP*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); // Sorts uniques in ascending order using `p_x_aux` for working memory. // Assumes no `NA`! chr_order_radix( n_uniques, p_truelength_info->max_string_size, p_truelength_info->p_uniques, p_x_aux, p_truelength_info->p_sizes, p_truelength_info->p_sizes_aux, p_bytes ); // Mark unique sorted strings with their order. // Use a negative value to differentiate with R. for (r_ssize i = 0; i < n_uniques; ++i) { SEXP elt = p_truelength_info->p_uniques[i]; SET_TRUELENGTH(elt, -i - 1); } } static void chr_mark_uniques(const SEXP* p_x, r_ssize size, struct truelength_info* p_truelength_info) { for (r_ssize i = 0; i < size; ++i) { SEXP elt = p_x[i]; // `NA_STRING` is replaced by `NA_INTEGER` for use in integer ordering if (elt == NA_STRING) { continue; } r_ssize truelength = TRUELENGTH(elt); // We have already seen and saved this string if (truelength < 0) { continue; } r_ssize elt_size = r_length(elt); // Track max string size to know how deep to recurse if (p_truelength_info->max_string_size < elt_size) { p_truelength_info->max_string_size = elt_size; } // Save the truelength so we can reset it later. // Also saves this unique value so we can order uniques. truelength_save(elt, truelength, elt_size, p_truelength_info); // Mark as negative to note that we have seen this string. // R uses positive or zero truelengths. SET_TRUELENGTH(elt, -1); } } // ----------------------------------------------------------------------------- static bool chr_str_ge(SEXP x, SEXP y, int x_size, const R_len_t pass); /* * Insertion order for character vectors. This occurs in the radix ordering * once we drop below a certain chunk size. * * One optimization done here is to take advantage of the `pass` info, which * will indicate that all characters before this pass are identical already * and don't need to be checked by `strcmp()`. */ static void chr_order_insertion(const r_ssize size, const R_len_t pass, SEXP* p_x, int* p_sizes) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const SEXP x_elt = p_x[i]; const int x_size = p_sizes[i]; r_ssize j = i - 1; while (j >= 0) { const SEXP x_cmp_elt = p_x[j]; if (chr_str_ge(x_elt, x_cmp_elt, x_size, pass)) { break; } int x_cmp_size = p_sizes[j]; // Swap p_x[j + 1] = x_cmp_elt; p_sizes[j + 1] = x_cmp_size; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_sizes[j + 1] = x_size; } } // ----------------------------------------------------------------------------- static void chr_order_radix_recurse(const r_ssize size, const R_len_t pass, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes); /* * Entry point for radix ordering of characters. * * This is different from with integers / doubles because: * - `p_x` will contain only unique strings * - `p_x` will not contain any `NA` strings * - We just need to sort `p_x` in place, no need to track group information, * which is instead done by `int_order_chunk()` later * - The number of passes is variable here, because strings have a variable * length. * - We also track the character sizes because repeated `r_length()` calls * can get expensive over just indexing into the array. */ static void chr_order_radix(const r_ssize size, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes) { R_len_t pass = 0; chr_order_radix_recurse( size, pass, max_size, p_x, p_x_aux, p_sizes, p_sizes_aux, p_bytes ); } // ----------------------------------------------------------------------------- /* * Recursive function for ordering the `p_x` unique strings * * For ASCII strings, 1 character aligns with 1 byte, so we can order them * 1 character at a time from left to right (MSB to LSB). * * For UTF-8 strings, the implementation of UTF-8 is done so that UTF-8 * characters are made up of between 1-4 bytes. Luckily, treating them as * a sequence of single bytes like we do for ASCII orders identically to * treating them as their full 1-4 byte sequence. * * Because these are variable length, some strings are shorter than others. * Shorter strings should order lower than longer strings if they are otherwise * equivalent, so we reserve the 0-th bucket of `p_counts` for counting * implicit empty strings. Normally this would be an issue because this is * the bucket for ASCII value 0, but this is the null value, which is not * allowed in R strings! * * Additionally, we don't have to worry about having an `NA` bucket because * there will be no missing values in the unique set. */ static void chr_order_radix_recurse(const r_ssize size, const R_len_t pass, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { chr_order_insertion(size, pass, p_x, p_sizes); return; } // We don't carry along `p_counts` from an up front allocation since // the strings have variable length r_ssize p_counts[UINT8_MAX_SIZE] = { 0 }; const int next_pass = pass + 1; // NA values won't be in `p_x` so we can reserve the 0th bucket for "" const uint8_t missing_bucket = 0; uint8_t byte = 0; // Histogram for (r_ssize i = 0; i < size; ++i) { const R_len_t x_elt_size = p_sizes[i]; // Check if there are characters left in the string and extract the next // one if so, otherwise assume implicit "". if (pass < x_elt_size) { const SEXP x_elt = p_x[i]; const char* c_x_elt = CHAR(x_elt); byte = (uint8_t) c_x_elt[pass]; } else { byte = missing_bucket; } p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass != max_size) { // If we are not at the last pass, recurse on next byte using // the same `size` since the group size hasn't changed chr_order_radix_recurse( size, next_pass, max_size, p_x, p_x_aux, p_sizes, p_sizes_aux, p_bytes ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Insert current cumulative value, then increment p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_x_aux[loc] = p_x[i]; p_sizes_aux[loc] = p_sizes[i]; } // Copy back over memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); memcpy(p_sizes, p_sizes_aux, size * sizeof(*p_sizes_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { ++p_x; ++p_sizes; continue; } if (next_pass == max_size) { p_x += group_size; p_sizes += group_size; continue; } // Order next byte of this subgroup chr_order_radix_recurse( group_size, next_pass, max_size, p_x, p_x_aux, p_sizes, p_sizes_aux, p_bytes ); p_x += group_size; p_sizes += group_size; } } // ----------------------------------------------------------------------------- /* * Check if `x` is greater than `y` lexicographically in a C-locale. * * - `x` and `y` are guaranteed to be different and not `NA`, so we don't gain * anything from pointer comparisons. * * - This is called from `chr_order_insertion()` from inside the radix ordering, * so we can use information about the current `pass` to only compare * characters that are actually different. */ static bool chr_str_ge(SEXP x, SEXP y, int x_size, const R_len_t pass) { // Pure insertion sort - we know nothing yet if (pass == 0) { const char* c_x = CHAR(x); const char* c_y = CHAR(y); int cmp = strcmp(c_x, c_y); return cmp >= 0; } // Otherwise we know they are equal up to the position before `pass`, but // it might have been equality with implicit "" so we need to check the // length of one of them const int last_pass = pass - 1; // We are comparing length with C 0-based indexing so we have to do +1. if (x_size < last_pass + 1) { return true; } const char* c_x = CHAR(x); const char* c_y = CHAR(y); // Now start the comparison at `last_pass`, which we know exists c_x = c_x + last_pass; c_y = c_y + last_pass; int cmp = strcmp(c_x, c_y); return cmp >= 0; } // ----------------------------------------------------------------------------- struct df_order_info { SEXP x; SEXP decreasing; SEXP na_last; r_ssize size; struct order* p_order; struct lazy_raw* p_lazy_x_chunk; struct lazy_raw* p_lazy_x_aux; struct lazy_raw* p_lazy_o_aux; struct lazy_raw* p_lazy_bytes; struct lazy_raw* p_lazy_counts; struct group_infos* p_group_infos; struct truelength_info* p_truelength_info; }; struct df_order_cleanup_info { struct truelength_info* p_truelength_info; }; static SEXP df_order_exec(void* p_data); static void df_order_cleanup(void* p_data); /* * `df_order()` is the main user of `p_group_infos`. It uses the grouping * of the current column to break up the next column into sub groups. That * process is continued until either all columns have been processed or we * can tell all of the values apart. * * Internally `df_order()` may call `chr_order_chunk()` to order character * columns. The TRUELENGTHs of the column are marked with * `chr_mark_sorted_uniques()`, and generally they are reset after each * column is processed by using `truelength_reset()`. However, if a longjump * occurs after the column is marked but before it is reset, then the * truelengths won't be reset. This might happen if an allocation fails, or * if an error is thrown. To carefully handle this case, * `R_ExecWithCleanup()` is used to ensure that `truelength_reset()` is * always called. When there aren't any character columns or if there are * character columns and the truelengths were reset normally, this does * nothing. */ static void df_order(SEXP x, SEXP decreasing, SEXP na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { struct df_order_info info = { .x = x, .decreasing = decreasing, .na_last = na_last, .size = size, .p_order = p_order, .p_lazy_x_chunk = p_lazy_x_chunk, .p_lazy_x_aux = p_lazy_x_aux, .p_lazy_o_aux = p_lazy_o_aux, .p_lazy_bytes = p_lazy_bytes, .p_lazy_counts = p_lazy_counts, .p_group_infos = p_group_infos, .p_truelength_info = p_truelength_info }; struct df_order_cleanup_info cleanup_info = { .p_truelength_info = p_truelength_info }; R_ExecWithCleanup( df_order_exec, &info, df_order_cleanup, &cleanup_info ); } static void df_order_internal(SEXP x, SEXP decreasing, SEXP na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static SEXP df_order_exec(void* p_data) { struct df_order_info* p_info = (struct df_order_info*) p_data; df_order_internal( p_info->x, p_info->decreasing, p_info->na_last, p_info->size, p_info->p_order, p_info->p_lazy_x_chunk, p_info->p_lazy_x_aux, p_info->p_lazy_o_aux, p_info->p_lazy_bytes, p_info->p_lazy_counts, p_info->p_group_infos, p_info->p_truelength_info ); return R_NilValue; } static void df_order_cleanup(void* p_data) { struct df_order_cleanup_info* p_info = (struct df_order_cleanup_info*) p_data; truelength_reset(p_info->p_truelength_info); } static void vec_order_chunk_switch(bool decreasing, bool na_last, r_ssize size, const enum vctrs_type type, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); #define DF_ORDER_EXTRACT_CHUNK(CONST_DEREF, CTYPE) do { \ const CTYPE* p_col = CONST_DEREF(col); \ CTYPE* p_x_chunk_col = (CTYPE*) p_x_chunk; \ \ /* Extract the next group chunk and place in */ \ /* sequential order for cache friendliness */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk_col[j] = p_col[loc]; \ } \ } while (0) #define DF_ORDER_EXTRACT_CHUNK_CPL() do { \ const Rcomplex* p_col = COMPLEX_RO(col); \ double* p_x_chunk_col = (double*) p_x_chunk; \ \ if (rerun_complex) { \ /* First pass - real */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk_col[j] = p_col[loc].r; \ } \ \ /* Decrement `i` to rerun column */ \ --i; \ } else { \ /* Second pass - imaginary */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk_col[j] = p_col[loc].i; \ } \ } \ } while (0) static void df_order_internal(SEXP x, SEXP decreasing, SEXP na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { r_ssize n_cols = r_length(x); bool recycle_decreasing; r_ssize n_decreasing = r_length(decreasing); int* p_decreasing = LOGICAL(decreasing); if (n_decreasing == 1) { recycle_decreasing = true; } else if (n_decreasing == n_cols) { recycle_decreasing = false; } else { Rf_errorcall( R_NilValue, "Internal error: `vec_order_expand_args()` should expand " "`decreasing` to have length 1 or length equal " "to the number of columns of `x` after calling `vec_proxy_order()`." ); } bool recycle_na_last; r_ssize n_na_last = r_length(na_last); int* p_na_last = LOGICAL(na_last); if (n_na_last == 1) { recycle_na_last = true; } else if (n_na_last == n_cols) { recycle_na_last = false; } else { Rf_errorcall( R_NilValue, "Internal error: `vec_order_expand_args()` should expand " "`na_last` to have length 1 or length equal " "to the number of columns of `x` after calling `vec_proxy_order()`." ); } // Special case no columns if (n_cols == 0) { init_order(p_order); return; } SEXP col = VECTOR_ELT(x, 0); bool col_decreasing = p_decreasing[0]; bool col_na_last = p_na_last[0]; enum vctrs_type type = vec_proxy_typeof(col); // Apply on one column to fill `p_group_infos`. // First column is immutable and we must copy into `x_chunk`. vec_order_base_switch( col, col_decreasing, col_na_last, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); // For complex, we have to rerun the column a second time on the // imaginary part. This is done by decrementing `i` after processing // the real part so the column is rerun. bool rerun_complex = false; // Iterate over remaining columns by group chunk for (r_ssize i = 1; i < n_cols; ++i) { // Get the number of group chunks from previous column group info struct group_info* p_group_info_pre = groups_current(p_group_infos); r_ssize n_groups = p_group_info_pre->n_groups; // If there were no ties, we are completely done if (n_groups == size) { break; } if (!recycle_decreasing) { col_decreasing = p_decreasing[i]; } if (!recycle_na_last) { col_na_last = p_na_last[i]; } // Reset pointer between columns since we increment it as // we iterate through the groups, but need it to start from the beginning // on the next column. `p_o` is initialized now that we have already // processed at least one column. int* p_o_col = p_order->p_data; col = VECTOR_ELT(x, i); type = vec_proxy_typeof(col); // If we are on the rerun pass, flip this back off so the // imaginary part is extracted below. if (type == vctrs_type_complex) { rerun_complex = rerun_complex ? false : true; } // Pre-sort unique characters once for the whole column if (type == vctrs_type_character) { const SEXP* p_col = STRING_PTR_RO(col); chr_mark_sorted_uniques( p_col, size, p_lazy_x_aux, p_lazy_bytes, p_truelength_info ); } // Turn off group tracking if: // - We are on the last column // - The user didn't request group information // - That column isn't the first pass of a complex column if (i == n_cols - 1 && !p_group_infos->force_groups && !rerun_complex) { p_group_infos->ignore_groups = true; } // Swap to other group info to prepare for this column groups_swap(p_group_infos); // Ensure `x_chunk` is initialized to hold chunks void* p_x_chunk = init_lazy_raw(p_lazy_x_chunk); // Iterate over this column's group chunks for (r_ssize group = 0; group < n_groups; ++group) { r_ssize group_size = p_group_info_pre->p_data[group]; // Fast handling of simplest case if (group_size == 1) { ++p_o_col; groups_size_maybe_push(1, p_group_infos); continue; } // Extract current chunk and place into `x_chunk` in sequential order switch (type) { case vctrs_type_integer: DF_ORDER_EXTRACT_CHUNK(INTEGER_RO, int); break; case vctrs_type_logical: DF_ORDER_EXTRACT_CHUNK(LOGICAL_RO, int); break; case vctrs_type_double: DF_ORDER_EXTRACT_CHUNK(REAL_RO, double); break; case vctrs_type_character: DF_ORDER_EXTRACT_CHUNK(STRING_PTR_RO, SEXP); break; case vctrs_type_complex: DF_ORDER_EXTRACT_CHUNK_CPL(); break; default: Rf_errorcall(R_NilValue, "Unknown data frame column type in `vec_order()`."); } vec_order_chunk_switch( col_decreasing, col_na_last, group_size, type, p_o_col, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); p_o_col += group_size; } // Reset TRUELENGTHs between columns if (type == vctrs_type_character) { truelength_reset(p_truelength_info); } } } #undef DF_ORDER_EXTRACT_CHUNK #undef DF_ORDER_EXTRACT_CHUNK_CPL // ----------------------------------------------------------------------------- /* * Switch function specifically for column chunks generated when * processing a data frame */ static void vec_order_chunk_switch(bool decreasing, bool na_last, r_ssize size, const enum vctrs_type type, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { switch (type) { case vctrs_type_integer: { int_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_logical: { lgl_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_double: { dbl_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_complex: { // Complex types are run in two passes, once over real then over imaginary dbl_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_character: { chr_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case vctrs_type_dataframe: { Rf_errorcall(R_NilValue, "Internal error: df-cols should have already been flattened."); break; } default: { Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`"); } } } // ----------------------------------------------------------------------------- static inline size_t df_compute_n_bytes_lazy_raw(SEXP x); /* * Compute the minimum size required for `lazy_x_aux` and `lazy_x_chunk`. * * For complex, we split the vector into two double vectors. We only need to * allocate 1 double vector though, and it will be reused for both the real * and imaginary parts. */ static inline size_t vec_compute_n_bytes_lazy_raw(SEXP x, const enum vctrs_type type) { switch (type) { case vctrs_type_integer: case vctrs_type_logical: return sizeof(int); case vctrs_type_double: return sizeof(double); case vctrs_type_complex: // Complex types will be split into two double vectors return sizeof(double); case vctrs_type_character: // Auxiliary data will store SEXP and ints, so return the larger return sizeof(SEXP) > sizeof(int) ? sizeof(SEXP) : sizeof(int); case vctrs_type_dataframe: return df_compute_n_bytes_lazy_raw(x); default: Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } // `x` should be a flattened df with no df-cols static inline size_t df_compute_n_bytes_lazy_raw(SEXP x) { r_ssize n_cols = r_length(x); size_t multiplier = 0; for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); const enum vctrs_type type = vec_proxy_typeof(col); size_t col_multiplier = vec_compute_n_bytes_lazy_raw(col, type); if (col_multiplier > multiplier) { multiplier = col_multiplier; } } return multiplier; } // ----------------------------------------------------------------------------- static size_t df_compute_n_bytes_lazy_counts(SEXP x); /* * Compute the minimum size required for `p_counts` * * - For integer, we use 4 passes. * - For double, we use 8 passes. * - Logical uses integer radix sorting. * - Character uses integer radix sorting. * - Complex uses double radix sorting. */ static inline size_t vec_compute_n_bytes_lazy_counts(SEXP x, const enum vctrs_type type) { switch (type) { case vctrs_type_integer: case vctrs_type_logical: case vctrs_type_character: return INT_MAX_RADIX_PASS; case vctrs_type_double: case vctrs_type_complex: return DBL_MAX_RADIX_PASS; case vctrs_type_dataframe: return df_compute_n_bytes_lazy_counts(x); default: Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } // `x` should be a flattened df with no df-cols static size_t df_compute_n_bytes_lazy_counts(SEXP x) { r_ssize n_cols = r_length(x); size_t multiplier = 0; for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); const enum vctrs_type type = vec_proxy_typeof(col); size_t col_multiplier = vec_compute_n_bytes_lazy_counts(col, type); if (col_multiplier > multiplier) { multiplier = col_multiplier; } } return multiplier; } // ----------------------------------------------------------------------------- static SEXP df_expand_args(SEXP x, SEXP args); /* * `vec_order_expand_args()` checks the type and length of `decreasing` and * `na_last` and possibly expands them. * * `x` is expected to be the original input, before `vec_proxy_order()` is * called on it. * * If `x` is not a data frame, `decreasing` and `na_last` must be boolean * values. If `x` is something like a rcrd type with a multi-column data frame * proxy, then restricting to a boolean argument is correct, and works because * the single value will be recycled across the columns. * * If `x` is a data frame, and `decreasing` or `na_last` is size 1, we return * it untouched and it will be recycled correctly. * * If `x` is a data frame and the size of the arg matches the number of * columns of `x`, we have to be careful to "expand" the arg to match * the number of columns of `x` that will exist after `vec_proxy_order()` * is called. It flattens df-cols which might either already exist in `x`, * or may arise from rcrd columns that have data frame proxies. The majority * of the code here is for tracking this expansion. */ static SEXP vec_order_expand_args(SEXP x, SEXP decreasing, SEXP na_last) { SEXP args = PROTECT(r_new_list(2)); SET_VECTOR_ELT(args, 0, decreasing); SET_VECTOR_ELT(args, 1, na_last); // Don't check length here. These might be vectorized if `x` is a data frame. if (TYPEOF(decreasing) != LGLSXP) { Rf_errorcall(R_NilValue, "Internal error: `decreasing` must be logical"); } if (lgl_any_na(decreasing)) { Rf_errorcall(R_NilValue, "Internal error: `decreasing` can't contain missing values."); } if (TYPEOF(na_last) != LGLSXP) { Rf_errorcall(R_NilValue, "Internal error: `na_last` must be logical"); } if (lgl_any_na(na_last)) { Rf_errorcall(R_NilValue, "Internal error: `na_last` can't contain missing values."); } if (is_data_frame(x)) { args = df_expand_args(x, args); UNPROTECT(1); return args; } if (r_length(decreasing) != 1) { Rf_errorcall(R_NilValue, "`direction` must be a single value when `x` is not a data frame."); } if (r_length(na_last) != 1) { Rf_errorcall(R_NilValue, "`na_value` must be a single value when `x` is not a data frame."); } UNPROTECT(1); return args; } static SEXP expand_arg(SEXP arg, const int* p_expansions, r_ssize arg_size, r_ssize size); static int vec_decreasing_expansion(SEXP x); static SEXP df_expand_args(SEXP x, SEXP args) { SEXP decreasing = VECTOR_ELT(args, 0); SEXP na_last = VECTOR_ELT(args, 1); r_ssize n_decreasing = r_length(decreasing); r_ssize n_na_last = r_length(na_last); r_ssize n_cols = r_length(x); // They will be recycled correctly even if columns get flattened if (n_decreasing == 1 && n_na_last == 1) { return args; } // Must start out with the same length as the number of columns if (n_decreasing != 1 && n_decreasing != n_cols) { Rf_errorcall( R_NilValue, "`direction` should have length 1 or length equal to the number of " "columns of `x` when `x` is a data frame." ); } if (n_na_last != 1 && n_na_last != n_cols) { Rf_errorcall( R_NilValue, "`na_value` should have length 1 or length equal to the number of " "columns of `x` when `x` is a data frame." ); } SEXP expansions = PROTECT(Rf_allocVector(INTSXP, n_cols)); int* p_expansions = INTEGER(expansions); int size = 0; bool needs_expansion = false; // Compute expansion factor for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); int expansion = vec_decreasing_expansion(col); if (expansion != 1) { needs_expansion = true; } p_expansions[i] = expansion; size += expansion; } if (!needs_expansion) { UNPROTECT(1); return args; } decreasing = expand_arg(decreasing, p_expansions, n_decreasing, size); SET_VECTOR_ELT(args, 0, decreasing); na_last = expand_arg(na_last, p_expansions, n_na_last, size); SET_VECTOR_ELT(args, 1, na_last); UNPROTECT(1); return args; } static SEXP expand_arg(SEXP arg, const int* p_expansions, r_ssize n_arg, r_ssize size) { if (n_arg == 1) { return arg; } SEXP out = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_out = LOGICAL(out); int* p_arg = LOGICAL(arg); int k = 0; // Fill `out` with repeated `arg` values to match expanded size for (r_ssize i = 0; i < n_arg; ++i) { int col_arg = p_arg[i]; int expansion = p_expansions[i]; for (r_ssize j = 0; j < expansion; ++j) { p_out[k] = col_arg; ++k; } } UNPROTECT(1); return out; } static int df_decreasing_expansion(SEXP x); static int vec_decreasing_expansion(SEXP x) { // Bare columns if (!OBJECT(x)) { return 1; } // Compute number of cols in df-cols, // and do proxy-compare on the cols as needed if (is_data_frame(x)) { return df_decreasing_expansion(x); } int expansion; // Otherwise we have an S3 column that could have a data frame // ordering proxy containing multiple columns, so we need to check for that SEXP proxy = PROTECT(vec_proxy_order(x)); // If the `proxy` is a data frame, the expansion factor is the // number of columns. Otherwise it is 1. if (is_data_frame(proxy)) { expansion = Rf_length(proxy); } else { expansion = 1; } UNPROTECT(1); return expansion; } // 0-col df-cols get dropped from the comparison proxy, so returning `0` here // when a df-col has no columns should be correct static int df_decreasing_expansion(SEXP x) { r_ssize n_cols = r_length(x); int out = 0; // Accumulate the expansion factors of the cols of the df-col for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); out += vec_decreasing_expansion(col); } return out; } // ----------------------------------------------------------------------------- static int parse_na_value_one(SEXP x); // [[ include("order-radix.h") ]] SEXP parse_na_value(SEXP na_value) { // Don't care about length here, checked later if (TYPEOF(na_value) != STRSXP) { Rf_errorcall(R_NilValue, "`na_value` must be a character vector."); } R_len_t size = Rf_length(na_value); const SEXP* p_na_value = STRING_PTR_RO(na_value); SEXP na_last = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_na_last = LOGICAL(na_last); for (R_len_t i = 0; i < size; ++i) { p_na_last[i] = parse_na_value_one(p_na_value[i]); } UNPROTECT(1); return na_last; } static int parse_na_value_one(SEXP x) { if (x == NA_STRING) { Rf_errorcall(R_NilValue, "`na_value` can't be missing."); } const char* c_x = CHAR(x); if (!strcmp(c_x, "largest")) return 1; if (!strcmp(c_x, "smallest")) return 0; Rf_errorcall( R_NilValue, "`na_value` must contain only \"largest\" or \"smallest\"." ); } static int parse_direction_one(SEXP x); // [[ include("order-radix.h") ]] SEXP parse_direction(SEXP direction) { // Don't care about length here, checked later if (TYPEOF(direction) != STRSXP) { Rf_errorcall(R_NilValue, "`direction` must be a character vector."); } R_len_t size = Rf_length(direction); const SEXP* p_direction = STRING_PTR_RO(direction); SEXP decreasing = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_decreasing = LOGICAL(decreasing); for (R_len_t i = 0; i < size; ++i) { p_decreasing[i] = parse_direction_one(p_direction[i]); } UNPROTECT(1); return decreasing; } static int parse_direction_one(SEXP x) { if (x == NA_STRING) { Rf_errorcall(R_NilValue, "`direction` can't be missing."); } const char* c_x = CHAR(x); if (!strcmp(c_x, "asc")) return 0; if (!strcmp(c_x, "desc")) return 1; Rf_errorcall( R_NilValue, "`direction` must contain only \"asc\" or \"desc\"." ); } vctrs/src/group.c0000644000176200001440000001352714042540502013506 0ustar liggesusers#include "vctrs.h" #include "dictionary.h" #include "translate.h" #include "type-data-frame.h" #include "utils.h" // [[ register() ]] SEXP vctrs_group_id(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_out = INTEGER(out); R_len_t g = 1; for (int i = 0; i < n; ++i) { int32_t hash = dict_hash_scalar(d, i); R_len_t key = d->key[hash]; if (key == DICT_EMPTY) { dict_put(d, hash, i); p_out[i] = g; ++g; } else { p_out[i] = p_out[key]; } } SEXP n_groups = PROTECT_N(Rf_ScalarInteger(d->used), &nprot); Rf_setAttrib(out, syms_n, n_groups); UNPROTECT(nprot); return out; } // ----------------------------------------------------------------------------- static SEXP new_group_rle(SEXP g, SEXP l, R_len_t n); // [[ register() ]] SEXP vctrs_group_rle(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); const void* p_vec = d->p_poly_vec->p_vec; SEXP g = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_g = INTEGER(g); SEXP l = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_l = INTEGER(l); if (n == 0) { SEXP out = PROTECT_N(new_group_rle(g, l, 0), &nprot); UNPROTECT(nprot); return out; } // Integer vector that maps `hash` values to locations in `g` SEXP map = PROTECT_N(Rf_allocVector(INTSXP, d->size), &nprot); int* p_map = INTEGER(map); // Initialize first value int32_t hash = dict_hash_scalar(d, 0); dict_put(d, hash, 0); p_map[hash] = 0; *p_g = 1; *p_l = 1; int loc = 1; for (int i = 1; i < n; ++i) { if (d->p_equal_na_equal(p_vec, i - 1, p_vec, i)) { ++(*p_l); continue; } ++p_l; *p_l = 1; // Check if we have seen this value before int32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); p_map[hash] = loc; p_g[loc] = d->used; } else { p_g[loc] = p_g[p_map[hash]]; } ++loc; } g = PROTECT_N(Rf_lengthgets(g, loc), &nprot); l = PROTECT_N(Rf_lengthgets(l, loc), &nprot); SEXP out = new_group_rle(g, l, d->used); UNPROTECT(nprot); return out; } static SEXP new_group_rle(SEXP g, SEXP l, R_len_t n) { SEXP out = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, g); SET_VECTOR_ELT(out, 1, l); SEXP names = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(names, 0, strings_group); SET_STRING_ELT(names, 1, strings_length); Rf_setAttrib(out, R_NamesSymbol, names); SEXP n_groups = PROTECT(Rf_ScalarInteger(n)); Rf_setAttrib(out, syms_n, n_groups); Rf_setAttrib(out, R_ClassSymbol, classes_vctrs_group_rle); UNPROTECT(3); return out; } // ----------------------------------------------------------------------------- // [[ include("vctrs.h"); register() ]] SEXP vec_group_loc(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); SEXP proxy = PROTECT_N(vec_proxy_equal(x), &nprot); proxy = PROTECT_N(vec_normalize_encoding(proxy), &nprot); struct dictionary* d = new_dictionary(proxy); PROTECT_DICT(d, &nprot); SEXP groups = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_groups = INTEGER(groups); R_len_t g = 0; // Identify groups, this is essentially `vec_group_id()` for (int i = 0; i < n; ++i) { const int32_t hash = dict_hash_scalar(d, i); const R_len_t key = d->key[hash]; if (key == DICT_EMPTY) { dict_put(d, hash, i); p_groups[i] = g; ++g; } else { p_groups[i] = p_groups[key]; } } const int n_groups = d->used; // Location of first occurence of each group in `x` SEXP key_loc = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_key_loc = INTEGER(key_loc); int key_loc_current = 0; // Count of the number of elements in each group SEXP counts = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_counts = INTEGER(counts); memset(p_counts, 0, n_groups * sizeof(int)); for (int i = 0; i < n; ++i) { const int group = p_groups[i]; if (group == key_loc_current) { p_key_loc[key_loc_current] = i + 1; ++key_loc_current; } ++p_counts[group]; } SEXP out_loc = PROTECT_N(Rf_allocVector(VECSXP, n_groups), &nprot); // Direct pointer to the location vectors we store in `out_loc` int** p_elt_loc = (int**) R_alloc(n_groups, sizeof(int*)); // Initialize `out_loc` to a list of integers with sizes corresponding // to the number of elements in that group for (int i = 0; i < n_groups; ++i) { SEXP elt_loc = Rf_allocVector(INTSXP, p_counts[i]); p_elt_loc[i] = INTEGER(elt_loc); SET_VECTOR_ELT(out_loc, i, elt_loc); } // The current location we are updating, each group has its own counter SEXP locations = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_locations = INTEGER(locations); memset(p_locations, 0, n_groups * sizeof(int)); // Fill in the location values for each group for (int i = 0; i < n; ++i) { const int group = p_groups[i]; const int location = p_locations[group]; p_elt_loc[group][location] = i + 1; ++p_locations[group]; } SEXP out_key = PROTECT_N(vec_slice(x, key_loc), &nprot); // Construct output data frame SEXP out = PROTECT_N(Rf_allocVector(VECSXP, 2), &nprot); SET_VECTOR_ELT(out, 0, out_key); SET_VECTOR_ELT(out, 1, out_loc); SEXP names = PROTECT_N(Rf_allocVector(STRSXP, 2), &nprot); SET_STRING_ELT(names, 0, strings_key); SET_STRING_ELT(names, 1, strings_loc); Rf_setAttrib(out, R_NamesSymbol, names); out = new_data_frame(out, n_groups); UNPROTECT(nprot); return out; } vctrs/src/Makevars0000644000176200001440000000003514042540502013670 0ustar liggesusersPKG_CFLAGS = $(C_VISIBILITY) vctrs/src/conditions.c0000644000176200001440000000733014042540502014516 0ustar liggesusers#include "vctrs.h" #include "utils.h" void stop_scalar_type(SEXP x, struct vctrs_arg* arg) { SEXP call = PROTECT(Rf_lang3(Rf_install("stop_scalar_type"), PROTECT(r_protect(x)), PROTECT(vctrs_arg(arg)))); Rf_eval(call, vctrs_ns_env); never_reached("stop_scalar_type"); } void vec_assert(SEXP x, struct vctrs_arg* arg) { if (!vec_is_vector(x)) { stop_scalar_type(x, arg); } } // [[ include("vctrs.h") ]] void stop_incompatible_type(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, bool cast) { SEXP syms[6] = { syms_x, syms_y, syms_x_arg, syms_y_arg, syms_action, NULL }; SEXP args[6] = { PROTECT(r_protect(x)), PROTECT(r_protect(y)), PROTECT(vctrs_arg(x_arg)), PROTECT(vctrs_arg(y_arg)), cast ? chrs_convert : chrs_combine, NULL }; SEXP call = PROTECT(r_call(syms_stop_incompatible_type, syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_incompatible_type"); } // [[ include("vctrs.h") ]] void stop_incompatible_size(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg) { SEXP syms[7] = { syms_x, syms_y, syms_x_size, syms_y_size, syms_x_arg, syms_y_arg, NULL }; SEXP args[7] = { PROTECT(r_protect(x)), PROTECT(r_protect(y)), PROTECT(r_int(x_size)), PROTECT(r_int(y_size)), PROTECT(vctrs_arg(x_arg)), PROTECT(vctrs_arg(y_arg)), NULL }; SEXP call = PROTECT(r_call(syms_stop_incompatible_size, syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_incompatible_size"); } void stop_recycle_incompatible_size(R_len_t x_size, R_len_t size, struct vctrs_arg* x_arg) { SEXP syms[4] = { r_sym("x_size"), r_sym("size"), r_sym("x_arg"), NULL }; SEXP args[4] = { PROTECT(r_int(x_size)), PROTECT(r_int(size)), PROTECT(vctrs_arg(x_arg)), NULL }; SEXP call = PROTECT(r_call(r_sym("stop_recycle_incompatible_size"), syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_recycle_incompatible_size"); } void stop_incompatible_shape(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, int axis, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { SEXP syms[8] = { r_sym("x"), r_sym("y"), r_sym("x_size"), r_sym("y_size"), r_sym("axis"), r_sym("x_arg"), r_sym("y_arg"), NULL }; SEXP args[8] = { PROTECT(r_protect(x)), PROTECT(r_protect(y)), PROTECT(r_int(x_size)), PROTECT(r_int(y_size)), PROTECT(r_int(axis)), PROTECT(vctrs_arg(p_x_arg)), PROTECT(vctrs_arg(p_y_arg)), NULL }; SEXP call = PROTECT(r_call(r_sym("stop_incompatible_shape"), syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_incompatible_shape"); } void stop_corrupt_factor_levels(SEXP x, struct vctrs_arg* arg) { SEXP call = PROTECT(Rf_lang3(Rf_install("stop_corrupt_factor_levels"), PROTECT(r_protect(x)), PROTECT(vctrs_arg(arg)))); Rf_eval(call, vctrs_ns_env); never_reached("stop_corrupt_factor_levels"); } void stop_corrupt_ordered_levels(SEXP x, struct vctrs_arg* arg) { SEXP call = PROTECT(Rf_lang3(Rf_install("stop_corrupt_ordered_levels"), PROTECT(r_protect(x)), PROTECT(vctrs_arg(arg)))); Rf_eval(call, vctrs_ns_env); never_reached("stop_corrupt_ordered_levels"); } vctrs/src/names.h0000644000176200001440000000236113717456727013503 0ustar liggesusers#ifndef VCTRS_NAMES_H #define VCTRS_NAMES_H enum name_repair_type { name_repair_none = 0, name_repair_minimal, name_repair_unique, name_repair_universal, name_repair_check_unique, name_repair_custom = 99 }; struct name_repair_opts { enum name_repair_type type; struct vctrs_arg* arg; SEXP fn; bool quiet; }; extern struct name_repair_opts unique_repair_default_opts; extern struct name_repair_opts unique_repair_silent_opts; static struct name_repair_opts const * const p_unique_repair_default_opts = &unique_repair_default_opts; static struct name_repair_opts const * const p_unique_repair_silent_opts = &unique_repair_silent_opts; #define PROTECT_NAME_REPAIR_OPTS(opts) PROTECT((opts)->fn) SEXP vec_as_names(SEXP names, const struct name_repair_opts* opts); struct name_repair_opts new_name_repair_opts(SEXP name_repair, struct vctrs_arg* arg, bool quiet); const char* name_repair_arg_as_c_string(enum name_repair_type type); bool is_unique_names(SEXP names); SEXP vec_as_unique_names(SEXP names, bool quiet); SEXP r_seq_chr(const char* prefix, R_len_t n); SEXP r_chr_paste_prefix(SEXP names, const char* prefix, const char* sep); #include "owned.h" SEXP vec_proxy_set_names(SEXP x, SEXP names, const enum vctrs_owned owned); #endif vctrs/src/size.c0000644000176200001440000001026114042540502013314 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "utils.h" #include "slice.h" static inline R_len_t vec_raw_size(SEXP x) { SEXP dimensions = r_dim(x); if (dimensions == R_NilValue || Rf_length(dimensions) == 0) { return Rf_length(x); } if (TYPEOF(dimensions) != INTSXP) { Rf_errorcall(R_NilValue, "Corrupt vector, `dim` attribute is not an integer vector."); } R_len_t size = INTEGER(dimensions)[0]; return size; } // [[ include("vctrs.h") ]] R_len_t vec_size(SEXP x) { int nprot = 0; struct vctrs_proxy_info info = vec_proxy_info(x); PROTECT_PROXY_INFO(&info, &nprot); SEXP data = info.proxy; R_len_t size; switch (info.type) { case vctrs_type_null: size = 0; break; case vctrs_type_logical: case vctrs_type_integer: case vctrs_type_double: case vctrs_type_complex: case vctrs_type_character: case vctrs_type_raw: case vctrs_type_list: size = vec_raw_size(data); break; case vctrs_type_dataframe: size = df_size(data); break; default: stop_scalar_type(x, NULL); } UNPROTECT(nprot); return size; } // [[ register() ]] SEXP vctrs_size(SEXP x) { return Rf_ScalarInteger(vec_size(x)); } SEXP list_sizes(SEXP x) { if (!vec_is_list(x)) { Rf_errorcall(R_NilValue, "`x` must be a list."); } R_len_t size = vec_size(x); SEXP out = PROTECT(Rf_allocVector(INTSXP, size)); int* p_out = INTEGER(out); for (R_len_t i = 0; i < size; ++i) { SEXP elt = VECTOR_ELT(x, i); p_out[i] = vec_size(elt); } UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_list_sizes(SEXP x) { return list_sizes(x); } R_len_t df_rownames_size(SEXP x) { for (SEXP attr = ATTRIB(x); attr != R_NilValue; attr = CDR(attr)) { if (TAG(attr) != R_RowNamesSymbol) { continue; } return rownames_size(CAR(attr)); } return -1; } // For performance, avoid Rf_getAttrib() because it automatically transforms // the rownames into an integer vector R_len_t df_size(SEXP x) { R_len_t n = df_rownames_size(x); if (n < 0) { Rf_errorcall(R_NilValue, "Corrupt data frame: row.names are missing"); } return n; } // Supports bare lists as well R_len_t df_raw_size(SEXP x) { R_len_t n = df_rownames_size(x); if (n >= 0) { return n; } return df_raw_size_from_list(x); } // [[ include("vctrs.h") ]] R_len_t df_raw_size_from_list(SEXP x) { if (Rf_length(x) >= 1) { return vec_size(VECTOR_ELT(x, 0)); } else { return 0; } } // [[ register() ]] SEXP vctrs_df_size(SEXP x) { return r_int(df_raw_size(x)); } // [[ include("vctrs.h") ]] SEXP vec_recycle(SEXP x, R_len_t size, struct vctrs_arg* x_arg) { if (x == R_NilValue) { return R_NilValue; } R_len_t n_x = vec_size(x); if (n_x == size) { return x; } if (n_x == 1L) { SEXP i = PROTECT(compact_rep(1, size)); SEXP out = vec_slice_impl(x, i); UNPROTECT(1); return out; } stop_recycle_incompatible_size(n_x, size, x_arg); } // [[ register() ]] SEXP vctrs_recycle(SEXP x, SEXP size_obj, SEXP x_arg) { if (x == R_NilValue || size_obj == R_NilValue) { return R_NilValue; } size_obj = PROTECT(vec_cast(size_obj, vctrs_shared_empty_int, args_empty, args_empty)); R_len_t size = r_int_get(size_obj, 0); UNPROTECT(1); struct vctrs_arg x_arg_ = vec_as_arg(x_arg); return vec_recycle(x, size, &x_arg_); } // [[ include("vctrs.h") ]] SEXP vec_recycle_fallback(SEXP x, R_len_t size, struct vctrs_arg* x_arg) { if (x == R_NilValue) { return R_NilValue; } R_len_t x_size = vec_size(x); if (x_size == size) { return x; } if (x_size == 1) { SEXP subscript = PROTECT(Rf_allocVector(INTSXP, size)); r_int_fill(subscript, 1, size); SEXP out = vec_slice_fallback(x, subscript); UNPROTECT(1); return out; } stop_recycle_incompatible_size(x_size, size, x_arg); } // [[ include("utils.h") ]] R_len_t size_validate(SEXP size, const char* arg) { size = vec_cast(size, vctrs_shared_empty_int, args_empty, args_empty); if (Rf_length(size) != 1) { Rf_errorcall(R_NilValue, "`%s` must be a single integer.", arg); } int out = r_int_get(size, 0); if (out == NA_INTEGER) { Rf_errorcall(R_NilValue, "`%s` can't be missing.", arg); } return out; } vctrs/src/order-groups.h0000644000176200001440000001233514042540502015003 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_GROUPS_H #define VCTRS_ORDER_GROUPS_H #include "vctrs.h" // ----------------------------------------------------------------------------- // This seems to be a reasonable default to start with for tracking group sizes // and is what base R uses. It is expanded by 2x every time we need to // reallocate. #define GROUP_DATA_SIZE_DEFAULT 100000 // ----------------------------------------------------------------------------- /* * Info related to 1 column / vector worth of groupings * * @member self A RAWSXP for the struct memory. * @member data An integer vector of group sizes. * @member p_data A pointer to `data`. * @member data_pi The protection index for `data` which allows us to * `REPROTECT()` on the fly. * @member data_size The current allocated size of `data`. * @member n_groups The current number of groups seen so far. * Always `<= data_size`. * @member max_group_size The maximum group size seen so far. */ struct group_info { SEXP self; SEXP data; int* p_data; PROTECT_INDEX data_pi; r_ssize data_size; r_ssize n_groups; r_ssize max_group_size; }; #define PROTECT_GROUP_INFO(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->data, &(p_info)->data_pi); \ *(p_n) += 2; \ } while(0) // ----------------------------------------------------------------------------- /* * `group_infos` contains information about 2 `group_info` structs. It contains * a pointer which points to 2 `group_info` pointers. * * For a single atomic vector, `current = 0` is always set and only one of the * structs is ever used. * * For a data frame with multiple columns, after every column `current` is * flipped between 0 and 1, giving us a chance to read the group information * off the previous column (which allows us to chunk the current column into * groups) while also updating the group information of the chunks of * the current one. * * @member self A RAWSXP for the struct memory. * @member p_p_group_info_data A RAWSXP for the p_p_group_info array memory. * @member p_p_group_info A pointer to two `group_info` pointers. * @member max_data_size The maximum data size that can be allocated when * reallocating an individual `p_group_info`. This is set to the size of * `x`. * @member current The current `group_info` pointer we are using. This is * either 0 or 1. * @member force_groups Was group information requested by the user? If so, we * always have to track group information. * @member ignore_groups Should group tracking be ignored? This is the default * for atomic vectors unless groups information is explicitly requested. For * data frames, this is true over all columns except the last one (for * performance) unless `force_groups` is true. */ struct group_infos { SEXP self; SEXP p_p_group_info_data; struct group_info** p_p_group_info; r_ssize max_data_size; int current; bool force_groups; bool ignore_groups; }; #define PROTECT_GROUP_INFOS(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT((p_info)->p_p_group_info_data); \ *(p_n) += 2; \ PROTECT_GROUP_INFO((p_info)->p_p_group_info[0], (p_n)); \ PROTECT_GROUP_INFO((p_info)->p_p_group_info[1], (p_n)); \ } while(0) // ----------------------------------------------------------------------------- struct group_info* new_group_info(); struct group_infos* new_group_infos(struct group_info* p_group_info0, struct group_info* p_group_info1, r_ssize max_data_size, bool force_groups, bool ignore_groups); void groups_swap(struct group_infos* p_group_infos); // ----------------------------------------------------------------------------- /* * Extract the current `group_info*` */ static inline struct group_info* groups_current(struct group_infos* p_group_infos) { return p_group_infos->p_p_group_info[p_group_infos->current]; } // ----------------------------------------------------------------------------- void groups_size_push(r_ssize size, struct group_infos* p_group_infos); /* * Inline version of `groups_size_push()` that only attempts to push if * we aren't ignoring groups. Important for this to be inline for performance, * especially with atomic vectors where order generally isn't required. */ static inline void groups_size_maybe_push(r_ssize size, struct group_infos* p_group_infos) { if (p_group_infos->ignore_groups) { return; } else { groups_size_push(size, p_group_infos); } } // ----------------------------------------------------------------------------- #endif vctrs/src/utils-dispatch.c0000644000176200001440000000574514042540502015312 0ustar liggesusers#include "vctrs.h" #include "utils.h" // Defined below enum vctrs_class_type class_type(SEXP x); static enum vctrs_class_type class_type_impl(SEXP class); static const char* class_type_as_str(enum vctrs_class_type type); // [[ register() ]] SEXP vctrs_class_type(SEXP x) { return Rf_mkString(class_type_as_str(class_type(x))); } // [[ include("utils.h") ]] enum vctrs_class_type class_type(SEXP x) { if (!OBJECT(x)) { return vctrs_class_none; } SEXP class = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); // Avoid corrupt objects where `x` is an OBJECT(), but the class is NULL if (class == R_NilValue) { UNPROTECT(1); return vctrs_class_none; } enum vctrs_class_type type = class_type_impl(class); UNPROTECT(1); return type; } static enum vctrs_class_type class_type_impl(SEXP class) { int n = Rf_length(class); SEXP const* p = STRING_PTR_RO(class); // First check for bare types for which we know how many strings are // the classes composed of switch (n) { case 1: { SEXP p0 = p[0]; if (p0 == strings_data_frame) { return vctrs_class_bare_data_frame; } else if (p0 == strings_factor) { return vctrs_class_bare_factor; } else if (p0 == strings_date) { return vctrs_class_bare_date; } break; } case 2: { SEXP p0 = p[0]; SEXP p1 = p[1]; if (p0 == strings_ordered && p1 == strings_factor) { return vctrs_class_bare_ordered; } if (p1 == strings_posixt) { if (p0 == strings_posixct) { return vctrs_class_bare_posixct; } else if (p0 == strings_posixlt) { return vctrs_class_bare_posixlt; } } break; } case 3: { if (p[0] == strings_tbl_df && p[1] == strings_tbl && p[2] == strings_data_frame) { return vctrs_class_bare_tibble; } break; }} // Now check for inherited classes p = p + n - 1; SEXP last = *p; if (last == strings_data_frame) { return vctrs_class_data_frame; } else if (last == strings_list) { return vctrs_class_list; } return vctrs_class_unknown; } static const char* class_type_as_str(enum vctrs_class_type type) { switch (type) { case vctrs_class_list: return "list"; case vctrs_class_data_frame: return "data_frame"; case vctrs_class_bare_data_frame: return "bare_data_frame"; case vctrs_class_bare_tibble: return "bare_tibble"; case vctrs_class_bare_factor: return "bare_factor"; case vctrs_class_bare_ordered: return "bare_ordered"; case vctrs_class_bare_date: return "bare_date"; case vctrs_class_bare_posixct: return "bare_posixct"; case vctrs_class_bare_posixlt: return "bare_posixlt"; case vctrs_class_unknown: return "unknown"; case vctrs_class_none: return "none"; } never_reached("class_type_as_str"); } // [[ include("vctrs.h") ]] bool vec_is_partial(SEXP x) { return x == R_NilValue || (TYPEOF(x) == VECSXP && Rf_inherits(x, "vctrs_partial")); } // [[ register() ]] SEXP vctrs_is_partial(SEXP x) { return Rf_ScalarLogical(vec_is_partial(x)); } vctrs/src/typeof2.c0000644000176200001440000004631614042540502013744 0ustar liggesusers#include "vctrs.h" #include "utils.h" /** * Type for symmetric binary dispatch. * * Permuting `x` and `y` does not change the typeof2. * * After adding entries in `vec_typeof2()`, adjust the list of types * in helper-types.R. This will ensure the consistency of the new * entries. */ /** * [[ include("utils.h") ]] * * @param left Output parameter. Set to 1 when the common type comes * from the left, 0 when it comes from the right, and -1 when it * comes from both sides. This means that "left" is the default * when coerced to a boolean value. */ enum vctrs_type2 vec_typeof2_impl(enum vctrs_type type_x, enum vctrs_type type_y, int* left) { switch (type_x) { case vctrs_type_null: { switch (type_y) { case vctrs_type_null: *left = -1; return vctrs_type2_null_null; case vctrs_type_unspecified: *left = 0; return vctrs_type2_null_unspecified; case vctrs_type_logical: *left = 0; return vctrs_type2_null_logical; case vctrs_type_integer: *left = 0; return vctrs_type2_null_integer; case vctrs_type_double: *left = 0; return vctrs_type2_null_double; case vctrs_type_complex: *left = 0; return vctrs_type2_null_complex; case vctrs_type_character: *left = 0; return vctrs_type2_null_character; case vctrs_type_raw: *left = 0; return vctrs_type2_null_raw; case vctrs_type_list: *left = 0; return vctrs_type2_null_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_null_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_null_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_null_scalar; } } case vctrs_type_unspecified: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_unspecified; case vctrs_type_unspecified: *left = -1; return vctrs_type2_unspecified_unspecified; case vctrs_type_logical: *left = 0; return vctrs_type2_unspecified_logical; case vctrs_type_integer: *left = 0; return vctrs_type2_unspecified_integer; case vctrs_type_double: *left = 0; return vctrs_type2_unspecified_double; case vctrs_type_complex: *left = 0; return vctrs_type2_unspecified_complex; case vctrs_type_character: *left = 0; return vctrs_type2_unspecified_character; case vctrs_type_raw: *left = 0; return vctrs_type2_unspecified_raw; case vctrs_type_list: *left = 0; return vctrs_type2_unspecified_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_unspecified_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_unspecified_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_unspecified_scalar; } } case vctrs_type_logical: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_logical; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_logical; case vctrs_type_logical: *left = -1; return vctrs_type2_logical_logical; case vctrs_type_integer: *left = 0; return vctrs_type2_logical_integer; case vctrs_type_double: *left = 0; return vctrs_type2_logical_double; case vctrs_type_complex: *left = 0; return vctrs_type2_logical_complex; case vctrs_type_character: *left = 0; return vctrs_type2_logical_character; case vctrs_type_raw: *left = 0; return vctrs_type2_logical_raw; case vctrs_type_list: *left = 0; return vctrs_type2_logical_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_logical_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_logical_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_logical_scalar; } } case vctrs_type_integer: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_integer; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_integer; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_integer; case vctrs_type_integer: *left = -1; return vctrs_type2_integer_integer; case vctrs_type_double: *left = 0; return vctrs_type2_integer_double; case vctrs_type_complex: *left = 0; return vctrs_type2_integer_complex; case vctrs_type_character: *left = 0; return vctrs_type2_integer_character; case vctrs_type_raw: *left = 0; return vctrs_type2_integer_raw; case vctrs_type_list: *left = 0; return vctrs_type2_integer_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_integer_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_integer_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_integer_scalar; } } case vctrs_type_double: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_double; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_double; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_double; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_double; case vctrs_type_double: *left = -1; return vctrs_type2_double_double; case vctrs_type_complex: *left = 0; return vctrs_type2_double_complex; case vctrs_type_character: *left = 0; return vctrs_type2_double_character; case vctrs_type_raw: *left = 0; return vctrs_type2_double_raw; case vctrs_type_list: *left = 0; return vctrs_type2_double_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_double_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_double_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_double_scalar; } } case vctrs_type_complex: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_complex; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_complex; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_complex; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_complex; case vctrs_type_double: *left = 1; return vctrs_type2_double_complex; case vctrs_type_complex: *left = -1; return vctrs_type2_complex_complex; case vctrs_type_character: *left = 0; return vctrs_type2_complex_character; case vctrs_type_raw: *left = 0; return vctrs_type2_complex_raw; case vctrs_type_list: *left = 0; return vctrs_type2_complex_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_complex_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_complex_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_complex_scalar; } } case vctrs_type_character: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_character; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_character; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_character; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_character; case vctrs_type_double: *left = 1; return vctrs_type2_double_character; case vctrs_type_complex: *left = 1; return vctrs_type2_complex_character; case vctrs_type_character: *left = -1; return vctrs_type2_character_character; case vctrs_type_raw: *left = 0; return vctrs_type2_character_raw; case vctrs_type_list: *left = 0; return vctrs_type2_character_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_character_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_character_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_character_scalar; } } case vctrs_type_raw: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_raw; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_raw; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_raw; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_raw; case vctrs_type_double: *left = 1; return vctrs_type2_double_raw; case vctrs_type_complex: *left = 1; return vctrs_type2_complex_raw; case vctrs_type_character: *left = 1; return vctrs_type2_character_raw; case vctrs_type_raw: *left = -1; return vctrs_type2_raw_raw; case vctrs_type_list: *left = 0; return vctrs_type2_raw_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_raw_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_raw_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_raw_scalar; } } case vctrs_type_list: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_list; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_list; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_list; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_list; case vctrs_type_double: *left = 1; return vctrs_type2_double_list; case vctrs_type_complex: *left = 1; return vctrs_type2_complex_list; case vctrs_type_character: *left = 1; return vctrs_type2_character_list; case vctrs_type_raw: *left = 1; return vctrs_type2_raw_list; case vctrs_type_list: *left = -1; return vctrs_type2_list_list; case vctrs_type_dataframe: *left = 0; return vctrs_type2_list_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_list_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_list_scalar; } } case vctrs_type_dataframe: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_dataframe; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_dataframe; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_dataframe; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_dataframe; case vctrs_type_double: *left = 1; return vctrs_type2_double_dataframe; case vctrs_type_complex: *left = 1; return vctrs_type2_complex_dataframe; case vctrs_type_character: *left = 1; return vctrs_type2_character_dataframe; case vctrs_type_raw: *left = 1; return vctrs_type2_raw_dataframe; case vctrs_type_list: *left = 1; return vctrs_type2_list_dataframe; case vctrs_type_dataframe: *left = -1; return vctrs_type2_dataframe_dataframe; case vctrs_type_s3: *left = 0; return vctrs_type2_dataframe_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_dataframe_scalar; } } case vctrs_type_s3: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_s3; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_s3; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_s3; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_s3; case vctrs_type_double: *left = 1; return vctrs_type2_double_s3; case vctrs_type_complex: *left = 1; return vctrs_type2_complex_s3; case vctrs_type_character: *left = 1; return vctrs_type2_character_s3; case vctrs_type_raw: *left = 1; return vctrs_type2_raw_s3; case vctrs_type_list: *left = 1; return vctrs_type2_list_s3; case vctrs_type_dataframe: *left = 1; return vctrs_type2_dataframe_s3; case vctrs_type_s3: *left = -1; return vctrs_type2_s3_s3; case vctrs_type_scalar: *left = 0; return vctrs_type2_s3_scalar; } } case vctrs_type_scalar: { switch (type_y) { case vctrs_type_null: *left = 1; return vctrs_type2_null_scalar; case vctrs_type_unspecified: *left = 1; return vctrs_type2_unspecified_scalar; case vctrs_type_logical: *left = 1; return vctrs_type2_logical_scalar; case vctrs_type_integer: *left = 1; return vctrs_type2_integer_scalar; case vctrs_type_double: *left = 1; return vctrs_type2_double_scalar; case vctrs_type_complex: *left = 1; return vctrs_type2_complex_scalar; case vctrs_type_character: *left = 1; return vctrs_type2_character_scalar; case vctrs_type_raw: *left = 1; return vctrs_type2_raw_scalar; case vctrs_type_list: *left = 1; return vctrs_type2_list_scalar; case vctrs_type_dataframe: *left = 1; return vctrs_type2_dataframe_scalar; case vctrs_type_s3: *left = 1; return vctrs_type2_s3_scalar; case vctrs_type_scalar: *left = -1; return vctrs_type2_scalar_scalar; } }} never_reached("vec_typeof2_impl()"); } // [[ include("vctrs.h") ]] enum vctrs_type2 vec_typeof2(SEXP x, SEXP y) { int _; return vec_typeof2_impl(vec_typeof(x), vec_typeof(y), &_); } const char* vctrs_type2_as_str(enum vctrs_type2 type) { switch (type) { case vctrs_type2_null_null: return "vctrs_type2_null_null"; case vctrs_type2_null_logical: return "vctrs_type2_null_logical"; case vctrs_type2_null_integer: return "vctrs_type2_null_integer"; case vctrs_type2_null_double: return "vctrs_type2_null_double"; case vctrs_type2_null_complex: return "vctrs_type2_null_complex"; case vctrs_type2_null_character: return "vctrs_type2_null_character"; case vctrs_type2_null_raw: return "vctrs_type2_null_raw"; case vctrs_type2_null_list: return "vctrs_type2_null_list"; case vctrs_type2_null_dataframe: return "vctrs_type2_null_dataframe"; case vctrs_type2_null_s3: return "vctrs_type2_null_s3"; case vctrs_type2_null_unspecified: return "vctrs_type2_null_unspecified"; case vctrs_type2_null_scalar: return "vctrs_type2_null_scalar"; case vctrs_type2_unspecified_logical: return "vctrs_type2_unspecified_logical"; case vctrs_type2_unspecified_integer: return "vctrs_type2_unspecified_integer"; case vctrs_type2_unspecified_double: return "vctrs_type2_unspecified_double"; case vctrs_type2_unspecified_complex: return "vctrs_type2_unspecified_complex"; case vctrs_type2_unspecified_character: return "vctrs_type2_unspecified_character"; case vctrs_type2_unspecified_raw: return "vctrs_type2_unspecified_raw"; case vctrs_type2_unspecified_list: return "vctrs_type2_unspecified_list"; case vctrs_type2_unspecified_dataframe: return "vctrs_type2_unspecified_dataframe"; case vctrs_type2_unspecified_s3: return "vctrs_type2_unspecified_s3"; case vctrs_type2_unspecified_unspecified: return "vctrs_type2_unspecified_unspecified"; case vctrs_type2_unspecified_scalar: return "vctrs_type2_unspecified_scalar"; case vctrs_type2_logical_logical: return "vctrs_type2_logical_logical"; case vctrs_type2_logical_integer: return "vctrs_type2_logical_integer"; case vctrs_type2_logical_double: return "vctrs_type2_logical_double"; case vctrs_type2_logical_complex: return "vctrs_type2_logical_complex"; case vctrs_type2_logical_character: return "vctrs_type2_logical_character"; case vctrs_type2_logical_raw: return "vctrs_type2_logical_raw"; case vctrs_type2_logical_list: return "vctrs_type2_logical_list"; case vctrs_type2_logical_dataframe: return "vctrs_type2_logical_dataframe"; case vctrs_type2_logical_s3: return "vctrs_type2_logical_s3"; case vctrs_type2_logical_scalar: return "vctrs_type2_logical_scalar"; case vctrs_type2_integer_integer: return "vctrs_type2_integer_integer"; case vctrs_type2_integer_double: return "vctrs_type2_integer_double"; case vctrs_type2_integer_complex: return "vctrs_type2_integer_complex"; case vctrs_type2_integer_character: return "vctrs_type2_integer_character"; case vctrs_type2_integer_raw: return "vctrs_type2_integer_raw"; case vctrs_type2_integer_list: return "vctrs_type2_integer_list"; case vctrs_type2_integer_dataframe: return "vctrs_type2_integer_dataframe"; case vctrs_type2_integer_s3: return "vctrs_type2_integer_s3"; case vctrs_type2_integer_scalar: return "vctrs_type2_integer_scalar"; case vctrs_type2_double_double: return "vctrs_type2_double_double"; case vctrs_type2_double_complex: return "vctrs_type2_double_complex"; case vctrs_type2_double_character: return "vctrs_type2_double_character"; case vctrs_type2_double_raw: return "vctrs_type2_double_raw"; case vctrs_type2_double_list: return "vctrs_type2_double_list"; case vctrs_type2_double_dataframe: return "vctrs_type2_double_dataframe"; case vctrs_type2_double_s3: return "vctrs_type2_double_s3"; case vctrs_type2_double_scalar: return "vctrs_type2_double_scalar"; case vctrs_type2_complex_complex: return "vctrs_type2_complex_complex"; case vctrs_type2_complex_character: return "vctrs_type2_complex_character"; case vctrs_type2_complex_raw: return "vctrs_type2_complex_raw"; case vctrs_type2_complex_list: return "vctrs_type2_complex_list"; case vctrs_type2_complex_dataframe: return "vctrs_type2_complex_dataframe"; case vctrs_type2_complex_s3: return "vctrs_type2_complex_s3"; case vctrs_type2_complex_scalar: return "vctrs_type2_complex_scalar"; case vctrs_type2_character_character: return "vctrs_type2_character_character"; case vctrs_type2_character_raw: return "vctrs_type2_character_raw"; case vctrs_type2_character_list: return "vctrs_type2_character_list"; case vctrs_type2_character_dataframe: return "vctrs_type2_character_dataframe"; case vctrs_type2_character_s3: return "vctrs_type2_character_s3"; case vctrs_type2_character_scalar: return "vctrs_type2_character_scalar"; case vctrs_type2_raw_raw: return "vctrs_type2_raw_raw"; case vctrs_type2_raw_list: return "vctrs_type2_raw_list"; case vctrs_type2_raw_dataframe: return "vctrs_type2_raw_dataframe"; case vctrs_type2_raw_s3: return "vctrs_type2_raw_s3"; case vctrs_type2_raw_scalar: return "vctrs_type2_raw_scalar"; case vctrs_type2_list_list: return "vctrs_type2_list_list"; case vctrs_type2_list_dataframe: return "vctrs_type2_list_dataframe"; case vctrs_type2_list_s3: return "vctrs_type2_list_s3"; case vctrs_type2_list_scalar: return "vctrs_type2_list_scalar"; case vctrs_type2_dataframe_dataframe: return "vctrs_type2_dataframe_dataframe"; case vctrs_type2_dataframe_s3: return "vctrs_type2_dataframe_s3"; case vctrs_type2_dataframe_scalar: return "vctrs_type2_dataframe_scalar"; case vctrs_type2_s3_s3: return "vctrs_type2_s3_s3"; case vctrs_type2_s3_scalar: return "vctrs_type2_s3_scalar"; case vctrs_type2_scalar_scalar: return "vctrs_type2_scalar_scalar"; } never_reached("vctrs_type2_as_str"); } SEXP vctrs_typeof2(SEXP x, SEXP y) { enum vctrs_type2 type = vec_typeof2(x, y); return Rf_mkString(vctrs_type2_as_str(type)); } vctrs/src/c-unchop.c0000644000176200001440000001623314042540502014063 0ustar liggesusers#include "vctrs.h" #include "c.h" #include "ptype-common.h" #include "slice.h" #include "slice-assign.h" #include "owned.h" #include "utils.h" // Defined in slice-chop.c SEXP vec_as_indices(SEXP indices, R_len_t n, SEXP names); static SEXP vec_unchop(SEXP x, SEXP indices, SEXP ptype, SEXP name_spec, const struct name_repair_opts* name_repair); // [[ register() ]] SEXP vctrs_unchop(SEXP x, SEXP indices, SEXP ptype, SEXP name_spec, SEXP name_repair) { struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false); PROTECT_NAME_REPAIR_OPTS(&name_repair_opts); SEXP out = vec_unchop(x, indices, ptype, name_spec, &name_repair_opts); UNPROTECT(1); return out; } enum fallback_homogeneous { FALLBACK_HOMOGENEOUS_false = 0, FALLBACK_HOMOGENEOUS_true }; static SEXP vec_unchop_fallback(SEXP ptype, SEXP x, SEXP indices, SEXP name_spec, const struct name_repair_opts* name_repair, enum fallback_homogeneous homogenous); static SEXP vec_unchop(SEXP xs, SEXP indices, SEXP ptype, SEXP name_spec, const struct name_repair_opts* name_repair) { if (!vec_is_list(xs)) { Rf_errorcall(R_NilValue, "`x` must be a list"); } if (indices == R_NilValue) { return vec_c(xs, ptype, name_spec, name_repair); } R_len_t xs_size = vec_size(xs); // Apply size/type checking to `indices` before possibly exiting early from // having a `NULL` common type if (xs_size != vec_size(indices)) { Rf_errorcall(R_NilValue, "`x` and `indices` must be lists of the same size"); } if (!vec_is_list(indices)) { Rf_errorcall(R_NilValue, "`indices` must be a list of integers, or `NULL`"); } ptype = PROTECT(vec_ptype_common_params(xs, ptype, DF_FALLBACK_DEFAULT, S3_FALLBACK_true)); if (needs_vec_c_fallback(ptype)) { SEXP out = vec_unchop_fallback(ptype, xs, indices, name_spec, name_repair, FALLBACK_HOMOGENEOUS_false); UNPROTECT(1); return out; } // FIXME: Needed for dplyr::summarise() which passes a non-fallback ptype if (needs_vec_c_homogeneous_fallback(xs, ptype)) { SEXP out = vec_unchop_fallback(ptype, xs, indices, name_spec, name_repair, FALLBACK_HOMOGENEOUS_true); UNPROTECT(1); return out; } if (ptype == R_NilValue) { UNPROTECT(1); return R_NilValue; } xs = PROTECT(vec_cast_common(xs, ptype)); bool assign_names = !Rf_inherits(name_spec, "rlang_zap"); SEXP xs_names = PROTECT(r_names(xs)); bool xs_is_named = xs_names != R_NilValue && !is_data_frame(ptype); R_len_t out_size = 0; // `out_size` is computed from `indices` for (R_len_t i = 0; i < xs_size; ++i) { SEXP x = VECTOR_ELT(xs, i); if (x == R_NilValue) { continue; } R_len_t index_size = Rf_length(VECTOR_ELT(indices, i)); out_size += index_size; // Each element of `xs` is recycled to its corresponding index's size x = vec_recycle(x, index_size, args_empty); SET_VECTOR_ELT(xs, i, x); } SEXP locs = PROTECT(vec_as_indices(indices, out_size, R_NilValue)); SEXP proxy = vec_proxy(ptype); PROTECT_INDEX proxy_pi; PROTECT_WITH_INDEX(proxy, &proxy_pi); proxy = vec_init(proxy, out_size); REPROTECT(proxy, proxy_pi); SEXP out_names = R_NilValue; PROTECT_INDEX out_names_pi; PROTECT_WITH_INDEX(out_names, &out_names_pi); const struct vec_assign_opts unchop_assign_opts = { .assign_names = assign_names, .ignore_outer_names = true }; for (R_len_t i = 0; i < xs_size; ++i) { SEXP x = VECTOR_ELT(xs, i); if (x == R_NilValue) { continue; } SEXP loc = VECTOR_ELT(locs, i); if (assign_names) { R_len_t size = Rf_length(loc); SEXP outer = xs_is_named ? STRING_ELT(xs_names, i) : R_NilValue; SEXP inner = PROTECT(vec_names(x)); SEXP x_nms = PROTECT(apply_name_spec(name_spec, outer, inner, size)); if (x_nms != R_NilValue) { R_LAZY_ALLOC(out_names, out_names_pi, STRSXP, out_size); // If there is no name to assign, skip the assignment since // `out_names` already contains empty strings if (x_nms != chrs_empty) { out_names = chr_assign(out_names, loc, x_nms, VCTRS_OWNED_true); REPROTECT(out_names, out_names_pi); } } UNPROTECT(2); } // Total ownership of `proxy` because it was freshly created with `vec_init()` proxy = vec_proxy_assign_opts(proxy, loc, x, VCTRS_OWNED_true, &unchop_assign_opts); REPROTECT(proxy, proxy_pi); } SEXP out_size_sexp = PROTECT(r_int(out_size)); SEXP out = PROTECT(vec_restore(proxy, ptype, out_size_sexp, VCTRS_OWNED_true)); if (out_names != R_NilValue) { out_names = PROTECT(vec_as_names(out_names, name_repair)); out = vec_set_names(out, out_names); UNPROTECT(1); } else if (!assign_names) { // FIXME: `vec_ptype2()` doesn't consistently zaps names, so `out` // might have been initialised with names. This branch can be // removed once #1020 is resolved. out = vec_set_names(out, R_NilValue); } UNPROTECT(8); return out; } // This is essentially: // vec_slice_fallback(vec_c_fallback_invoke(!!!x), order(vec_c(!!!indices))) // with recycling of each element of `x` to the corresponding index size static SEXP vec_unchop_fallback(SEXP ptype, SEXP x, SEXP indices, SEXP name_spec, const struct name_repair_opts* name_repair, enum fallback_homogeneous homogeneous) { R_len_t x_size = vec_size(x); x = PROTECT(r_clone_referenced(x)); R_len_t out_size = 0; // Recycle `x` elements to the size of their corresponding index for (R_len_t i = 0; i < x_size; ++i) { SEXP elt = VECTOR_ELT(x, i); R_len_t index_size = vec_size(VECTOR_ELT(indices, i)); out_size += index_size; SET_VECTOR_ELT(x, i, vec_recycle_fallback(elt, index_size, args_empty)); } indices = PROTECT(vec_as_indices(indices, out_size, R_NilValue)); SEXP out = R_NilValue; if (homogeneous) { out = PROTECT(vec_c_fallback_invoke(x, name_spec)); } else { out = PROTECT(vec_c_fallback(ptype, x, name_spec, name_repair)); } const struct name_repair_opts name_repair_opts = { .type = name_repair_none, .fn = R_NilValue }; indices = PROTECT(vec_c( indices, vctrs_shared_empty_int, R_NilValue, &name_repair_opts )); const int* p_indices = INTEGER(indices); SEXP locations = PROTECT(Rf_allocVector(INTSXP, out_size)); int* p_locations = INTEGER(locations); // Initialize with missing to handle locations that are never selected for (R_len_t i = 0; i < out_size; ++i) { p_locations[i] = NA_INTEGER; } for (R_len_t i = 0; i < out_size; ++i) { const int index = p_indices[i]; if (index == NA_INTEGER) { continue; } p_locations[index - 1] = i + 1; } out = PROTECT(vec_slice_fallback(out, locations)); UNPROTECT(6); return out; } vctrs/src/decl/0000755000176200001440000000000014042540502013105 5ustar liggesusersvctrs/src/decl/ptype-decl.h0000644000176200001440000000024614042540502015326 0ustar liggesusers#ifndef VCTRS_PTYPE_DECL_H #define VCTRS_PTYPE_DECL_H static inline SEXP vec_ptype_method(SEXP x); static inline SEXP vec_ptype_invoke(SEXP x, SEXP method); #endif vctrs/src/ptype2.h0000644000176200001440000000520613671672047013616 0ustar liggesusers#ifndef VCTRS_PTYPE2_H #define VCTRS_PTYPE2_H // Sync with R constants in ptype2.R #define DF_FALLBACK_DEFAULT 0 enum df_fallback { DF_FALLBACK_warn_maybe = 0, DF_FALLBACK_warn, DF_FALLBACK_none, DF_FALLBACK_quiet }; #define S3_FALLBACK_DEFAULT 0 enum s3_fallback { S3_FALLBACK_false = 0, S3_FALLBACK_true }; struct fallback_opts { enum df_fallback df; enum s3_fallback s3; }; struct ptype2_opts { SEXP x; SEXP y; struct vctrs_arg* x_arg; struct vctrs_arg* y_arg; struct fallback_opts fallback; }; SEXP vec_ptype2_dispatch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left); SEXP vec_ptype2_opts(const struct ptype2_opts* opts, int* left); static inline SEXP vec_ptype2_params(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, enum df_fallback df_fallback, int* left) { const struct ptype2_opts opts = { .x = x, .y = y, .x_arg = x_arg, .y_arg = y_arg, .fallback = { .df = df_fallback } }; return vec_ptype2_opts(&opts, left); } static inline SEXP vec_ptype2(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, int* left) { const struct ptype2_opts opts = { .x = x, .y = y, .x_arg = x_arg, .y_arg = y_arg }; return vec_ptype2_opts(&opts, left); } SEXP vec_ptype2_dispatch_s3(const struct ptype2_opts* opts); bool vec_is_coercible(const struct ptype2_opts* opts, int* dir); struct ptype2_opts new_ptype2_opts(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, SEXP opts); SEXP new_fallback_r_opts(const struct ptype2_opts* opts); struct fallback_opts new_fallback_opts(SEXP opts); SEXP vec_invoke_coerce_method(SEXP method_sym, SEXP method, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP x_arg_sym, SEXP x_arg, SEXP y_arg_sym, SEXP y_arg, const struct fallback_opts* opts); SEXP vec_ptype2_from_unspecified(const struct ptype2_opts* opts, enum vctrs_type other_type, SEXP other, struct vctrs_arg* other_arg); #endif vctrs/src/cast-bare.c0000644000176200001440000000715614042540502014214 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "utils.h" // [[ include("cast.h") ]] SEXP int_as_logical(SEXP x, bool* lossy) { int* data = INTEGER(x); R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* out_data = LOGICAL(out); for (R_len_t i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; if (elt == NA_INTEGER) { *out_data = NA_LOGICAL; continue; } if (elt != 0 && elt != 1) { *lossy = true; UNPROTECT(1); return R_NilValue; } *out_data = elt; } UNPROTECT(1); return out; } // [[ include("cast.h") ]] SEXP dbl_as_logical(SEXP x, bool* lossy) { double* data = REAL(x); R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* out_data = LOGICAL(out); for (R_len_t i = 0; i < n; ++i, ++data, ++out_data) { double elt = *data; if (isnan(elt)) { *out_data = NA_LOGICAL; continue; } if (elt != 0 && elt != 1) { *lossy = true; UNPROTECT(1); return R_NilValue; } *out_data = (int) elt; } UNPROTECT(1); return out; } // [[ include("cast.h") ]] SEXP chr_as_logical(SEXP x, bool* lossy) { SEXP const* x_p = STRING_PTR_RO(x); R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_out = LOGICAL(out); for (R_len_t i = 0; i < n; ++i) { SEXP str = x_p[i]; if (str == NA_STRING) { p_out[i] = NA_LOGICAL; continue; } const char* elt = CHAR(str); switch (elt[0]) { case 'T': if (elt[1] == '\0' || strcmp(elt, "TRUE") == 0) { p_out[i] = 1; continue; } break; case 'F': if (elt[1] == '\0' || strcmp(elt, "FALSE") == 0) { p_out[i] = 0; continue; } break; case 't': if (strcmp(elt, "true") == 0) { p_out[i] = 1; continue; } break; case 'f': if (strcmp(elt, "false") == 0) { p_out[i] = 0; continue; } break; default: break; } *lossy = true; UNPROTECT(1); return R_NilValue; } UNPROTECT(1); return out; } // [[ include("cast.h") ]] SEXP lgl_as_integer(SEXP x, bool* lossy) { return Rf_coerceVector(x, INTSXP); } // [[ include("cast.h") ]] SEXP dbl_as_integer(SEXP x, bool* lossy) { double* data = REAL(x); R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(INTSXP, n)); int* out_data = INTEGER(out); for (R_len_t i = 0; i < n; ++i, ++data, ++out_data) { double elt = *data; if (elt <= INT_MIN || elt >= INT_MAX + 1.0) { *lossy = true; UNPROTECT(1); return R_NilValue; } if (isnan(elt)) { *out_data = NA_INTEGER; continue; } int value = (int) elt; if (value != elt) { *lossy = true; UNPROTECT(1); return R_NilValue; } *out_data = value; } UNPROTECT(1); return out; } // [[ include("cast.h") ]] SEXP lgl_as_double(SEXP x, bool* lossy) { int* data = LOGICAL(x); R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(REALSXP, n)); double* out_data = REAL(out); for (R_len_t i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; *out_data = (elt == NA_LOGICAL) ? NA_REAL : elt; } UNPROTECT(1); return out; } // [[ include("cast.h") ]] SEXP int_as_double(SEXP x, bool* lossy) { int* data = INTEGER(x); R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(REALSXP, n)); double* out_data = REAL(out); for (R_len_t i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; *out_data = (elt == NA_INTEGER) ? NA_REAL : elt; } UNPROTECT(1); return out; } vctrs/src/utils-rlang.h0000644000176200001440000000577714042540502014630 0ustar liggesusers#ifndef VCTRS_UTILS_RLANG_H #define VCTRS_UTILS_RLANG_H typedef struct SEXPREC sexp; #define r_null R_NilValue #define r_syms_names R_NamesSymbol #define r_syms_class R_ClassSymbol #define KEEP PROTECT #define FREE UNPROTECT // node.h ------------------------------------------------------------ static inline sexp* r_node_car(sexp* x) { return CAR(x); } static inline sexp* r_node_cdr(sexp* x) { return CDR(x); } static inline sexp* r_node_tag(sexp* x) { return TAG(x); } static inline sexp* r_node_caar(sexp* x) { return CAAR(x); } static inline sexp* r_node_cadr(sexp* x) { return CADR(x); } static inline sexp* r_node_cdar(sexp* x) { return CDAR(x); } static inline sexp* r_node_cddr(sexp* x) { return CDDR(x); } static inline sexp* r_node_poke_car(sexp* x, sexp* newcar) { SETCAR(x, newcar); return x; } static inline sexp* r_node_poke_cdr(sexp* x, sexp* newcdr) { SETCDR(x, newcdr); return x; } static inline sexp* r_node_poke_tag(sexp* x, sexp* tag) { SET_TAG(x, tag); return x; } static inline sexp* r_node_poke_caar(sexp* x, sexp* newcaar) { SETCAR(CAR(x), newcaar); return x; } static inline sexp* r_node_poke_cadr(sexp* x, sexp* newcar) { SETCADR(x, newcar); return x; } static inline sexp* r_node_poke_cdar(sexp* x, sexp* newcdar) { SETCDR(CAR(x), newcdar); return x; } static inline sexp* r_node_poke_cddr(sexp* x, sexp* newcdr) { SETCDR(CDR(x), newcdr); return x; } static inline sexp* r_new_node(sexp* car, sexp* cdr) { return Rf_cons(car, cdr); } static inline sexp* r_new_node3(sexp* car, sexp* cdr, sexp* tag) { sexp* out = Rf_cons(car, cdr); SET_TAG(out, tag); return out; } sexp* r_pairlist_find(sexp* node, sexp* tag); sexp* r_pairlist_rev(sexp* node); static inline sexp* r_pairlist_get(sexp* node, sexp* tag) { return r_node_car(r_pairlist_find(node, tag)); } static inline sexp* r_pairlist_poke(sexp* node, sexp* tag, sexp* value) { sexp* x = r_pairlist_find(node, tag); if (x == R_NilValue) { node = r_new_node(value, node); r_node_poke_tag(node, tag); return node; } else { r_node_poke_car(x, value); return node; } } static inline sexp* r_pairlist_find_last(sexp* x) { while (CDR(x) != R_NilValue) x = CDR(x); return x; } // attrs.h ----------------------------------------------------------- static inline sexp* r_attrib(sexp* x) { return ATTRIB(x); } static inline sexp* r_poke_attrib(sexp* x, sexp* attrs) { SET_ATTRIB(x, attrs); return x; } // Unlike Rf_getAttrib(), this never allocates. This also doesn't bump // refcounts or namedness. static inline sexp* r_attrib_get(sexp* x, sexp* tag) { return r_pairlist_get(r_attrib(x), tag); } SEXP r_clone_shared(SEXP x); static inline void r_attrib_poke(sexp* x, sexp* tag, sexp* value) { sexp* attrib = KEEP(r_clone_shared(r_attrib(x))); r_poke_attrib(x, r_pairlist_poke(attrib, tag, value)); FREE(1); return; } static inline sexp* r_names(sexp* x) { return r_attrib_get(x, r_syms_names); } static inline sexp* r_class(sexp* x) { return r_attrib_get(x, r_syms_class); } #endif vctrs/src/utils-rlang.c0000644000176200001440000000050414042540502014602 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "utils-rlang.h" // node.h ------------------------------------------------------------ sexp* r_pairlist_find(sexp* node, sexp* tag) { while (node != r_null) { if (r_node_tag(node) == tag) { return node; } node = r_node_cdr(node); } return r_null; } vctrs/src/type-data-frame.c0000644000176200001440000005704514042540502015335 0ustar liggesusers#include "vctrs.h" #include "ptype-common.h" #include "ptype2.h" #include "type-data-frame.h" #include "utils.h" static SEXP syms_df_lossy_cast = NULL; static SEXP fns_df_lossy_cast = NULL; static SEXP new_compact_rownames(R_len_t n); // [[ include("type-data-frame.h") ]] bool is_data_frame(SEXP x) { if (TYPEOF(x) != VECSXP) { return false; } enum vctrs_class_type type = class_type(x); return type == vctrs_class_bare_data_frame || type == vctrs_class_bare_tibble || type == vctrs_class_data_frame; } // [[ include("type-data-frame.h") ]] bool is_native_df(SEXP x) { enum vctrs_class_type type = class_type(x); return type == vctrs_class_bare_data_frame || type == vctrs_class_bare_tibble; } // [[ include("type-data-frame.h") ]] bool is_bare_data_frame(SEXP x) { return class_type(x) == vctrs_class_bare_data_frame; } // [[ include("type-data-frame.h") ]] bool is_bare_tibble(SEXP x) { return class_type(x) == vctrs_class_bare_tibble; } // [[ include("type-data-frame.h") ]] SEXP new_data_frame(SEXP x, R_len_t n) { x = PROTECT(r_clone_referenced(x)); init_data_frame(x, n); UNPROTECT(1); return x; } static R_len_t df_size_from_list(SEXP x, SEXP n); static R_len_t df_size_from_n(SEXP n); static SEXP c_data_frame_class(SEXP cls); // [[ register() ]] SEXP vctrs_new_data_frame(SEXP args) { args = CDR(args); SEXP x = CAR(args); args = CDR(args); SEXP n = CAR(args); args = CDR(args); SEXP cls = CAR(args); args = CDR(args); SEXP attrib = args; PROTECT_INDEX pi; PROTECT_WITH_INDEX(attrib, &pi); if (TYPEOF(x) != VECSXP) { Rf_errorcall(R_NilValue, "`x` must be a list"); } bool has_names = false; bool has_rownames = false; R_len_t size = df_size_from_list(x, n); SEXP out = PROTECT(r_clone_referenced(x)); for (SEXP node = attrib; node != R_NilValue; node = CDR(node)) { SEXP tag = TAG(node); // We might add dynamic dots later on if (tag == R_ClassSymbol) { stop_internal("new_data_frame", "Can't supply `class` in `...`."); } if (tag == R_NamesSymbol) { has_names = true; continue; } if (tag == R_RowNamesSymbol) { // "row.names" is checked for consistency with n (if provided) if (size != rownames_size(CAR(node)) && n != R_NilValue) { Rf_errorcall(R_NilValue, "`n` and `row.names` must be consistent."); } has_rownames = true; continue; } } // Take names from `x` if `attrib` doesn't have any if (!has_names) { SEXP nms = vctrs_shared_empty_chr; if (Rf_length(out)) { nms = r_names(out); } PROTECT(nms); if (nms != R_NilValue) { attrib = Rf_cons(nms, attrib); SET_TAG(attrib, R_NamesSymbol); REPROTECT(attrib, pi); } UNPROTECT(1); } if (!has_rownames) { SEXP rn = PROTECT(new_compact_rownames(size)); attrib = Rf_cons(rn, attrib); SET_TAG(attrib, R_RowNamesSymbol); UNPROTECT(1); REPROTECT(attrib, pi); } if (cls == R_NilValue) { cls = classes_data_frame; } else { cls = c_data_frame_class(cls); } PROTECT(cls); attrib = Rf_cons(cls, attrib); SET_TAG(attrib, R_ClassSymbol); UNPROTECT(1); REPROTECT(attrib, pi); SET_ATTRIB(out, attrib); SET_OBJECT(out, 1); UNPROTECT(2); return out; } static R_len_t df_size_from_list(SEXP x, SEXP n) { if (n == R_NilValue) { if (is_data_frame(x)) { return df_size(x); } else { return df_raw_size_from_list(x); } } else { return df_size_from_n(n); } } static R_len_t df_size_from_n(SEXP n) { if (TYPEOF(n) != INTSXP || Rf_length(n) != 1) { Rf_errorcall(R_NilValue, "`n` must be an integer of size 1"); } return r_int_get(n, 0); } static SEXP c_data_frame_class(SEXP cls) { if (TYPEOF(cls) != STRSXP) { Rf_errorcall(R_NilValue, "`class` must be NULL or a character vector"); } return chr_c(cls, classes_data_frame); } SEXP data_frame(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts); // [[ register() ]] SEXP vctrs_data_frame(SEXP x, SEXP size, SEXP name_repair) { struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false); PROTECT_NAME_REPAIR_OPTS(&name_repair_opts); r_ssize c_size = 0; if (size == R_NilValue) { c_size = vec_size_common(x, 0); } else { c_size = size_validate(size, ".size"); } SEXP out = data_frame(x, c_size, &name_repair_opts); UNPROTECT(1); return out; } SEXP df_list(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts); SEXP data_frame(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts) { SEXP out = PROTECT(df_list(x, size, p_name_repair_opts)); out = new_data_frame(out, size); UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_df_list(SEXP x, SEXP size, SEXP name_repair) { struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false); PROTECT_NAME_REPAIR_OPTS(&name_repair_opts); r_ssize c_size = 0; if (size == R_NilValue) { c_size = vec_size_common(x, 0); } else { c_size = size_validate(size, ".size"); } SEXP out = df_list(x, c_size, &name_repair_opts); UNPROTECT(1); return out; } static SEXP df_list_drop_null(SEXP x); static SEXP df_list_splice(SEXP x); SEXP df_list(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts) { if (TYPEOF(x) != VECSXP) { stop_internal("df_list", "`x` must be a list."); } x = PROTECT(vec_recycle_common(x, size)); r_ssize n_cols = r_length(x); // Unnamed columns are auto-named with `""` if (r_names(x) == R_NilValue) { SEXP names = PROTECT(r_new_character(n_cols)); r_poke_names(x, names); UNPROTECT(1); } x = PROTECT(df_list_drop_null(x)); x = PROTECT(df_list_splice(x)); SEXP names = PROTECT(r_names(x)); names = PROTECT(vec_as_names(names, p_name_repair_opts)); r_poke_names(x, names); UNPROTECT(5); return x; } static SEXP df_list_drop_null(SEXP x) { r_ssize n_cols = r_length(x); r_ssize count = 0; for (r_ssize i = 0; i < n_cols; ++i) { count += VECTOR_ELT(x, i) == R_NilValue; } if (count == 0) { return x; } SEXP names = PROTECT(r_names(x)); const SEXP* p_names = STRING_PTR_RO(names); r_ssize n_out = n_cols - count; SEXP out = PROTECT(Rf_allocVector(VECSXP, n_out)); SEXP out_names = PROTECT(Rf_allocVector(STRSXP, n_out)); r_ssize out_i = 0; for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); if (col != R_NilValue) { SET_VECTOR_ELT(out, out_i, col); SET_STRING_ELT(out_names, out_i, p_names[i]); ++out_i; } } r_poke_names(out, out_names); UNPROTECT(3); return out; } static SEXP df_list_splice(SEXP x) { SEXP names = PROTECT(r_names(x)); const SEXP* p_names = STRING_PTR_RO(names); bool any_needs_splice = false; r_ssize n_cols = r_length(x); r_ssize i = 0; for (; i < n_cols; ++i) { // Only splice unnamed data frames if (p_names[i] != strings_empty) { continue; } SEXP col = VECTOR_ELT(x, i); if (is_data_frame(col)) { any_needs_splice = true; break; } } if (!any_needs_splice) { UNPROTECT(1); return x; } SEXP splice = PROTECT(r_new_logical(n_cols)); int* p_splice = LOGICAL(splice); for (r_ssize j = 0; j < n_cols; ++j) { p_splice[j] = 0; } r_ssize width = i; for (; i < n_cols; ++i) { // Only splice unnamed data frames if (p_names[i] != strings_empty) { ++width; continue; } SEXP col = VECTOR_ELT(x, i); if (is_data_frame(col)) { width += r_length(col); p_splice[i] = 1; } else { ++width; } } SEXP out = PROTECT(r_new_list(width)); SEXP out_names = PROTECT(r_new_character(width)); r_ssize loc = 0; // Splice loop for (r_ssize i = 0; i < n_cols; ++i) { if (!p_splice[i]) { SET_VECTOR_ELT(out, loc, VECTOR_ELT(x, i)); SET_STRING_ELT(out_names, loc, p_names[i]); ++loc; continue; } SEXP col = VECTOR_ELT(x, i); SEXP col_names = PROTECT(r_names(col)); if (TYPEOF(col_names) != STRSXP) { stop_internal( "df_splice", "Encountered corrupt data frame. " "Data frames must have character column names." ); } const SEXP* p_col_names = STRING_PTR_RO(col_names); r_ssize col_i = 0; r_ssize stop = loc + r_length(col); for (; loc < stop; ++loc, ++col_i) { SET_VECTOR_ELT(out, loc, VECTOR_ELT(col, col_i)); SET_STRING_ELT(out_names, loc, p_col_names[col_i]); } loc = stop; UNPROTECT(1); } r_poke_names(out, out_names); UNPROTECT(4); return out; } // [[ include("type-data-frame.h") ]] enum rownames_type rownames_type(SEXP x) { switch (TYPEOF(x)) { case STRSXP: return ROWNAMES_IDENTIFIERS; case INTSXP: if (Rf_length(x) == 2 && INTEGER(x)[0] == NA_INTEGER) { return ROWNAMES_AUTOMATIC_COMPACT; } else { return ROWNAMES_AUTOMATIC; } default: Rf_error("Corrupt data in `rownames_type()`: Unexpected type `%s`.", Rf_type2char(TYPEOF(x))); } } static R_len_t compact_rownames_length(SEXP x) { return abs(INTEGER(x)[1]); } // [[ include("type-data-frame.h") ]] R_len_t rownames_size(SEXP rn) { switch (rownames_type(rn)) { case ROWNAMES_IDENTIFIERS: case ROWNAMES_AUTOMATIC: return Rf_length(rn); case ROWNAMES_AUTOMATIC_COMPACT: return compact_rownames_length(rn); } never_reached("rownames_size"); } static void init_bare_data_frame(SEXP x, R_len_t n); // [[ include("type-data-frame.h") ]] void init_data_frame(SEXP x, R_len_t n) { Rf_setAttrib(x, R_ClassSymbol, classes_data_frame); init_bare_data_frame(x, n); } // [[ include("type-data-frame.h") ]] void init_tibble(SEXP x, R_len_t n) { Rf_setAttrib(x, R_ClassSymbol, classes_tibble); init_bare_data_frame(x, n); } static void init_bare_data_frame(SEXP x, R_len_t n) { if (Rf_length(x) == 0) { Rf_setAttrib(x, R_NamesSymbol, vctrs_shared_empty_chr); } init_compact_rownames(x, n); } // [[ include("type-data-frame.h") ]] void init_compact_rownames(SEXP x, R_len_t n) { SEXP rn = PROTECT(new_compact_rownames(n)); Rf_setAttrib(x, R_RowNamesSymbol, rn); UNPROTECT(1); } static SEXP new_compact_rownames(R_len_t n) { if (n <= 0) { return vctrs_shared_empty_int; } SEXP out = Rf_allocVector(INTSXP, 2); int* out_data = INTEGER(out); out_data[0] = NA_INTEGER; out_data[1] = -n; return out; } // vctrs type methods ------------------------------------------------ // [[ register() ]] SEXP vctrs_df_ptype2_opts(SEXP x, SEXP y, SEXP opts, SEXP x_arg, SEXP y_arg) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_y_arg = vec_as_arg(y_arg); const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &c_x_arg, &c_y_arg, opts); return df_ptype2(&c_opts); } static SEXP df_ptype2_match(const struct ptype2_opts* opts, SEXP x_names, SEXP y_names); static SEXP df_ptype2_loop(const struct ptype2_opts* opts, SEXP y_names); // [[ include("type-data-frame.h") ]] SEXP df_ptype2(const struct ptype2_opts* opts) { SEXP x_names = PROTECT(r_names(opts->x)); SEXP y_names = PROTECT(r_names(opts->y)); SEXP out = R_NilValue; if (equal_object(x_names, y_names)) { out = df_ptype2_loop(opts, x_names); } else { out = df_ptype2_match(opts, x_names, y_names); } UNPROTECT(2); return out; } SEXP df_ptype2_match(const struct ptype2_opts* opts, SEXP x_names, SEXP y_names) { SEXP x = opts->x; SEXP y = opts->y; SEXP x_dups_pos = PROTECT(vec_match(x_names, y_names)); SEXP y_dups_pos = PROTECT(vec_match(y_names, x_names)); int* x_dups_pos_data = INTEGER(x_dups_pos); int* y_dups_pos_data = INTEGER(y_dups_pos); R_len_t x_len = Rf_length(x_names); R_len_t y_len = Rf_length(y_names); // Count columns that are only in `y` R_len_t rest_len = 0; for (R_len_t i = 0; i < y_len; ++i) { if (y_dups_pos_data[i] == NA_INTEGER) { ++rest_len; } } R_len_t out_len = x_len + rest_len; SEXP out = PROTECT(Rf_allocVector(VECSXP, out_len)); SEXP nms = PROTECT(Rf_allocVector(STRSXP, out_len)); Rf_setAttrib(out, R_NamesSymbol, nms); R_len_t i = 0; // Fill in prototypes of all the columns that are in `x`, in order for (; i < x_len; ++i) { R_len_t dup = x_dups_pos_data[i]; struct arg_data_index x_arg_data = new_index_arg_data(r_chr_get_c_string(x_names, i), opts->x_arg); struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data); SEXP col = VECTOR_ELT(x, i); struct ptype2_opts col_opts = *opts; col_opts.x = col; col_opts.x_arg = &named_x_arg; SEXP type; if (dup == NA_INTEGER) { col_opts.y = vctrs_shared_empty_uns; col_opts.y_arg = NULL; type = vec_ptype2_from_unspecified(&col_opts, vec_typeof(col), col, &named_x_arg); } else { --dup; // 1-based index struct arg_data_index y_arg_data = new_index_arg_data(r_chr_get_c_string(y_names, dup), opts->y_arg); struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data); col_opts.y = VECTOR_ELT(y, dup); col_opts.y_arg = &named_y_arg; int _left; type = vec_ptype2_opts(&col_opts, &_left); } SET_VECTOR_ELT(out, i, type); SET_STRING_ELT(nms, i, STRING_ELT(x_names, i)); } // Fill in prototypes of the columns that are only in `y` for (R_len_t j = 0; i < out_len; ++j) { R_len_t dup = y_dups_pos_data[j]; if (dup == NA_INTEGER) { SEXP col = VECTOR_ELT(y, j); struct arg_data_index y_arg_data = new_index_arg_data(r_chr_get_c_string(y_names, j), opts->y_arg); struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data); struct ptype2_opts col_opts = *opts; col_opts.y = col; col_opts.y_arg = &named_y_arg; col_opts.x = vctrs_shared_empty_uns; col_opts.x_arg = NULL; SEXP type = vec_ptype2_from_unspecified(&col_opts, vec_typeof(col), col, &named_y_arg); SET_VECTOR_ELT(out, i, type); SET_STRING_ELT(nms, i, STRING_ELT(y_names, j)); ++i; } } init_data_frame(out, 0); UNPROTECT(4); return out; } static SEXP df_ptype2_loop(const struct ptype2_opts* opts, SEXP names) { SEXP x = opts->x; SEXP y = opts->y; R_len_t len = Rf_length(names); SEXP out = PROTECT(Rf_allocVector(VECSXP, len)); Rf_setAttrib(out, R_NamesSymbol, names); for (R_len_t i = 0; i < len; ++i) { const char* name = r_chr_get_c_string(names, i); struct arg_data_index x_arg_data = new_index_arg_data(name, opts->x_arg); struct arg_data_index y_arg_data = new_index_arg_data(name, opts->y_arg); struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data); struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data); struct ptype2_opts col_opts = *opts; col_opts.x = VECTOR_ELT(x, i); col_opts.y = VECTOR_ELT(y, i); col_opts.x_arg = &named_x_arg; col_opts.y_arg = &named_y_arg; int _left; SEXP type = vec_ptype2_opts(&col_opts, &_left); SET_VECTOR_ELT(out, i, type); } init_data_frame(out, 0); UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_df_cast_opts(SEXP x, SEXP to, SEXP opts, SEXP x_arg, SEXP to_arg) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_to_arg = vec_as_arg(to_arg); const struct cast_opts c_opts = new_cast_opts(x, to, &c_x_arg, &c_to_arg, opts); return df_cast_opts(&c_opts); } static SEXP df_cast_match(const struct cast_opts* opts, SEXP x_names, SEXP to_names); static SEXP df_cast_loop(const struct cast_opts* opts, SEXP names); // Take all columns of `to` and preserve the order. Common columns are // cast to their types in `to`. Extra `x` columns are dropped and // cause a lossy cast. Extra `to` columns are filled with missing // values. // [[ include("cast.h") ]] SEXP df_cast_opts(const struct cast_opts* opts) { SEXP x_names = PROTECT(r_names(opts->x)); SEXP to_names = PROTECT(r_names(opts->to)); if (x_names == R_NilValue || to_names == R_NilValue) { stop_internal("df_cast_opts", "Data frame must have names."); } SEXP out = R_NilValue; if (equal_object(x_names, to_names)) { out = df_cast_loop(opts, x_names); } else { out = df_cast_match(opts, x_names, to_names); } UNPROTECT(2); return out; } static SEXP df_cast_match(const struct cast_opts* opts, SEXP x_names, SEXP to_names) { SEXP x = opts->x; SEXP to = opts->to; SEXP to_dups_pos = PROTECT(vec_match(to_names, x_names)); int* to_dups_pos_data = INTEGER(to_dups_pos); R_len_t to_len = Rf_length(to_dups_pos); SEXP out = PROTECT(Rf_allocVector(VECSXP, to_len)); Rf_setAttrib(out, R_NamesSymbol, to_names); R_len_t size = df_size(x); R_len_t common_len = 0; for (R_len_t i = 0; i < to_len; ++i) { R_len_t pos = to_dups_pos_data[i]; SEXP col; if (pos == NA_INTEGER) { SEXP to_col = VECTOR_ELT(to, i); col = vec_init(to_col, size); // FIXME: Need to initialise the vector because we currently use // `vec_assign()` in `vec_rbind()` before falling back. Attach // an attribute to recognise unspecified vectors in // `base_c_invoke()`. if (opts->fallback.s3 && vec_is_common_class_fallback(to_col)) { PROTECT(col); Rf_setAttrib(col, Rf_install("vctrs:::unspecified"), vctrs_shared_true); UNPROTECT(1); } } else { --pos; // 1-based index ++common_len; struct arg_data_index x_arg_data = new_index_arg_data(r_chr_get_c_string(x_names, pos), opts->x_arg); struct arg_data_index to_arg_data = new_index_arg_data(r_chr_get_c_string(to_names, i), opts->to_arg); struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data); struct vctrs_arg named_to_arg = new_index_arg(opts->to_arg, &to_arg_data); struct cast_opts col_opts = { .x = VECTOR_ELT(x, pos), .to = VECTOR_ELT(to, i), .x_arg = &named_x_arg, .to_arg = &named_to_arg, .fallback = opts->fallback }; col = vec_cast_opts(&col_opts); } SET_VECTOR_ELT(out, i, col); } // Restore data frame size before calling `vec_restore()`. `x` and // `to` might not have any columns to compute the original size. init_data_frame(out, size); Rf_setAttrib(out, R_RowNamesSymbol, df_rownames(x)); R_len_t extra_len = Rf_length(x) - common_len; if (extra_len) { out = vctrs_dispatch3(syms_df_lossy_cast, fns_df_lossy_cast, syms_out, out, syms_x, x, syms_to, to); } UNPROTECT(2); return out; } static SEXP df_cast_loop(const struct cast_opts* opts, SEXP names) { SEXP x = opts->x; SEXP to = opts->to; R_len_t len = Rf_length(names); SEXP out = PROTECT(Rf_allocVector(VECSXP, len)); Rf_setAttrib(out, R_NamesSymbol, names); R_len_t size = df_size(x); for (R_len_t i = 0; i < len; ++i) { const char* name = r_chr_get_c_string(names, i); struct arg_data_index x_arg_data = new_index_arg_data(name, opts->x_arg); struct arg_data_index to_arg_data = new_index_arg_data(name, opts->to_arg); struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data); struct vctrs_arg named_to_arg = new_index_arg(opts->to_arg, &to_arg_data); struct cast_opts col_opts = { .x = VECTOR_ELT(x, i), .to = VECTOR_ELT(to, i), .x_arg = &named_x_arg, .to_arg = &named_to_arg, .fallback = opts->fallback }; SEXP col = vec_cast_opts(&col_opts); SET_VECTOR_ELT(out, i, col); } // Restore data frame size before calling `vec_restore()`. `x` and // `to` might not have any columns to compute the original size. init_data_frame(out, size); Rf_setAttrib(out, R_RowNamesSymbol, df_rownames(x)); UNPROTECT(1); return out; } // If negative index, value is appended SEXP df_poke(SEXP x, R_len_t i, SEXP value) { if (i >= 0) { SET_VECTOR_ELT(x, i, value); return x; } R_len_t ncol = Rf_length(x); SEXP tmp = PROTECT(r_resize(x, ncol + 1)); Rf_copyMostAttrib(x, tmp); x = tmp; SET_VECTOR_ELT(x, ncol, value); UNPROTECT(1); return x; } SEXP df_poke_at(SEXP x, SEXP name, SEXP value) { SEXP names = PROTECT(r_names(x)); R_len_t i = r_chr_find(names, name); UNPROTECT(1); x = PROTECT(df_poke(x, i, value)); if (i < 0) { SEXP names = PROTECT(r_names(x)); SET_STRING_ELT(names, Rf_length(x) - 1, name); UNPROTECT(1); } UNPROTECT(1); return x; } static inline R_len_t df_flat_width(SEXP x) { R_len_t n = Rf_length(x); R_len_t out = n; const SEXP* v_x = VECTOR_PTR_RO(x); for (R_len_t i = 0; i < n; ++i) { SEXP col = v_x[i]; if (is_data_frame(col)) { out = out + df_flat_width(col) - 1; } } return out; } struct flatten_info { bool flatten; R_len_t width; }; static inline struct flatten_info df_flatten_info(SEXP x) { bool flatten = false; R_len_t n = Rf_length(x); R_len_t width = n; const SEXP* v_x = VECTOR_PTR_RO(x); for (R_len_t i = 0; i < n; ++i) { SEXP col = v_x[i]; if (is_data_frame(col)) { flatten = true; width = width + df_flat_width(col) - 1; } } return (struct flatten_info){flatten, width}; } // [[ register() ]] SEXP vctrs_df_flatten_info(SEXP x) { struct flatten_info info = df_flatten_info(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, r_lgl(info.flatten)); SET_VECTOR_ELT(out, 1, r_int(info.width)); UNPROTECT(1); return out; } static R_len_t df_flatten_loop(SEXP x, SEXP out, SEXP out_names, R_len_t counter); // Might return duplicate names. Currently only used for equality // proxy so this doesn't matter. A less bare bone version would repair // names. // // [[ register(); include("type-data-frame.h") ]] SEXP df_flatten(SEXP x) { struct flatten_info info = df_flatten_info(x); if (!info.flatten) { return x; } SEXP out = PROTECT(Rf_allocVector(VECSXP, info.width)); SEXP out_names = PROTECT(Rf_allocVector(STRSXP, info.width)); r_poke_names(out, out_names); df_flatten_loop(x, out, out_names, 0); init_data_frame(out, df_size(x)); UNPROTECT(2); return out; } static R_len_t df_flatten_loop(SEXP x, SEXP out, SEXP out_names, R_len_t counter) { R_len_t n = Rf_length(x); SEXP x_names = PROTECT(r_names(x)); for (R_len_t i = 0; i < n; ++i) { SEXP col = VECTOR_ELT(x, i); if (is_data_frame(col)) { counter = df_flatten_loop(col, out, out_names, counter); } else { SET_VECTOR_ELT(out, counter, col); SET_STRING_ELT(out_names, counter, STRING_ELT(x_names, i)); ++counter; } } UNPROTECT(1); return counter; } SEXP df_repair_names(SEXP x, struct name_repair_opts* name_repair) { SEXP nms = PROTECT(r_names(x)); SEXP repaired = PROTECT(vec_as_names(nms, name_repair)); // Should this go through proxy and restore so that classes can // update metadata and check invariants when special columns are // renamed? if (nms != repaired) { x = PROTECT(r_clone_referenced(x)); r_poke_names(x, repaired); UNPROTECT(1); } UNPROTECT(2); return x; } void vctrs_init_type_data_frame(SEXP ns) { syms_df_lossy_cast = Rf_install("df_lossy_cast"); fns_df_lossy_cast = Rf_findVar(syms_df_lossy_cast, ns); } vctrs/src/vctrs.h0000644000176200001440000004726614042540502013527 0ustar liggesusers#ifndef VCTRS_H #define VCTRS_H #define R_NO_REMAP #include #include #include #include #include extern bool vctrs_debug_verbose; #define VCTRS_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) typedef R_xlen_t r_ssize; #define R_SSIZE_MAX R_XLEN_T_MAX #define r_length Rf_xlength #define r_new_vector Rf_allocVector // An ERR indicates either a C NULL in case of no error, or a // condition object otherwise #define ERR SEXP // Vector types ------------------------------------------------- enum vctrs_type { vctrs_type_null = 0, vctrs_type_unspecified, vctrs_type_logical, vctrs_type_integer, vctrs_type_double, vctrs_type_complex, vctrs_type_character, vctrs_type_raw, vctrs_type_list, vctrs_type_dataframe, vctrs_type_scalar, vctrs_type_s3 = 255 }; /** * @member type The vector type of the original data. * @member proxy_method The function of the `vec_proxy()` method, if * any. This method is looked up with [vec_proxy_method()]. */ struct vctrs_type_info { enum vctrs_type type; SEXP proxy_method; }; /** * @inheritMembers vctrs_type_info * @member type If `proxy_method` was found, the vector type of the * proxy data. Otherwise, the vector type of the original data. * This is never `vctrs_type_s3`. * @member proxy If `proxy_method` was found, the result of invoking * the method. Otherwise, the original data. */ struct vctrs_proxy_info { enum vctrs_type type; SEXP proxy_method; SEXP proxy; }; /** * Return the type information of a vector or its proxy * * `vec_type_info()` returns the vctrs type of `x`. `vec_proxy_info()` * returns the vctrs type of `x` or its proxy if it has one. The * former returns `vctrs_type_s3` with S3 objects (expect for native * types like bare data frames). The latter returns the bare type of * the proxy, if any. It never returns `vctrs_type_s3`. * * `vec_proxy_info()` returns both the proxy method and the proxy * data. `vec_type_info()` only returns the proxy method, which it * needs to determine whether S3 lists and non-vector base types are * scalars or proxied vectors. * * Use `PROTECT_PROXY_INFO()` and `PROTECT_TYPE_INFO()` to protect the * members of the return value. These helpers take a pointer to a * protection counter that can be passed to `UNPROTECT()`. */ struct vctrs_type_info vec_type_info(SEXP x); struct vctrs_proxy_info vec_proxy_info(SEXP x); #define PROTECT_PROXY_INFO(info, n) do { \ PROTECT((info)->proxy); \ PROTECT((info)->proxy_method); \ *n += 2; \ } while (0) #define PROTECT_TYPE_INFO(info, n) do { \ PROTECT((info)->proxy_method); \ *n += 1; \ } while (0) enum vctrs_type vec_typeof(SEXP x); enum vctrs_type vec_proxy_typeof(SEXP x); const char* vec_type_as_str(enum vctrs_type type); bool vec_is_list(SEXP x); bool vec_is_vector(SEXP x); bool vec_is_partial(SEXP x); // After adding a new `vctrs_dispatch` type, add the missing entries // in `vec_typeof2()` enum vctrs_type2 { vctrs_type2_null_null, vctrs_type2_null_unspecified, vctrs_type2_null_logical, vctrs_type2_null_integer, vctrs_type2_null_double, vctrs_type2_null_complex, vctrs_type2_null_character, vctrs_type2_null_raw, vctrs_type2_null_list, vctrs_type2_null_dataframe, vctrs_type2_null_s3, vctrs_type2_null_scalar, vctrs_type2_unspecified_unspecified, vctrs_type2_unspecified_logical, vctrs_type2_unspecified_integer, vctrs_type2_unspecified_double, vctrs_type2_unspecified_complex, vctrs_type2_unspecified_character, vctrs_type2_unspecified_raw, vctrs_type2_unspecified_list, vctrs_type2_unspecified_dataframe, vctrs_type2_unspecified_s3, vctrs_type2_unspecified_scalar, vctrs_type2_logical_logical, vctrs_type2_logical_integer, vctrs_type2_logical_double, vctrs_type2_logical_complex, vctrs_type2_logical_character, vctrs_type2_logical_raw, vctrs_type2_logical_list, vctrs_type2_logical_dataframe, vctrs_type2_logical_s3, vctrs_type2_logical_scalar, vctrs_type2_integer_integer, vctrs_type2_integer_double, vctrs_type2_integer_complex, vctrs_type2_integer_character, vctrs_type2_integer_raw, vctrs_type2_integer_list, vctrs_type2_integer_dataframe, vctrs_type2_integer_s3, vctrs_type2_integer_scalar, vctrs_type2_double_double, vctrs_type2_double_complex, vctrs_type2_double_character, vctrs_type2_double_raw, vctrs_type2_double_list, vctrs_type2_double_dataframe, vctrs_type2_double_s3, vctrs_type2_double_scalar, vctrs_type2_complex_complex, vctrs_type2_complex_character, vctrs_type2_complex_raw, vctrs_type2_complex_list, vctrs_type2_complex_dataframe, vctrs_type2_complex_s3, vctrs_type2_complex_scalar, vctrs_type2_character_character, vctrs_type2_character_raw, vctrs_type2_character_list, vctrs_type2_character_dataframe, vctrs_type2_character_s3, vctrs_type2_character_scalar, vctrs_type2_raw_raw, vctrs_type2_raw_list, vctrs_type2_raw_dataframe, vctrs_type2_raw_s3, vctrs_type2_raw_scalar, vctrs_type2_list_list, vctrs_type2_list_dataframe, vctrs_type2_list_s3, vctrs_type2_list_scalar, vctrs_type2_dataframe_dataframe, vctrs_type2_dataframe_s3, vctrs_type2_dataframe_scalar, vctrs_type2_s3_s3, vctrs_type2_s3_scalar, vctrs_type2_scalar_scalar }; enum vctrs_type2_s3 { vctrs_type2_s3_null_bare_factor, vctrs_type2_s3_null_bare_ordered, vctrs_type2_s3_null_bare_date, vctrs_type2_s3_null_bare_posixct, vctrs_type2_s3_null_bare_posixlt, vctrs_type2_s3_null_bare_tibble, vctrs_type2_s3_null_unknown, vctrs_type2_s3_unspecified_bare_factor, vctrs_type2_s3_unspecified_bare_ordered, vctrs_type2_s3_unspecified_bare_date, vctrs_type2_s3_unspecified_bare_posixct, vctrs_type2_s3_unspecified_bare_posixlt, vctrs_type2_s3_unspecified_bare_tibble, vctrs_type2_s3_unspecified_unknown, vctrs_type2_s3_logical_bare_factor, vctrs_type2_s3_logical_bare_ordered, vctrs_type2_s3_logical_bare_date, vctrs_type2_s3_logical_bare_posixct, vctrs_type2_s3_logical_bare_posixlt, vctrs_type2_s3_logical_bare_tibble, vctrs_type2_s3_logical_unknown, vctrs_type2_s3_integer_bare_factor, vctrs_type2_s3_integer_bare_ordered, vctrs_type2_s3_integer_bare_date, vctrs_type2_s3_integer_bare_posixct, vctrs_type2_s3_integer_bare_posixlt, vctrs_type2_s3_integer_bare_tibble, vctrs_type2_s3_integer_unknown, vctrs_type2_s3_double_bare_factor, vctrs_type2_s3_double_bare_ordered, vctrs_type2_s3_double_bare_date, vctrs_type2_s3_double_bare_posixct, vctrs_type2_s3_double_bare_posixlt, vctrs_type2_s3_double_bare_tibble, vctrs_type2_s3_double_unknown, vctrs_type2_s3_complex_bare_factor, vctrs_type2_s3_complex_bare_ordered, vctrs_type2_s3_complex_bare_date, vctrs_type2_s3_complex_bare_posixct, vctrs_type2_s3_complex_bare_posixlt, vctrs_type2_s3_complex_bare_tibble, vctrs_type2_s3_complex_unknown, vctrs_type2_s3_character_bare_factor, vctrs_type2_s3_character_bare_ordered, vctrs_type2_s3_character_bare_date, vctrs_type2_s3_character_bare_posixct, vctrs_type2_s3_character_bare_posixlt, vctrs_type2_s3_character_bare_tibble, vctrs_type2_s3_character_unknown, vctrs_type2_s3_raw_bare_factor, vctrs_type2_s3_raw_bare_ordered, vctrs_type2_s3_raw_bare_date, vctrs_type2_s3_raw_bare_posixct, vctrs_type2_s3_raw_bare_posixlt, vctrs_type2_s3_raw_bare_tibble, vctrs_type2_s3_raw_unknown, vctrs_type2_s3_list_bare_factor, vctrs_type2_s3_list_bare_ordered, vctrs_type2_s3_list_bare_date, vctrs_type2_s3_list_bare_posixct, vctrs_type2_s3_list_bare_posixlt, vctrs_type2_s3_list_bare_tibble, vctrs_type2_s3_list_unknown, vctrs_type2_s3_dataframe_bare_factor, vctrs_type2_s3_dataframe_bare_ordered, vctrs_type2_s3_dataframe_bare_date, vctrs_type2_s3_dataframe_bare_posixct, vctrs_type2_s3_dataframe_bare_posixlt, vctrs_type2_s3_dataframe_bare_tibble, vctrs_type2_s3_dataframe_unknown, vctrs_type2_s3_scalar_bare_factor, vctrs_type2_s3_scalar_bare_ordered, vctrs_type2_s3_scalar_bare_date, vctrs_type2_s3_scalar_bare_posixct, vctrs_type2_s3_scalar_bare_posixlt, vctrs_type2_s3_scalar_bare_tibble, vctrs_type2_s3_scalar_unknown, vctrs_type2_s3_bare_factor_bare_factor, vctrs_type2_s3_bare_factor_bare_ordered, vctrs_type2_s3_bare_factor_bare_date, vctrs_type2_s3_bare_factor_bare_posixct, vctrs_type2_s3_bare_factor_bare_posixlt, vctrs_type2_s3_bare_factor_bare_tibble, vctrs_type2_s3_bare_factor_unknown, vctrs_type2_s3_bare_ordered_bare_ordered, vctrs_type2_s3_bare_ordered_bare_date, vctrs_type2_s3_bare_ordered_bare_posixct, vctrs_type2_s3_bare_ordered_bare_posixlt, vctrs_type2_s3_bare_ordered_bare_tibble, vctrs_type2_s3_bare_ordered_unknown, vctrs_type2_s3_bare_date_bare_date, vctrs_type2_s3_bare_date_bare_posixct, vctrs_type2_s3_bare_date_bare_posixlt, vctrs_type2_s3_bare_date_bare_tibble, vctrs_type2_s3_bare_date_unknown, vctrs_type2_s3_bare_posixct_bare_posixct, vctrs_type2_s3_bare_posixct_bare_posixlt, vctrs_type2_s3_bare_posixct_bare_tibble, vctrs_type2_s3_bare_posixct_unknown, vctrs_type2_s3_bare_posixlt_bare_posixlt, vctrs_type2_s3_bare_posixlt_bare_tibble, vctrs_type2_s3_bare_posixlt_unknown, vctrs_type2_s3_bare_tibble_bare_tibble, vctrs_type2_s3_bare_tibble_unknown, vctrs_type2_s3_unknown_unknown }; enum vctrs_type2 vec_typeof2(SEXP x, SEXP y); const char* vctrs_type2_as_str(enum vctrs_type2 type); extern SEXP vctrs_shared_empty_lgl; extern SEXP vctrs_shared_empty_int; extern SEXP vctrs_shared_empty_dbl; extern SEXP vctrs_shared_empty_cpl; extern SEXP vctrs_shared_empty_chr; extern SEXP vctrs_shared_empty_raw; extern SEXP vctrs_shared_empty_list; extern SEXP vctrs_shared_empty_date; extern SEXP vctrs_shared_empty_uns; extern SEXP vctrs_shared_true; extern SEXP vctrs_shared_false; extern Rcomplex vctrs_shared_na_cpl; extern SEXP vctrs_shared_na_lgl; extern SEXP vctrs_shared_na_list; SEXP vec_unspecified(R_len_t n); bool vec_is_unspecified(SEXP x); // Vector methods ------------------------------------------------ #include "arg.h" #include "names.h" #include "owned.h" enum vctrs_proxy_kind { VCTRS_PROXY_KIND_default, VCTRS_PROXY_KIND_equal, VCTRS_PROXY_KIND_compare, VCTRS_PROXY_KIND_order, VCTRS_PROXY_KIND_complete }; SEXP vec_proxy(SEXP x); SEXP vec_proxy_equal(SEXP x); SEXP vec_proxy_compare(SEXP x); SEXP vec_proxy_order(SEXP x); SEXP vec_proxy_complete(SEXP x); SEXP vec_restore(SEXP x, SEXP to, SEXP n, const enum vctrs_owned owned); SEXP vec_restore_default(SEXP x, SEXP to, const enum vctrs_owned owned); R_len_t vec_size(SEXP x); R_len_t vec_size_common(SEXP xs, R_len_t absent); SEXP vec_cast_common(SEXP xs, SEXP to); SEXP vec_slice(SEXP x, SEXP subscript); SEXP vec_slice_impl(SEXP x, SEXP subscript); SEXP vec_chop(SEXP x, SEXP indices); SEXP vec_slice_shaped(enum vctrs_type type, SEXP x, SEXP index); SEXP vec_proxy_assign(SEXP proxy, SEXP index, SEXP value); bool vec_requires_fallback(SEXP x, struct vctrs_proxy_info info); SEXP vec_init(SEXP x, R_len_t n); SEXP vec_ptype(SEXP x, struct vctrs_arg* x_arg); SEXP vec_ptype_finalise(SEXP x); bool vec_is_unspecified(SEXP x); SEXP vec_recycle(SEXP x, R_len_t size, struct vctrs_arg* x_arg); SEXP vec_recycle_fallback(SEXP x, R_len_t size, struct vctrs_arg* x_arg); SEXP vec_recycle_common(SEXP xs, R_len_t size); SEXP vec_names(SEXP x); SEXP vec_proxy_names(SEXP x); SEXP vec_group_loc(SEXP x); SEXP vec_identify_runs(SEXP x); SEXP vec_match_params(SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg); #include "cast.h" static inline SEXP vec_cast(SEXP x, SEXP to, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg) { struct cast_opts opts = { .x = x, .to = to, .x_arg = x_arg, .to_arg = to_arg }; return vec_cast_opts(&opts); } static inline SEXP vec_match(SEXP needles, SEXP haystack) { return vec_match_params(needles, haystack, true, NULL, NULL); } SEXP vec_c(SEXP xs, SEXP ptype, SEXP name_spec, const struct name_repair_opts* name_repair); bool is_data_frame(SEXP x); R_len_t df_size(SEXP x); R_len_t df_rownames_size(SEXP x); R_len_t df_raw_size(SEXP x); R_len_t df_raw_size_from_list(SEXP x); SEXP vec_bare_df_restore(SEXP x, SEXP to, SEXP n, const enum vctrs_owned owned); SEXP vec_df_restore(SEXP x, SEXP to, SEXP n, const enum vctrs_owned owned); // equal_object() never propagates missingness, so // it can return a `bool` bool equal_object(SEXP x, SEXP y); bool equal_object_normalized(SEXP x, SEXP y); bool equal_names(SEXP x, SEXP y); int compare_scalar(SEXP x, R_len_t i, SEXP y, R_len_t j, bool na_equal); uint32_t hash_object(SEXP x); void hash_fill(uint32_t* p, R_len_t n, SEXP x, bool na_equal); SEXP vec_unique(SEXP x); bool duplicated_any(SEXP names); // Data frame column iteration ---------------------------------- // Used in functions that treat data frames as vectors of rows, but // iterate over columns. Examples are `vec_equal()` and // `vec_compare()`. /** * @member row_known A boolean array of size `n_row`. Allocated on the R heap. * Initially, all values are initialized to `false`. As we iterate along the * columns, we flip the corresponding row's `row_known` value to `true` if we * can determine the `out` value for that row from the current columns. * Once a row's `row_known` value is `true`, we never check that row again * as we continue through the columns. * @member p_row_known A pointer to the boolean array stored in `row_known`. * Initialized with `(bool*) RAW(info.row_known)`. * @member remaining The number of `row_known` values that are still `false`. * If this hits `0` before we traverse the entire data frame, we can exit * immediately because all `out` values are already known. * @member size The number of rows in the data frame. */ struct df_short_circuit_info { SEXP row_known; bool* p_row_known; PROTECT_INDEX row_known_pi; R_len_t remaining; R_len_t size; }; #define PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, p_n) do { \ PROTECT_WITH_INDEX((p_info)->row_known, &(p_info)->row_known_pi); \ *(p_n) += 1; \ } while (0) static inline struct df_short_circuit_info new_df_short_circuit_info(R_len_t size, bool lazy) { SEXP row_known; bool* p_row_known; if (lazy) { row_known = PROTECT(R_NilValue); p_row_known = NULL; } else { row_known = PROTECT(Rf_allocVector(RAWSXP, size * sizeof(bool))); p_row_known = (bool*) RAW(row_known); // To begin with, no rows have a known comparison value memset(p_row_known, false, size * sizeof(bool)); } struct df_short_circuit_info info = { .row_known = row_known, .p_row_known = p_row_known, .remaining = size, .size = size }; UNPROTECT(1); return info; } static inline void init_lazy_df_short_circuit_info(struct df_short_circuit_info* p_info) { if (p_info->row_known != R_NilValue) { return; } p_info->row_known = Rf_allocVector(RAWSXP, p_info->size * sizeof(bool)); REPROTECT(p_info->row_known, p_info->row_known_pi); p_info->p_row_known = (bool*) RAW(p_info->row_known); } // Missing values ----------------------------------------------- // Annex F of C99 specifies that `double` should conform to the IEEE 754 // type `binary64`, which is defined as: // * 1 bit : sign // * 11 bits: exponent // * 52 bits: significand // // R stores the value "1954" in the last 32 bits: this payload marks // the value as a NA, not a regular NaN. // // On big endian systems, this corresponds to the second element of an // integer array of size 2. On little endian systems, this is flipped // and the NA marker is in the first element. // // The type assumptions made here are asserted in `vctrs_init_utils()` #ifdef WORDS_BIGENDIAN static const int vctrs_indicator_pos = 1; #else static const int vctrs_indicator_pos = 0; #endif union vctrs_dbl_indicator { double value; // 8 bytes unsigned int key[2]; // 4 * 2 bytes }; enum vctrs_dbl_class { vctrs_dbl_number, vctrs_dbl_missing, vctrs_dbl_nan }; enum vctrs_dbl_class dbl_classify(double x); // Factor methods ----------------------------------------------- SEXP chr_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg); SEXP chr_as_ordered(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg); SEXP fct_as_character(SEXP x, struct vctrs_arg* x_arg); SEXP fct_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg); SEXP ord_as_character(SEXP x, struct vctrs_arg* x_arg); // Datetime methods --------------------------------------------- SEXP date_as_date(SEXP x); SEXP date_as_posixct(SEXP x, SEXP to); SEXP date_as_posixlt(SEXP x, SEXP to); SEXP posixct_as_date(SEXP x, bool* lossy); SEXP posixlt_as_date(SEXP x, bool* lossy); SEXP posixct_as_posixct(SEXP x, SEXP to); SEXP posixlt_as_posixct(SEXP x, SEXP to); SEXP posixct_as_posixlt(SEXP x, SEXP to); SEXP posixlt_as_posixlt(SEXP x, SEXP to); SEXP vec_date_restore(SEXP x, SEXP to, const enum vctrs_owned owned); SEXP vec_posixct_restore(SEXP x, SEXP to, const enum vctrs_owned owned); SEXP vec_posixlt_restore(SEXP x, SEXP to, const enum vctrs_owned owned); SEXP date_datetime_ptype2(SEXP x, SEXP y); SEXP datetime_datetime_ptype2(SEXP x, SEXP y); // Growable vector ---------------------------------------------- struct growable { SEXP x; SEXPTYPE type; void* array; PROTECT_INDEX idx; int n; int capacity; }; struct growable new_growable(SEXPTYPE type, int capacity); SEXP growable_values(struct growable* g); static inline void growable_push_int(struct growable* g, int i) { if (g->n == g->capacity) { g->capacity *= 2; g->x = Rf_lengthgets(g->x, g->capacity); REPROTECT(g->x, g->idx); g->array = INTEGER(g->x); } int* p = (int*) g->array; p[g->n] = i; ++(g->n); } #define PROTECT_GROWABLE(g, n) do { \ PROTECT_WITH_INDEX((g)->x, &((g)->idx)); \ *n += 1; \ } while(0) #define UNPROTECT_GROWABLE(g) do { UNPROTECT(1);} while(0) // Conditions --------------------------------------------------- void stop_scalar_type(SEXP x, struct vctrs_arg* arg) __attribute__((noreturn)); void vec_assert(SEXP x, struct vctrs_arg* arg); __attribute__((noreturn)) void stop_incompatible_size(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg); __attribute__((noreturn)) void stop_incompatible_type(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, bool cast); __attribute__((noreturn)) void stop_recycle_incompatible_size(R_len_t x_size, R_len_t size, struct vctrs_arg* x_arg); __attribute__((noreturn)) void stop_incompatible_shape(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, int axis, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); void stop_corrupt_factor_levels(SEXP x, struct vctrs_arg* arg) __attribute__((noreturn)); void stop_corrupt_ordered_levels(SEXP x, struct vctrs_arg* arg) __attribute__((noreturn)); // Compatibility ------------------------------------------------ #if (R_VERSION < R_Version(3, 5, 0)) # define LOGICAL_RO(x) ((const int*) LOGICAL(x)) # define INTEGER_RO(x) ((const int*) INTEGER(x)) # define REAL_RO(x) ((const double*) REAL(x)) # define COMPLEX_RO(x) ((const Rcomplex*) COMPLEX(x)) # define STRING_PTR_RO(x) ((const SEXP*) STRING_PTR(x)) # define RAW_RO(x) ((const Rbyte*) RAW(x)) # define DATAPTR_RO(x) ((const void*) STRING_PTR(x)) #endif #define VECTOR_PTR_RO(x) ((const SEXP*) DATAPTR_RO(x)) #endif vctrs/src/equal.c0000644000176200001440000004742614042540502013466 0ustar liggesusers#include #include "equal.h" #include "vctrs.h" #include "utils.h" #include "translate.h" // ----------------------------------------------------------------------------- static SEXP vec_equal(SEXP x, SEXP y, bool na_equal); // [[ register() ]] SEXP vctrs_equal(SEXP x, SEXP y, SEXP na_equal) { bool c_na_equal = r_bool_as_int(na_equal); return vec_equal(x, y, c_na_equal); } static SEXP lgl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static SEXP int_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static SEXP dbl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static SEXP cpl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static SEXP chr_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static SEXP raw_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static SEXP list_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static SEXP df_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); /* * Recycling and casting is done at the R level */ static SEXP vec_equal(SEXP x, SEXP y, bool na_equal) { SEXP x_proxy = PROTECT(vec_proxy_equal(x)); SEXP y_proxy = PROTECT(vec_proxy_equal(y)); x_proxy = PROTECT(vec_normalize_encoding(x_proxy)); y_proxy = PROTECT(vec_normalize_encoding(y_proxy)); R_len_t size = vec_size(x_proxy); enum vctrs_type type = vec_proxy_typeof(x_proxy); if (type != vec_proxy_typeof(y_proxy) || size != vec_size(y_proxy)) { Rf_errorcall(R_NilValue, "`x` and `y` must have same types and lengths."); } SEXP out; switch (type) { case vctrs_type_logical: out = lgl_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_integer: out = int_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_double: out = dbl_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_complex: out = cpl_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_character: out = chr_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_raw: out = raw_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_list: out = list_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_dataframe: out = df_equal(x_proxy, y_proxy, size, na_equal); break; case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", type); } UNPROTECT(4); return out; } // ----------------------------------------------------------------------------- #define EQUAL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE) \ SEXP out = PROTECT(r_new_logical(size)); \ int* p_out = LOGICAL(out); \ \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ if (na_equal) { \ for (R_len_t i = 0; i < size; ++i) { \ p_out[i] = EQUAL_NA_EQUAL(p_x[i], p_y[i]); \ } \ } else { \ for (R_len_t i = 0; i < size; ++i) { \ p_out[i] = EQUAL_NA_PROPAGATE(p_x[i], p_y[i]); \ } \ } \ \ UNPROTECT(1); \ return out; static SEXP lgl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(int, LOGICAL_RO, lgl_equal_na_equal, lgl_equal_na_propagate); } static SEXP int_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(int, INTEGER_RO, int_equal_na_equal, int_equal_na_propagate); } static SEXP dbl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(double, REAL_RO, dbl_equal_na_equal, dbl_equal_na_propagate); } static SEXP cpl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal, cpl_equal_na_propagate); } static SEXP chr_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(SEXP, STRING_PTR_RO, chr_equal_na_equal, chr_equal_na_propagate); } static SEXP raw_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(Rbyte, RAW_RO, raw_equal_na_equal, raw_equal_na_propagate); } static SEXP list_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(SEXP, VECTOR_PTR_RO, list_equal_na_equal, list_equal_na_propagate); } #undef EQUAL // ----------------------------------------------------------------------------- static void vec_equal_col_na_equal(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info); static void vec_equal_col_na_propagate(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info); static SEXP df_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { int nprot = 0; SEXP out = PROTECT_N(r_new_logical(size), &nprot); int* p_out = LOGICAL(out); // Initialize to "equality" value // and only change if we learn that it differs for (R_len_t i = 0; i < size; ++i) { p_out[i] = 1; } struct df_short_circuit_info info = new_df_short_circuit_info(size, false); struct df_short_circuit_info* p_info = &info; PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, &nprot); R_len_t n_col = Rf_length(x); if (n_col != Rf_length(y)) { Rf_errorcall(R_NilValue, "`x` and `y` must have the same number of columns"); } void (*vec_equal_col)(SEXP, SEXP, int*, struct df_short_circuit_info*); if (na_equal) { vec_equal_col = vec_equal_col_na_equal; } else { vec_equal_col = vec_equal_col_na_propagate; } const SEXP* p_x = VECTOR_PTR_RO(x); const SEXP* p_y = VECTOR_PTR_RO(y); for (R_len_t i = 0; i < n_col; ++i) { vec_equal_col(p_x[i], p_y[i], p_out, p_info); if (p_info->remaining == 0) { break; } } UNPROTECT(nprot); return out; } // ----------------------------------------------------------------------------- #define EQUAL_COL(CTYPE, CONST_DEREF, EQUAL) do { \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ for (R_len_t i = 0; i < p_info->size; ++i) { \ if (p_info->p_row_known[i]) { \ continue; \ } \ \ int eq = EQUAL(p_x[i], p_y[i]); \ \ if (eq <= 0) { \ p_out[i] = eq; \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ } \ } while (0) static void vec_equal_col_na_equal(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info) { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: EQUAL_COL(int, LOGICAL_RO, lgl_equal_na_equal); break; case vctrs_type_integer: EQUAL_COL(int, INTEGER_RO, int_equal_na_equal); break; case vctrs_type_double: EQUAL_COL(double, REAL_RO, dbl_equal_na_equal); break; case vctrs_type_complex: EQUAL_COL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal); break; case vctrs_type_character: EQUAL_COL(SEXP, STRING_PTR_RO, chr_equal_na_equal); break; case vctrs_type_raw: EQUAL_COL(Rbyte, RAW_RO, raw_equal_na_equal); break; case vctrs_type_list: EQUAL_COL(SEXP, VECTOR_PTR_RO, list_equal_na_equal); break; case vctrs_type_dataframe: stop_internal("vec_equal", "Data frame columns should be flattened already."); case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", vec_proxy_typeof(x)); } } static void vec_equal_col_na_propagate(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info) { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: EQUAL_COL(int, LOGICAL_RO, lgl_equal_na_propagate); break; case vctrs_type_integer: EQUAL_COL(int, INTEGER_RO, int_equal_na_propagate); break; case vctrs_type_double: EQUAL_COL(double, REAL_RO, dbl_equal_na_propagate); break; case vctrs_type_complex: EQUAL_COL(Rcomplex, COMPLEX_RO, cpl_equal_na_propagate); break; case vctrs_type_character: EQUAL_COL(SEXP, STRING_PTR_RO, chr_equal_na_propagate); break; case vctrs_type_raw: EQUAL_COL(Rbyte, RAW_RO, raw_equal_na_propagate); break; case vctrs_type_list: EQUAL_COL(SEXP, VECTOR_PTR_RO, list_equal_na_propagate); break; case vctrs_type_dataframe: stop_internal("vec_equal", "Data frame columns should be flattened already."); case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", vec_proxy_typeof(x)); } } #undef EQUAL_COL // ----------------------------------------------------------------------------- // Missingness is never propagated through objects, // so `na_equal` is always `true` in these macros #define EQUAL_ALL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL) \ do { \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ for (R_len_t i = 0; i < n; ++i) { \ if (!EQUAL_NA_EQUAL(p_x[i], p_y[i])) { \ return false; \ } \ } \ return true; \ } \ while (0) static inline bool vec_equal_attrib(SEXP x, SEXP y); // [[ include("vctrs.h") ]] bool equal_object(SEXP x, SEXP y) { x = PROTECT(vec_normalize_encoding(x)); y = PROTECT(vec_normalize_encoding(y)); bool out = equal_object_normalized(x, y); UNPROTECT(2); return out; } // Assumes `vec_normalize_encoding()` has already been called // [[ include("vctrs.h") ]] bool equal_object_normalized(SEXP x, SEXP y) { SEXPTYPE type = TYPEOF(x); if (type != TYPEOF(y)) { return false; } // Pointer comparison is all that is required for these types switch (type) { case NILSXP: case SYMSXP: case SPECIALSXP: case BUILTINSXP: case CHARSXP: case ENVSXP: case EXTPTRSXP: return x == y; } // For other types, try a pointer comparison first before // performing an in depth equality check if (x == y) { return true; } switch (type) { // Handled below case LGLSXP: case INTSXP: case REALSXP: case STRSXP: case RAWSXP: case CPLXSXP: case EXPRSXP: case VECSXP: break; case DOTSXP: case LANGSXP: case LISTSXP: case BCODESXP: { if (!equal_object_normalized(ATTRIB(x), ATTRIB(y))) { return false; } if (!equal_object_normalized(CAR(x), CAR(y))) { return false; } x = CDR(x); y = CDR(y); if (!equal_object_normalized(x, y)) { return false; } return true; } case CLOSXP: if (!equal_object_normalized(ATTRIB(x), ATTRIB(y))) { return false; } if (!equal_object_normalized(BODY(x), BODY(y))) { return false; } if (!equal_object_normalized(CLOENV(x), CLOENV(y))) { return false; } if (!equal_object_normalized(FORMALS(x), FORMALS(y))) { return false; } return true; case NILSXP: case SYMSXP: case SPECIALSXP: case BUILTINSXP: case CHARSXP: case ENVSXP: case EXTPTRSXP: // These are handled above with pointer comparison stop_internal("equal_object_normalized", "Unexpected reference type."); default: stop_unimplemented_type("equal_object_normalized", TYPEOF(x)); } R_len_t n = Rf_length(x); if (n != Rf_length(y)) { return false; } if (!vec_equal_attrib(x, y)) { return false; } switch (type) { case LGLSXP: EQUAL_ALL(int, LOGICAL_RO, lgl_equal_na_equal); case INTSXP: EQUAL_ALL(int, INTEGER_RO, int_equal_na_equal); case REALSXP: EQUAL_ALL(double, REAL_RO, dbl_equal_na_equal); case STRSXP: EQUAL_ALL(SEXP, STRING_PTR_RO, chr_equal_na_equal); case RAWSXP: EQUAL_ALL(Rbyte, RAW_RO, raw_equal_na_equal); case CPLXSXP: EQUAL_ALL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal); case EXPRSXP: case VECSXP: EQUAL_ALL(SEXP, VECTOR_PTR_RO, list_equal_na_equal); default: stop_unimplemented_type("equal_object", type); } } #undef EQUAL_ALL // [[ register() ]] SEXP vctrs_equal_object(SEXP x, SEXP y) { return Rf_ScalarLogical(equal_object(x, y)); } // TODO: Sort attributes by tag before comparison static inline bool vec_equal_attrib(SEXP x, SEXP y) { SEXP x_attrs = ATTRIB(x); SEXP y_attrs = ATTRIB(y); while (x_attrs != R_NilValue) { if (y_attrs == R_NilValue) { return false; } SEXP x_tag = TAG(x_attrs); SEXP y_tag = TAG(x_attrs); if (x_tag != y_tag) { return false; } if (!equal_object_normalized(CAR(x_attrs), CAR(y_attrs))) { return false; } x_attrs = CDR(x_attrs); y_attrs = CDR(y_attrs); } return true; } // [[ include("vctrs.h") ]] bool equal_names(SEXP x, SEXP y) { SEXP x_names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); SEXP y_names = PROTECT(Rf_getAttrib(y, R_NamesSymbol)); bool out = equal_object(x_names, y_names); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_equal_na(SEXP x) { return vec_equal_na(x); } #define EQUAL_NA(CTYPE, CONST_DEREF, IS_MISSING) \ do { \ SEXP out = PROTECT(Rf_allocVector(LGLSXP, size)); \ int* p_out = LOGICAL(out); \ \ const CTYPE* p_x = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i) { \ p_out[i] = IS_MISSING(p_x[i]); \ } \ \ UNPROTECT(2); \ return out; \ } \ while (0) static SEXP df_equal_na(SEXP x, R_len_t size); // [[ include("equal.h") ]] SEXP vec_equal_na(SEXP x) { R_len_t size = vec_size(x); x = PROTECT(vec_proxy_equal(x)); enum vctrs_type type = vec_proxy_typeof(x); switch (type) { case vctrs_type_logical: EQUAL_NA(int, LOGICAL_RO, lgl_is_missing); case vctrs_type_integer: EQUAL_NA(int, INTEGER_RO, int_is_missing); case vctrs_type_double: EQUAL_NA(double, REAL_RO, dbl_is_missing); case vctrs_type_complex: EQUAL_NA(Rcomplex, COMPLEX_RO, cpl_is_missing); case vctrs_type_raw: EQUAL_NA(Rbyte, RAW_RO, raw_is_missing); case vctrs_type_character: EQUAL_NA(SEXP, STRING_PTR_RO, chr_is_missing); case vctrs_type_list: EQUAL_NA(SEXP, VECTOR_PTR_RO, list_is_missing); case vctrs_type_dataframe: { SEXP out = df_equal_na(x, size); UNPROTECT(1); return out; } case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't detect `NA` values in scalars with `vctrs_equal_na()`."); default: Rf_error("Unimplemented type in `vctrs_equal_na()`."); } } #undef EQUAL_NA // ----------------------------------------------------------------------------- static void vec_equal_na_col(int* p_out, struct df_short_circuit_info* p_info, SEXP x); static void df_equal_na_impl(int* p_out, struct df_short_circuit_info* p_info, SEXP x) { int n_col = Rf_length(x); for (R_len_t i = 0; i < n_col; ++i) { SEXP col = VECTOR_ELT(x, i); vec_equal_na_col(p_out, p_info, col); // If all rows have at least one non-missing value, break if (p_info->remaining == 0) { break; } } } static SEXP df_equal_na(SEXP x, R_len_t size) { int nprot = 0; SEXP out = PROTECT_N(Rf_allocVector(LGLSXP, size), &nprot); int* p_out = LOGICAL(out); // Initialize to "equality" value // and only change if we learn that it differs for (R_len_t i = 0; i < size; ++i) { p_out[i] = 1; } struct df_short_circuit_info info = new_df_short_circuit_info(size, false); struct df_short_circuit_info* p_info = &info; PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, &nprot); df_equal_na_impl(p_out, p_info, x); UNPROTECT(nprot); return out; } // ----------------------------------------------------------------------------- #define EQUAL_NA_COL(CTYPE, CONST_DEREF, IS_MISSING) \ do { \ const CTYPE* p_x = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < p_info->size; ++i) { \ if (p_info->p_row_known[i]) { \ continue; \ } \ \ if (!IS_MISSING(p_x[i])) { \ p_out[i] = 0; \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ } \ } \ while (0) static void vec_equal_na_col(int* p_out, struct df_short_circuit_info* p_info, SEXP x) { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: EQUAL_NA_COL(int, LOGICAL_RO, lgl_is_missing); break; case vctrs_type_integer: EQUAL_NA_COL(int, INTEGER_RO, int_is_missing); break; case vctrs_type_double: EQUAL_NA_COL(double, REAL_RO, dbl_is_missing); break; case vctrs_type_complex: EQUAL_NA_COL(Rcomplex, COMPLEX_RO, cpl_is_missing); break; case vctrs_type_raw: EQUAL_NA_COL(Rbyte, RAW_RO, raw_is_missing); break; case vctrs_type_character: EQUAL_NA_COL(SEXP, STRING_PTR_RO, chr_is_missing); break; case vctrs_type_list: EQUAL_NA_COL(SEXP, VECTOR_PTR_RO, list_is_missing); break; case vctrs_type_dataframe: df_equal_na_impl(p_out, p_info, x); break; case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal_na()`"); default: Rf_error("Unimplemented type in `vec_equal_na()`"); } } #undef EQUAL_NA_COL vctrs/src/type-tibble.h0000644000176200001440000000026613671672047014614 0ustar liggesusers#ifndef VCTRS_TYPE_TIBBLE_H #define VCTRS_TYPE_TIBBLE_H #include "ptype2.h" SEXP tib_ptype2(const struct ptype2_opts* opts); SEXP tib_cast(const struct cast_opts* opts); #endif vctrs/src/arg.h0000644000176200001440000000350113712211241013115 0ustar liggesusers#ifndef VCTRS_ARG_H #define VCTRS_ARG_H /** * Structure for argument tags * * Argument tags are used in error messages to provide information * about which elements of nested data structures (such as tibbles) * fail to match a given type. They are generated lazily by the `fill` * method in order to avoid any cost when there is no error. * * @member parent The previously active argument tag. * @member fill Takes a pointer to data, and a buffer to fill. If the * buffer is too small according to the `remaining` argument, * `fill()` must return a negative error value. */ struct vctrs_arg { struct vctrs_arg* parent; r_ssize (*fill)(void* data, char* buf, r_ssize remaining); void* data; }; // Simple wrapper around a string struct vctrs_arg new_wrapper_arg(struct vctrs_arg* parent, const char* arg); // Wrapper around a counter representing the current position of the // argument struct arg_data_counter { R_len_t* i; SEXP* names; R_len_t* names_i; }; struct vctrs_arg new_counter_arg(struct vctrs_arg* parent, struct arg_data_counter* data); struct arg_data_counter new_counter_arg_data(R_len_t* i, SEXP* names, R_len_t* names_i); // Wrapper around a string that should be prefixed with `$`, unless // that's the first argument of the chain struct arg_data_index { const char* arg; struct vctrs_arg* parent; }; struct vctrs_arg new_index_arg(struct vctrs_arg* parent, struct arg_data_index* data); struct arg_data_index new_index_arg_data(const char* arg, struct vctrs_arg* parent); // Materialise an argument tag as a CHARSXP. SEXP vctrs_arg(struct vctrs_arg* arg); #endif vctrs/src/subscript.h0000644000176200001440000000376413650511520014401 0ustar liggesusers#ifndef VCTRS_SUBSCRIPT_H #define VCTRS_SUBSCRIPT_H #include "utils.h" enum subscript_action { SUBSCRIPT_ACTION_DEFAULT = 0, SUBSCRIPT_ACTION_SUBSET, SUBSCRIPT_ACTION_EXTRACT, SUBSCRIPT_ACTION_ASSIGN, SUBSCRIPT_ACTION_RENAME, SUBSCRIPT_ACTION_REMOVE, SUBSCRIPT_ACTION_NEGATE }; enum subscript_type_action { SUBSCRIPT_TYPE_ACTION_CAST = 0, SUBSCRIPT_TYPE_ACTION_ERROR }; struct subscript_opts { enum subscript_action action; enum subscript_type_action logical; enum subscript_type_action numeric; enum subscript_type_action character; struct vctrs_arg* subscript_arg; }; static const struct subscript_opts subscript_default_opts = { .action = SUBSCRIPT_ACTION_DEFAULT, .logical = SUBSCRIPT_TYPE_ACTION_CAST, .numeric = SUBSCRIPT_TYPE_ACTION_CAST, .character = SUBSCRIPT_TYPE_ACTION_CAST, .subscript_arg = NULL }; static const struct subscript_opts subscript_default_assign_opts = { .action = SUBSCRIPT_ACTION_ASSIGN, .logical = SUBSCRIPT_TYPE_ACTION_CAST, .numeric = SUBSCRIPT_TYPE_ACTION_CAST, .character = SUBSCRIPT_TYPE_ACTION_CAST, .subscript_arg = NULL }; SEXP vec_as_subscript_opts(SEXP subscript, const struct subscript_opts* opts, ERR* err); static inline SEXP subscript_type_action_chr(enum subscript_type_action action) { switch (action) { case SUBSCRIPT_TYPE_ACTION_CAST: return chrs_cast; case SUBSCRIPT_TYPE_ACTION_ERROR: return chrs_error; } never_reached("subscript_type_action_chr"); } static inline SEXP get_opts_action(const struct subscript_opts* opts) { switch (opts->action) { case SUBSCRIPT_ACTION_DEFAULT: return R_NilValue; case SUBSCRIPT_ACTION_SUBSET: return chrs_subset; case SUBSCRIPT_ACTION_EXTRACT: return chrs_extract; case SUBSCRIPT_ACTION_ASSIGN: return chrs_assign; case SUBSCRIPT_ACTION_RENAME: return chrs_rename; case SUBSCRIPT_ACTION_REMOVE: return chrs_remove; case SUBSCRIPT_ACTION_NEGATE: return chrs_negate; } never_reached("get_opts_action"); } #endif vctrs/src/translate.h0000644000176200001440000000170613753021253014355 0ustar liggesusers#ifndef VCTRS_TRANSLATE_H #define VCTRS_TRANSLATE_H #include "vctrs.h" // ----------------------------------------------------------------------------- // Vector translation SEXP vec_normalize_encoding(SEXP x); // ----------------------------------------------------------------------------- // Low-level string translation #define MASK_ASCII 8 #define MASK_UTF8 64 // The first 128 values are ASCII, and are the same regardless of the encoding. // Otherwise we enforce UTF-8. static inline bool string_is_ascii_or_utf8(SEXP x) { const int levels = LEVELS(x); return (levels & MASK_ASCII) || (levels & MASK_UTF8); } #undef MASK_ASCII #undef MASK_UTF8 static inline SEXP string_normalize(SEXP x) { return Rf_mkCharCE(Rf_translateCharUTF8(x), CE_UTF8); } static inline bool string_is_normalized(SEXP x) { return string_is_ascii_or_utf8(x) || (x == NA_STRING); } // ----------------------------------------------------------------------------- #endif vctrs/src/poly-op.c0000644000176200001440000001526714042540502013754 0ustar liggesusers#include "poly-op.h" #include "vctrs.h" #include "equal.h" #include "utils.h" // ----------------------------------------------------------------------------- struct poly_df_data { enum vctrs_type* col_types; const void** col_ptrs; r_ssize n_col; }; // ----------------------------------------------------------------------------- static int p_df_equal_na_equal(const void* x, r_ssize i, const void* y, r_ssize j); // [[ include("poly-op.h") ]] poly_binary_int_fn_ptr new_poly_p_equal_na_equal(enum vctrs_type type) { switch (type) { case vctrs_type_null: return p_nil_equal_na_equal; case vctrs_type_logical: return p_lgl_equal_na_equal; case vctrs_type_integer: return p_int_equal_na_equal; case vctrs_type_double: return p_dbl_equal_na_equal; case vctrs_type_complex: return p_cpl_equal_na_equal; case vctrs_type_character: return p_chr_equal_na_equal; case vctrs_type_raw: return p_raw_equal_na_equal; case vctrs_type_list: return p_list_equal_na_equal; case vctrs_type_dataframe: return p_df_equal_na_equal; default: stop_unimplemented_vctrs_type("new_poly_p_equal_na_equal", type); } } static int p_df_equal_na_equal(const void* x, r_ssize i, const void* y, r_ssize j) { struct poly_df_data* x_data = (struct poly_df_data*) x; struct poly_df_data* y_data = (struct poly_df_data*) y; r_ssize n_col = x_data->n_col; if (n_col != y_data->n_col) { stop_internal("p_df_equal_na_equal", "`x` and `y` must have the same number of columns."); } enum vctrs_type* types = x_data->col_types; const void** x_ptrs = x_data->col_ptrs; const void** y_ptrs = y_data->col_ptrs; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { if (!p_equal_na_equal(x_ptrs[col], i, y_ptrs[col], j, types[col])) { return false; } } return true; } // ----------------------------------------------------------------------------- static bool p_df_is_missing(const void* x, r_ssize i); // [[ include("poly-op.h") ]] poly_unary_bool_fn_ptr new_poly_p_is_missing(enum vctrs_type type) { switch (type) { case vctrs_type_null: return p_nil_is_missing; case vctrs_type_logical: return p_lgl_is_missing; case vctrs_type_integer: return p_int_is_missing; case vctrs_type_double: return p_dbl_is_missing; case vctrs_type_complex: return p_cpl_is_missing; case vctrs_type_character: return p_chr_is_missing; case vctrs_type_raw: return p_raw_is_missing; case vctrs_type_list: return p_list_is_missing; case vctrs_type_dataframe: return p_df_is_missing; default: stop_unimplemented_vctrs_type("new_poly_p_is_missing", type); } } static bool p_df_is_missing(const void* x, r_ssize i) { struct poly_df_data* x_data = (struct poly_df_data*) x; enum vctrs_type* types = x_data->col_types; const void** x_ptrs = x_data->col_ptrs; r_ssize n_col = x_data->n_col; for (r_ssize col = 0; col < n_col; ++col) { if (p_is_missing(x_ptrs[col], i, types[col])) { return true; } } return false; } // ----------------------------------------------------------------------------- static void init_nil_poly_vec(struct poly_vec* p_poly_vec); static void init_lgl_poly_vec(struct poly_vec* p_poly_vec); static void init_int_poly_vec(struct poly_vec* p_poly_vec); static void init_dbl_poly_vec(struct poly_vec* p_poly_vec); static void init_cpl_poly_vec(struct poly_vec* p_poly_vec); static void init_chr_poly_vec(struct poly_vec* p_poly_vec); static void init_raw_poly_vec(struct poly_vec* p_poly_vec); static void init_list_poly_vec(struct poly_vec* p_poly_vec); static void init_df_poly_vec(struct poly_vec* p_poly_vec); // [[ include("poly-op.h") ]] struct poly_vec* new_poly_vec(SEXP proxy, enum vctrs_type type) { SEXP self = PROTECT(Rf_allocVector(RAWSXP, sizeof(struct poly_vec))); struct poly_vec* p_poly_vec = (struct poly_vec*) RAW(self); p_poly_vec->self = self; p_poly_vec->vec = proxy; switch (type) { case vctrs_type_null: init_nil_poly_vec(p_poly_vec); break; case vctrs_type_logical: init_lgl_poly_vec(p_poly_vec); break; case vctrs_type_integer: init_int_poly_vec(p_poly_vec); break; case vctrs_type_double: init_dbl_poly_vec(p_poly_vec); break; case vctrs_type_complex: init_cpl_poly_vec(p_poly_vec); break; case vctrs_type_character: init_chr_poly_vec(p_poly_vec); break; case vctrs_type_raw: init_raw_poly_vec(p_poly_vec); break; case vctrs_type_list: init_list_poly_vec(p_poly_vec); break; case vctrs_type_dataframe: init_df_poly_vec(p_poly_vec); break; default: stop_unimplemented_vctrs_type("new_poly_vec", type); } // `init_*_poly_vec()` functions may allocate PROTECT(p_poly_vec->self); UNPROTECT(2); return p_poly_vec; } static void init_nil_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = NULL; } static void init_lgl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) LOGICAL_RO(p_poly_vec->vec); } static void init_int_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) INTEGER_RO(p_poly_vec->vec); } static void init_dbl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) REAL_RO(p_poly_vec->vec); } static void init_cpl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) COMPLEX_RO(p_poly_vec->vec); } static void init_chr_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) STRING_PTR_RO(p_poly_vec->vec); } static void init_raw_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) RAW_RO(p_poly_vec->vec); } static void init_list_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) VECTOR_PTR_RO(p_poly_vec->vec); } static void init_df_poly_vec(struct poly_vec* p_poly_vec) { SEXP df = p_poly_vec->vec; r_ssize n_col = Rf_xlength(df); SEXP self = PROTECT(Rf_allocVector(VECSXP, 4)); SET_VECTOR_ELT(self, 0, p_poly_vec->self); p_poly_vec->self = self; SEXP data_handle = PROTECT(Rf_allocVector(RAWSXP, sizeof(struct poly_df_data))); struct poly_df_data* data = (struct poly_df_data*) RAW(data_handle); SET_VECTOR_ELT(self, 1, data_handle); SEXP col_types_handle = PROTECT(Rf_allocVector(RAWSXP, n_col * sizeof(enum vctrs_type))); enum vctrs_type* col_types = (enum vctrs_type*) RAW(col_types_handle); SET_VECTOR_ELT(self, 2, col_types_handle); SEXP col_ptrs_handle = PROTECT(Rf_allocVector(RAWSXP, n_col * sizeof(void*))); const void** col_ptrs = (const void**) RAW(col_ptrs_handle); SET_VECTOR_ELT(self, 3, col_ptrs_handle); for (r_ssize i = 0; i < n_col; ++i) { SEXP col = VECTOR_ELT(df, i); col_types[i] = vec_proxy_typeof(col); col_ptrs[i] = r_vec_deref_const(col); } data->col_types = col_types; data->col_ptrs = col_ptrs; data->n_col = n_col; p_poly_vec->p_vec = (void*) data; UNPROTECT(4); } vctrs/src/dictionary.h0000644000176200001440000000336613753021253014531 0ustar liggesusers#include "poly-op.h" #define DICT_EMPTY -1 // The dictionary structure is a little peculiar since R has no notion of // a scalar, so the `key`s are indexes into vector `x`. This means we can // only store values from a single vector, but we can still lookup using // another vector, provided that they're of the same type (which is ensured // at the R-level). struct dictionary { SEXP protect; poly_binary_int_fn_ptr p_equal_na_equal; poly_unary_bool_fn_ptr p_is_missing; struct poly_vec* p_poly_vec; uint32_t* hash; R_len_t* key; uint32_t size; uint32_t used; }; /** * Initialise a dictionary * * - `new_dictionary()` creates a dictionary and precaches the hashes for * each element of `x`. * * - `new_dictionary_partial()` creates a dictionary with precached hashes * as well, but does not allocate an array of keys. This is useful * for finding a key in another dictionary with `dict_hash_with()`. */ struct dictionary_opts { bool partial; bool na_equal; }; struct dictionary* new_dictionary(SEXP x); struct dictionary* new_dictionary_partial(SEXP x); #define PROTECT_DICT(d, n) do { \ struct dictionary* d_ = (d); \ PROTECT_POLY_VEC(d_->p_poly_vec, n); \ PROTECT(d_->protect); \ *(n) += 1; \ } while(0) /** * Find key hash for a vector element * * - `dict_hash_scalar()` returns the key hash for element `i`. * * - `dict_hash_with()` finds the hash for indexing into `d` with * element `i` of `x`. */ uint32_t dict_hash_scalar(struct dictionary* d, R_len_t i); uint32_t dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i); bool dict_is_missing(struct dictionary* d, R_len_t i); void dict_put(struct dictionary* d, uint32_t k, R_len_t i); vctrs/src/runs.c0000644000176200001440000003745014042540502013342 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "equal.h" #include "translate.h" // ----------------------------------------------------------------------------- static SEXP vec_locate_runs(SEXP x, bool start); // [[register()]] SEXP vctrs_locate_runs(SEXP x, SEXP start) { bool c_start = (bool) r_bool_as_int(start); return vec_locate_runs(x, c_start); } static void vec_locate_run_starts(const int* p_id, r_ssize size, int* p_out); static void vec_locate_run_ends(const int* p_id, r_ssize size, int* p_out); static SEXP vec_locate_runs(SEXP x, bool start) { SEXP id = PROTECT(vec_identify_runs(x)); const int* p_id = INTEGER(id); r_ssize size = r_length(id); int n = r_int_get(r_attrib_get(id, syms_n), 0); SEXP out = PROTECT(r_new_integer(n)); int* p_out = INTEGER(out); if (n == 0) { UNPROTECT(2); return out; } if (start) { vec_locate_run_starts(p_id, size, p_out); } else { vec_locate_run_ends(p_id, size, p_out); } UNPROTECT(2); return out; } static void vec_locate_run_starts(const int* p_id, r_ssize size, int* p_out) { r_ssize loc = 0; // Handle first case int ref = p_id[0]; p_out[loc] = 1; ++loc; for (r_ssize i = 1; i < size; ++i) { const int elt = p_id[i]; if (elt == ref) { continue; } ref = elt; p_out[loc] = i + 1; ++loc; } } static void vec_locate_run_ends(const int* p_id, r_ssize size, int* p_out) { r_ssize loc = 0; int ref = p_id[0]; for (r_ssize i = 1; i < size; ++i) { const int elt = p_id[i]; if (elt == ref) { continue; } ref = elt; p_out[loc] = i; ++loc; } // Handle last case p_out[loc] = size; } // ----------------------------------------------------------------------------- static SEXP vec_detect_runs(SEXP x, bool start); // [[register()]] SEXP vctrs_detect_runs(SEXP x, SEXP start) { bool c_start = (bool) r_bool_as_int(start); return vec_detect_runs(x, c_start); } static void vec_detect_run_starts(const int* p_id, r_ssize size, int* p_out); static void vec_detect_run_ends(const int* p_id, r_ssize size, int* p_out); static SEXP vec_detect_runs(SEXP x, bool start) { SEXP id = PROTECT(vec_identify_runs(x)); const int* p_id = INTEGER(id); r_ssize size = r_length(id); SEXP out = PROTECT(r_new_logical(size)); int* p_out = LOGICAL(out); memset(p_out, 0, size * sizeof(int)); if (size == 0) { UNPROTECT(2); return out; } if (start) { vec_detect_run_starts(p_id, size, p_out); } else { vec_detect_run_ends(p_id, size, p_out); } UNPROTECT(2); return out; } static void vec_detect_run_starts(const int* p_id, r_ssize size, int* p_out) { // Handle first case int ref = p_id[0]; p_out[0] = 1; for (r_ssize i = 1; i < size; ++i) { const int elt = p_id[i]; if (elt == ref) { continue; } ref = elt; p_out[i] = 1; } } static void vec_detect_run_ends(const int* p_id, r_ssize size, int* p_out) { int ref = p_id[0]; for (r_ssize i = 1; i < size; ++i) { const int elt = p_id[i]; if (elt == ref) { continue; } ref = elt; p_out[i - 1] = 1; } // Handle last case p_out[size - 1] = 1; } // ----------------------------------------------------------------------------- // [[register()]] SEXP vctrs_identify_runs(SEXP x) { return vec_identify_runs(x); } static int lgl_identify_runs(SEXP x, R_len_t size, int* p_out); static int int_identify_runs(SEXP x, R_len_t size, int* p_out); static int dbl_identify_runs(SEXP x, R_len_t size, int* p_out); static int cpl_identify_runs(SEXP x, R_len_t size, int* p_out); static int chr_identify_runs(SEXP x, R_len_t size, int* p_out); static int raw_identify_runs(SEXP x, R_len_t size, int* p_out); static int list_identify_runs(SEXP x, R_len_t size, int* p_out); static int df_identify_runs(SEXP x, R_len_t size, int* p_out); // [[ include("vctrs.h") ]] SEXP vec_identify_runs(SEXP x) { SEXP proxy = PROTECT(vec_proxy_equal(x)); R_len_t size = vec_size(proxy); proxy = PROTECT(vec_normalize_encoding(proxy)); SEXP out = PROTECT(Rf_allocVector(INTSXP, size)); int* p_out = INTEGER(out); // Handle size 0 up front. // All implementations assume at least 1 element. if (size == 0) { SEXP n = PROTECT(r_int(0)); r_attrib_poke(out, syms_n, n); UNPROTECT(4); return out; } enum vctrs_type type = vec_proxy_typeof(proxy); int n; switch (type) { case vctrs_type_logical: n = lgl_identify_runs(proxy, size, p_out); break; case vctrs_type_integer: n = int_identify_runs(proxy, size, p_out); break; case vctrs_type_double: n = dbl_identify_runs(proxy, size, p_out); break; case vctrs_type_complex: n = cpl_identify_runs(proxy, size, p_out); break; case vctrs_type_character: n = chr_identify_runs(proxy, size, p_out); break; case vctrs_type_raw: n = raw_identify_runs(proxy, size, p_out); break; case vctrs_type_list: n = list_identify_runs(proxy, size, p_out); break; case vctrs_type_dataframe: n = df_identify_runs(proxy, size, p_out); break; default: stop_unimplemented_vctrs_type("vec_identify_runs", type); } SEXP r_n = PROTECT(r_int(n)); r_attrib_poke(out, syms_n, r_n); UNPROTECT(4); return out; } // ----------------------------------------------------------------------------- #define VEC_IDENTIFY_RUNS(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL) { \ int id = 1; \ const CTYPE* p_x = CONST_DEREF(x); \ \ /* Handle first case */ \ CTYPE ref = p_x[0]; \ p_out[0] = id; \ \ for (R_len_t i = 1; i < size; ++i) { \ const CTYPE elt = p_x[i]; \ \ if (EQUAL_NA_EQUAL(elt, ref) == 0) { \ ++id; \ ref = elt; \ } \ \ p_out[i] = id; \ } \ \ return id; \ } static int lgl_identify_runs(SEXP x, R_len_t size, int* p_out) { VEC_IDENTIFY_RUNS(int, LOGICAL_RO, lgl_equal_na_equal); } static int int_identify_runs(SEXP x, R_len_t size, int* p_out) { VEC_IDENTIFY_RUNS(int, INTEGER_RO, int_equal_na_equal); } static int dbl_identify_runs(SEXP x, R_len_t size, int* p_out) { VEC_IDENTIFY_RUNS(double, REAL_RO, dbl_equal_na_equal); } static int cpl_identify_runs(SEXP x, R_len_t size, int* p_out) { VEC_IDENTIFY_RUNS(Rcomplex, COMPLEX_RO, cpl_equal_na_equal); } static int chr_identify_runs(SEXP x, R_len_t size, int* p_out) { VEC_IDENTIFY_RUNS(SEXP, STRING_PTR_RO, chr_equal_na_equal); } static int raw_identify_runs(SEXP x, R_len_t size, int* p_out) { VEC_IDENTIFY_RUNS(Rbyte, RAW_RO, raw_equal_na_equal); } static int list_identify_runs(SEXP x, R_len_t size, int* p_out) { VEC_IDENTIFY_RUNS(SEXP, VECTOR_PTR_RO, list_equal_na_equal); } #undef VEC_IDENTIFY_RUNS // ----------------------------------------------------------------------------- static inline int vec_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static int df_identify_runs(SEXP x, R_len_t size, int* p_out) { int nprot = 0; const SEXP* p_x = VECTOR_PTR_RO(x); struct df_short_circuit_info info = new_df_short_circuit_info(size, false); PROTECT_DF_SHORT_CIRCUIT_INFO(&info, &nprot); int id = 1; R_len_t n_col = Rf_length(x); // Define 0 column case to be a single run if (n_col == 0) { r_p_int_fill(p_out, id, size); UNPROTECT(nprot); return id; } // Handle first case p_out[0] = id; info.p_row_known[0] = true; --info.remaining; // Compute non-sequential run IDs for (R_len_t i = 0; i < n_col; ++i) { SEXP col = p_x[i]; id = vec_identify_runs_col(col, id, &info, p_out); // All values are unique if (info.remaining == 0) { break; } } id = 1; int previous = p_out[0]; // Overwrite with sequential IDs for (R_len_t i = 1; i < size; ++i) { const int current = p_out[i]; if (current != previous) { ++id; previous = current; } p_out[i] = id; } UNPROTECT(nprot); return id; } // ----------------------------------------------------------------------------- static int lgl_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static int int_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static int dbl_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static int cpl_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static int chr_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static int raw_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static int list_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out); static inline int vec_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out) { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: return lgl_identify_runs_col(x, id, p_info, p_out); case vctrs_type_integer: return int_identify_runs_col(x, id, p_info, p_out); case vctrs_type_double: return dbl_identify_runs_col(x, id, p_info, p_out); case vctrs_type_complex: return cpl_identify_runs_col(x, id, p_info, p_out); case vctrs_type_character: return chr_identify_runs_col(x, id, p_info, p_out); case vctrs_type_raw: return raw_identify_runs_col(x, id, p_info, p_out); case vctrs_type_list: return list_identify_runs_col(x, id, p_info, p_out); case vctrs_type_dataframe: stop_internal("vec_identify_runs_col", "Data frame columns should be flattened."); case vctrs_type_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_identify_runs()`"); default: Rf_error("Unimplemented type in `vec_identify_runs()`"); } } // ----------------------------------------------------------------------------- #define VEC_IDENTIFY_RUNS_COL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL) { \ const CTYPE* p_x = CONST_DEREF(x); \ \ /* First row is always known, so `run_val` and `run_id` */ \ /* will always be overwritten immediately below. */ \ /* But for gcc11 we have to initialize these variables. */ \ CTYPE run_val = p_x[0]; \ int run_id = 0; \ \ for (R_len_t i = 0; i < p_info->size; ++i) { \ /* Start of new run */ \ if (p_info->p_row_known[i]) { \ run_val = p_x[i]; \ run_id = p_out[i]; \ continue; \ } \ \ const CTYPE elt = p_x[i]; \ const int eq = EQUAL_NA_EQUAL(elt, run_val); \ \ /* Update ID of identical values */ \ if (eq != 0) { \ p_out[i] = run_id; \ continue; \ } \ \ ++id; \ run_val = elt; \ run_id = id; \ p_out[i] = id; \ \ /* This is a run change, so don't check this row again */ \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ \ return id; \ } static int lgl_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out) { VEC_IDENTIFY_RUNS_COL(int, LOGICAL_RO, lgl_equal_na_equal); } static int int_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out) { VEC_IDENTIFY_RUNS_COL(int, INTEGER_RO, int_equal_na_equal); } static int dbl_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out) { VEC_IDENTIFY_RUNS_COL(double, REAL_RO, dbl_equal_na_equal); } static int cpl_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out) { VEC_IDENTIFY_RUNS_COL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal); } static int chr_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out) { VEC_IDENTIFY_RUNS_COL(SEXP, STRING_PTR_RO, chr_equal_na_equal); } static int raw_identify_runs_col(SEXP x, R_len_t id, struct df_short_circuit_info* p_info, int* p_out) { VEC_IDENTIFY_RUNS_COL(Rbyte, RAW_RO, raw_equal_na_equal); } static int list_identify_runs_col(SEXP x, int id, struct df_short_circuit_info* p_info, int* p_out) { VEC_IDENTIFY_RUNS_COL(SEXP, VECTOR_PTR_RO, list_equal_na_equal); } #undef VEC_IDENTIFY_RUNS_COL vctrs/src/altrep-rle.c0000644000176200001440000001060014042540502014406 0ustar liggesusers#include "vctrs.h" #include "altrep-rle.h" #include "altrep.h" #if (!HAS_ALTREP) #include void vctrs_init_altrep_rle(DllInfo* dll) { } SEXP altrep_rle_Make(SEXP input) { Rf_error("Need R 3.5+ for Altrep support."); return R_NilValue; } #else // Initialised at load time R_altrep_class_t altrep_rle_class; SEXP altrep_rle_Make(SEXP input) { SEXP res = R_new_altrep(altrep_rle_class, input, R_NilValue); MARK_NOT_MUTABLE(res); return res; } // ALTREP methods ------------------- // The length of the object inline R_xlen_t altrep_rle_Length(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return Rf_xlength(data2); } R_xlen_t sz = 0; SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); for (R_xlen_t i = 0; i < Rf_xlength(rle); ++i) { sz += rle_p[i]; } return sz; } // What gets printed when .Internal(inspect()) is used Rboolean altrep_rle_Inspect(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)) { Rprintf("vctrs_altrep_rle (len=%d, materialized=%s)\n", altrep_rle_Length(x), R_altrep_data2(x) != R_NilValue ? "T" : "F"); return TRUE; } // ALTSTRING methods ----------------- // the element at the index `i` SEXP altrep_rle_string_Elt(SEXP vec, R_xlen_t i) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return STRING_ELT(data2, i); } SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); SEXP nms = Rf_getAttrib(rle, Rf_install("names")); R_xlen_t idx = 0; while (i >= 0 && idx < Rf_xlength(rle)) { i -= rle_p[idx++]; } return STRING_ELT(nms, idx - 1); } R_xlen_t find_rle_index(int* rle_data, R_xlen_t i, R_xlen_t size) { R_xlen_t idx = 0; while (i >= 0 && idx < size) { i -= rle_data[idx++]; } return idx - 1; } // This is a simple implementation, a more complex one would produce a // altrep_rle object as well SEXP altrep_rle_Extract_subset(SEXP x, SEXP indx, SEXP call) { SEXP data2 = R_altrep_data2(x); // If the vector is already materialized, just fall back to the default // implementation if (data2 != R_NilValue) { return NULL; } SEXP data1 = R_altrep_data1(x); int* index_data = INTEGER(indx); R_xlen_t index_n = Rf_length(indx); int* rle_data = INTEGER(data1); R_xlen_t rle_n = Rf_length(data1); SEXP nms = PROTECT(Rf_getAttrib(data1, Rf_install("names"))); SEXP out = PROTECT(Rf_allocVector(STRSXP, index_n)); for (R_len_t i = 0; i < index_n; ++i) { int index_elt = index_data[i]; if (index_elt == NA_INTEGER) { SET_STRING_ELT(out, i, NA_STRING); continue; } --index_elt; R_xlen_t rle_idx = find_rle_index(rle_data, index_elt, rle_n); SET_STRING_ELT(out, i, STRING_ELT(nms, rle_idx)); } UNPROTECT(2); return out; } // --- Altvec SEXP altrep_rle_string_Materialize(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return data2; } R_xlen_t sz = altrep_rle_Length(vec); SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); SEXP out = PROTECT(Rf_allocVector(STRSXP, sz)); R_xlen_t idx = 0; SEXP nms = Rf_getAttrib(rle, Rf_install("names")); for (R_xlen_t i = 0; i < Rf_xlength(rle); ++i) { for (R_xlen_t j = 0; j < rle_p[i]; ++j) { SET_STRING_ELT(out, idx++, STRING_ELT(nms, i)); } } UNPROTECT(1); R_set_altrep_data2(vec, out); return out; } void* altrep_rle_Dataptr(SEXP vec, Rboolean writeable) { return STDVEC_DATAPTR(altrep_rle_string_Materialize(vec)); } const void* altrep_rle_Dataptr_or_null(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 == R_NilValue) return NULL; return STDVEC_DATAPTR(data2); } void vctrs_init_altrep_rle(DllInfo* dll) { altrep_rle_class = R_make_altstring_class("altrep_rle", "vctrs", dll); // altrep R_set_altrep_Length_method(altrep_rle_class, altrep_rle_Length); R_set_altrep_Inspect_method(altrep_rle_class, altrep_rle_Inspect); // altvec R_set_altvec_Dataptr_method(altrep_rle_class, altrep_rle_Dataptr); R_set_altvec_Dataptr_or_null_method(altrep_rle_class, altrep_rle_Dataptr_or_null); R_set_altvec_Extract_subset_method(altrep_rle_class, altrep_rle_Extract_subset); // altstring R_set_altstring_Elt_method(altrep_rle_class, altrep_rle_string_Elt); } #endif // R version >= 3.5.0 vctrs/src/type-factor.c0000644000176200001440000002170114042540502014600 0ustar liggesusers#include "vctrs.h" #include "ptype2.h" #include "utils.h" static SEXP levels_union(SEXP x, SEXP y); // [[ include("type-factor.h") ]] SEXP fct_ptype2(const struct ptype2_opts* opts) { SEXP x = opts->x; SEXP y = opts->y; SEXP x_levels = Rf_getAttrib(x, R_LevelsSymbol); SEXP y_levels = Rf_getAttrib(y, R_LevelsSymbol); if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_factor_levels(x, opts->x_arg); } if (TYPEOF(y_levels) != STRSXP) { stop_corrupt_factor_levels(y, opts->y_arg); } // Quick early exit for identical levels pointing to the same SEXP if (x_levels == y_levels) { return new_empty_factor(x_levels); } SEXP levels = PROTECT(levels_union(x_levels, y_levels)); SEXP out = new_empty_factor(levels); UNPROTECT(1); return out; } static SEXP ord_ptype2_validate(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, bool cast) { SEXP x_levels = Rf_getAttrib(x, R_LevelsSymbol); SEXP y_levels = Rf_getAttrib(y, R_LevelsSymbol); if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_ordered_levels(x, x_arg); } if (TYPEOF(y_levels) != STRSXP) { stop_corrupt_ordered_levels(y, y_arg); } if (!equal_object(x_levels, y_levels)) { stop_incompatible_type(x, y, x_arg, y_arg, cast); } return x_levels; } // [[ include("type-factor.h") ]] SEXP ord_ptype2(const struct ptype2_opts* opts) { SEXP levels = PROTECT(ord_ptype2_validate(opts->x, opts->y, opts->x_arg, opts->y_arg, false)); SEXP out = new_empty_ordered(levels); return UNPROTECT(1), out; } static SEXP levels_union(SEXP x, SEXP y) { SEXP args = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(args, 0, x); SET_VECTOR_ELT(args, 1, y); const struct name_repair_opts name_repair_opts = { .type = name_repair_none, .fn = R_NilValue }; // Combine with known ptype // No name repair because this is just combining factor levels SEXP xy = PROTECT(vec_c( args, vctrs_shared_empty_chr, R_NilValue, &name_repair_opts )); SEXP out = vec_unique(xy); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- static void init_factor(SEXP x, SEXP levels); static void init_ordered(SEXP x, SEXP levels); // [[ include("vctrs.h") ]] SEXP fct_as_character(SEXP x, struct vctrs_arg* x_arg) { SEXP levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_factor_levels(x, x_arg); } UNPROTECT(1); return Rf_asCharacterFactor(x); } // [[ include("vctrs.h") ]] SEXP ord_as_character(SEXP x, struct vctrs_arg* x_arg) { return fct_as_character(x, x_arg); } static SEXP chr_as_factor_from_self(SEXP x, bool ordered); static SEXP chr_as_factor_impl(SEXP x, SEXP levels, bool* lossy, bool ordered); // [[ include("vctrs.h") ]] SEXP chr_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg) { SEXP levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_factor_levels(to, to_arg); } SEXP out; // When `to` has no levels, it is treated as a template and the // levels come from `x` if (vec_size(levels) == 0) { out = chr_as_factor_from_self(x, false); } else { out = chr_as_factor_impl(x, levels, lossy, false); } UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP chr_as_ordered(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg) { SEXP levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_ordered_levels(to, to_arg); } SEXP out; // When `to` has no levels, it is treated as a template and the // levels come from `x` if (vec_size(levels) == 0) { out = chr_as_factor_from_self(x, true); } else { out = chr_as_factor_impl(x, levels, lossy, true); } UNPROTECT(1); return out; } static SEXP chr_as_factor_impl(SEXP x, SEXP levels, bool* lossy, bool ordered) { SEXP out = PROTECT(vec_match(x, levels)); const int* p_out = INTEGER(out); R_len_t size = vec_size(x); const SEXP* p_x = STRING_PTR_RO(x); // Detect lossy no-matches, but allow `NA` values from `x` for (R_len_t i = 0; i < size; ++i) { if (p_out[i] == NA_INTEGER && p_x[i] != NA_STRING) { *lossy = true; UNPROTECT(1); return R_NilValue; } } if (ordered) { init_ordered(out, levels); } else { init_factor(out, levels); } UNPROTECT(1); return out; } static SEXP remove_na_levels(SEXP levels); // Factor levels are added in order of appearance. // `NA` values in `x` are not considered factor levels. static SEXP chr_as_factor_from_self(SEXP x, bool ordered) { SEXP levels = PROTECT(vec_unique(x)); levels = PROTECT(remove_na_levels(levels)); // `NA` values in `x` correctly become `NA` values in the result SEXP out = PROTECT(vec_match(x, levels)); if (ordered) { init_ordered(out, levels); } else { init_factor(out, levels); } UNPROTECT(3); return out; } static SEXP remove_na_levels(SEXP levels) { R_len_t size = vec_size(levels); const SEXP* p_levels = STRING_PTR_RO(levels); // There might only ever be 1 `NA` level. // Remove it if it exists. for (R_len_t i = 0; i < size; ++i) { if (p_levels[i] == NA_STRING) { int na_loc = (i + 1) * -1; SEXP na_loc_obj = PROTECT(r_int(na_loc)); SEXP out = vec_slice(levels, na_loc_obj); UNPROTECT(1); return out; } } return levels; } static SEXP fct_as_factor_impl(SEXP x, SEXP x_levels, SEXP to_levels, bool* lossy, bool ordered); // [[ include("vctrs.h") ]] SEXP fct_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg) { SEXP x_levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); SEXP to_levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_factor_levels(x, x_arg); } if (TYPEOF(to_levels) != STRSXP) { stop_corrupt_factor_levels(to, to_arg); } SEXP out = fct_as_factor_impl(x, x_levels, to_levels, lossy, false); UNPROTECT(2); return out; } // [[ include("factor.h") ]] SEXP ord_as_ordered(const struct cast_opts* opts) { ord_ptype2_validate(opts->x, opts->to, opts->x_arg, opts->to_arg, true); return opts->x; } static SEXP fct_as_factor_impl(SEXP x, SEXP x_levels, SEXP to_levels, bool* lossy, bool ordered) { // Early exit if levels are identical if (x_levels == to_levels) { return x; } R_len_t x_levels_size = vec_size(x_levels); R_len_t to_levels_size = vec_size(to_levels); // Early exit if `to` has no levels. In this case it is being used as // a template if (to_levels_size == 0) { return x; } // Always lossy if there are more levels in `x` than in `to` if (x_levels_size > to_levels_size) { *lossy = true; return R_NilValue; } R_len_t x_size = vec_size(x); const SEXP* p_x_levels = STRING_PTR_RO(x_levels); const SEXP* p_to_levels = STRING_PTR_RO(to_levels); bool is_contiguous_subset = true; for (R_len_t i = 0; i < x_levels_size; ++i) { if (p_x_levels[i] != p_to_levels[i]) { is_contiguous_subset = false; break; } } // No recoding required if contiguous subset. // Duplicate, strip non-factor attributes, and re-initialize with new levels. // Using `r_clone_referenced()` avoids an immediate copy using ALTREP wrappers. if (is_contiguous_subset) { SEXP out = PROTECT(r_clone_referenced(x)); SET_ATTRIB(out, R_NilValue); if (ordered) { init_ordered(out, to_levels); } else { init_factor(out, to_levels); } UNPROTECT(1); return out; } const int* p_x = INTEGER_RO(x); SEXP out = PROTECT(Rf_allocVector(INTSXP, x_size)); int* p_out = INTEGER(out); if (ordered) { init_ordered(out, to_levels); } else { init_factor(out, to_levels); } SEXP recode = PROTECT(vec_match(x_levels, to_levels)); const int* p_recode = INTEGER_RO(recode); // Detect if there are any levels in `x` that aren't in `to` for (R_len_t i = 0; i < x_levels_size; ++i) { if (p_recode[i] == NA_INTEGER) { *lossy = true; UNPROTECT(2); return R_NilValue; } } // Recode `x` int values into `to` level ordering for (R_len_t i = 0; i < x_size; ++i) { const int elt = p_x[i]; if (elt == NA_INTEGER) { p_out[i] = NA_INTEGER; continue; } p_out[i] = p_recode[elt - 1]; } UNPROTECT(2); return out; } static void init_factor(SEXP x, SEXP levels) { if (TYPEOF(x) != INTSXP) { stop_internal("init_factor", "Only integers can be made into factors."); } Rf_setAttrib(x, R_LevelsSymbol, levels); Rf_setAttrib(x, R_ClassSymbol, classes_factor); } static void init_ordered(SEXP x, SEXP levels) { if (TYPEOF(x) != INTSXP) { stop_internal("init_ordered", "Only integers can be made into ordered factors."); } Rf_setAttrib(x, R_LevelsSymbol, levels); Rf_setAttrib(x, R_ClassSymbol, classes_ordered); } vctrs/src/type-tibble.c0000644000176200001440000000221214042540502014557 0ustar liggesusers#include "vctrs.h" #include "cast.h" #include "ptype2.h" #include "type-data-frame.h" #include "utils.h" // [[ include("vctrs.h") ]] SEXP tib_ptype2(const struct ptype2_opts* opts) { SEXP out = PROTECT(df_ptype2(opts)); Rf_setAttrib(out, R_ClassSymbol, classes_tibble); UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_tib_ptype2(SEXP x, SEXP y, SEXP x_arg_, SEXP y_arg_) { struct vctrs_arg x_arg = vec_as_arg(x_arg_); struct vctrs_arg y_arg = vec_as_arg(y_arg_); const struct ptype2_opts opts = { .x = x, .y = y, .x_arg = &x_arg, .y_arg = &y_arg }; return tib_ptype2(&opts); } // [[ include("type-tibble.h") ]] SEXP tib_cast(const struct cast_opts* opts) { SEXP out = PROTECT(df_cast_opts(opts)); Rf_setAttrib(out, R_ClassSymbol, classes_tibble); UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_tib_cast(SEXP x, SEXP to, SEXP x_arg, SEXP to_arg) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_to_arg = vec_as_arg(to_arg); const struct cast_opts opts = { .x = x, .to = to, .x_arg = &c_x_arg, .to_arg = &c_to_arg }; return tib_cast(&opts); } vctrs/src/altrep-rle.h0000644000176200001440000000121413712211241014412 0ustar liggesusers#ifndef ALTREP_RLE_H #define ALTREP_RLE_H #include "altrep.h" #if (HAS_ALTREP) SEXP altrep_rle_Make(SEXP input); R_xlen_t altrep_rle_Length(SEXP vec); Rboolean altrep_rle_Inspect( SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)); SEXP altrep_rle_string_Elt(SEXP vec, R_xlen_t i); SEXP altrep_rle_Extract_subset(SEXP x, SEXP indx, SEXP call); SEXP altrep_rle_string_Materialize(SEXP vec); void* altrep_rle_Dataptr(SEXP vec, Rboolean writeable); const void* altrep_rle_Dataptr_or_null(SEXP vec); void vctrs_init_altrep_rle(DllInfo* dll); extern R_altrep_class_t altrep_rle_class; #endif #endif vctrs/src/lazy.h0000644000176200001440000000574313753021253013344 0ustar liggesusers#ifndef VCTRS_LAZY_H #define VCTRS_LAZY_H #include "vctrs.h" // ----------------------------------------------------------------------------- /* * @member self A RAWSXP for the struct memory. * @member data The RAWSXP that gets allocated lazily. * @member p_data A void pointer to the RAWSXP. * @member data_pi A protection index to `data` so it can reprotect itself * upon allocation. * @member size The total size of the RAWSXP to allocate. * This is computed as `size * n_bytes` in `new_lazy_raw()`, where `n_bytes` * is from `sizeof()`. */ struct lazy_raw { SEXP self; SEXP data; void* p_data; PROTECT_INDEX data_pi; r_ssize size; }; /* * @param size The size of the type you want to interpret the memory as. * @param n_bytes A `sizeof()` result for the type you are allocating * memory for. */ static inline struct lazy_raw* new_lazy_raw(r_ssize size, size_t n_bytes) { SEXP self = PROTECT(r_new_raw(sizeof(struct lazy_raw))); struct lazy_raw* p_out = (struct lazy_raw*) RAW(self); p_out->self = self; p_out->data = R_NilValue; p_out->size = size * n_bytes; UNPROTECT(1); return p_out; } /* * Allocate the lazy vector if it hasn't already been allocated. * This reprotects itself using the protection index. */ static inline void* init_lazy_raw(struct lazy_raw* p_x) { if (p_x->data != R_NilValue) { return p_x->p_data; } p_x->data = Rf_allocVector(RAWSXP, p_x->size); REPROTECT(p_x->data, p_x->data_pi); p_x->p_data = (void*) RAW(p_x->data); return p_x->p_data; } // ----------------------------------------------------------------------------- /* * @member self A RAWSXP for the struct memory. * @member data The STRSXP that gets allocated lazily. * @member p_data A constant pointer to `data`. Modification to `data` should * be done using `SET_STRING_ELT()`. * @member data_pi A protection index to `data` so it can reprotect itself * upon allocation. * @member size The total size of the STRSXP to allocate. */ struct lazy_chr { SEXP self; SEXP data; const SEXP* p_data; PROTECT_INDEX data_pi; r_ssize size; }; static inline struct lazy_chr* new_lazy_chr(r_ssize size) { SEXP self = PROTECT(r_new_raw(sizeof(struct lazy_chr))); struct lazy_chr* p_out = (struct lazy_chr*) RAW(self); p_out->self = self; p_out->data = R_NilValue; p_out->size = size; UNPROTECT(1); return p_out; } static inline const SEXP* init_lazy_chr(struct lazy_chr* p_x) { if (p_x->data != R_NilValue) { return p_x->p_data; } p_x->data = Rf_allocVector(STRSXP, p_x->size); REPROTECT(p_x->data, p_x->data_pi); p_x->p_data = STRING_PTR_RO(p_x->data); return p_x->p_data; } // ----------------------------------------------------------------------------- #define PROTECT_LAZY_VEC(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->data, &(p_info)->data_pi); \ *(p_n) += 2; \ } while (0) #endif vctrs/src/subscript-loc.h0000644000176200001440000000216413650511520015145 0ustar liggesusers#ifndef VCTRS_SUBSCRIPT_LOC_H #define VCTRS_SUBSCRIPT_LOC_H #include "utils.h" #include "subscript.h" enum subscript_missing { SUBSCRIPT_MISSING_PROPAGATE, SUBSCRIPT_MISSING_ERROR }; enum num_loc_negative { LOC_NEGATIVE_INVERT, LOC_NEGATIVE_ERROR, LOC_NEGATIVE_IGNORE }; enum num_loc_oob { LOC_OOB_EXTEND, LOC_OOB_ERROR }; enum num_loc_zero { LOC_ZERO_REMOVE, LOC_ZERO_ERROR, LOC_ZERO_IGNORE }; struct location_opts { const struct subscript_opts* subscript_opts; enum num_loc_negative loc_negative; enum num_loc_oob loc_oob; enum num_loc_zero loc_zero; enum subscript_missing missing; }; extern struct location_opts location_default_opts_obj; extern struct location_opts location_default_assign_opts_obj; static const struct location_opts* const location_default_opts = &location_default_opts_obj; static const struct location_opts* const location_default_assign_opts = &location_default_assign_opts_obj; SEXP vec_as_location(SEXP i, R_len_t n, SEXP names); SEXP vec_as_location_opts(SEXP subscript, R_len_t n, SEXP names, const struct location_opts* location_opts); #endif vctrs/src/fill.c0000644000176200001440000001322714042540502013275 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "equal.h" #define INFINITE_FILL -1 static void parse_direction(SEXP x, bool* p_down, bool* p_leading); static int parse_max_fill(SEXP x); static SEXP vec_fill_missing(SEXP x, bool down, bool leading, int max_fill); // [[ register() ]] SEXP vctrs_fill_missing(SEXP x, SEXP direction, SEXP max_fill) { bool down; bool leading; parse_direction(direction, &down, &leading); int c_max_fill = parse_max_fill(max_fill); return vec_fill_missing(x, down, leading, c_max_fill); } static void vec_fill_missing_down(const int* p_na, r_ssize size, bool leading, int* p_loc); static void vec_fill_missing_down_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc); static void vec_fill_missing_up(const int* p_na, r_ssize size, bool leading, int* p_loc); static void vec_fill_missing_up_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc); static SEXP vec_fill_missing(SEXP x, bool down, bool leading, int max_fill) { r_ssize size = vec_size(x); SEXP na = PROTECT(vec_equal_na(x)); const int* p_na = LOGICAL_RO(na); SEXP loc = PROTECT(r_new_integer(size)); int* p_loc = INTEGER(loc); const bool has_max_fill = max_fill != INFINITE_FILL; if (down) { if (has_max_fill) { vec_fill_missing_down_with_max_fill(p_na, size, leading, max_fill, p_loc); } else { vec_fill_missing_down(p_na, size, leading, p_loc); } } else { if (has_max_fill) { vec_fill_missing_up_with_max_fill(p_na, size, leading, max_fill, p_loc); } else { vec_fill_missing_up(p_na, size, leading, p_loc); } } SEXP out = vec_slice_impl(x, loc); UNPROTECT(2); return out; } static void vec_fill_missing_down(const int* p_na, r_ssize size, bool leading, int* p_loc) { r_ssize loc = 0; if (leading) { // Increment `loc` to the first non-missing value for (r_ssize i = 0; i < size; ++i) { if (!p_na[i]) { loc = i; break; } } // Back-fill with first non-missing value for (r_ssize i = loc - 1; i >= 0; --i) { p_loc[i] = loc + 1; } } for (r_ssize i = loc; i < size; ++i) { if (!p_na[i]) { loc = i; } p_loc[i] = loc + 1; } } static void vec_fill_missing_down_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc) { r_ssize loc = 0; if (leading) { // Increment `loc` to the first non-missing value for (r_ssize i = 0; i < size; ++i) { if (!p_na[i]) { loc = i; break; } } // Back-fill with first non-missing value with a max_fill r_ssize n_fill = 0; for (r_ssize i = loc - 1; i >= 0; --i) { if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } r_ssize n_fill = 0; for (r_ssize i = loc; i < size; ++i) { if (!p_na[i]) { loc = i; n_fill = 0; p_loc[i] = i + 1; continue; } if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } static void vec_fill_missing_up(const int* p_na, r_ssize size, bool leading, int* p_loc) { r_ssize loc = size - 1; if (leading) { // Decrement `loc` to the last non-missing value for (r_ssize i = size - 1; i >= 0; --i) { if (!p_na[i]) { loc = i; break; } } // Forward-fill with last non-missing value for (r_ssize i = loc + 1; i < size; ++i) { p_loc[i] = loc + 1; } } for (r_ssize i = loc; i >= 0; --i) { if (!p_na[i]) { loc = i; } p_loc[i] = loc + 1; } } static void vec_fill_missing_up_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc) { r_ssize loc = size - 1; if (leading) { // Decrement `loc` to the last non-missing value for (r_ssize i = size - 1; i >= 0; --i) { if (!p_na[i]) { loc = i; break; } } // Forward-fill with last non-missing value with a max_fill r_ssize n_fill = 0; for (r_ssize i = loc + 1; i < size; ++i) { if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } r_ssize n_fill = 0; for (r_ssize i = loc; i >= 0; --i) { if (!p_na[i]) { loc = i; n_fill = 0; p_loc[i] = i + 1; continue; } if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } // ----------------------------------------------------------------------------- static void stop_bad_direction(); static void parse_direction(SEXP x, bool* p_down, bool* p_leading) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_bad_direction(); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "down")) { *p_down = true; *p_leading = false; return; } if (!strcmp(str, "up")) { *p_down = false; *p_leading = false; return; } if (!strcmp(str, "downup")) { *p_down = true; *p_leading = true; return; } if (!strcmp(str, "updown")) { *p_down = false; *p_leading = true; return; } stop_bad_direction(); never_reached("parse_direction"); } static void stop_bad_direction() { r_abort("`direction` must be one of \"down\", \"up\", \"downup\", or \"updown\"."); } static int parse_max_fill(SEXP x) { if (x == R_NilValue) { return INFINITE_FILL; } x = PROTECT(vec_cast(x, vctrs_shared_empty_int, args_max_fill, args_empty)); if (!r_is_positive_number(x)) { r_abort("`max_fill` must be `NULL` or a single positive integer."); } int out = r_int_get(x, 0); UNPROTECT(1); return out; } vctrs/src/order-sortedness.h0000644000176200001440000000417214042540502015655 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_SORTEDNESS_H #define VCTRS_ORDER_SORTEDNESS_H #include "vctrs.h" #include "order-groups.h" // ----------------------------------------------------------------------------- enum vctrs_sortedness { VCTRS_SORTEDNESS_unsorted, VCTRS_SORTEDNESS_sorted, VCTRS_SORTEDNESS_reversed, }; // ----------------------------------------------------------------------------- enum vctrs_sortedness dbl_sortedness(const double* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos); enum vctrs_sortedness int_sortedness(const int* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos); enum vctrs_sortedness chr_sortedness(const SEXP* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos); // ----------------------------------------------------------------------------- void ord_resolve_sortedness(enum vctrs_sortedness sortedness, r_ssize size, int* p_o); void ord_resolve_sortedness_chunk(enum vctrs_sortedness sortedness, r_ssize size, int* p_o); // ----------------------------------------------------------------------------- #endif vctrs/src/order-truelength.h0000644000176200001440000000713714042540502015651 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_TRUELENGTH_H #define VCTRS_ORDER_TRUELENGTH_H #include "vctrs.h" // ----------------------------------------------------------------------------- // This seems to be a reasonable default to start with for tracking the original // truelengths of the unique strings, and is what base R uses. It is expanded // by 2x every time we need to reallocate. #define TRUELENGTH_SIZE_ALLOC_DEFAULT 10000 // ----------------------------------------------------------------------------- /* * Struct of information required to track truelengths of character vectors * when ordering them * * @member self A RAWSXP for the struct memory. * * @members strings,p_strings,strings_pi The unique CHARSXP seen during * ordering. * @members lengths,p_lengths,lengths_pi The original truelengths of the * `strings`. * @members uniques,p_uniques,uniques_pi At first, this is the same as `strings` * until `chr_mark_sorted_uniques()` is called, which reorders them in place * and sorts them. * @members sizes,p_sizes,sizes_pi The sizes of each individual CHARSXP in * `uniques`. Kept in the same ordering as `uniques` while sorting. * @members sizes_aux, p_sizes_aux, sizes_aux_pi Auxiliary vector of sizes * that is used as working memory when sorting `uniques`. * * @member size_alloc The current allocated size of the SEXP objects in this * struct * @member max_size_alloc The maximum allowed allocation size for the SEXP * objects in this struct. Set to the size of `x`, which would occur if * all strings were unique. * @member size_used The number of unique strings currently in `strings`. * @member max_string_size The maximum string size of the unique strings stored * in `strings`. This controls the depth of recursion in `chr_radix_order()`. */ struct truelength_info { SEXP self; SEXP strings; SEXP* p_strings; PROTECT_INDEX strings_pi; SEXP lengths; r_ssize* p_lengths; PROTECT_INDEX lengths_pi; SEXP uniques; SEXP* p_uniques; PROTECT_INDEX uniques_pi; SEXP sizes; int* p_sizes; PROTECT_INDEX sizes_pi; SEXP sizes_aux; int* p_sizes_aux; PROTECT_INDEX sizes_aux_pi; r_ssize size_alloc; r_ssize max_size_alloc; r_ssize size_used; r_ssize max_string_size; }; #define PROTECT_TRUELENGTH_INFO(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->strings, &(p_info)->strings_pi); \ PROTECT_WITH_INDEX((p_info)->lengths, &(p_info)->lengths_pi); \ PROTECT_WITH_INDEX((p_info)->uniques, &(p_info)->uniques_pi); \ PROTECT_WITH_INDEX((p_info)->sizes, &(p_info)->sizes_pi); \ PROTECT_WITH_INDEX((p_info)->sizes_aux, &(p_info)->sizes_aux_pi); \ *(p_n) += 6; \ } while(0) struct truelength_info* new_truelength_info(r_ssize max_size_alloc); void truelength_reset(struct truelength_info* p_truelength_info); void truelength_save(SEXP x, r_ssize truelength, r_ssize size, struct truelength_info* p_truelength_info); // ----------------------------------------------------------------------------- #endif vctrs/src/arg.c0000644000176200001440000001170714042540502013121 0ustar liggesusers#include "vctrs.h" #include "utils.h" // Materialising argument tags ------------------------------------------ #define DEFAULT_ARG_BUF_SIZE 100 static int fill_arg_buffer(struct vctrs_arg* arg, char* buf, r_ssize cur_size, r_ssize tot_size); /** * This takes a `struct vctrs_arg{}` linked list and calls the * recursive function `fill_arg_buffer()`. It allocates a buffer in a * RAWSXP of size 100, which is grown by a factor of 1.5 each time the * `fill()` methods return a negative value. Returns a character * vector of size 1 containing the materialised argument tag. */ SEXP vctrs_arg(struct vctrs_arg* arg) { if (!arg) { return chrs_empty; } r_ssize next_size = DEFAULT_ARG_BUF_SIZE; r_ssize size; SEXP buf_holder = PROTECT(R_NilValue); char* buf; do { size = next_size; UNPROTECT(1); buf_holder = PROTECT(Rf_allocVector(RAWSXP, size)); buf = (char*) RAW(buf_holder); // Reallocate a larger buffer at the next iteration if the current // buffer turns out too small next_size *= 1.5; } while (fill_arg_buffer(arg, buf, 0, size) < 0); SEXP out = Rf_mkString(buf); UNPROTECT(1); return out; } /** * Takes a `struct vctrs_arg{}` linked list and a buffer and calls the * `fill()` method on each of those, recursively. Unless an error * occurred, it returns the current size written to the buffer so we * can track the remaining memory available in the buffer after * recursion. */ static int fill_arg_buffer(struct vctrs_arg* arg, char* buf, r_ssize cur_size, r_ssize tot_size) { if (arg->parent) { cur_size = fill_arg_buffer(arg->parent, buf, cur_size, tot_size); if (cur_size < 0) { return cur_size; } } r_ssize written = arg->fill(arg->data, buf + cur_size, tot_size - cur_size); if (written < 0) { return written; } else { return cur_size + written; } } // Objects ------------------------------------------------------------- // Simple wrapper around a `const char*` argument tag static r_ssize wrapper_arg_fill(void* data, char* buf, r_ssize remaining); struct vctrs_arg new_wrapper_arg(struct vctrs_arg* parent, const char* arg) { return (struct vctrs_arg) { .parent = parent, .fill = &wrapper_arg_fill, .data = (void*) arg }; } static r_ssize wrapper_arg_fill(void* data, char* buf, r_ssize remaining) { const char* src = (const char*) data; size_t len = strlen(src); if (len >= remaining) { return -1; } memcpy(buf, src, len); buf[len] = '\0'; return len; } // Wrapper around a counter representing the current position of the // argument static r_ssize counter_arg_fill(void* data, char* buf, r_ssize remaining); struct vctrs_arg new_counter_arg(struct vctrs_arg* parent, struct arg_data_counter* data) { return (struct vctrs_arg) { .parent = parent, .fill = &counter_arg_fill, .data = (void*) data }; } struct arg_data_counter new_counter_arg_data(R_len_t* i, SEXP* names, R_len_t* names_i) { return (struct arg_data_counter) { .i = i, .names = names, .names_i = names_i }; } static r_ssize counter_arg_fill(void* data_, char* buf, r_ssize remaining) { struct arg_data_counter* data = (struct arg_data_counter*) data_; R_len_t i = *data->i; SEXP names = *data->names; R_len_t names_i = *data->names_i; int len; if (r_has_name_at(names, names_i)) { // FIXME: Check for syntactic names len = snprintf(buf, remaining, "%s", r_chr_get_c_string(names, names_i)); } else { len = snprintf(buf, remaining, "..%d", i + 1); } if (len >= remaining) { return -1; } else { return len; } } // Indexing tag that materialises as `$rhs`. The `$` is only written when // the arg has a parent. static r_ssize index_arg_fill(void* data, char* buf, r_ssize remaining); static bool is_empty_arg(struct vctrs_arg* arg); struct vctrs_arg new_index_arg(struct vctrs_arg* parent, struct arg_data_index* data) { return (struct vctrs_arg) { .parent = parent, .fill = &index_arg_fill, .data = (void*) data }; } struct arg_data_index new_index_arg_data(const char* arg, struct vctrs_arg* parent) { return (struct arg_data_index) { .arg = arg, .parent = parent }; } static r_ssize index_arg_fill(void* data_, char* buf, r_ssize remaining) { struct arg_data_index* data = (struct arg_data_index*) data_; const char* src = data->arg; size_t len = strlen(src); bool child = is_empty_arg(data->parent); if (child) { ++len; } if (len >= remaining) { return -1; } if (child) { *buf++ = '$'; } memcpy(buf, src, len); buf[len] = '\0'; return len; } static bool is_empty_arg(struct vctrs_arg* arg) { if (!arg) { return true; } char tmp[1]; return arg->fill(arg->data, tmp, 1) != 0;; } vctrs/src/type-factor.h0000644000176200001440000000037613653027721014624 0ustar liggesusers#ifndef VCTRS_TYPE_FACTOR_H #define VCTRS_TYPE_FACTOR_H #include "cast.h" #include "ptype2.h" SEXP fct_ptype2(const struct ptype2_opts* opts); SEXP ord_ptype2(const struct ptype2_opts* opts); SEXP ord_as_ordered(const struct cast_opts* opts); #endif vctrs/src/subscript-loc.c0000644000176200001440000004272114042540502015141 0ustar liggesusers#include "vctrs.h" #include "utils.h" #include "subscript.h" #include "subscript-loc.h" static SEXP int_invert_location(SEXP subscript, R_len_t n, const struct location_opts* opts); static SEXP int_filter_zero(SEXP subscript, R_len_t n_zero); static void int_check_consecutive(SEXP subscript, R_len_t n, R_len_t n_extend, const struct location_opts* opts); static void stop_subscript_missing(SEXP i); static void stop_subscript_oob_location(SEXP i, R_len_t size, const struct location_opts* opts); static void stop_subscript_oob_name(SEXP i, SEXP names, const struct location_opts* opts); static void stop_location_negative(SEXP i, const struct location_opts* opts); static void stop_location_zero(SEXP i, const struct location_opts* opts); static void stop_indicator_size(SEXP i, SEXP n, const struct location_opts* opts); static void stop_location_negative_missing(SEXP i, const struct location_opts* opts); static void stop_location_negative_positive(SEXP i, const struct location_opts* opts); static void stop_location_oob_non_consecutive(SEXP i, R_len_t size, const struct location_opts* opts); static SEXP int_as_location(SEXP subscript, R_len_t n, const struct location_opts* opts) { const int* data = INTEGER_RO(subscript); R_len_t loc_n = Rf_length(subscript); // Zeros need to be filtered out from the subscript vector. // `int_invert_location()` filters them out for negative indices, but // positive indices need to go through and `int_filter_zero()`. R_len_t n_zero = 0; R_len_t n_extend = 0; for (R_len_t i = 0; i < loc_n; ++i, ++data) { int elt = *data; if (elt == NA_INTEGER) { if (opts->missing == SUBSCRIPT_MISSING_ERROR) { stop_subscript_missing(subscript); } } else { if (elt < 0) { switch (opts->loc_negative) { case LOC_NEGATIVE_INVERT: return int_invert_location(subscript, n, opts); case LOC_NEGATIVE_ERROR: stop_location_negative(subscript, opts); case LOC_NEGATIVE_IGNORE: break; } } if (elt == 0) { switch (opts->loc_zero) { case LOC_ZERO_REMOVE: ++n_zero; break; case LOC_ZERO_ERROR: stop_location_zero(subscript, opts); case LOC_ZERO_IGNORE: break; } } else if (abs(elt) > n) { if (opts->loc_oob == LOC_OOB_ERROR) { stop_subscript_oob_location(subscript, n, opts); } ++n_extend; } } } if (n_zero) { subscript = int_filter_zero(subscript, n_zero); } PROTECT(subscript); if (n_extend > 0) { int_check_consecutive(subscript, n, n_extend, opts); } UNPROTECT(1); return subscript; } static SEXP lgl_as_location(SEXP subscript, R_len_t n, const struct location_opts* opts); static SEXP int_invert_location(SEXP subscript, R_len_t n, const struct location_opts* opts) { const int* data = INTEGER_RO(subscript); R_len_t loc_n = Rf_length(subscript); SEXP sel = PROTECT(Rf_allocVector(LGLSXP, n)); r_lgl_fill(sel, 1, n); int* sel_data = LOGICAL(sel); for (R_len_t i = 0; i < loc_n; ++i, ++data) { int j = *data; if (j == NA_INTEGER) { stop_location_negative_missing(subscript, opts); } if (j >= 0) { if (j == 0) { continue; } else { stop_location_negative_positive(subscript, opts); } } j = -j; if (j > n) { struct location_opts updated_opts = *opts; struct subscript_opts updated_subscript_opts = *updated_opts.subscript_opts; updated_subscript_opts.action = SUBSCRIPT_ACTION_NEGATE; updated_opts.subscript_opts = &updated_subscript_opts; stop_subscript_oob_location(subscript, n, &updated_opts); } sel_data[j - 1] = 0; } SEXP out = lgl_as_location(sel, n, opts); UNPROTECT(1); return out; } static SEXP int_filter_zero(SEXP subscript, R_len_t n_zero) { R_len_t loc_n = vec_size(subscript); const int* data = INTEGER_RO(subscript); SEXP out = PROTECT(Rf_allocVector(INTSXP, loc_n - n_zero)); int* out_data = INTEGER(out); for (R_len_t i = 0; i < loc_n; ++i, ++data) { int elt = *data; if (elt != 0) { *out_data = elt; ++out_data; } } UNPROTECT(1); return out; } // From compare.c int qsort_icmp(const void* x, const void* y); static void int_check_consecutive(SEXP subscript, R_len_t n, R_len_t n_extend, const struct location_opts* opts) { SEXP extended = PROTECT(Rf_allocVector(INTSXP, n_extend)); int* p_extended = INTEGER(extended); int i_extend = 0; int new_n = n; int* p_subscript = INTEGER(subscript); R_len_t n_subscript = Rf_length(subscript); for (R_len_t i = 0; i < n_subscript; ++i) { int elt = p_subscript[i]; // Missing value also covered here if (elt <= n) { continue; } // Special case: appending in ascending sequence at the end // should not require any sorting if (elt - 1 == new_n) { ++new_n; --n_extend; } else { p_extended[i_extend++] = elt - 1; } } if (n_extend != i_extend) { stop_internal("int_check_consecutive", "n_extend (%d) != i_extend (%d).", n_extend, i_extend); } if (i_extend == 0) { UNPROTECT(1); return; } // Only the first i_extend entries of the array are populated, // the rest is never touched. qsort(p_extended, i_extend, sizeof(int), &qsort_icmp); for (R_len_t i = 0; i < i_extend; ++i) { int elt = p_extended[i]; if (elt != new_n + i) { stop_location_oob_non_consecutive(subscript, n, opts); } } UNPROTECT(1); } static SEXP dbl_as_location(SEXP subscript, R_len_t n, const struct location_opts* opts) { subscript = PROTECT(vec_cast(subscript, vctrs_shared_empty_int, args_empty, args_empty)); subscript = int_as_location(subscript, n, opts); UNPROTECT(1); return subscript; } static SEXP lgl_as_location(SEXP subscript, R_len_t n, const struct location_opts* opts) { R_len_t subscript_n = Rf_length(subscript); if (subscript_n == n) { SEXP out = PROTECT(r_lgl_which(subscript, true)); SEXP nms = PROTECT(r_names(subscript)); if (nms != R_NilValue) { nms = PROTECT(vec_slice(nms, out)); r_poke_names(out, nms); UNPROTECT(1); } UNPROTECT(2); return out; } /* A single `TRUE` or `FALSE` index is recycled_nms to the full vector * size. This means `TRUE` is synonym for missing index (subscript.e. no * subsetting) and `FALSE` is synonym for empty index. * * We could return the missing argument as sentinel to avoid * materialising the index vector for the `TRUE` case but this would * make `vec_as_location()` an option type just to optimise a rather * uncommon case. */ if (subscript_n == 1) { int elt = LOGICAL(subscript)[0]; SEXP out; if (elt == NA_LOGICAL) { out = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill(out, NA_INTEGER, n); } else if (elt) { out = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill_seq(out, 1, n); } else { return vctrs_shared_empty_int; } SEXP nms = PROTECT(r_names(subscript)); if (nms != R_NilValue) { SEXP recycled_nms = PROTECT(Rf_allocVector(STRSXP, n)); r_chr_fill(recycled_nms, r_chr_get(nms, 0), n); r_poke_names(out, recycled_nms); UNPROTECT(1); } UNPROTECT(2); return out; } SEXP n_obj = PROTECT(Rf_ScalarInteger(n)); stop_indicator_size(subscript, n_obj, opts); never_reached("lgl_as_location"); } static SEXP chr_as_location(SEXP subscript, SEXP names, const struct location_opts* opts) { if (names == R_NilValue) { Rf_errorcall(R_NilValue, "Can't use character names to index an unnamed vector."); } if (TYPEOF(names) != STRSXP) { Rf_errorcall(R_NilValue, "`names` must be a character vector."); } SEXP matched = PROTECT(Rf_match(names, subscript, NA_INTEGER)); R_len_t n = Rf_length(matched); const int* p = INTEGER_RO(matched); const SEXP* ip = STRING_PTR_RO(subscript); for (R_len_t k = 0; k < n; ++k) { if (p[k] == NA_INTEGER && ip[k] != NA_STRING) { stop_subscript_oob_name(subscript, names, opts); } } r_poke_names(matched, PROTECT(r_names(subscript))); UNPROTECT(1); UNPROTECT(1); return matched; } SEXP vec_as_location(SEXP subscript, R_len_t n, SEXP names) { return vec_as_location_opts(subscript, n, names, location_default_opts); } SEXP vec_as_location_opts(SEXP subscript, R_len_t n, SEXP names, const struct location_opts* opts) { ERR err = NULL; subscript = vec_as_subscript_opts(subscript, opts->subscript_opts, &err); PROTECT2(subscript, err); if (err) { r_cnd_signal(err); never_reached("vec_as_location_opts"); } SEXP out = R_NilValue; switch (TYPEOF(subscript)) { case NILSXP: out = vctrs_shared_empty_int; break; case INTSXP: out = int_as_location(subscript, n, opts); break; case REALSXP: out = dbl_as_location(subscript, n, opts); break; case LGLSXP: out = lgl_as_location(subscript, n, opts); break; case STRSXP: out = chr_as_location(subscript, names, opts); break; default: stop_unimplemented_type("vec_as_location_opts", TYPEOF(subscript)); } UNPROTECT(2); return out; } static void stop_subscript_arg_missing() { Rf_errorcall(R_NilValue, "`missing` must be one of \"propagate\" or \"error\"."); } static void stop_bad_negative() { Rf_errorcall(R_NilValue, "`negative` must be one of \"invert\", \"error\", or \"ignore\"."); } static void stop_bad_oob() { Rf_errorcall(R_NilValue, "`oob` must be one of \"error\" or \"extend\"."); } static void stop_bad_zero() { Rf_errorcall(R_NilValue, "`zero` must be one of \"remove\", \"error\", or \"ignore\"."); } static enum subscript_missing parse_subscript_arg_missing(SEXP x) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_subscript_arg_missing(); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "propagate")) return SUBSCRIPT_MISSING_PROPAGATE; if (!strcmp(str, "error")) return SUBSCRIPT_MISSING_ERROR; stop_subscript_arg_missing(); never_reached("stop_subscript_arg_missing"); } static enum num_loc_negative parse_loc_negative(SEXP x) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_bad_negative(); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "invert")) return LOC_NEGATIVE_INVERT; if (!strcmp(str, "error")) return LOC_NEGATIVE_ERROR; if (!strcmp(str, "ignore")) return LOC_NEGATIVE_IGNORE; stop_bad_negative(); never_reached("stop_bad_negative"); } static enum num_loc_oob parse_loc_oob(SEXP x) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_bad_oob(); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "error")) return LOC_OOB_ERROR; if (!strcmp(str, "extend")) return LOC_OOB_EXTEND; stop_bad_oob(); never_reached("stop_bad_oob"); } static enum num_loc_zero parse_loc_zero(SEXP x) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_bad_zero(); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "remove")) return LOC_ZERO_REMOVE; if (!strcmp(str, "error")) return LOC_ZERO_ERROR; if (!strcmp(str, "ignore")) return LOC_ZERO_IGNORE; stop_bad_zero(); never_reached("parse_loc_zero"); } // [[ register() ]] SEXP vctrs_as_location(SEXP subscript, SEXP n_, SEXP names, SEXP loc_negative, SEXP loc_oob, SEXP loc_zero, SEXP missing, SEXP arg_) { R_len_t n = 0; if (n_ == R_NilValue && TYPEOF(subscript) == STRSXP) { n = Rf_length(subscript); } else { if (OBJECT(n_) || TYPEOF(n_) != INTSXP) { n_ = vec_cast(n_, vctrs_shared_empty_int, args_empty, args_empty); } PROTECT(n_); if (Rf_length(n_) != 1) { stop_internal("vctrs_as_location", "`n` must be a scalar number."); } n = r_int_get(n_, 0); UNPROTECT(1); } struct vctrs_arg arg = vec_as_arg(arg_); struct subscript_opts subscript_opts = { .subscript_arg = &arg }; struct location_opts opts = { .subscript_opts = &subscript_opts, .missing = parse_subscript_arg_missing(missing), .loc_negative = parse_loc_negative(loc_negative), .loc_oob = parse_loc_oob(loc_oob), .loc_zero = parse_loc_zero(loc_zero) }; return vec_as_location_opts(subscript, n, names, &opts); } static void stop_subscript_missing(SEXP i) { vctrs_eval_mask1(Rf_install("stop_subscript_missing"), syms_i, i); never_reached("stop_subscript_missing"); } static void stop_location_negative_missing(SEXP i, const struct location_opts* opts) { SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask3(Rf_install("stop_location_negative_missing"), syms_i, i, syms_subscript_arg, arg, syms_subscript_action, get_opts_action(opts->subscript_opts)); never_reached("stop_location_negative_missing"); } static void stop_location_negative_positive(SEXP i, const struct location_opts* opts) { SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask3(Rf_install("stop_location_negative_positive"), syms_i, i, syms_subscript_arg, arg, syms_subscript_action, get_opts_action(opts->subscript_opts)); never_reached("stop_location_negative_positive"); } static void stop_subscript_oob_location(SEXP i, R_len_t size, const struct location_opts* opts) { SEXP size_obj = PROTECT(r_int(size)); SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask5(Rf_install("stop_subscript_oob"), syms_i, i, syms_subscript_type, chrs_numeric, syms_size, size_obj, syms_subscript_action, get_opts_action(opts->subscript_opts), syms_subscript_arg, arg); UNPROTECT(1); never_reached("stop_subscript_oob_location"); } static void stop_subscript_oob_name(SEXP i, SEXP names, const struct location_opts* opts) { SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask5(Rf_install("stop_subscript_oob"), syms_i, i, syms_subscript_type, chrs_character, syms_names, names, syms_subscript_action, get_opts_action(opts->subscript_opts), syms_subscript_arg, arg); never_reached("stop_subscript_oob_name"); } static void stop_location_negative(SEXP i, const struct location_opts* opts) { SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask3(Rf_install("stop_location_negative"), syms_i, i, syms_subscript_action, get_opts_action(opts->subscript_opts), syms_subscript_arg, arg); never_reached("stop_location_negative"); } static void stop_location_zero(SEXP i, const struct location_opts* opts) { SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask3(Rf_install("stop_location_zero"), syms_i, i, syms_subscript_action, get_opts_action(opts->subscript_opts), syms_subscript_arg, arg); never_reached("stop_location_zero"); } static void stop_indicator_size(SEXP i, SEXP n, const struct location_opts* opts) { SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask4(Rf_install("stop_indicator_size"), syms_i, i, syms_n, n, syms_subscript_action, get_opts_action(opts->subscript_opts), syms_subscript_arg, arg); never_reached("stop_indicator_size"); } static void stop_location_oob_non_consecutive(SEXP i, R_len_t size, const struct location_opts* opts) { SEXP size_obj = PROTECT(r_int(size)); SEXP arg = PROTECT(vctrs_arg(opts->subscript_opts->subscript_arg)); vctrs_eval_mask4(Rf_install("stop_location_oob_non_consecutive"), syms_i, i, syms_size, size_obj, syms_subscript_action, get_opts_action(opts->subscript_opts), syms_subscript_arg, arg); UNPROTECT(1); never_reached("stop_location_oob_non_consecutive"); } struct location_opts location_default_opts_obj; struct location_opts location_default_assign_opts_obj; void vctrs_init_subscript_loc(SEXP ns) { location_default_opts_obj.subscript_opts = &subscript_default_opts; location_default_opts_obj.loc_negative = LOC_NEGATIVE_INVERT; location_default_opts_obj.loc_oob = LOC_OOB_ERROR; location_default_opts_obj.loc_zero = LOC_ZERO_REMOVE; location_default_opts_obj.missing = SUBSCRIPT_MISSING_PROPAGATE; location_default_assign_opts_obj = location_default_opts_obj; location_default_assign_opts_obj.subscript_opts = &subscript_default_assign_opts; } vctrs/src/altrep.h0000644000176200001440000000411414000045262013633 0ustar liggesusers#ifndef ALTREP_H #define ALTREP_H #include "Rversion.h" #if (R_VERSION < R_Version(3, 5, 0)) || \ (defined(_WIN32) && R_VERSION == R_Version(3, 5, 0)) # define HAS_ALTREP 0 #else # define HAS_ALTREP 1 #endif #if !HAS_ALTREP # define ALTREP(x) false # define ALTVEC_EXTRACT_SUBSET_PROXY(x, indx, call) \ ((void) x, (void) indx, (void) call, NULL) #else #include "R_ext/Altrep.h" #define ALTREP_METHODS \ R_altrep_UnserializeEX_method_t UnserializeEX; \ R_altrep_Unserialize_method_t Unserialize; \ R_altrep_Serialized_state_method_t Serialized_state; \ R_altrep_DuplicateEX_method_t DuplicateEX; \ R_altrep_Duplicate_method_t Duplicate; \ R_altrep_Coerce_method_t Coerce; \ R_altrep_Inspect_method_t Inspect; \ R_altrep_Length_method_t Length #define ALTVEC_METHODS \ ALTREP_METHODS; \ R_altvec_Dataptr_method_t Dataptr; \ R_altvec_Dataptr_or_null_method_t Dataptr_or_null; \ R_altvec_Extract_subset_method_t Extract_subset typedef struct { ALTREP_METHODS; } altrep_methods_t; typedef struct { ALTVEC_METHODS; } altvec_methods_t; #define CLASS_METHODS_TABLE(type_class) STDVEC_DATAPTR(type_class) #define GENERIC_METHODS_TABLE(x, type_class) \ ((type_class##_methods_t *) CLASS_METHODS_TABLE(ALTREP_CLASS(x))) #define ALTREP_METHODS_TABLE(x) GENERIC_METHODS_TABLE(x, altrep) #define ALTVEC_METHODS_TABLE(x) GENERIC_METHODS_TABLE(x, altvec) #define DISPATCH_TARGET_HELPER(x, ...) x #define DISPATCH_TARGET(...) DISPATCH_TARGET_HELPER(__VA_ARGS__, dummy) #define DO_DISPATCH(type, fun, ...) \ type##_METHODS_TABLE(DISPATCH_TARGET(__VA_ARGS__))->fun(__VA_ARGS__) #define ALTREP_DISPATCH(fun, ...) DO_DISPATCH(ALTREP, fun, __VA_ARGS__) #define ALTVEC_DISPATCH(fun, ...) DO_DISPATCH(ALTVEC, fun, __VA_ARGS__) static inline SEXP ALTVEC_EXTRACT_SUBSET_PROXY(SEXP x, SEXP indx, SEXP call) { return ALTVEC_DISPATCH(Extract_subset, x, indx, call); } #endif #endif vctrs/src/complete.c0000644000176200001440000001361614042540502014161 0ustar liggesusers#include "vctrs.h" #include "equal.h" #include "type-data-frame.h" // ----------------------------------------------------------------------------- static SEXP vec_slice_complete(SEXP x); // [[ register() ]] SEXP vctrs_slice_complete(SEXP x) { return vec_slice_complete(x); } static SEXP vec_locate_complete(SEXP x); static SEXP vec_slice_complete(SEXP x) { SEXP loc = PROTECT(vec_locate_complete(x)); // Skip `vec_as_location()` in `vec_slice()` SEXP out = vec_slice_impl(x, loc); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_locate_complete(SEXP x) { return vec_locate_complete(x); } static SEXP vec_detect_complete(SEXP x); static SEXP vec_locate_complete(SEXP x) { SEXP where = PROTECT(vec_detect_complete(x)); SEXP out = r_lgl_which(where, false); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_detect_complete(SEXP x) { return vec_detect_complete(x); } static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out); static SEXP vec_detect_complete(SEXP x) { SEXP proxy = PROTECT(vec_proxy_complete(x)); R_len_t size = vec_size(proxy); SEXP out = PROTECT(r_new_logical(size)); int* p_out = LOGICAL(out); // Initialize assuming fully complete for (R_len_t i = 0; i < size; ++i) { p_out[i] = 1; } vec_detect_complete_switch(proxy, size, p_out); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- static inline void lgl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void int_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void dbl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void cpl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void chr_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void raw_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void list_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void df_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out) { switch (vec_proxy_typeof(x)) { case vctrs_type_logical: lgl_detect_complete(x, size, p_out); break; case vctrs_type_integer: int_detect_complete(x, size, p_out); break; case vctrs_type_double: dbl_detect_complete(x, size, p_out); break; case vctrs_type_complex: cpl_detect_complete(x, size, p_out); break; case vctrs_type_character: chr_detect_complete(x, size, p_out); break; case vctrs_type_raw: raw_detect_complete(x, size, p_out); break; case vctrs_type_list: list_detect_complete(x, size, p_out); break; case vctrs_type_dataframe: df_detect_complete(x, size, p_out); break; case vctrs_type_scalar: stop_internal("vec_detect_complete", "Can't detect missing values in scalars."); default: stop_unimplemented_vctrs_type("vec_detect_complete", vec_proxy_typeof(x)); } } // ----------------------------------------------------------------------------- /* * Avoid the temptation to add an extra if branch at the start of the for * loop like: * * ``` * if (!p_out[i]) { * continue; * } * ``` * * In theory this avoids calculations if we already know the row is incomplete, * but in practice it can wreck performance. I imagine it is due to the cost * of the extra branch + the volatility of this value, causing the result of * the branch to be "guessed" incorrectly many times. For example, the vctrs * result here gets 6x slower (i.e. slower than the R solution) by adding that * branch. * * ``` * # Place many NA values randomly in the first column * first <- sample(c(1, NA, 3), size = 1e6, replace = TRUE) * cols <- rep_len(list(rep(1, 1e6)), 100) * cols <- c(list(first), cols) * names(cols) <- paste0("a", 1:length(cols)) * df <- new_data_frame(cols) * bench::mark(vec_detect_complete(df), complete.cases(df)) * ``` */ #define VEC_DETECT_COMPLETE(CTYPE, CONST_DEREF, IS_MISSING) { \ const CTYPE* p_x = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i) { \ const CTYPE elt = p_x[i]; \ \ if (IS_MISSING(elt)) { \ p_out[i] = 0; \ } \ } \ } static inline void lgl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(int, LOGICAL_RO, lgl_is_missing); } static inline void int_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(int, INTEGER_RO, int_is_missing); } static inline void dbl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(double, REAL_RO, dbl_is_missing); } static inline void cpl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(Rcomplex, COMPLEX_RO, cpl_is_missing); } static inline void chr_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(SEXP, STRING_PTR_RO, chr_is_missing); } static inline void raw_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(Rbyte, RAW_RO, raw_is_missing); } static inline void list_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(SEXP, VECTOR_PTR_RO, list_is_missing); } #undef VEC_DETECT_COMPLETE // ----------------------------------------------------------------------------- static inline void df_detect_complete(SEXP x, R_len_t size, int* p_out) { r_ssize n_cols = r_length(x); const SEXP* p_x = VECTOR_PTR_RO(x); for (r_ssize i = 0; i < n_cols; ++i) { vec_detect_complete_switch(p_x[i], size, p_out); } } vctrs/src/proxy.c0000644000176200001440000001657114042540502013535 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "dim.h" #include "utils.h" #include "equal.h" // Initialised at load time SEXP syms_vec_proxy = NULL; SEXP syms_vec_proxy_equal = NULL; SEXP syms_vec_proxy_equal_array = NULL; SEXP syms_vec_proxy_compare = NULL; SEXP syms_vec_proxy_compare_array = NULL; SEXP syms_vec_proxy_order = NULL; SEXP syms_vec_proxy_order_array = NULL; SEXP fns_vec_proxy_equal_array = NULL; SEXP fns_vec_proxy_compare_array = NULL; SEXP fns_vec_proxy_order_array = NULL; static SEXP vec_proxy_unwrap(SEXP x); SEXP vec_proxy_method(SEXP x); SEXP vec_proxy_invoke(SEXP x, SEXP method); // [[ register(); include("vctrs.h") ]] SEXP vec_proxy(SEXP x) { int nprot = 0; struct vctrs_type_info info = vec_type_info(x); PROTECT_TYPE_INFO(&info, &nprot); SEXP out; if (info.type == vctrs_type_s3) { out = vec_proxy_invoke(x, info.proxy_method); } else { out = x; } UNPROTECT(nprot); return out; } static inline SEXP vec_proxy_equal_method(SEXP x); static inline SEXP vec_proxy_equal_invoke(SEXP x, SEXP method); // [[ register(); include("vctrs.h") ]] SEXP vec_proxy_equal(SEXP x) { SEXP method = PROTECT(vec_proxy_equal_method(x)); SEXP out = vec_proxy_equal_invoke(x, method); UNPROTECT(1); return out; } static inline SEXP vec_proxy_compare_method(SEXP x); static inline SEXP vec_proxy_compare_invoke(SEXP x, SEXP method); // [[ register(); include("vctrs.h") ]] SEXP vec_proxy_compare(SEXP x) { SEXP method = PROTECT(vec_proxy_compare_method(x)); SEXP out = vec_proxy_compare_invoke(x, method); UNPROTECT(1); return out; } static inline SEXP vec_proxy_order_method(SEXP x); static inline SEXP vec_proxy_order_invoke(SEXP x, SEXP method); // [[ register(); include("vctrs.h") ]] SEXP vec_proxy_order(SEXP x) { SEXP method = PROTECT(vec_proxy_order_method(x)); SEXP out = vec_proxy_order_invoke(x, method); UNPROTECT(1); return out; } static SEXP df_proxy(SEXP x, enum vctrs_proxy_kind kind); /* * Specialized internal variant of `vec_proxy_equal()` that returns an * alternative proxy for non data frame input that has a data frame proxy. * These are special cased under the heuristic that the entire row has to be * missing to be considered "incomplete". The easiest way to generate a * completeness proxy following this heuristic is to generate a logical vector * marked with `NA` where that row is completely missing. */ // [[ register() ]] SEXP vec_proxy_complete(SEXP x) { if (is_data_frame(x)) { return df_proxy(x, VCTRS_PROXY_KIND_complete); } SEXP proxy = PROTECT(vec_proxy_equal(x)); // Arrays have stopgap data frame proxies, // but their completeness rules match normal data frames if (has_dim(x)) { UNPROTECT(1); return proxy; } if (!is_data_frame(proxy)) { UNPROTECT(1); return proxy; } SEXP out = PROTECT(vec_equal_na(proxy)); int* p_out = LOGICAL(out); r_ssize size = r_length(out); for (r_ssize i = 0; i < size; ++i) { if (p_out[i]) { p_out[i] = NA_LOGICAL; } } UNPROTECT(2); return out; } SEXP vec_proxy_method(SEXP x) { return s3_find_method("vec_proxy", x, vctrs_method_table); } // This should be faster than normal dispatch but also means that // proxy methods can't call `NextMethod()`. This could be changed if // it turns out a problem. SEXP vec_proxy_invoke(SEXP x, SEXP method) { if (method == R_NilValue) { return x; } else { return vctrs_dispatch1(syms_vec_proxy, method, syms_x, x); } } static inline SEXP vec_proxy_method_impl(SEXP x, const char* generic, SEXP fn_proxy_array) { SEXP cls = PROTECT(s3_get_class(x)); SEXP method = s3_class_find_method(generic, cls, vctrs_method_table); if (method != R_NilValue) { UNPROTECT(1); return method; } /* FIXME: Stopgap check for bare arrays */ /* which equality functions don't handle well */ if (vec_dim_n(x) > 1) { UNPROTECT(1); return fn_proxy_array; } UNPROTECT(1); return R_NilValue; } static inline SEXP vec_proxy_equal_method(SEXP x) { return vec_proxy_method_impl(x, "vec_proxy_equal", fns_vec_proxy_equal_array); } static inline SEXP vec_proxy_compare_method(SEXP x) { return vec_proxy_method_impl(x, "vec_proxy_compare", fns_vec_proxy_compare_array); } static inline SEXP vec_proxy_order_method(SEXP x) { return vec_proxy_method_impl(x, "vec_proxy_order", fns_vec_proxy_order_array); } static inline SEXP vec_proxy_invoke_impl(SEXP x, SEXP method, SEXP vec_proxy_sym, SEXP (*vec_proxy_fn)(SEXP)) { if (method != R_NilValue) { return vctrs_dispatch1(vec_proxy_sym, method, syms_x, x); } /* Fallback on S3 objects with no proxy */ if (vec_typeof(x) == vctrs_type_s3) { return vec_proxy_fn(x); } else { return x; } } static inline SEXP vec_proxy_equal_invoke(SEXP x, SEXP method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_equal, vec_proxy); } static inline SEXP vec_proxy_compare_invoke(SEXP x, SEXP method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_compare, &vec_proxy_equal); } static inline SEXP vec_proxy_order_invoke(SEXP x, SEXP method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_order, &vec_proxy_compare); } #define DF_PROXY(PROXY) do { \ R_len_t n_cols = Rf_length(x); \ \ for (R_len_t i = 0; i < n_cols; ++i) { \ SEXP col = VECTOR_ELT(x, i); \ SET_VECTOR_ELT(x, i, PROXY(col)); \ } \ } while (0) static SEXP df_proxy(SEXP x, enum vctrs_proxy_kind kind) { x = PROTECT(r_clone_referenced(x)); switch (kind) { case VCTRS_PROXY_KIND_default: DF_PROXY(vec_proxy); break; case VCTRS_PROXY_KIND_equal: DF_PROXY(vec_proxy_equal); break; case VCTRS_PROXY_KIND_compare: DF_PROXY(vec_proxy_compare); break; case VCTRS_PROXY_KIND_order: DF_PROXY(vec_proxy_order); break; case VCTRS_PROXY_KIND_complete: DF_PROXY(vec_proxy_complete); break; } x = PROTECT(df_flatten(x)); x = vec_proxy_unwrap(x); UNPROTECT(2); return x; } #undef DF_PROXY // [[ register() ]] SEXP vctrs_df_proxy(SEXP x, SEXP kind) { if (!r_is_number(kind)) { stop_internal("vctrs_df_proxy", "`kind` must be a single integer."); } enum vctrs_proxy_kind c_kind = r_int_get(kind, 0); return df_proxy(x, c_kind); } static SEXP vec_proxy_unwrap(SEXP x) { if (TYPEOF(x) == VECSXP && XLENGTH(x) == 1 && is_data_frame(x)) { x = vec_proxy_unwrap(VECTOR_ELT(x, 0)); } return x; } // [[ register() ]] SEXP vctrs_unset_s4(SEXP x) { x = r_clone_referenced(x); r_unmark_s4(x); return x; } void vctrs_init_data(SEXP ns) { syms_vec_proxy = Rf_install("vec_proxy"); syms_vec_proxy_equal = Rf_install("vec_proxy_equal"); syms_vec_proxy_equal_array = Rf_install("vec_proxy_equal.array"); syms_vec_proxy_compare = Rf_install("vec_proxy_compare"); syms_vec_proxy_compare_array = Rf_install("vec_proxy_compare.array"); syms_vec_proxy_order = Rf_install("vec_proxy_order"); syms_vec_proxy_order_array = Rf_install("vec_proxy_order.array"); fns_vec_proxy_equal_array = r_env_get(ns, syms_vec_proxy_equal_array); fns_vec_proxy_compare_array = r_env_get(ns, syms_vec_proxy_compare_array); fns_vec_proxy_order_array = r_env_get(ns, syms_vec_proxy_order_array); } vctrs/src/translate.c0000644000176200001440000001161514042540502014343 0ustar liggesusers#include "translate.h" #include "vctrs.h" #include "utils.h" // For testing // [[ register() ]] SEXP vctrs_normalize_encoding(SEXP x) { return vec_normalize_encoding(x); } static inline SEXP obj_normalize_encoding(SEXP x); /* * Recursively normalize encodings of character vectors. * * A CHARSXP is considered normalized if: * - It is the NA_STRING * - It is ASCII, which means the encoding will be unmarked * - It is marked as UTF-8 * * Attributes are translated as well. * * ASCII strings will never get marked with an encoding when they go * through `Rf_mkCharLenCE()`, but they will get marked as ASCII. Since * UTF-8 is fully compatible with ASCII and ASCII is by far the most common * case, we let ASCII strings through without translating them. * * This converts vectors that are completely marked as Latin-1 to UTF-8. In * theory we could leave these as Latin-1, and comparing within * a single vector would be fine, since the encoding would be consistent. * However, this makes comparing between vectors difficult because we then * have to normalize the vectors relative to each other's encodings. * Consistently converting to UTF-8 avoids this issue altogether. * * Vectors with "bytes" encodings are not supported, as they cannot be * converted to UTF-8 by `Rf_translateCharUTF8()`. * * [[ include("translate.h") ]] */ SEXP vec_normalize_encoding(SEXP x) { return obj_normalize_encoding(x); } // ----------------------------------------------------------------------------- static SEXP chr_normalize_encoding(SEXP x); static SEXP list_normalize_encoding(SEXP x); static SEXP obj_attrib_normalize_encoding(SEXP x, SEXP attrib); static inline SEXP obj_normalize_encoding(SEXP x) { switch (TYPEOF(x)) { case STRSXP: x = chr_normalize_encoding(x); break; case VECSXP: x = list_normalize_encoding(x); break; default: break; } // For performance, avoid `PROTECT()` / `UNPROTECT()` when not needed SEXP attrib = r_attrib(x); if (attrib != r_null) { PROTECT(x); x = obj_attrib_normalize_encoding(x, attrib); UNPROTECT(1); } return x; } // ----------------------------------------------------------------------------- static inline r_ssize chr_find_normalize_start(SEXP x, r_ssize size); static SEXP chr_normalize_encoding(SEXP x) { r_ssize size = r_length(x); r_ssize start = chr_find_normalize_start(x, size); if (size == start) { return x; } x = PROTECT(r_clone_referenced(x)); const SEXP* p_x = STRING_PTR_RO(x); const void* vmax = vmaxget(); for (r_ssize i = start; i < size; ++i) { const SEXP elt = p_x[i]; if (string_is_normalized(elt)) { continue; } SET_STRING_ELT(x, i, string_normalize(elt)); } vmaxset(vmax); UNPROTECT(1); return x; } static inline r_ssize chr_find_normalize_start(SEXP x, r_ssize size) { const SEXP* p_x = STRING_PTR_RO(x); for (r_ssize i = 0; i < size; ++i) { const SEXP elt = p_x[i]; if (string_is_normalized(elt)) { continue; } return i; } return size; } // ----------------------------------------------------------------------------- static SEXP list_normalize_encoding(SEXP x) { PROTECT_INDEX pi; PROTECT_WITH_INDEX(x, &pi); r_ssize size = r_length(x); const SEXP* p_x = VECTOR_PTR_RO(x); for (r_ssize i = 0; i < size; ++i) { SEXP elt_old = p_x[i]; SEXP elt_new = obj_normalize_encoding(elt_old); if (elt_old == elt_new) { continue; } PROTECT(elt_new); // Cloned once, at which point `x` is free of references if (MAYBE_REFERENCED(x)) { x = r_clone(x); REPROTECT(x, pi); p_x = VECTOR_PTR_RO(x); } SET_VECTOR_ELT(x, i, elt_new); UNPROTECT(1); } UNPROTECT(1); return x; } // ----------------------------------------------------------------------------- static SEXP attrib_normalize_encoding(SEXP x); static SEXP obj_attrib_normalize_encoding(SEXP x, SEXP attrib) { SEXP attrib_new = attrib_normalize_encoding(attrib); if (attrib_new == attrib) { return x; } PROTECT(attrib_new); x = PROTECT(r_clone_referenced(x)); r_poke_attrib(x, attrib_new); UNPROTECT(2); return x; } static SEXP attrib_normalize_encoding(SEXP x) { r_ssize loc = 0; bool owned = false; PROTECT_INDEX pi; PROTECT_WITH_INDEX(x, &pi); for (SEXP node = x; node != r_null; node = r_node_cdr(node), ++loc) { SEXP elt_old = r_node_car(node); SEXP elt_new = obj_normalize_encoding(elt_old); if (elt_old == elt_new) { continue; } PROTECT(elt_new); if (!owned) { // Shallow clone entire pairlist if not owned. // Should be fast because these are generally short. x = r_clone(x); REPROTECT(x, pi); owned = true; node = x; // Restore original positioning post-clone for (r_ssize i = 0; i < loc; ++i) { node = r_node_cdr(node); } } r_node_poke_car(node, elt_new); UNPROTECT(1); } UNPROTECT(1); return x; } vctrs/src/shape.c0000644000176200001440000001206714042540502013450 0ustar liggesusers#include "shape.h" #include "dim.h" // [[ register() ]] SEXP vctrs_shaped_ptype(SEXP ptype, SEXP x, SEXP y, SEXP x_arg, SEXP y_arg) { struct vctrs_arg x_arg_ = vec_as_arg(x_arg); struct vctrs_arg y_arg_ = vec_as_arg(y_arg); return vec_shaped_ptype(ptype, x, y, &x_arg_, &y_arg_); } static SEXP vec_shape2(SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); // Computes the common shape of `x` and `y` and attaches it as the // dimensions of `ptype`. If `x` and `y` are both atomic with `NULL` dimensions, // then no dimensions are attached and `ptype` is returned unmodified. // [[ include("shape.h") ]] SEXP vec_shaped_ptype(SEXP ptype, SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { SEXP ptype_dimensions = PROTECT(vec_shape2(x, y, p_x_arg, p_y_arg)); if (ptype_dimensions == R_NilValue) { UNPROTECT(1); return ptype; } ptype = PROTECT(r_clone_referenced(ptype)); r_poke_dim(ptype, ptype_dimensions); UNPROTECT(2); return ptype; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_shape2(SEXP x, SEXP y, SEXP x_arg, SEXP y_arg) { struct vctrs_arg x_arg_ = vec_as_arg(x_arg); struct vctrs_arg y_arg_ = vec_as_arg(y_arg); return vec_shape2(x, y, &x_arg_, &y_arg_); } static SEXP vec_shape2_impl(SEXP x_dimensions, SEXP y_dimensions, SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); static SEXP vec_shape2(SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { SEXP x_dimensions = PROTECT(r_dim(x)); SEXP y_dimensions = PROTECT(r_dim(y)); SEXP out = vec_shape2_impl(x_dimensions, y_dimensions, x, y, p_x_arg, p_y_arg); UNPROTECT(2); return out; } static SEXP vec_shape(SEXP dimensions); static inline int vec_dimension2(int x_dimension, int y_dimension, int axis, SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); /* * Returns `NULL` if `x` and `y` are atomic. * Otherwise returns a dimensions vector where the first dimension length * is forcibly set to 0, and the rest are the common shape of `x` and `y`. */ static SEXP vec_shape2_impl(SEXP x_dimensions, SEXP y_dimensions, SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { if (x_dimensions == R_NilValue) { return vec_shape(y_dimensions); } if (y_dimensions == R_NilValue) { return vec_shape(x_dimensions); } R_len_t x_dimensionality = Rf_length(x_dimensions); R_len_t y_dimensionality = Rf_length(y_dimensions); SEXP max_dimensions; R_len_t max_dimensionality; R_len_t min_dimensionality; if (x_dimensionality >= y_dimensionality) { max_dimensions = x_dimensions; max_dimensionality = x_dimensionality; min_dimensionality = y_dimensionality; } else { max_dimensions = y_dimensions; max_dimensionality = y_dimensionality; min_dimensionality = x_dimensionality; } // Sanity check, should never be true if (max_dimensionality == 0) { stop_internal("vec_shape2_impl", "`max_dimensionality` must have length."); } const int* p_x_dimensions = INTEGER_RO(x_dimensions); const int* p_y_dimensions = INTEGER_RO(y_dimensions); const int* p_max_dimensions = INTEGER_RO(max_dimensions); SEXP out = PROTECT(Rf_allocVector(INTSXP, max_dimensionality)); int* p_out = INTEGER(out); // Set the first axis to zero p_out[0] = 0; // Start loop at the second axis R_len_t i = 1; for (; i < min_dimensionality; ++i) { const int axis = i + 1; const int x_dimension = p_x_dimensions[i]; const int y_dimension = p_y_dimensions[i]; p_out[i] = vec_dimension2(x_dimension, y_dimension, axis, x, y, p_x_arg, p_y_arg); } for (; i < max_dimensionality; ++i) { p_out[i] = p_max_dimensions[i]; } UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // Sets the first axis to zero static SEXP vec_shape(SEXP dimensions) { if (dimensions == R_NilValue) { return R_NilValue; } dimensions = PROTECT(r_clone_referenced(dimensions)); if (Rf_length(dimensions) == 0) { stop_internal("vec_shape", "`dimensions` must have length."); } if (TYPEOF(dimensions) != INTSXP) { stop_internal("vec_shape", "`dimensions` must be an integer vector."); } INTEGER(dimensions)[0] = 0; UNPROTECT(1); return dimensions; } static inline int vec_dimension2(int x_dimension, int y_dimension, int axis, SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { if (x_dimension == y_dimension) { return x_dimension; } else if (x_dimension == 1) { return y_dimension; } else if (y_dimension == 1) { return x_dimension; } else { stop_incompatible_shape(x, y, x_dimension, y_dimension, axis, p_x_arg, p_y_arg); } } vctrs/src/proxy-restore.c0000644000176200001440000001417314042540502015212 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "owned.h" #include "utils.h" // Initialised at load time static SEXP syms_vec_restore_dispatch = NULL; static SEXP fns_vec_restore_dispatch = NULL; // [[ register() ]] SEXP vctrs_restore_default(SEXP x, SEXP to) { return vec_restore_default(x, to, vec_owned(x)); } // Copy attributes except names and dim. This duplicates `x` if needed. // [[ include("vctrs.h") ]] SEXP vec_restore_default(SEXP x, SEXP to, const enum vctrs_owned owned) { SEXP attrib = ATTRIB(to); const bool is_s4 = IS_S4_OBJECT(to); if (attrib == R_NilValue && !is_s4) { return x; } int n_protect = 0; attrib = PROTECT(Rf_shallow_duplicate(attrib)); ++n_protect; x = PROTECT(vec_clone_referenced(x, owned)); ++n_protect; // Remove vectorised attributes which might be incongruent after reshaping. // Shouldn't matter for GNU R but other R implementations might have checks. // Also record class to set it later with `Rf_setAttrib()`. This restores // the OBJECT bit and is likely more compatible with other implementations. SEXP class = R_NilValue; { SEXP node = attrib; SEXP prev = R_NilValue; while (node != R_NilValue) { SEXP tag = TAG(node); // Skip special attributes if (tag == R_NamesSymbol || tag == R_DimSymbol || tag == R_DimNamesSymbol || tag == R_ClassSymbol || tag == R_RowNamesSymbol) { if (tag == R_ClassSymbol) { class = CAR(node); } if (prev == R_NilValue) { attrib = CDR(attrib); } else { SETCDR(prev, CDR(node)); } node = CDR(node); continue; } prev = node; node = CDR(node); } } // Copy attributes but keep names and dims. Don't restore names for // shaped objects since those are generated from dimnames. SEXP dim = PROTECT(Rf_getAttrib(x, R_DimSymbol)); ++n_protect; if (dim == R_NilValue) { SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); // Check if `to` is a data frame early. If `x` and `to` point // to the same reference, then `SET_ATTRIB()` would alter `to`. SEXP rownms = PROTECT(df_rownames(x)); const bool restore_rownms = rownms != R_NilValue && is_data_frame(to); SET_ATTRIB(x, attrib); Rf_setAttrib(x, R_NamesSymbol, nms); // Don't restore row names if `to` isn't a data frame if (restore_rownms) { Rf_setAttrib(x, R_RowNamesSymbol, rownms); } UNPROTECT(2); } else { SEXP dimnames = PROTECT(Rf_getAttrib(x, R_DimNamesSymbol)); SET_ATTRIB(x, attrib); Rf_setAttrib(x, R_DimSymbol, dim); Rf_setAttrib(x, R_DimNamesSymbol, dimnames); UNPROTECT(1); } if (class != R_NilValue) { Rf_setAttrib(x, R_ClassSymbol, class); } if (is_s4) { r_mark_s4(x); } UNPROTECT(n_protect); return x; } static SEXP vec_restore_dispatch(SEXP x, SEXP to, SEXP n) { return vctrs_dispatch3(syms_vec_restore_dispatch, fns_vec_restore_dispatch, syms_x, x, syms_to, to, syms_n, n); } static SEXP vec_bare_df_restore_impl(SEXP x, SEXP to, R_len_t size, const enum vctrs_owned owned) { x = PROTECT(vec_restore_default(x, to, owned)); if (Rf_getAttrib(x, R_NamesSymbol) == R_NilValue) { SEXP names = PROTECT(Rf_allocVector(STRSXP, Rf_length(x))); Rf_setAttrib(x, R_NamesSymbol, names); UNPROTECT(1); } SEXP rownames = PROTECT(df_rownames(x)); if (rownames == R_NilValue) { init_compact_rownames(x, size); } else if (rownames_type(rownames) == ROWNAMES_IDENTIFIERS) { rownames = PROTECT(vec_as_names(rownames, p_unique_repair_silent_opts)); x = vec_proxy_set_names(x, rownames, owned); UNPROTECT(1); } UNPROTECT(2); return x; } // [[ register() ]] SEXP vctrs_bare_df_restore(SEXP x, SEXP to, SEXP n) { return vec_bare_df_restore(x, to, n, vec_owned(x)); } // [[ include("vctrs.h") ]] SEXP vec_bare_df_restore(SEXP x, SEXP to, SEXP n, const enum vctrs_owned owned) { if (TYPEOF(x) != VECSXP) { stop_internal("vec_bare_df_restore", "Attempt to restore data frame from a %s.", Rf_type2char(TYPEOF(x))); } R_len_t size = (n == R_NilValue) ? df_raw_size(x) : r_int_get(n, 0); return vec_bare_df_restore_impl(x, to, size, owned); } // Restore methods are passed the original atomic type back, so we // first restore data frames as such before calling the restore // method, if any // [[ include("vctrs.h") ]] SEXP vec_df_restore(SEXP x, SEXP to, SEXP n, const enum vctrs_owned owned) { SEXP out = PROTECT(vec_bare_df_restore(x, to, n, owned)); out = vec_restore_dispatch(out, to, n); UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_restore(SEXP x, SEXP to, SEXP n) { return vec_restore(x, to, n, vec_owned(x)); } // FIXME: Having `owned` as an argument to `vec_restore()` may be // unnecessary once we have recursive proxy / restore mechanisms. // It currently helps resolve performance issues in `vec_rbind()`'s usage of // `df_assign()`, which repeatedly proxies and restores each column, // causing duplication to occur. Passing `owned` through here allows us to // call `vec_clone_referenced()`, which won't attempt to clone if we know we // own the object. See #1151. // [[ include("vctrs.h") ]] SEXP vec_restore(SEXP x, SEXP to, SEXP n, const enum vctrs_owned owned) { switch (class_type(to)) { default: return vec_restore_dispatch(x, to, n); case vctrs_class_bare_factor: case vctrs_class_bare_ordered: case vctrs_class_none: return vec_restore_default(x, to, owned); case vctrs_class_bare_date: return vec_date_restore(x, to, owned); case vctrs_class_bare_posixct: return vec_posixct_restore(x, to, owned); case vctrs_class_bare_posixlt: return vec_posixlt_restore(x, to, owned); case vctrs_class_bare_data_frame: case vctrs_class_bare_tibble: return vec_bare_df_restore(x, to, n, owned); case vctrs_class_data_frame: return vec_df_restore(x, to, n, owned); } } void vctrs_init_proxy_restore(SEXP ns) { syms_vec_restore_dispatch = Rf_install("vec_restore_dispatch"); fns_vec_restore_dispatch = Rf_findVar(syms_vec_restore_dispatch, ns); } vctrs/src/split.c0000644000176200001440000000102114042540502013467 0ustar liggesusers#include "vctrs.h" #include "utils.h" // [[ register() ]] SEXP vec_split(SEXP x, SEXP by) { if (vec_size(x) != vec_size(by)) { Rf_errorcall(R_NilValue, "`x` and `by` must have the same size."); } SEXP out = PROTECT(vec_group_loc(by)); SEXP indices = VECTOR_ELT(out, 1); SEXP val = vec_chop(x, indices); SET_VECTOR_ELT(out, 1, val); SEXP names = PROTECT(Rf_getAttrib(out, R_NamesSymbol)); SET_STRING_ELT(names, 1, strings_val); Rf_setAttrib(out, R_NamesSymbol, names); UNPROTECT(2); return out; } vctrs/src/ptype-common.h0000644000176200001440000000110213663775021015006 0ustar liggesusers#ifndef VCTRS_PTYPE_COMMON_H #define VCTRS_PTYPE_COMMON_H #include "ptype2.h" #include "utils.h" static inline bool vec_is_common_class_fallback(SEXP ptype) { return Rf_inherits(ptype, c_strs_vctrs_common_class_fallback); } SEXP vec_ptype_common_params(SEXP dots, SEXP ptype, enum df_fallback df_fallback, enum s3_fallback s3_fallback); SEXP vec_ptype_common_opts(SEXP dots, SEXP ptype, const struct fallback_opts* opts); #endif vctrs/vignettes/0000755000176200001440000000000014042546502013425 5ustar liggesusersvctrs/vignettes/type-size.Rmd0000644000176200001440000003155113650511520016023 0ustar liggesusers--- title: "Prototypes and sizes" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Prototypes and sizes} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Rather than using `class()` and `length()`, vctrs has notions of prototype (`vec_ptype_show()`) and size (`vec_size()`). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules. Size and prototype are motivated by thinking about the optimal behaviour for `c()` and `rbind()`, particularly inspired by data frames with columns that are matrices or data frames. ```{r} library(vctrs) ``` ## Prototype The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the `class()` of an object is inadequate for this purpose: * The `class()` doesn't include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a `POSIXct`. You cannot combine two factors or two `POSIXct`s without thinking about the attributes. * The `class()` of a matrix is "matrix" and doesn't include the type of the underlying vector or the dimensionality. Instead, vctrs takes advantage of R's vectorised nature and uses a __prototype__, a 0-observation slice of the vector (this is basically `x[0]` but with some subtleties we'll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data. Conveniently, you can create many prototypes using existing base functions (e.g, `double()` and `factor(levels = c("a", "b"))`). vctrs provides a few helpers (e.g. `new_date()`, `new_datetime()`, and `new_duration()`) where the equivalents in base R are missing. ### Base prototypes `vec_ptype()` creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides `vec_ptype_show()`, which prints the prototype in a friendly way (and returns nothing). Using `vec_ptype_show()` allows us to see the prototypes base R classes: * Atomic vectors have no attributes and just display the underlying `typeof()`: ```{r} vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ``` * The prototype of matrices and arrays include the base type and the dimensions after the first: ```{r} vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ``` * The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it's highly likely that their levels are also equal. ```{r} vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ``` While `vec_ptype_show()` prints only the hash, the prototype object itself does contain all levels: ```{r} vec_ptype(factor("a")) ``` * Base R has three key date time classes: dates, date-times (`POSIXct`), and durations (`difftime)`. Date-times have a timezone, and durations have a unit. ```{r} vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ``` * Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column: ```{r} vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ``` Data frames can have columns that are themselves data frames, making this a "recursive" type: ```{r} df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ``` ### Coercing to common type It's often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via `vec_ptype_common()`. `vec_ptype_common()` possesses the following invariants: * `class(vec_ptype_common(x, y))` equals `class(vec_ptype_common(y, x))`. * `class(vec_ptype_common(x, vec_ptype_common(y, z))` equals `class(vec_ptype_common(vec_ptype_common(x, y), z))`. * `vec_ptype_common(x, NULL) == vec_ptype(x)`. i.e., `vec_ptype_common()` is both commutative and associative (with respect to class) and has an identity element, `NULL`; i.e., it's a __commutative monoid__. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects. Like with `vec_ptype()`, the easiest way to explore `vec_ptype_common()` is with `vec_ptype_show()`: when given multiple inputs, it will print their common prototype. (In other words: program with `vec_ptype_common()` but play with `vec_ptype_show()`.) * The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically: ```{r, error = TRUE} vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ``` * Matrices and arrays are automatically broadcast to higher dimensions: ```{r} vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ``` Provided that the dimensions follow the vctrs recycling rules: ```{r, error = TRUE} vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ``` * Factors combine levels in the order in which they appear. ```{r} fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ``` * Combining a date and date-time yields a date-time: ```{r} vec_ptype_show(new_date(), new_datetime()) ``` When combining two date times, the timezone is taken from the first input: ```{r} vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ``` Unless it's the local timezone, in which case any explicit time zone will win: ```{r} vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ``` * The common type of two data frames is the common type of each column that occurs in both data frames: ```{r} vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ``` And the union of the columns that only occur in one: ```{r} vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ``` Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled. ### Casting to specified type `vec_ptype_common()` finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That's the job of `vec_cast_common()`: ```{r} str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ``` Alternatively, you can cast to a specific prototype using `vec_cast()`: ```{r, error = TRUE} # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ``` If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error. ```{r, error = TRUE} vec_cast(c(1.5, 2), integer()) ``` You can suppress the lossy cast errors with `allow_lossy_cast()`: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ``` This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ``` The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound. ## Size `vec_size()` was motivated by the need to have an invariant that describes the number of "observations" in a data structure. This is particularly important for data frames, as it's useful to have some function such that `f(data.frame(x))` equals `f(x)`. No base function has this property: * `length(data.frame(x))` equals `1` because the length of a data frame is the number of columns. * `nrow(data.frame(x))` does not equal `nrow(x)` because `nrow()` of a vector is `NULL`. * `NROW(data.frame(x))` equals `NROW(x)` for vector `x`, so is almost what we want. But because `NROW()` is defined in terms of `length()`, it returns a value for every object, even types that can't go in a data frame, e.g. `data.frame(mean)` errors even though `NROW(mean)` is `1`. We define `vec_size()` as follows: * It is the length of 1d vectors. * It is the number of rows of data frames, matrices, and arrays. * It throws error for non vectors. Given `vec_size()`, we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns. ### Slicing `vec_slice()` is to `vec_size()` as `[` is to `length()`; i.e., it allows you to select observations regardless of the dimensionality of the underlying object. `vec_slice(x, i)` is equivalent to: * `x[i]` when `x` is a vector. * `x[i, , drop = FALSE]` when `x` is a data frame. * `x[i, , , drop = FALSE]` when `x` is a 3d array. ```{r} x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ``` `vec_slice(data.frame(x), i)` equals `data.frame(vec_slice(x, i))` (modulo variable and row names). Prototypes are generated with `vec_slice(x, 0L)`; given a prototype, you can initialize a vector of given size (filled with `NA`s) with `vec_init()`. ### Common sizes: recycling rules Closely related to the definition of size are the __recycling rules__. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in `vec_size_common()`, which gives the common size of a set of vectors: ```{r} vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ``` vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like `dest == c("IAH", "HOU"))`, at the cost of occasionally requiring an explicit calls to `rep()`. ```{r, echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates n error"} knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ``` You can apply the recycling rules in two ways: * If you have a vector and desired size, use `vec_recycle()`: ```{r} vec_recycle(1:3, 3) vec_recycle(1, 10) ``` * If you have multiple vectors and you want to recycle them to the same size, use `vec_recycle_common()`: ```{r} vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ``` ## Appendix: recycling in base R The recycling rules in base R are described in [The R Language Definition](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Recycling-rules) but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions. Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer: ```{r} rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ``` If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning: ```{r} invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ``` But some functions recycle silently: ```{r} length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ``` And `data.frame()` throws an error: ```{r, error = TRUE} data.frame(1:2, 1:3) ``` The R language definition states that "any arithmetic operation involving a zero-length vector has a zero-length result". But outside of arithmetic, this rule is not consistently followed: ```{r, error = TRUE} # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) ``` vctrs/vignettes/pillar.Rmd0000644000176200001440000002130513753015426015361 0ustar liggesusers--- title: "Printing vectors nicely in tibbles" author: "Kirill Müller, Hadley Wickham" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Printing vectors nicely in tibbles} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = pillar::style_subtle("#>")) ``` You can get basic control over how a vector is printed in a tibble by providing a `format()` method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics: * `vctrs::vec_ptype_abbr()` determines what goes into the column header. * `pillar::pillar_shaft()` determines what goes into the body, or the shaft, of the column. Technically a [*pillar*](https://en.wikipedia.org/wiki/Column#Nomenclature) is composed of a *shaft* (decorated with an *ornament*), with a *capital* above and a *base* below. Multiple pillars form a *colonnade*, which can be stacked in multiple *tiers*. This is the motivation behind the names in our API. This short vignette shows the basics of column styling using a `"latlon"` vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you'll need to create documentation and the `NAMESPACE` file. In this vignette, we'll attach pillar and vctrs: ```{r setup} library(vctrs) library(pillar) ``` You don't need to do this in a package. Instead, you'll need to _import_ the packages by then to the `Imports:` section of your `DESCRIPTION`. The following helper does this for you: ```{r, eval = FALSE} usethis::use_package("vctrs") usethis::use_package("pillar") ``` ## Prerequisites To illustrate the basic ideas we're going to create a `"latlon"` class that encodes geographic coordinates in a record. We'll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using `vctrs_rcrd()`, we already get the infrastructure to make this class fully compatible with data frames for free. See `vignette("s3-vector", package = "vctrs")` for details on the record data type. ```{r} #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ``` ## Using in a tibble Columns of this class can be used in a tibble right away because we've made a class using the vctrs infrastructure and have provided a `format()` method: ```{r} library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ``` This output is ok, but we could improve it by: 1. Using a more description type abbreviation than ``. 1. Using a dash of colour to highlight the most important parts of the value. 1. Providing a narrower view when horizontal space is at a premium. The following sections show how to enhance the rendering. ## Fixing the data type Instead of `` we'd prefer to use ``. We can do that by implementing the `vec_ptype_abbr()` method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that's under 6 characters. ```{r} #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ``` ## Custom rendering The `format()` method is used by default for rendering. For custom formatting you need to implement the `pillar_shaft()` method. This function should always return a pillar shaft object, created by `new_pillar_shaft_simple()` or similar. `new_pillar_shaft_simple()` accepts ANSI escape codes for colouring, and pillar includes some built in styles like `style_subtle()`. We can use subtle style for the degree and minute separators to make the data more obvious. First we define a degree formatter that makes use of `style_subtle()`: ```{r} deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ``` And then we pass that to our `format()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ``` Currently, ANSI escapes are not rendered in vignettes, so this result doesn't look any different, but if you run the code yourself you'll see an improved display. ```{r} data ``` As well as the functions in pillar, the [cli](http://cli.r-lib.org/) package provides a variety of tools for styling text. ## Truncation Tibbles can automatically compacts columns when there's no enough horizontal space to display everything: ```{r} print(data, width = 30) ``` Currently the latlon class isn't ever compacted because we haven't specified a minimum width when constructing the shaft. Let's fix that and re-print the data: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ``` ## Adaptive rendering Truncation may be useful for character data, but for lat-lon data it'd be nicer to show full degrees and remove the minutes. We'll first write a function that does this: ```{r} deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ``` Then use it as part of more sophisticated implementation of the `pillar_shaft()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ``` Now the `pillar_shaft()` method returns an object of class `"pillar_shaft_latlon"` created by `new_pillar_shaft()`. This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (`get_max_extent()` is a helper that computes the maximum display width occupied by the values in a character vector.) All that's left to do is to implement a `format()` method for our new `"pillar_shaft_latlon"` class. This method will be called with a `width` argument, which then determines which of the formats to choose. The formatting of our choice is passed to the `new_ornament()` function: ```{r} #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ``` ## Testing If you want to test the output of your code, you can compare it with a known state recorded in a text file. The `testthat::expect_snapshot()` function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won't make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN. Use this testthat expectation in one of your test files to create a snapshot test: ```{r eval = FALSE} expect_snapshot(pillar_shaft(data$loc)) ``` See for more information. vctrs/vignettes/s3-vector.Rmd0000644000176200001440000011552014027052305015716 0ustar liggesusers--- title: "S3 vectors" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{S3 vectors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ``` This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you'll also need to provide methods that actually make the vector useful. I assume that you're already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of [the S3 chapter](https://adv-r.hadley.nz/s3.html) of _Advanced R_. This article refers to "vectors of numbers" as *double vectors*. Here, "double" stands for ["double precision floating point number"](https://en.wikipedia.org/wiki/Double-precision_floating-point_format), see also `double()`. ```{r setup} library(vctrs) library(zeallot) ``` This vignette works through five big topics: * The basics of creating a new vector class with vctrs. * The coercion and casting system. * The record and list-of types. * Equality and comparison proxies. * Arithmetic operators. They're collectively demonstrated with a number of simple S3 classes: * Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting. * Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions. * Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care. * Rational: a pair of integer vectors that defines a rational number like `2 / 3`. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for `+`, `-`, and friends. * Polynomial: a list of integer vectors that define polynomials like `1 + x - x^3`. Sorting such vectors correctly requires a custom equality method. * Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties. * Period and frequency: a pair of classes represent a period, or it's inverse, frequency. This allows us to explore more arithmetic operators. ## Basics In this section you'll learn how to create a new vctrs class by calling `new_vctr()`. This creates an object with class `vctrs_vctr` which has a number of methods. These are designed to make your life as easy as possible. For example: * The `print()` and `str()` methods are defined in terms of `format()` so you get a pleasant, consistent display as soon as you've made your `format()` method. * You can immediately put your new vector class in a data frame because `as.data.frame.vctrs_vctr()` does the right thing. * Subsetting (`[`, `[[`, and `$`), `length<-`, and `rep()` methods automatically preserve attributes because they use `vec_restore()`. A default `vec_restore()` works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data. * Default subset-assignment methods (`[<-`, `[[<-`, and `$<-`) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages. ### Percent class In this section, I'll show you how to make a `percent` class, i.e., a double vector that is printed as a percentage. We start by defining a low-level [constructor](https://adv-r.hadley.nz/s3.html#s3-constrcutor) that uses `vec_assert()` to checks types and/or sizes then calls `new_vctr()`. `percent` is built on a double vector of any length and doesn't have any attributes. ```{r} new_percent <- function(x = double()) { vec_assert(x, double()) new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ``` Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as [blob](https://blob.tidyverse.org/)), it's fine to use the package name without prefix as the class name. We then follow up with a user friendly [helper](https://adv-r.hadley.nz/s3.html#helpers). Here we'll use `vec_cast()` to allow it to accept anything coercible to a double: ```{r} percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype. ```{r} new_percent() percent() ``` For the convenience of your users, consider implementing an `is_percent()` function: ```{r} is_percent <- function(x) { inherits(x, "vctrs_percent") } ``` ### `format()` method The first method for every class should almost always be a `format()` method. This should return a character vector the same length as `x`. The easiest way to do this is to rely on one of R's low-level formatting functions like `formatC()`: ```{r} format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ``` ```{r, include = FALSE} # As of R 3.5, print.vctr can not find format.percent since it's not in # it's lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ``` ```{r} x ``` (Note the use of `vec_data()` so `format()` doesn't get stuck in an infinite loop, and that I take a little care to not convert `NA` to `"NA"`; this leads to better printing.) The format method is also used by data frames, tibbles, and `str()`: ```{r} data.frame(x) ``` For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in `str()`: ```{r} vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ``` If you need more control over printing in tibbles, implement a method for `pillar::pillar_shaft()`. See `vignette("pillar", package = "vctrs")` for details. ## Casting and coercion The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens _implicitly_ (e.g in `c()`) we call it __coercion__; when the change happens _explicitly_ (e.g. with `as.integer(x)`), we call it __casting__. One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it's possible to make accurate predictions about what (e.g.) `c(x, y)` should do when `x` and `y` have different prototypes. vctrs achieves this goal through two generics: * `vec_ptype2(x, y)` defines possible set of coercions. It returns a prototype if `x` and `y` can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes. * `vec_cast(x, to)` defines the possible sets of casts. It returns `x` translated to have prototype `to`, or throws an error if the conversion isn't possible. The set of possible casts is a superset of possible coercions because they're requested explicitly. ### Double dispatch Both generics use __[double dispatch](https://en.wikipedia.org/wiki/Double_dispatch)__ which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means: - You end up with method names with two classes, like `vec_ptype2.foo.bar()`. - You don't need to implement default methods (they would never be called if you do). - You can't call `NextMethod()`. ### Percent class {#percent} We'll make our percent class coercible back and forth with double vectors. `vec_ptype2()` provides a user friendly error message if the coercion doesn't exist and makes sure `NA` is handled in a standard way. `NA` is technically a logical vector, but we want to stand in for a missing value of any type. ```{r, error = TRUE} vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ``` By default and in simple cases, an object of the same class is compatible with itself: ```{r} vec_ptype2(percent(), percent()) ``` However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we'll start by saying that a `vctrs_percent` combined with a `vctrs_percent` yields a `vctrs_percent`, which we indicate by returning a prototype generated by the constructor. ```{r} vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ``` Next we define methods that say that combining a `percent` and double should yield a `double`. We avoid returning a `percent` here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers. Because double dispatch is a bit of a hack, we need to provide two methods. It's your responsibility to ensure that each member of the pair returns the same result: if they don't you will get weird and unpredictable behaviour. The double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. If we implemented `vec_ptype2.vctrs_percent.numeric()`, it would never be called. ```{r} vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ``` We can check that we've implemented this correctly with `vec_ptype_show()`: ```{r} vec_ptype_show(percent(), double(), percent()) ``` The `vec_ptype2()` methods define which input is the richer type that vctrs should coerce to. However, they don't perform any conversion. This is the job of `vec_cast()`, which we implement next. We'll provide a method to cast a percent to a percent: ```{r} vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ``` And then for converting back and forth between doubles. To convert a double to a percent we use the `percent()` helper (not the constructor; this is unvalidated user input). To convert a `percent` to a double, we strip the attributes. Note that for historical reasons the order of argument in the signature is the opposite as for `vec_ptype2()`. The class for `to` comes first, and the class for `x` comes second. Again, the double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. Implementing `vec_cast.vctrs_percent.numeric()` has no effect. ```{r} vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ``` Then we can check this works with `vec_cast()`: ```{r} vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ``` Once you've implemented `vec_ptype2()` and `vec_cast()`, you get `vec_c()`, `[<-`, and `[[<-` implementations for free. ```{r, error = TRUE} vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ``` You'll also get mostly correct behaviour for `c()`. The exception is when you use `c()` with a base R class: ```{r, error = TRUE} # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ``` Unfortunately there's no way to fix this problem with the current design of `c()`. Again, as a convenience, consider providing an `as_percent()` function that makes use of the casts defined in your `vec_cast.vctrs_percent()` methods: ```{r} as_percent <- function(x) { vec_cast(x, new_percent()) } ``` Occasionally, it is useful to provide conversions that go beyond what's allowed in casting. For example, we could offer a parsing method for character vectors. In this case, `as_percent()` should be generic, the default method should cast, and then additional methods should implement more flexible conversion: ```{r} as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ``` ### Decimal class Now that you've seen the basics with a very simple S3 class, we'll gradually explore more complicated scenarios. This section creates a `decimal` class that prints with the specified number of decimal places. This is very similar to `percent` but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1). We start off as before, defining a low-level constructor, a user-friendly constructor, a `format()` method, and a `vec_ptype_abbr()`. Note that additional object attributes are simply passed along to `new_vctr()`: ```{r} new_decimal <- function(x = double(), digits = 2L) { vec_assert(x, ptype = double()) vec_assert(digits, ptype = integer(), size = 1) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ``` Note that I provide a little helper to extract the `digits` attribute. This makes the code a little easier to read and should not be exported. By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You'll see what to do if the attributes are data dependent in the next section. ```{r} x[1:2] x[[1]] ``` For the sake of exposition, we'll assume that `digits` is an important attribute of the class and should be included in the full type: ```{r} vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ``` Now consider `vec_cast()` and `vec_ptype2()`. Casting and coercing from one decimal to another requires a little thought as the values of the `digits` attribute might be different, and we need some way to reconcile them. Here I've decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error. ```{r} vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ``` Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal). ```{r} vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ``` If type `x` has greater resolution than `y`, there will be some inputs that lose precision. These should generate errors using `stop_lossy_cast()`. You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution. ```{r, error = TRUE} vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ``` ### Cached sum class {#cached-sum} The next level up in complexity is an object that has data-dependent attributes. To explore this idea we'll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors: ```{r} new_cached_sum <- function(x = double(), sum = 0L) { vec_assert(x, ptype = double()) vec_assert(sum, ptype = double(), size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ``` For this class, we can use the default `format()` method, and instead, we'll customise the `obj_print_footer()` method. This is a good place to display user facing attributes. ```{r} obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ``` We'll also override `sum()` and `mean()` to use the attribute. This is easiest to do with `vec_math()`, which you'll learn about later. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ``` As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they'll work, but return the incorrect result: ```{r} x[1:2] ``` To fix this, you need to provide a `vec_restore()` method. Note that this method dispatches on the `to` argument. ```{r} vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ``` This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with `vec_data()` and then reapplying them again with `vec_restore()`. The default `vec_restore()` method copies over all attributes, which is not appropriate when the attributes depend on the data. Note that `vec_restore.class` is subtly different from `vec_cast.class.class()`. `vec_restore()` is used when restoring attributes that have been lost; `vec_cast()` is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with `new_vctr()`. `vec_restore.factor()` would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with `vec_cast()`. ## Record-style objects Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is `POSIXlt`, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override `length()` and subsetting methods to conceal this implementation detail. ```{r} x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ``` vctrs makes it easy to create new record-style classes using `new_rcrd()`, which has a wide selection of default methods. ### Rational class A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short). As usual we start with low-level and user-friendly constructors. The low-level constructor calls `new_rcrd()`, which needs a named list of equal-length vectors. ```{r} new_rational <- function(n = integer(), d = integer()) { vec_assert(n, ptype = integer()) vec_assert(d, ptype = integer()) new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ``` Our user friendly constructor casts `n` and `d` to integers and recycles them to the same length. ```{r} rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ``` Behind the scenes, `x` is a named list with two elements. But those details are hidden so that it behaves like a vector: ```{r} names(x) length(x) ``` To access the underlying fields we need to use `field()` and `fields()`: ```{r} fields(x) field(x, "n") ``` Notice that we can't `print()` or `str()` the new rational vector `x` yet. Printing causes an error: ```{r, error = TRUE} x str(x) ``` This is because we haven't defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call `vec_data(x)`. ```{r} vec_data(x) str(vec_data(x)) ``` It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way: ```{r} format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ``` vctrs uses the `format()` method in `str()`, hiding the underlying implementation details from the user: ```{r} str(x) ``` For `rational`, `vec_ptype2()` and `vec_cast()` follow the same pattern as `percent()`. We allow coercion from integer and to doubles. ```{r} vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ``` ### Decimal2 class The previous implementation of `decimal` was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems. A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a `scale`). The following code is a very quick sketch of how you might start creating such a class: ```{r} new_decimal2 <- function(l, r, scale = 2L) { vec_assert(l, ptype = integer()) vec_assert(r, ptype = integer()) vec_assert(scale, ptype = integer(), size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ``` ## Equality and comparison vctrs provides three "proxy" generics. Two of these let you control how your class determines equality and ordering: * `vec_proxy_equal()` returns a data vector suitable for comparison. It underpins `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()`. * `vec_proxy_compare()` specifies how to compare the elements of your vector. This proxy is used in `<`, `<=`, `>=`, `>`, `min()`, `max()`, `median()`, `quantile()`, and `xtfrm()` (used in `order()` and `sort()`) methods. By default, `vec_proxy_equal()` and `vec_proxy_compare()` just call `vec_proxy()`. * `vec_proxy()` returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn't need to implement `vec_proxy()`. You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work. These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C. ### Rational class Let's explore these ideas by with the rational class we started on above. By default, `vec_proxy()` converts a record to a data frame, and the default comparison works column by column: ```{r} x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ``` This makes sense as a default but isn't correct here because `rational(1, 1)` represents the same number as `rational(2, 2)`, so they should be equal. We can fix that by implementing a `vec_proxy_equal()` method that divides `n` and `d` by their greatest common divisor: ```{r} # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ``` `vec_proxy_equal()` is also used by `unique()`: ```{r} unique(x) ``` We now need to fix `sort()` similarly, since it currently sorts by `n`, then by `d`: ```{r} sort(x) ``` The easiest fix is to convert the fraction to a decimal and then sort that: ```{r} vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } sort(x) ``` (We could have used the same approach in `vec_proxy_equal()`, but when working with floating point numbers it's not necessarily true that `x == y` implies that `d * x == d * y`.) ### Polynomial class A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like `1 + 3x - 2x^2`) using a list of integer vectors (like `c(1, 3, -2)`). Note the use of `new_list_of()` in the constructor. ```{r} new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly") } poly <- function(...) { x <- list(...) x <- lapply(x, vec_cast, integer()) new_poly(x) } vec_ptype_full.vctrs_poly <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly <- function(x, ...) "poly" format.vctrs_poly <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } else if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly <- function(x, ...) { if (length(x) == 0) return() print(format(x), quote = FALSE) } p <- poly(1, c(1, 0, 1), c(1, 0, 0, 0, 2)) p ``` The resulting objects will inherit from the `vctrs_list_of` class, which provides tailored methods for `$`, `[[`, the corresponding assignment operators, and other methods. ```{r} class(p) p[2] p[[2]] ``` Equality works out of the box because we can tell if two integer vectors are equal: ```{r} p == poly(c(1, 0, 1)) ``` But we can't order them because lists are not comparable: ```{r, error = TRUE} sort(p) ``` So we need to define a `vec_proxy_compare()` method: ```{r} vec_proxy_compare.vctrs_poly <- function(x, ...) { x_raw <- vec_data(x) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } sort(poly(3, 2, 1)) sort(poly(1, c(1, 0, 0), c(1, 0))) ``` ## Arithmetic vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once: * `vec_math(fn, x, ...)` specifies the behaviour of mathematical functions like `abs()`, `sum()`, and `mean()`. (Note that `var()` and `sd()` can't be overridden, see `?vec_math()` for the complete list supported by `vec_math()`.) * `vec_arith(op, x, y)` specifies the behaviour of the arithmetic operations like `+`, `-`, and `%%`. (See `?vec_arith()` for the complete list.) Both generics define the behaviour for multiple functions because `sum.vctrs_vctr(x)` calls `vec_math.vctrs_vctr("sum", x)`, and `x + y` calls `vec_math.x_class.y_class("+", x, y)`. They're accompanied by `vec_math_base()` and `vec_arith_base()` which make it easy to call the underlying base R functions. `vec_arith()` uses double dispatch and needs the following standard boilerplate: ```{r} vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` ### Cached sum class I showed an example of `vec_math()` to define `sum()` and `mean()` methods for `cached_sum`. Now let's talk about exactly how it works. Most `vec_math()` functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to `vec_math_base()` for those that you don't care about. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ``` ### Meter class To explore the infix arithmetic operators exposed by `vec_arith()` I'll create a new class that represents a measurement in `meter`s: ```{r} new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ``` Because `meter` is built on top of a double vector, basic mathematic operations work: ```{r} sum(x) mean(x) ``` But we can't do arithmetic: ```{r, error = TRUE} x + 1 meter(10) + meter(1) meter(10) * 3 ``` To allow these infix functions to work, we'll need to provide `vec_arith()` generic. But before we do that, let's think about what combinations of inputs we should support: * It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can't multiply meters (because that would yield an area). * For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don't make much sense as we, strictly speaking, are dealing with objects of different nature. `vec_arith()` is another function that uses double dispatch, so as usual we start with a template. ```{r} vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Then write the method for two meter objects. We use a switch statement to cover the cases we care about and `stop_incompatible_op()` to throw an informative error message for everything else. ```{r, error = TRUE} vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ``` Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while `meter(10) / 2` makes sense, `2 / meter(10)` does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over `numeric` here instead of `double`. ```{r, error = TRUE} vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ``` For completeness, we also need `vec_arith.vctrs_meter.MISSING` for the unary `+` and `-` operators: ```{r} vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ``` ## Implementing a vctrs S3 class in a package Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things: * Register the S3 methods by listing them in the `NAMESPACE` file. * Create documentation around your methods, for the sake of your user and to satisfy `R CMD check`. Let's assume that the `percent` class is implemented in the pizza package in the file `R/percent.R`. Here we walk through the major sections of this hypothetical file. You've seen all of this code before, but now it's augmented by the roxygen2 directives that produce the correct `NAMESPACE` entries and help topics. ### Getting started First, the pizza package needs to include vctrs in the `Imports` section of its `DESCRIPTION` (perhaps by calling `usethis::use_package("vctrs")`. While vctrs is under very active development, it probably makes sense to state a minimum version. ``` Imports: a_package, another_package, ... vctrs (>= x.y.z), ... ``` Then we make all vctrs functions available within the pizza package by including the directive `#' @import vctrs` somewhere. Usually, it's not good practice to `@import` the entire namespace of a package, but vctrs is deliberately designed with this use case in mind. Where should we put `#' @import vctrs`? There are two natural locations: * With package-level docs in `R/pizza-doc.R`. You can use `usethis::use_package_doc()` to initiate this package-level documentation. * In `R/percent.R`. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package. We also must use one of these locations to dump some internal documentation that's needed to avoid `R CMD check` complaints. We don't expect any human to ever read this documentation. Here's how this dummy documentation should look, combined with the `#' @import vctrs` directive described above. ```{r eval = FALSE} #' Internal vctrs methods #' #' @import vctrs #' @keywords internal #' @name pizza-vctrs NULL ``` This should appear in `R/pizza-doc.R` (package-level docs) or in `R/percent.R` (class-focused file). Remember to call `devtools::document()` regularly, as you develop, to regenerate `NAMESPACE` and the `.Rd` files. From this point on, the code shown is expected to appear in `R/percent.R`. ### Low-level and user-friendly constructors Next we add our constructor: ```{r} new_percent <- function(x = double()) { vec_assert(x, double()) new_vctr(x, class = "pizza_percent") } ``` Note that the name of the package must be included in the class name (`pizza_percent`), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class. We can also add a call to `setOldClass()` for compatibility with S4: ```{r} # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ``` Because we've used a function from the methods package, you'll also need to add methods to `Imports`, with (e.g.) `usethis::use_package("methods")`. This is a "free" dependency because methods is bundled with every R install. Next we implement, export, and document a user-friendly helper: `percent()`. ```{r} #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` (Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do `pizza::percent()`; it would be redundant to have `pizza::pizza_percent()`.) ### Other helpers It's a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor `percent()`: ```{r} #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ``` You'll also need to update the `percent()` documentation to reflect that `x` now means two different things: ```{r} #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ``` Next we provide the key methods to make printing work. These are S3 methods, so they don't need to be documented, but they do need to be exported. ```{r eval = FALSE} #' @export format.pizza_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } #' @export vec_ptype_abbr.pizza_percent <- function(x, ...) { "prcnt" } ``` Finally, we implement methods for `vec_ptype2()` and `vec_cast()`. ```{r, eval = FALSE} #' @export vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() #' @export vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() #' @export vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x #' @export vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) #' @export vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ``` ### Testing It's good practice to test your new class. Specific recommendations: * `R/percent.R` is the type of file where you really do want 100% test coverage. You can use `devtools::test_coverage_file()` to check this. * Make sure to test behaviour with zero-length inputs and missing values. * Use `testthat::verify_output()` to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about `verify_output()` in the [testthat v2.3.0 blog post](https://www.tidyverse.org/blog/2019/11/testthat-2-3-0/); it's an example of a so-called [golden test](https://ro-che.info/articles/2017-12-04-golden-tests). * Check for method symmetry; use `expect_s3_class()`, probably with `exact = TRUE`, to ensure that `vec_c(x, y)` and `vec_c(y, x)` return the same type of output for the important `x`s and `y`s in your domain. * Use `testthat::expect_error()` to check that inputs that can't be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include `vctrs_error_assert_ptype`, `vctrs_error_assert_size`, and `vctrs_error_incompatible_type`. ```{r, eval = FALSE} expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") ``` If your tests pass when run by `devtools::test()`, but fail when run in `R CMD check`, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated `NAMESPACE`. vctrs/vignettes/stability.Rmd0000644000176200001440000003132513654033264016105 0ustar liggesusers--- title: "Type and size stability" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Type and size stability} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the "shape" of the output you only need to know the "shape"s of the inputs. This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can't predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases. ```{r setup} library(vctrs) library(zeallot) ``` ## Definitions We say a function is __type-stable__ iff: 1. You can predict the output type knowing only the input types. 1. The order of arguments in ... does not affect the output type. Similarly, a function is __size-stable__ iff: 1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size. Very few base R functions are size-stable, so I'll also define a slightly weaker condition. I'll call a function __length-stable__ iff: 1. You can predict the output _length_ knowing only the input _lengths_, or there is a single numeric input that specifies the output _length_. (But note that length-stable is not a particularly robust definition because `length()` returns a value for things that are not vectors.) We'll call functions that don't obey these principles __type-unstable__ and __size-unstable__ respectively. On top of type- and size-stability it's also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions. The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again. ### Examples To make these ideas concrete, let's apply them to a few base functions: 1. `mean()` is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error). 1. Surprisingly, `median()` is type-unstable: ```{r} vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ``` It is, however, size-stable, since it always returns a vector of length 1. 1. `sapply()` is type-unstable because you can't predict the output type only knowing the input types: ```{r} vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ``` It's not quite size-stable; `vec_size(sapply(x, f))` is `vec_size(x)` for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns). 1. `vapply()` is a type-stable version of `sapply()` because `vec_ptype_show(vapply(x, fn, template))` is always `vec_ptype_show(template)`. It is size-unstable for the same reasons as `sapply()`. 1. `c()` is type-unstable because `c(x, y)` doesn't always output the same type as `c(y, x)`. ```{r} vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ``` `c()` is *almost always* length-stable because `length(c(x, y))` *almost always* equals `length(x) + length(y)`. One common source of instability here is dealing with non-vectors (see the later section "Non-vectors"): ```{r} env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ``` 1. `paste(x1, x2)` is length-stable because `length(paste(x1, x2))` equals `max(length(x1), length(x2))`. However, it doesn't follow the usual arithmetic recycling rules because `paste(1:2, 1:3)` doesn't generate a warning. 1. `ifelse()` is length-stable because `length(ifelse(cond, true, false))` is always `length(cond)`. `ifelse()` is type-unstable because the output type depends on the value of `cond`: ```{r} vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ``` 1. `read.csv(file)` is type-unstable and size-unstable because, while you know it will return a data frame, you don't know what columns it will return or how many rows it will have. Similarly, `df[[i]]` is not type-stable because the result depends on the _value_ of `i`. There are many important functions that can not be made type-stable or size-stable! With this understanding of type- and size-stability in hand, we'll use them to analyse some base R functions in greater depth and then propose alternatives with better properties. ## `c()` and `vctrs::vec_c()` In this section we'll compare and contrast `c()` and `vec_c()`. `vec_c()` is both type- and size-stable because it possesses the following invariants: * `vec_ptype(vec_c(x, y))` equals `vec_ptype_common(x, y)`. * `vec_size(vec_c(x, y))` equals `vec_size(x) + vec_size(y)`. `c()` has another undesirable property in that it's not consistent with `unlist()`; i.e., `unlist(list(x, y))` does not always equal `c(x, y)`; i.e., base R has multiple sets of type-coercion rules. I won't consider this problem further here. I have two goals here: * To fully document the quirks of `c()`, hence motivating the development of an alternative. * To discuss non-obvious consequences of the type- and size-stability above. ### Atomic vectors If we only consider atomic vectors, `c()` is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical. ```{r} c(FALSE, 1L, 2.5) ``` `vec_c()` obeys similar rules: ```{r} vec_c(FALSE, 1L, 2.5) ``` But it does not automatically coerce to character vectors or lists: ```{r, error = TRUE} c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ``` ### Incompatible vectors and non-vectors In general, most base methods do not throw an error: ```{r} c(10.5, factor("x")) ``` If the inputs aren't vectors, `c()` automatically puts them in a list: ```{r} c(mean, globalenv()) ``` For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list: ```{r, error = TRUE} c(getRversion(), "x") c("x", getRversion()) ``` `vec_c()` throws an error if the inputs are not vectors or not automatically coercible: ```{r, error = TRUE} vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ``` ### Factors Combining two factors returns an integer vector: ```{r} fa <- factor("a") fb <- factor("b") c(fa, fb) ``` (This is documented in `c()` but is still undesirable.) `vec_c()` returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with `dplyr::bind_rows()`, which is stricter and is a common source of user difficulty.) ```{r} vec_c(fa, fb) vec_c(fb, fa) ``` ### Date-times `c()` strips the time zone associated with date-times: ```{r} datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ``` This behaviour is documented in `?DateTimeClasses` but is the source of considerable user pain. `vec_c()` preserves time zones: ```{r} vec_c(datetime_nz) ``` What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there's no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone: ```{r} datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ``` ### Dates and date-times Combining dates and date-times with `c()` gives silently incorrect results: ```{r} date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ``` This behaviour arises because neither `c.Date()` nor `c.POSIXct()` check that all inputs are of the same type. `vec_c()` uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time). ```{r} vec_c(date, datetime) vec_c(date, datetime_nz) ``` ### Missing values If a missing value comes at the beginning of the inputs, `c()` falls back to the internal behaviour, which strips all attributes: ```{r} c(NA, fa) c(NA, date) c(NA, datetime) ``` `vec_c()` takes a different approach treating a logical vector consisting only of `NA` as the `unspecified()` class which can be converted to any other 1d type: ```{r} vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ``` ### Data frames Because it is *almost always* length-stable, `c()` combines data frames column wise (into a list): ```{r} df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ``` `vec_c()` is size-stable, which implies it will row-bind data frames: ```{r} vec_c(df1, df2) ``` ### Matrices and arrays The same reasoning applies to matrices: ```{r} m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ``` One difference is that `vec_c()` will "broadcast" a vector to match the dimensions of a matrix: ```{r} c(m, 1) vec_c(m, 1) ``` ### Implementation The basic implementation of `vec_c()` is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with `vec_init()`, and then insert each input into the correct place in the output. ```{r, eval = FALSE} vec_c <- function(...) { args <- compact(list2(...)) ptype <- vec_ptype_common(!!!args) if (is.null(ptype)) return(NULL) ns <- map_int(args, vec_size) out <- vec_init(ptype, sum(ns)) pos <- 1 for (i in seq_along(ns)) { n <- ns[[i]] x <- vec_cast(args[[i]], to = ptype) vec_slice(out, pos:(pos + n - 1)) <- x pos <- pos + n } out } ``` (The real `vec_c()` is a bit more complicated in order to handle inner and outer names). ## `ifelse()` One of the functions that motivate the development of vctrs is `ifelse()`. It has the surprising property that the result value is "A vector of the same length and attributes (including dimensions and class) as `test`". To me, it seems more reasonable for the type of the output to be controlled by the type of the `yes` and `no` arguments. In `dplyr::if_else()` I swung too far towards strictness: it throws an error if `yes` and `no` are not the same type. This is annoying in practice because it requires typed missing values (`NA_character_` etc), and because the checks are only on the class (not the full prototype), it's easy to create invalid output. I found it much easier to understand what `ifelse()` _should_ do once I internalised the ideas of type- and size-stability: * The first argument must be logical. * `vec_ptype(if_else(test, yes, no))` equals `vec_ptype_common(yes, no)`. Unlike `ifelse()` this implies that `if_else()` must always evaluate both `yes` and `no` in order to figure out the correct type. I think this is consistent with `&&` (scalar operation, short circuits) and `&` (vectorised, evaluates both sides). * `vec_size(if_else(test, yes, no))` equals `vec_size_common(test, yes, no)`. I think the output could have the same size as `test` (i.e., the same behaviour as `ifelse`), but I _think_ as a general rule that your inputs should either be mutually recycling or not. This leads to the following implementation: ```{r} if_else <- function(test, yes, no) { vec_assert(test, logical()) c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ``` By using `vec_size()` and `vec_slice()`, this definition of `if_else()` automatically works with data.frames and matrices: ```{r} if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) ``` vctrs/R/0000755000176200001440000000000014042543376011624 5ustar liggesusersvctrs/R/partial-factor.R0000644000176200001440000000552514001521575014656 0ustar liggesusers#' Partially specify a factor #' #' @description #' `r lifecycle::badge("experimental")` #' #' This special class can be passed as a `ptype` in order to specify that the #' result should be a factor that contains at least the specified levels. #' #' @inheritParams new_factor #' @keywords internal #' @export #' @examples #' # Assert that `x` is a factor #' vec_assert(factor("x"), partial_factor()) #' #' # Testing with `factor()` is too strict, #' # because it tries to match the levels exactly #' # rather than learning them from the data. #' try(vec_assert(factor("x"), factor())) #' #' # You can also enforce a minimum set of levels #' try(vec_assert(factor("x"), partial_factor("y"))) #' #' vec_assert(factor(c("x", "y")), partial_factor("y")) #' #' pf <- partial_factor(levels = c("x", "y")) #' pf #' #' vec_ptype_common(factor("v"), factor("w"), .ptype = pf) #' partial_factor <- function(levels = character()) { partial <- new_factor(levels = levels) new_partial_factor(partial) } new_partial_factor <- function(partial = factor(), learned = factor()) { stopifnot( is.factor(partial), is.factor(learned) ) # Fails if `learned` is not compatible with `partial` vec_ptype2(partial, learned) new_partial( partial = partial, learned = learned, class = "vctrs_partial_factor" ) } #' @export vec_ptype_full.vctrs_partial_factor <- function(x, ...) { empty <- "" levels <- map(x, levels) hashes <- map_chr(levels, hash_label) needs_indent <- hashes != empty hashes[needs_indent] <- map_chr(hashes[needs_indent], function(x) paste0(" ", x)) source <- rep_named(names(hashes), empty) if (hashes["partial"] != empty) { source["partial"] <- " {partial}" } details <- paste0(hashes, source) details <- details[details != empty] paste0( "partial_factor<\n", paste0(details, collapse = "\n"), "\n>" ) } #' @export vec_ptype_abbr.vctrs_partial_factor <- function(x, ...) { "prtl_fctr" } #' @method vec_ptype2 vctrs_partial_factor #' @export vec_ptype2.vctrs_partial_factor <- function(x, y, ...) { UseMethod("vec_ptype2.vctrs_partial_factor") } #' @method vec_ptype2.vctrs_partial_factor vctrs_partial_factor #' @export vec_ptype2.vctrs_partial_factor.vctrs_partial_factor <- function(x, y, ...) { partial <- vec_ptype2(x$partial, y$partial) learned <- vec_ptype2(x$learned, y$learned) new_partial_factor(partial, learned) } #' @method vec_ptype2.vctrs_partial_factor factor #' @export vec_ptype2.vctrs_partial_factor.factor <- function(x, y, ...) { new_partial_factor(x$partial, vec_ptype2(x$learned, y)) } #' @method vec_ptype2.factor vctrs_partial_factor #' @export vec_ptype2.factor.vctrs_partial_factor <- function(x, y, ...) { new_partial_factor(y$partial, vec_ptype2(y$learned, x)) } #' @export vec_ptype_finalise.vctrs_partial_factor <- function(x, ...) { vec_ptype2(x$learned, x$partial) } vctrs/R/names.R0000644000176200001440000004405513762412012013050 0ustar liggesusers#' Retrieve and repair names #' #' @description #' #' `vec_as_names()` takes a character vector of names and repairs it #' according to the `repair` argument. It is the r-lib and tidyverse #' equivalent of [base::make.names()]. #' #' vctrs deals with a few levels of name repair: #' #' * `minimal` names exist. The `names` attribute is not `NULL`. The #' name of an unnamed element is `""` and never `NA`. For instance, #' `vec_as_names()` always returns minimal names and data frames #' created by the tibble package have names that are, at least, #' `minimal`. #' #' * `unique` names are `minimal`, have no duplicates, and can be used #' where a variable name is expected. Empty names, `...`, and #' `..` followed by a sequence of digits are banned. #' #' - All columns can be accessed by name via `df[["name"]]` and #' ``df$`name` `` and ``with(df, `name`)``. #' #' * `universal` names are `unique` and syntactic (see Details for #' more). #' #' - Names work everywhere, without quoting: `df$name` and `with(df, #' name)` and `lm(name1 ~ name2, data = df)` and #' `dplyr::select(df, name)` all work. #' #' `universal` implies `unique`, `unique` implies `minimal`. These #' levels are nested. #' #' #' @param names A character vector. #' @param repair Either a string or a function. If a string, it must #' be one of `"check_unique"`, `"minimal"`, `"unique"`, or `"universal"`. #' If a function, it is invoked with a vector of minimal names and must #' return minimal names, otherwise an error is thrown. #' #' * Minimal names are never `NULL` or `NA`. When an element doesn't #' have a name, its minimal name is an empty string. #' #' * Unique names are unique. A suffix is appended to duplicate #' names to make them unique. #' #' * Universal names are unique and syntactic, meaning that you can #' safely use the names as variables without causing a syntax #' error. #' #' The `"check_unique"` option doesn't perform any name repair. #' Instead, an error is raised if the names don't suit the #' `"unique"` criteria. #' @param repair_arg If specified and `repair = "check_unique"`, any errors #' will include a hint to set the `repair_arg`. #' @param quiet By default, the user is informed of any renaming #' caused by repairing the names. This only concerns unique and #' universal repairing. Set `quiet` to `TRUE` to silence the #' messages. #' @inheritParams ellipsis::dots_empty #' #' @section `minimal` names: #' #' `minimal` names exist. The `names` attribute is not `NULL`. The #' name of an unnamed element is `""` and never `NA`. #' #' Examples: #' #' ``` #' Original names of a vector with length 3: NULL #' minimal names: "" "" "" #' #' Original names: "x" NA #' minimal names: "x" "" #' ``` #' #' #' @section `unique` names: #' #' `unique` names are `minimal`, have no duplicates, and can be used #' (possibly with backticks) in contexts where a variable is #' expected. Empty names, `...`, and `..` followed by a sequence of #' digits are banned. If a data frame has `unique` names, you can #' index it by name, and also access the columns by name. In #' particular, `df[["name"]]` and `` df$`name` `` and also ``with(df, #' `name`)`` always work. #' #' There are many ways to make names `unique`. We append a suffix of the form #' `...j` to any name that is `""` or a duplicate, where `j` is the position. #' We also change `..#` and `...` to `...#`. #' #' Example: #' #' ``` #' Original names: "" "x" "" "y" "x" "..2" "..." #' unique names: "...1" "x...2" "...3" "y" "x...5" "...6" "...7" #' ``` #' #' Pre-existing suffixes of the form `...j` are always stripped, prior #' to making names `unique`, i.e. reconstructing the suffixes. If this #' interacts poorly with your names, you should take control of name #' repair. #' #' #' @section `universal` names: #' #' `universal` names are `unique` and syntactic, meaning they: #' #' * Are never empty (inherited from `unique`). #' * Have no duplicates (inherited from `unique`). #' * Are not `...`. Do not have the form `..i`, where `i` is a #' number (inherited from `unique`). #' * Consist of letters, numbers, and the dot `.` or underscore `_` #' characters. #' * Start with a letter or start with the dot `.` not followed by a #' number. #' * Are not a [reserved] word, e.g., `if` or `function` or `TRUE`. #' #' If a vector has `universal` names, variable names can be used #' "as is" in code. They work well with nonstandard evaluation, e.g., #' `df$name` works. #' #' vctrs has a different method of making names syntactic than #' [base::make.names()]. In general, vctrs prepends one or more dots #' `.` until the name is syntactic. #' #' Examples: #' #' ``` #' Original names: "" "x" NA "x" #' universal names: "...1" "x...2" "...3" "x...4" #' #' Original names: "(y)" "_z" ".2fa" "FALSE" #' universal names: ".y." "._z" "..2fa" ".FALSE" #' ``` #' #' @seealso [rlang::names2()] returns the names of an object, after #' making them `minimal`. #' #' The [Names attribute](https://principles.tidyverse.org/names-attribute.html) #' section in the "tidyverse package development principles". #' #' @examples #' # By default, `vec_as_names()` returns minimal names: #' vec_as_names(c(NA, NA, "foo")) #' #' # You can make them unique: #' vec_as_names(c(NA, NA, "foo"), repair = "unique") #' #' # Universal repairing fixes any non-syntactic name: #' vec_as_names(c("_foo", "+"), repair = "universal") #' @export vec_as_names <- function(names, ..., repair = c("minimal", "unique", "universal", "check_unique"), repair_arg = "", quiet = FALSE) { if (!missing(...)) { ellipsis::check_dots_empty() } .Call(vctrs_as_names, names, repair, repair_arg, quiet) } validate_name_repair_arg <- function(repair) { .Call(vctrs_validate_name_repair_arg, repair) } validate_minimal_names <- function(names, n = NULL) { .Call(vctrs_validate_minimal_names, names, n) } validate_unique <- function(names, arg = "", n = NULL) { validate_minimal_names(names, n) empty_names <- detect_empty_names(names) if (has_length(empty_names)) { stop_names_cannot_be_empty(names) } dot_dot_name <- detect_dot_dot(names) if (has_length(dot_dot_name)) { stop_names_cannot_be_dot_dot(names) } if (anyDuplicated(names)) { stop_names_must_be_unique(names, arg) } invisible(names) } detect_empty_names <- function(names) { which(names == "") } detect_dot_dot <- function(names) { grep("^[.][.](?:[.]|[1-9][0-9]*)$", names) } #' Get or set the names of a vector #' #' @description #' These functions work like [rlang::names2()], [names()] and [names<-()], #' except that they return or modify the the rowwise names of the vector. These are: #' * The usual `names()` for atomic vectors and lists #' * The row names for data frames and matrices #' * The names of the first dimension for arrays #' Rowwise names are size consistent: the length of the names always equals #' [vec_size()]. #' #' `vec_names2()` returns the repaired names from a vector, even if it is unnamed. #' See [vec_as_names()] for details on name repair. #' #' `vec_names()` is a bare-bones version that returns `NULL` if the vector is #' unnamed. #' #' `vec_set_names()` sets the names or removes them. #' #' @param x A vector with names #' @param names A character vector, or `NULL`. #' @inheritParams vec_as_names #' #' @return #' `vec_names2()` returns the names of `x`, repaired. #' `vec_names()` returns the names of `x` or `NULL` if unnamed. #' `vec_set_names()` returns `x` with names updated. #' #' @name vec_names #' @export #' @examples #' vec_names2(1:3) #' vec_names2(1:3, repair = "unique") #' vec_names2(c(a = 1, b = 2)) #' #' # `vec_names()` consistently returns the rowwise names of data frames and arrays: #' vec_names(data.frame(a = 1, b = 2)) #' names(data.frame(a = 1, b = 2)) #' vec_names(mtcars) #' names(mtcars) #' vec_names(Titanic) #' names(Titanic) #' #' vec_set_names(1:3, letters[1:3]) #' vec_set_names(data.frame(a = 1:3), letters[1:3]) vec_names2 <- function(x, ..., repair = c("minimal", "unique", "universal", "check_unique"), quiet = FALSE) { if (!missing(...)) { ellipsis::check_dots_empty() } repair <- validate_name_repair_arg(repair) if (is_function(repair)) { names <- minimal_names(x) new_names <- validate_minimal_names(repair(names), n = length(names)) if (!quiet) { describe_repair(names, new_names) } return(new_names) } switch(repair, minimal = minimal_names(x), unique = unique_names(x, quiet = quiet), universal = as_universal_names(minimal_names(x), quiet = quiet), check_unique = validate_unique(minimal_names(x)) ) } vec_repair_names <- function(x, repair = c("minimal", "unique", "universal", "check_unique"), ..., quiet = FALSE) { if (is.data.frame(x)) { x } else { vec_set_names(x, vec_names2(x, ..., repair = repair, quiet = quiet)) } } minimal_names <- function(x) { .Call(vctrs_minimal_names, x) } unique_names <- function(x, quiet = FALSE) { .Call(vctrs_unique_names, x, quiet) } #' @rdname vec_names #' @export vec_names <- function(x) { .Call(vctrs_names, x) } as_minimal_names <- function(names) { .Call(vctrs_as_minimal_names, names) } as_unique_names <- function(names, quiet = FALSE) { .Call(vctrs_as_unique_names, names, quiet) } as_universal_names <- function(names, quiet = FALSE) { new_names <- names new_names[] <- "" naked_names <- strip_pos(two_to_three_dots(names)) empty <- naked_names %in% c("", "...") new_names[!empty] <- make_syntactic(naked_names[!empty]) needs_suffix <- empty | vec_duplicate_detect(new_names) new_names <- append_pos(new_names, needs_suffix = needs_suffix) if (!quiet) { describe_repair(names, new_names) } new_names } two_to_three_dots <- function(names) { sub("(^[.][.][1-9][0-9]*$)", ".\\1", names) } append_pos <- function(names, needs_suffix) { need_append_pos <- which(needs_suffix) names[need_append_pos] <- paste0(names[need_append_pos], "...", need_append_pos) names } strip_pos <- function(names) { rx <- "([.][.][.][1-9][0-9]*)+$" gsub(rx, "", names) %|% "" } # Makes each individual name syntactic but does not enforce unique-ness make_syntactic <- function(names) { names[is.na(names)] <- "" names[names == ""] <- "." names[names == "..."] <- "...." names <- sub("^_", "._", names) new_names <- make.names(names) X_prefix <- grepl("^X", new_names) & !grepl("^X", names) new_names[X_prefix] <- sub("^X", "", new_names[X_prefix]) dot_suffix <- which(new_names == paste0(names, ".")) new_names[dot_suffix] <- sub("^(.*)[.]$", ".\\1", new_names[dot_suffix]) # Illegal characters have been replaced with '.' via make.names() # however, we have: # * Declined its addition of 'X' prefixes. # * Turned its '.' suffixes to '.' prefixes. regex <- paste0( "^(?[.]{0,2})", "(?[0-9]*)", "(?[^0-9]?.*$)" ) re <- re_match(new_names, pattern = regex) needs_dots <- which(re$numbers != "") needs_third_dot <- (re$leftovers[needs_dots] == "") re$leading_dots[needs_dots] <- ifelse(needs_third_dot, "...", "..") new_names <- paste0(re$leading_dots, re$numbers, re$leftovers) new_names } # From rematch2, except we don't add tbl_df or tbl classes to the return value re_match <- function(text, pattern, perl = TRUE, ...) { stopifnot( is.character(pattern), length(pattern) == 1, !is.na(pattern) ) text <- as.character(text) match <- regexpr(pattern, text, perl = perl, ...) start <- as.vector(match) length <- attr(match, "match.length") end <- start + length - 1L matchstr <- substring(text, start, end) matchstr[ start == -1 ] <- NA_character_ res <- data.frame( stringsAsFactors = FALSE, .text = text, .match = matchstr ) if (!is.null(attr(match, "capture.start"))) { gstart <- attr(match, "capture.start") glength <- attr(match, "capture.length") gend <- gstart + glength - 1L groupstr <- substring(text, gstart, gend) groupstr[ gstart == -1 ] <- NA_character_ dim(groupstr) <- dim(gstart) res <- cbind(groupstr, res, stringsAsFactors = FALSE) } names(res) <- c(attr(match, "capture.names"), ".text", ".match") res } describe_repair <- function(orig_names, names) { if (is_null(orig_names)) { orig_names <- rep_along(names, "") } if (length(orig_names) != length(names)) { stop("Internal error: New names and old names don't have same length") } new_names <- names != as_minimal_names(orig_names) if (any(new_names)) { msg <- bullets( header = "New names:", paste0( tick_if_needed(orig_names[new_names]), " -> ", tick_if_needed(names[new_names]) ) ) message(msg) } } bullets <- function(..., header = NULL) { problems <- c(...) MAX_BULLETS <- 6L if (length(problems) >= MAX_BULLETS) { n_more <- length(problems) - MAX_BULLETS + 1L problems[[MAX_BULLETS]] <- "..." length(problems) <- MAX_BULLETS } info <- paste0("* ", problems, collapse = "\n") if (!is.null(header)) { info <- paste0(header, "\n", info) } info } tick <- function(x) { ifelse(is.na(x), "NA", encodeString(x, quote = "`")) } is_syntactic <- function(x) { ret <- (make_syntactic(x) == x) ret[is.na(x)] <- FALSE ret } tick_if_needed <- function(x) { needs_ticks <- !is_syntactic(x) x[needs_ticks] <- tick(x[needs_ticks]) x } # Used in names.c set_rownames_fallback <- function(x, names) { rownames(x) <- names x } # Used in names.c set_names_fallback <- function(x, names) { names(x) <- names x } #' @rdname vec_names #' @export vec_set_names <- function(x, names) { .Call(vctrs_set_names, x, names) } #' Repair names with legacy method #' #' This standardises names with the legacy approach that was used in #' tidyverse packages (such as tibble, tidyr, and readxl) before #' [vec_as_names()] was implemented. This tool is meant to help #' transitioning to the new name repairing standard and will be #' deprecated and removed from the package some time in the future. #' #' @inheritParams vec_as_names #' @param prefix,sep Prefix and separator for repaired names. #' #' @examples #' if (rlang::is_installed("tibble")) { #' #' library(tibble) #' #' # Names repair is turned off by default in tibble: #' try(tibble(a = 1, a = 2)) #' #' # You can turn it on by supplying a repair method: #' tibble(a = 1, a = 2, .name_repair = "universal") #' #' # If you prefer the legacy method, use `vec_as_names_legacy()`: #' tibble(a = 1, a = 2, .name_repair = vec_as_names_legacy) #' #' } #' @keywords internal #' @export vec_as_names_legacy <- function(names, prefix = "V", sep = "") { if (length(names) == 0) { return(character()) } blank <- names == "" names[!blank] <- make.unique(names[!blank], sep = sep) new_nms <- setdiff(paste(prefix, seq_along(names), sep = sep), names) names[blank] <- new_nms[seq_len(sum(blank))] names } #' Name specifications #' #' @description #' #' A name specification describes how to combine an inner and outer #' names. This sort of name combination arises when concatenating #' vectors or flattening lists. There are two possible cases: #' #' * Named vector: #' #' ``` #' vec_c(outer = c(inner1 = 1, inner2 = 2)) #' ``` #' #' * Unnamed vector: #' #' ``` #' vec_c(outer = 1:2) #' ``` #' #' In r-lib and tidyverse packages, these cases are errors by default, #' because there's no behaviour that works well for every case. #' Instead, you can provide a name specification that describes how to #' combine the inner and outer names of inputs. Name specifications #' can refer to: #' #' * `outer`: The external name recycled to the size of the input #' vector. #' #' * `inner`: Either the names of the input vector, or a sequence of #' integer from 1 to the size of the vector if it is unnamed. #' #' @param name_spec,.name_spec A name specification for combining #' inner and outer names. This is relevant for inputs passed with a #' name, when these inputs are themselves named, like `outer = #' c(inner = 1)`, or when they have length greater than 1: `outer = #' 1:2`. By default, these cases trigger an error. You can resolve #' the error by providing a specification that describes how to #' combine the names or the indices of the inner vector with the #' name of the input. This specification can be: #' #' * A function of two arguments. The outer name is passed as a #' string to the first argument, and the inner names or positions #' are passed as second argument. #' #' * An anonymous function as a purrr-style formula. #' #' * A glue specification of the form `"{outer}_{inner}"`. #' #' * An [rlang::zap()] object, in which case both outer and inner #' names are ignored and the result is unnamed. #' #' See the [name specification topic][name_spec]. #' #' @examples #' # By default, named inputs must be length 1: #' vec_c(name = 1) # ok #' try(vec_c(name = 1:3)) # bad #' #' # They also can't have internal names, even if scalar: #' try(vec_c(name = c(internal = 1))) # bad #' #' # Pass a name specification to work around this. A specification #' # can be a glue string referring to `outer` and `inner`: #' vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}") #' vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}_{inner}") #' #' # They can also be functions: #' my_spec <- function(outer, inner) paste(outer, inner, sep = "_") #' vec_c(name = 1:3, other = 4:5, .name_spec = my_spec) #' #' # Or purrr-style formulas for anonymous functions: #' vec_c(name = 1:3, other = 4:5, .name_spec = ~ paste0(.x, .y)) #' @name name_spec NULL apply_name_spec <- function(name_spec, outer, inner, n = length(inner)) { .Call(vctrs_apply_name_spec, name_spec, outer, inner, n) } glue_as_name_spec <- function(`_spec`) { function(inner, outer) { glue::glue(`_spec`) } } # Evaluate glue specs in a child of base for now environment(glue_as_name_spec) <- baseenv() vctrs/R/type-misc.R0000644000176200001440000000640113762412012013650 0ustar liggesusers # `numeric_version` from base ---------------------------------------- #' @export vec_proxy.numeric_version <- function(x, ...) x # `omit` from base --------------------------------------------------- #' @export vec_proxy.omit <- function(x, ...) { x } #' @export vec_restore.omit <- function(x, ...) { structure(x, class = "omit") } #' @export vec_ptype2.omit.omit <- function(x, y, ...) { x } #' @export vec_ptype2.integer.omit <- function(x, y, ...) { x } #' @export vec_ptype2.omit.integer <- function(x, y, ...) { y } #' @export vec_ptype2.double.omit <- function(x, y, ...) { x } #' @export vec_ptype2.omit.double <- function(x, y, ...) { y } #' @export vec_cast.omit.omit <- function(x, to, ...) { x } #' @export vec_cast.integer.omit <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.omit.integer <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } #' @export vec_cast.double.omit <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.omit.double <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } # `exclude` from base ------------------------------------------------ #' @export vec_proxy.exclude <- function(x, ...) { x } #' @export vec_restore.exclude <- function(x, ...) { structure(x, class = "exclude") } #' @export vec_ptype2.exclude.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.integer.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.exclude.integer <- function(x, y, ...) { y } #' @export vec_ptype2.double.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.exclude.double <- function(x, y, ...) { y } #' @export vec_cast.exclude.exclude <- function(x, to, ...) { x } #' @export vec_cast.integer.exclude <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.exclude.integer <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } #' @export vec_cast.double.exclude <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.exclude.double <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } # `data.table` ------------------------------------------------------- delayedAssign("as.data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "as.data.table") } else { function(...) abort("`data.table` must be installed.") } }) dt_ptype2 <- function(x, y, ...) { as.data.table(df_ptype2(x, y, ...)) } dt_cast <- function(x, to, ...) { as.data.table(df_cast(x, to, ...)) } #' @export vec_ptype2.data.table.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_cast.data.table.data.table <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.table.data.frame <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.frame.data.table <- function(x, to, ...) { df_cast(x, to, ...) } vctrs/R/type-list-of.R0000644000176200001440000001174013762412012014274 0ustar liggesusers#' `list_of` S3 class for homogenous lists #' #' A `list_of` object is a list where each element has the same type. #' Modifying the list with `$`, `[`, and `[[` preserves the constraint #' by coercing all input items. #' #' Unlike regular lists, setting a list element to `NULL` using `[[` #' does not remove it. #' #' @inheritParams vec_c #' @param x For `as_list_of()`, a vector to be coerced to list_of. #' @param y,to Arguments to `vec_ptype2()` and `vec_cast()`. #' @export #' @examples #' x <- list_of(1:3, 5:6, 10:15) #' if (requireNamespace("tibble", quietly = TRUE)) { #' tibble::tibble(x = x) #' } #' #' vec_c(list_of(1, 2), list_of(FALSE, TRUE)) list_of <- function(..., .ptype = NULL) { args <- list2(...) ptype <- vec_ptype_common(!!!args, .ptype = .ptype) if (is.null(ptype)) { abort("Could not find common type for elements of `x`.") } x <- map(args, vec_cast, to = ptype) new_list_of(x, ptype) } #' @export #' @rdname list_of as_list_of <- function(x, ...) { UseMethod("as_list_of") } #' @export as_list_of.vctrs_list_of <- function(x, .ptype = NULL, ...) { if (!is.null(.ptype)) { list_of(!!!x, .ptype = .ptype) } else { x } } #' @export as_list_of.list <- function(x, ..., .ptype = NULL) { list_of(!!!x, .ptype = .ptype) } #' Create list_of subclass #' #' @param x A list #' @param ptype The prototype which every element of `x` belongs to #' @param ... Additional attributes used by subclass #' @param class Optional subclass name #' @keywords internal #' @export new_list_of <- function(x = list(), ptype = logical(), ..., class = character()) { if (!vec_is_list(x)) { abort("`x` must be a list.") } if (vec_size(ptype) != 0L) { abort("`ptype` must have size 0.") } new_vctr(x, ..., ptype = ptype, class = c(class, "vctrs_list_of")) } #' @export #' @rdname list_of validate_list_of <- function(x) { if (!vec_is_list(x)) { abort("`x` must be a list.") } ptype <- attr(x, "ptype") if (vec_size(ptype) != 0L) { abort("`ptype` must have size 0.") } walk(x, vec_cast, to = ptype) invisible(x) } #' @export #' @rdname list_of is_list_of <- function(x) { inherits(x, "vctrs_list_of") } #' @export vec_proxy.vctrs_list_of <- function(x, ...) { unclass(x) } # Formatting -------------------------------------------------------------- #' @export obj_print_data.vctrs_list_of <- function(x, ...) { if (length(x) == 0) return() print(vec_data(x)) } #' @export format.vctrs_list_of <- function(x, ...) { format.default(x) } #' @export vec_ptype_full.vctrs_list_of <- function(x, ...) { param <- vec_ptype_full(attr(x, "ptype")) if (grepl("\n", param)) { param <- paste0(indent(paste0("\n", param), 2), "\n") } paste0("list_of<", param, ">") } #' @export vec_ptype_abbr.vctrs_list_of <- function(x, ...) { paste0("list<", vec_ptype_abbr(attr(x, "ptype")), ">") } # vctr methods ------------------------------------------------------------ #' @export as.list.vctrs_list_of <- function(x, ...) { attr(x, "ptype") <- NULL attr(x, "class") <- NULL x } #' @export as.character.vctrs_list_of <- function(x, ...) { map_chr(x, function(elt) paste0("<", vec_ptype_abbr(elt), ">")) } #' @export `[[.vctrs_list_of` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `$.vctrs_list_of` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `[<-.vctrs_list_of` <- function(x, i, value) { wrapped_type <- attr(x, "ptype") value <- map(value, vec_cast, to = wrapped_type) value <- new_list_of(value, ptype = attr(x, "ptype")) NextMethod() } #' @export `[[<-.vctrs_list_of` <- function(x, i, value) { if (is.null(value)) { # Setting to NULL via [[ shortens the list! Example: # `[[<-`(list(1), 1, NULL) x[i] <- list(value) return(x) } value <- vec_cast(value, attr(x, "ptype")) NextMethod() } #' @export `$<-.vctrs_list_of` <- function(x, i, value) { value <- vec_cast(value, attr(x, "ptype")) NextMethod() } # Type system ------------------------------------------------------------- #' @rdname list_of #' @inheritParams vec_ptype2 #' @export vec_ptype2.vctrs_list_of #' @method vec_ptype2 vctrs_list_of #' @export vec_ptype2.vctrs_list_of <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.vctrs_list_of") } #' @method vec_ptype2.vctrs_list_of vctrs_list_of #' @export vec_ptype2.vctrs_list_of.vctrs_list_of <- function(x, y, ...) { type <- vec_ptype2(attr(x, "ptype"), attr(y, "ptype")) new_list_of(list(), type) } #' @rdname list_of #' @export vec_cast.vctrs_list_of #' @method vec_cast vctrs_list_of #' @export vec_cast.vctrs_list_of <- function(x, to, ...) { UseMethod("vec_cast.vctrs_list_of") } #' @export #' @method vec_cast.vctrs_list_of vctrs_list_of vec_cast.vctrs_list_of.vctrs_list_of <-function(x, to, ...) { # Casting list to list_of will warn/err if the cast is lossy, # but the locations refer to the inner vectors, # and the cast fails if all (vector) elements in a single (list) element as_list_of(x, .ptype = attr(to, "ptype")) } vctrs/R/type-date-time.R0000644000176200001440000003733513762412012014600 0ustar liggesusers#' Date, date-time, and duration S3 classes #' #' * A `date` ([Date]) is a double vector. Its value represent the number #' of days since the Unix "epoch", 1970-01-01. It has no attributes. #' * A `datetime` ([POSIXct] is a double vector. Its value represents the #' number of seconds since the Unix "Epoch", 1970-01-01. It has a single #' attribute: the timezone (`tzone`)) #' * A `duration` ([difftime]) #' #' These function help the base `Date`, `POSIXct`, and `difftime` classes fit #' into the vctrs type system by providing constructors, coercion functions, #' and casting functions. #' #' @param x A double vector representing the number of days since UNIX #' epoch for `new_date()`, number of seconds since UNIX epoch for #' `new_datetime()`, and number of `units` for `new_duration()`. #' @param tzone Time zone. A character vector of length 1. Either `""` for #' the local time zone, or a value from [OlsonNames()] #' @param units Units of duration. #' @export #' @keywords internal #' @examples #' new_date(0) #' new_datetime(0, tzone = "UTC") #' new_duration(1, "hours") new_date <- function(x = double()) { .Call(vctrs_new_date, x) } #' @export #' @rdname new_date new_datetime <- function(x = double(), tzone = "") { .Call(vctrs_new_datetime, x, tzone) } #' @export #' @rdname new_date new_duration <- function(x = double(), units = c("secs", "mins", "hours", "days", "weeks")) { stopifnot(is.double(x)) units <- arg_match0(units, c("secs", "mins", "hours", "days", "weeks")) structure( x, units = units, class = "difftime" ) } #' @export vec_proxy.Date <- function(x, ...) { date_validate(x) } #' @export vec_proxy.POSIXct <- function(x, ...) { datetime_validate(x) } #' @export vec_proxy.POSIXlt <- function(x, ...) { new_data_frame(unclass(x)) } #' @export vec_proxy_equal.POSIXlt <- function(x, ...) { x <- vec_cast(x, new_datetime(tzone = tzone(x))) vec_proxy_equal(x, ...) } #' @export vec_proxy_compare.POSIXlt <- function(x, ...) { x <- vec_cast(x, new_datetime(tzone = tzone(x))) vec_proxy_compare(x) } #' @export vec_restore.Date <- function(x, to, ...) { NextMethod() } #' @export vec_restore.POSIXct <- function(x, to, ...) { NextMethod() } #' @export vec_restore.POSIXlt <- function(x, to, ...) { NextMethod() } # Print ------------------------------------------------------------------ #' @export vec_ptype_full.Date <- function(x, ...) { "date" } #' @export vec_ptype_abbr.Date <- function(x, ...) { "date" } #' @export vec_ptype_full.POSIXct <- function(x, ...) { tzone <- if (tzone_is_local(x)) "local" else tzone(x) paste0("datetime<", tzone, ">") } #' @export vec_ptype_full.POSIXlt <- function(x, ...) { tzone <- if (tzone_is_local(x)) "local" else tzone(x) paste0("POSIXlt<", tzone, ">") } #' @export vec_ptype_abbr.POSIXt <- function(x, ...) { "dttm" } #' @export vec_ptype_full.difftime <- function(x, ...) { paste0("duration<", attr(x, "units"), ">") } #' @export vec_ptype_abbr.difftime <- function(x, ...) { "drtn" } # Coerce ------------------------------------------------------------------ #' @rdname new_date #' @export vec_ptype2.Date #' @method vec_ptype2 Date #' @export vec_ptype2.Date <- function(x, y, ...) { UseMethod("vec_ptype2.Date") } #' @method vec_ptype2.Date Date #' @export vec_ptype2.Date.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.Date") } #' @method vec_ptype2.Date POSIXct #' @export vec_ptype2.Date.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.POSIXct") } #' @method vec_ptype2.Date POSIXlt #' @export vec_ptype2.Date.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.POSIXlt") } #' @rdname new_date #' @export vec_ptype2.POSIXct #' @method vec_ptype2 POSIXct #' @export vec_ptype2.POSIXct <- function(x, y, ...) { UseMethod("vec_ptype2.POSIXct") } #' @method vec_ptype2.POSIXct POSIXct #' @export vec_ptype2.POSIXct.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.POSIXct") } #' @method vec_ptype2.POSIXct Date #' @export vec_ptype2.POSIXct.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.Date") } #' @method vec_ptype2.POSIXct POSIXlt #' @export vec_ptype2.POSIXct.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.POSIXlt") } #' @rdname new_date #' @export vec_ptype2.POSIXlt #' @method vec_ptype2 POSIXlt #' @export vec_ptype2.POSIXlt <- function(x, y, ...) { UseMethod("vec_ptype2.POSIXlt") } #' @method vec_ptype2.POSIXlt POSIXlt #' @export vec_ptype2.POSIXlt.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.POSIXlt") } #' @method vec_ptype2.POSIXlt Date #' @export vec_ptype2.POSIXlt.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.Date") } #' @method vec_ptype2.POSIXlt POSIXct #' @export vec_ptype2.POSIXlt.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.POSIXct") } #' @rdname new_date #' @export vec_ptype2.difftime #' @method vec_ptype2 difftime #' @export vec_ptype2.difftime <- function(x, y, ...) UseMethod("vec_ptype2.difftime") #' @method vec_ptype2.difftime difftime #' @export vec_ptype2.difftime.difftime <- function(x, y, ...) new_duration(units = units_union(x, y)) # Cast -------------------------------------------------------------------- #' @rdname new_date #' @export vec_cast.Date #' @method vec_cast Date #' @export vec_cast.Date <- function(x, to, ...) { UseMethod("vec_cast.Date") } #' @export #' @method vec_cast.Date Date vec_cast.Date.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.Date.Date") } #' @export #' @method vec_cast.Date POSIXct vec_cast.Date.POSIXct <- function(x, to, ...) { # TODO: Mark with `stop_native_implementation()` when we use lazy errors date_cast(x, to, ...) } #' @export #' @method vec_cast.Date POSIXlt vec_cast.Date.POSIXlt <- function(x, to, ...) { # TODO: Mark with `stop_native_implementation()` when we use lazy errors date_cast(x, to, ...) } # TODO: Remove when we have lazy errors date_cast <- function(x, to, ..., x_arg = "", to_arg = "") { out <- as.Date(x, tz = tzone(x)) x_ct <- as.POSIXct(x) out_ct <- as.POSIXct(as.character(out), tz = tzone(x)) lossy <- abs(x_ct - out_ct) > 1e-9 & !is.na(x) maybe_lossy_cast(out, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } #' @rdname new_date #' @export vec_cast.POSIXct #' @method vec_cast POSIXct #' @export vec_cast.POSIXct <- function(x, to, ...) { UseMethod("vec_cast.POSIXct") } #' @export #' @method vec_cast.POSIXct Date vec_cast.POSIXct.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.Date") } #' @export #' @method vec_cast.POSIXct POSIXlt vec_cast.POSIXct.POSIXlt <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.POSIXlt") } #' @export #' @method vec_cast.POSIXct POSIXct vec_cast.POSIXct.POSIXct <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.POSIXct") } #' @rdname new_date #' @export vec_cast.POSIXlt #' @method vec_cast POSIXlt #' @export vec_cast.POSIXlt <- function(x, to, ...) { UseMethod("vec_cast.POSIXlt") } #' @export #' @method vec_cast.POSIXlt Date vec_cast.POSIXlt.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.Date") } #' @export #' @method vec_cast.POSIXlt POSIXlt vec_cast.POSIXlt.POSIXlt <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.POSIXlt") } #' @export #' @method vec_cast.POSIXlt POSIXct vec_cast.POSIXlt.POSIXct <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.POSIXct") } #' @rdname new_date #' @export vec_cast.difftime #' @method vec_cast difftime #' @export vec_cast.difftime <- function(x, to, ...) { UseMethod("vec_cast.difftime") } #' @export #' @method vec_cast.difftime difftime vec_cast.difftime.difftime <- function(x, to, ...) { if (identical(units(x), units(to))) { x } else { # Hack: I can't see any obvious way of changing the units origin <- as.POSIXct(0, origin = "1970-01-01") difftime(origin, origin - x, units = units(to)) } } # Arithmetic -------------------------------------------------------------- #' @rdname new_date #' @export vec_arith.Date #' @method vec_arith Date #' @export vec_arith.Date <- function(op, x, y, ...) UseMethod("vec_arith.Date", y) #' @rdname new_date #' @export vec_arith.POSIXct #' @method vec_arith POSIXct #' @export vec_arith.POSIXct <- function(op, x, y, ...) UseMethod("vec_arith.POSIXct", y) #' @rdname new_date #' @export vec_arith.POSIXlt #' @method vec_arith POSIXlt #' @export vec_arith.POSIXlt <- function(op, x, y, ...) UseMethod("vec_arith.POSIXlt", y) #' @rdname new_date #' @export vec_arith.difftime #' @method vec_arith difftime #' @export vec_arith.difftime <- function(op, x, y, ...) UseMethod("vec_arith.difftime", y) #' @method vec_arith.Date default #' @export vec_arith.Date.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.POSIXct default #' @export vec_arith.POSIXct.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.POSIXlt default #' @export vec_arith.POSIXlt.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.difftime default #' @export vec_arith.difftime.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.Date Date #' @export vec_arith.Date.Date <- function(op, x, y, ...) { switch(op, `-` = difftime(x, y, units = "days"), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct POSIXct #' @export vec_arith.POSIXct.POSIXct <- function(op, x, y, ...) { switch(op, `-` = difftime(x, y, units = "secs"), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXlt POSIXlt #' @export vec_arith.POSIXlt.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXct Date #' @export vec_arith.POSIXct.Date <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date POSIXct #' @export vec_arith.Date.POSIXct <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXlt Date #' @export vec_arith.POSIXlt.Date <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date POSIXlt #' @export vec_arith.Date.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXlt POSIXct #' @export vec_arith.POSIXlt.POSIXct <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXct POSIXlt #' @export vec_arith.POSIXct.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date numeric #' @export vec_arith.Date.numeric <- function(op, x, y, ...) { switch(op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.numeric Date #' @export vec_arith.numeric.Date <- function(op, x, y, ...) { switch(op, `+` = vec_restore(vec_arith_base(op, x, y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct numeric #' @export vec_arith.POSIXct.numeric <- vec_arith.Date.numeric #' @method vec_arith.numeric POSIXct #' @export vec_arith.numeric.POSIXct <- vec_arith.numeric.Date #' @method vec_arith.POSIXlt numeric #' @export vec_arith.POSIXlt.numeric <- function(op, x, y, ...) { vec_arith.POSIXct.numeric(op, as.POSIXct(x), y, ...) } #' @method vec_arith.numeric POSIXlt #' @export vec_arith.numeric.POSIXlt <- function(op, x, y, ...) { vec_arith.numeric.POSIXct(op, x, as.POSIXct(y), ...) } #' @method vec_arith.Date difftime #' @export vec_arith.Date.difftime <- function(op, x, y, ...) { y <- vec_cast(y, new_duration(units = "days")) switch(op, `+` = , `-` = vec_restore(vec_arith_base(op, x, lossy_floor(y, x)), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime Date #' @export vec_arith.difftime.Date <- function(op, x, y, ...) { x <- vec_cast(x, new_duration(units = "days")) switch(op, `+` = vec_restore(vec_arith_base(op, lossy_floor(x, y), y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct difftime #' @export vec_arith.POSIXct.difftime <- function(op, x, y, ...) { y <- vec_cast(y, new_duration(units = "secs")) switch(op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime POSIXct #' @export vec_arith.difftime.POSIXct <- function(op, x, y, ...) { x <- vec_cast(x, new_duration(units = "secs")) switch(op, `+` = vec_restore(vec_arith_base(op, x, y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXlt difftime #' @export vec_arith.POSIXlt.difftime <- function(op, x, y, ...) { vec_arith.POSIXct.difftime(op, as.POSIXct(x), y, ...) } #' @method vec_arith.difftime POSIXlt #' @export vec_arith.difftime.POSIXlt <- function(op, x, y, ...) { vec_arith.difftime.POSIXct(op, x, as.POSIXct(y), ...) } #' @method vec_arith.difftime difftime #' @export vec_arith.difftime.difftime <- function(op, x, y, ...) { # Ensure x and y have same units args <- vec_cast_common(x, y) x <- args[[1L]] y <- args[[2L]] switch(op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), `/` = vec_arith_base(op, x, y), `%/%` = vec_arith_base(op, x, y), `%%` = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime MISSING #' @export vec_arith.difftime.MISSING <- function(op, x, y, ...) { switch(op, `-` = vec_restore(-vec_data(x), x), `+` = x, stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime numeric #' @export vec_arith.difftime.numeric <- function(op, x, y, ...) { vec_restore(vec_arith_base(op, x, y), x) } #' @method vec_arith.numeric difftime #' @export vec_arith.numeric.difftime <- function(op, x, y, ...) { switch(op, `/` = stop_incompatible_op(op, x, y), vec_restore(vec_arith_base(op, x, y), y) ) } # Helpers ----------------------------------------------------------------- # The tz attribute for POSIXlt can have 3 components # (time zone name, abbreviated name, abbreviated DST name) tzone <- function(x) { attr(x, "tzone")[[1]] %||% "" } tzone_is_local <- function(x) { identical(tzone(x), "") } tzone_union <- function(x, y) { if (tzone_is_local(x)) { tzone(y) } else { tzone(x) } } units_union <- function(x, y) { if (identical(units(x), units(y))) { units(x) } else { "secs" } } date_validate <- function(x) { .Call(vctrs_date_validate, x) } datetime_validate <- function(x) { .Call(vctrs_datetime_validate, x) } # as.character.Date() calls format() which tries to guess a simplified format. # Supplying a known format is faster and much more memory efficient. date_as_character <- function(x) { format(x, format = "%Y-%m-%d") } # `as.POSIXlt.character()` tries multiple formats. Supplying # a known format is much faster and more memory efficient. chr_date_as_posixlt <- function(x, tzone) { as.POSIXlt.character(x, tz = tzone, format = "%Y-%m-%d") } # `as.POSIXct.default()` for characters goes through `as.POSIXlt.character()` chr_date_as_posixct <- function(x, tzone) { out <- chr_date_as_posixlt(x, tzone) as.POSIXct.POSIXlt(out, tzone) } lossy_floor <- function(x, to, x_arg = "", to_arg = "") { x_floor <- floor(x) lossy <- x != x_floor maybe_lossy_cast(x_floor, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } # Guarantees the presence of a `tzone` attribute # by going through `as.POSIXlt.POSIXct()`. # Useful for testing, since we always try to restore a `tzone`. as_posixlt <- function(x, tz = "") { as.POSIXlt(as.POSIXct(x, tz)) } # Math -------------------------------------------------------------------- #' @export vec_math.Date <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_math.POSIXct <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_math.POSIXlt <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } vctrs/R/compat-linked-version.R0000644000176200001440000000362113762412012016151 0ustar liggesusers# nocov start --- compat-linked-version --- 2020-02-24 Mon 13:05 CET check_linked_version <- local({ # Keep in sync with compat-downstream-deps.R howto_reinstall_msg <- function(pkg) { os <- tolower(Sys.info()[["sysname"]]) if (os == "windows") { url <- "https://github.com/jennybc/what-they-forgot/issues/62" c( i = sprintf("Please update %s to the latest version.", pkg), i = sprintf("Updating packages on Windows requires precautions:\n <%s>", url) ) } else { c( i = sprintf("Please update %s with `install.packages(\"%s\")` and restart R.", pkg, pkg) ) } } function(pkg, with_rlang = requireNamespace("rlang")) { ver <- utils::packageVersion(pkg) ns <- asNamespace(pkg) linked_ver_ptr <- ns[[paste0(pkg, "_linked_version")]] if (is.null(linked_ver_ptr)) { linked_ver <- "" } else { # Construct call to avoid NOTE when argument to `.Call()` is not # statically analysable linked_ver <- do.call(".Call", list(linked_ver_ptr)) } if (nzchar(linked_ver) && ver == linked_ver) { return(invisible(NULL)) } header <- sprintf("The %s package is not properly installed.", pkg) if (nzchar(linked_ver)) { msg <- c(x = sprintf( "The DLL version (%s) does not correspond to the package version (%s).", linked_ver, ver )) } else { # Package does not have a version pointer. This happens when DLL # updating fails for the first version that includes the pointer. msg <- c(x = "The DLL version does not correspond to the package version.") } msg <- c(msg, howto_reinstall_msg(pkg)) if (with_rlang) { msg <- paste(header, rlang::format_error_bullets(msg), sep = "\n") rlang::abort(msg) } else { msg <- paste(c(header, msg), collapse = "\n") stop(msg, call. = FALSE) } } }) # nocov end vctrs/R/cast-list.R0000644000176200001440000000221613762412012013641 0ustar liggesusers#' Cast a list to vector of specific type #' #' This is a function for developers to use when extending vctrs. It casts #' a list to a more specific vectoring type, keeping the length constant. #' It does this by discarding (with a warning), any elements after the 1. #' It is called from `vec_cast.XYZ.list()` methods to preserve symmetry with #' `vec_cast.list.XYZ()`. #' #' See `vignette("s3-vector")` for details. #' #' @param x A list #' @param to Type to coerce to #' @inheritParams ellipsis::dots_empty #' #' @export #' @keywords internal vec_list_cast <- function(x, to, ..., x_arg = "", to_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } ns <- map_int(x, vec_size) n <- vec_size(x) out <- vec_init(to, n) for (i in seq_len(n)) { val <- x[[i]] if (vec_size(val) == 0) { next } val <- vec_slice(val, 1L) vec_slice(out, i) <- vec_cast(val, to, x_arg = x_arg, to_arg = to_arg) } if (!is.object(to)) { out <- shape_broadcast(out, to, x_arg = x_arg, to_arg = to_arg) } maybe_lossy_cast( out, x, to, lossy = !ns %in% c(0L, 1L), x_arg = x_arg, to_arg = to_arg ) } vctrs/R/register-s3.R0000644000176200001440000001163314042542465014120 0ustar liggesusers# This source code file is licensed under the unlicense license # https://unlicense.org #' Register a method for a suggested dependency #' #' Generally, the recommend way to register an S3 method is to use the #' `S3Method()` namespace directive (often generated automatically by the #' `@export` roxygen2 tag). However, this technique requires that the generic #' be in an imported package, and sometimes you want to suggest a package, #' and only provide a method when that package is loaded. `s3_register()` #' can be called from your package's `.onLoad()` to dynamically register #' a method only if the generic's package is loaded. #' #' For R 3.5.0 and later, `s3_register()` is also useful when demonstrating #' class creation in a vignette, since method lookup no longer always involves #' the lexical scope. For R 3.6.0 and later, you can achieve a similar effect #' by using "delayed method registration", i.e. placing the following in your #' `NAMESPACE` file: #' #' ``` #' if (getRversion() >= "3.6.0") { #' S3method(package::generic, class) #' } #' ``` #' #' @section Usage in other packages: #' To avoid taking a dependency on vctrs, you copy the source of #' [`s3_register()`](https://github.com/r-lib/vctrs/blob/master/R/register-s3.R) #' into your own package. It is licensed under the permissive #' [unlicense](https://choosealicense.com/licenses/unlicense/) to make it #' crystal clear that we're happy for you to do this. There's no need to include #' the license or even credit us when using this function. #' #' @usage NULL #' @param generic Name of the generic in the form `pkg::generic`. #' @param class Name of the class #' @param method Optionally, the implementation of the method. By default, #' this will be found by looking for a function called `generic.class` #' in the package environment. #' #' Note that providing `method` can be dangerous if you use #' devtools. When the namespace of the method is reloaded by #' `devtools::load_all()`, the function will keep inheriting from #' the old namespace. This might cause crashes because of dangling #' `.Call()` pointers. #' @export #' @examples #' # A typical use case is to dynamically register tibble/pillar methods #' # for your class. That way you avoid creating a hard dependency on packages #' # that are not essential, while still providing finer control over #' # printing when they are used. #' #' .onLoad <- function(...) { #' s3_register("pillar::pillar_shaft", "vctrs_vctr") #' s3_register("tibble::type_sum", "vctrs_vctr") #' } #' @keywords internal # nocov start s3_register <- function(generic, class, method = NULL) { stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) pieces <- strsplit(generic, "::")[[1]] stopifnot(length(pieces) == 2) package <- pieces[[1]] generic <- pieces[[2]] caller <- parent.frame() get_method_env <- function() { top <- topenv(caller) if (isNamespace(top)) { asNamespace(environmentName(top)) } else { caller } } get_method <- function(method, env) { if (is.null(method)) { get(paste0(generic, ".", class), envir = get_method_env()) } else { method } } register <- function(...) { envir <- asNamespace(package) # Refresh the method each time, it might have been updated by # `devtools::load_all()` method_fn <- get_method(method) stopifnot(is.function(method_fn)) # Only register if generic can be accessed if (exists(generic, envir)) { registerS3method(generic, class, method_fn, envir = envir) } else if (identical(Sys.getenv("NOT_CRAN"), "true")) { warning(sprintf( "Can't find generic `%s` in package %s to register S3 method.", generic, package )) } } # Always register hook in case package is later unloaded & reloaded setHook(packageEvent(package, "onLoad"), register) # Avoid registration failures during loading (pkgload or regular) if (isNamespaceLoaded(package)) { register() } invisible() } on_load({ s3_register <- replace_from("s3_register", "rlang") }) knitr_defer <- function(expr, env = caller_env()) { roxy_caller <- detect(sys.frames(), env_inherits, ns_env("knitr")) if (is_null(roxy_caller)) { abort("Internal error: can't find knitr on the stack.") } blast( withr::defer(!!substitute(expr), !!roxy_caller), env ) } blast <- function(expr, env = caller_env()) { eval_bare(enexpr(expr), env) } knitr_local_registration <- function(generic, class, env = caller_env()) { stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) pieces <- strsplit(generic, "::")[[1]] stopifnot(length(pieces) == 2) package <- pieces[[1]] generic <- pieces[[2]] name <- paste0(generic, ".", class) method <- env_get(env, name) old <- env_bind(global_env(), !!name := method) knitr_defer(env_bind(global_env(), !!!old)) } # nocov end vctrs/R/type-tibble.R0000644000176200001440000000205613762412012014160 0ustar liggesusers#' @rdname df_ptype2 #' @export tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { .Call( vctrs_tib_ptype2, x = x, y = y, x_arg = x_arg, y_arg = y_arg ) } #' @rdname df_ptype2 #' @export tib_cast <- function(x, to, ..., x_arg = "", to_arg = "") { .Call( vctrs_tib_cast, x = x, to = to, x_arg = x_arg, to_arg = to_arg ) } df_as_tibble <- function(df) { class(df) <- c("tbl_df", "tbl", "data.frame") df } # Conditionally registered in .onLoad() vec_ptype2.tbl_df.tbl_df <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_ptype2.tbl_df.data.frame <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_ptype2.data.frame.tbl_df <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_cast.tbl_df.tbl_df <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vec_cast.data.frame.tbl_df <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vec_cast.tbl_df.data.frame <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vctrs/R/utils.R0000644000176200001440000001441213762412012013077 0ustar liggesusersstr_dup <- function(x, times) { paste0(rep(x, times = times), collapse = "") } indent <- function(x, n) { pad <- str_dup(" ", n) map_chr(x, gsub, pattern = "(\n+)", replacement = paste0("\\1", pad)) } ones <- function(...) { array(1, dim = c(...)) } vec_coerce_bare <- function(x, type) { # Unexported wrapper around Rf_coerceVector() coerce <- env_get(ns_env("rlang"), "vec_coerce") coerce(x, type) } # Matches the semantics of c() - based on experimenting with the output # of c(), not reading the source code. outer_names <- function(names, outer, n) { .Call(vctrs_outer_names, names, outer, vec_cast(n, int())) } has_inner_names <- function(x) { !all(map_lgl(map(x, vec_names), is.null)) } cat_line <- function(...) { cat(paste0(..., "\n", collapse = "")) } set_partition <- function(x, y) { list( both = intersect(x, y), only_x = setdiff(x, y), only_y = setdiff(y, x) ) } all_equal <- function(x) all(x == x[[1]]) inline_list <- function(title, x, width = getOption("width"), quote = "") { label_width <- width - nchar(title) x <- glue::glue_collapse( encodeString(x, quote = quote), sep = ", ", width = label_width ) paste0(title, x) } has_unique_names <- function(x) { nms <- names(x) if (length(nms) != length(x)) { return(FALSE) } if (any(is.na(nms) | nms == "")) { return(FALSE) } !anyDuplicated(nms) } compact <- function(x) { is_null <- map_lgl(x, is.null) x[!is_null] } paste_line <- function (...) { paste(chr(...), collapse = "\n") } # Experimental result <- function(ok = NULL, err = NULL) { structure( list(ok = ok, err = err), class = "rlang_result" ) } result_get <- function(x) { if (!is_null(x$err)) { cnd_signal(x$err) } x$ok } obj_type <- function(x) { if (vec_is(x)) { vec_ptype_full(x) } else if (is.object(x)) { paste(class(x), collapse = "/") } else if (is_function(x)) { "function" } else { typeof(x) } } new_opts <- function(x, opts, subclass = NULL, arg = NULL) { if (!all(x %in% opts)) { if (is_null(arg)) { arg <- "Argument" } else { arg <- glue::glue("`{arg}`") } opts <- encodeString(opts, quote = "\"") opts <- glue::glue_collapse(opts, sep = ", ", last = " or ") abort(glue::glue("{arg} must be one of {opts}.")) } structure( set_names(opts %in% x, opts), class = c(subclass, "vctrs_opts") ) } glue_data_bullets <- function(.data, ..., .env = caller_env()) { glue_data <- function(...) glue::glue_data(.data, ..., .envir = .env) format_error_bullets(map_chr(chr(...), glue_data)) } unstructure <- function(x) { attributes(x) <- NULL x } # We almost never want `stringsAsFactors = TRUE`, and `FALSE` became # the default in R 4.0.0. This wrapper ensures that our tests are compliant # with versions of R before and after this change. Keeping it in `utils.R` # rather than as a testthat helper ensures that it is sourced before any other # testthat helpers. data.frame <- function(..., stringsAsFactors = NULL) { stringsAsFactors <- stringsAsFactors %||% FALSE base::data.frame(..., stringsAsFactors = stringsAsFactors) } try_catch_callback <- function(data, cnd) { .Call(vctrs_try_catch_callback, data, cnd) } try_catch_hnd <- function(data) { function(cnd) { try_catch_callback(data, cnd) } } try_catch_impl <- function(data, ...) { tryCatch( try_catch_callback(data, NULL), ... ) } ns_methods <- function(name) { ns_env(name)$.__S3MethodsTable__. } s3_find_method <- function(x, generic, ns = "base") { stopifnot( is_string(generic), is_string(ns) ) table <- ns_methods(ns_env(ns)) .Call(vctrs_s3_find_method, generic, x, table) } df_has_base_subset <- function(x) { method <- s3_find_method(x, "[", ns = "base") is_null(method) || identical(method, `[.data.frame`) } last <- function(x) { x[[length(x)]] } # Find the longest common suffix of two vectors vec_common_suffix <- function(x, y) { common <- vec_cast_common(x = x, y = y) x <- common$x y <- common$y x_size <- vec_size(x) y_size <- vec_size(y) n <- min(x_size, y_size) if (!n) { return(vec_slice(x, int())) } # Truncate the start of the vectors so they have equal size if (x_size < y_size) { y <- vec_slice(y, seq2(y_size - x_size + 1, y_size)) } else if (y_size < x_size) { x <- vec_slice(x, seq2(x_size - y_size + 1, x_size)) } # Find locations of unequal elements. Elements after the last # location are the common suffix. common <- vec_equal(x, y) i <- which(!common) # Slice the suffix after the last unequal element if (length(i)) { vec_slice(x, seq2(max(i) + 1, n)) } else { x } } import_from <- function(ns, names, env = caller_env()) { objs <- env_get_list(ns_env(ns), names) env_bind(env, !!!objs) } fast_c <- function(x, y) { .Call(vctrs_fast_c, x, y) } # Based on r-lib/bench (itself based on gaborcsardi/prettyunits) #' @export format.vctrs_bytes <- function(x, scientific = FALSE, digits = 3, drop0trailing = TRUE, ...) { nms <- names(x) bytes <- unclass(x) unit <- map_chr(x, find_unit, byte_units) res <- round(bytes / byte_units[unit], digits = digits) ## Zero bytes res[bytes == 0] <- 0 unit[bytes == 0] <- "B" ## NA and NaN bytes res[is.na(bytes)] <- NA_real_ res[is.nan(bytes)] <- NaN unit[is.na(bytes)] <- "" # Includes NaN as well # Append an extra B to each unit large_units <- unit %in% names(byte_units)[-1] unit[large_units] <- paste0(unit[large_units], "B") res <- format(res, scientific = scientific, digits = digits, drop0trailing = drop0trailing, ...) stats::setNames(paste0(res, unit), nms) } #' @export print.vctrs_bytes <- function(x, ...) { print(format(x, ...), quote = FALSE) } tolerance <- sqrt(.Machine$double.eps) find_unit <- function(x, units) { if (is.na(x) || is.nan(x) || x <= 0 || is.infinite(x)) { return(NA_character_) } epsilon <- 1 - (x * (1 / units)) names(utils::tail(n = 1, which(epsilon < tolerance))) } byte_units <- c( 'B' = 1, 'K' = 1024, 'M' = 1024 ^ 2, 'G' = 1024 ^ 3, 'T' = 1024 ^ 4, 'P' = 1024 ^ 5, 'E' = 1024 ^ 6, 'Z' = 1024 ^ 7, 'Y' = 1024 ^ 8 ) new_vctrs_bytes <- function(x) { structure(x, class = c("vctrs_bytes", "numeric")) } named <- function(x) { if (is_null(names(x))) { names(x) <- names2(x) } x } vctrs/R/zzz.R0000644000176200001440000001023214042540715012574 0ustar liggesusers# nocov start on_package_load <- function(pkg, expr) { if (isNamespaceLoaded(pkg)) { expr } else { thunk <- function(...) expr setHook(packageEvent(pkg, "onLoad"), thunk) } } .onLoad <- function(libname, pkgname) { check_linked_version(pkgname) ns <- ns_env("vctrs") run_on_load() on_package_load("testthat", { s3_register("testthat::is_informative_error", "vctrs_error_cast_lossy") s3_register("testthat::is_informative_error", "vctrs_error_cast_lossy_dropped") }) s3_register("generics::as.factor", "vctrs_vctr") s3_register("generics::as.ordered", "vctrs_vctr") s3_register("generics::as.difftime", "vctrs_vctr") # Remove once tibble has implemented the methods on_package_load("tibble", { if (!env_has(ns_env("tibble"), "vec_ptype2.tbl_df.tbl_df")) { s3_register("vctrs::vec_ptype2", "tbl_df.tbl_df") s3_register("vctrs::vec_ptype2", "tbl_df.data.frame") s3_register("vctrs::vec_ptype2", "data.frame.tbl_df") } if (!env_has(ns_env("tibble"), "vec_cast.tbl_df.tbl_df")) { s3_register("vctrs::vec_cast", "tbl_df.tbl_df") s3_register("vctrs::vec_cast", "tbl_df.data.frame") s3_register("vctrs::vec_cast", "data.frame.tbl_df") } }) on_package_load("dplyr", { if (!env_has(ns_env("dplyr"), "vec_restore.grouped_df")) { s3_register("vctrs::vec_restore", "grouped_df") } if (!env_has(ns_env("dplyr"), "vec_ptype2.grouped_df.grouped_df")) { s3_register("vctrs::vec_ptype2", "grouped_df.grouped_df") s3_register("vctrs::vec_ptype2", "grouped_df.data.frame") s3_register("vctrs::vec_ptype2", "grouped_df.tbl_df") s3_register("vctrs::vec_ptype2", "data.frame.grouped_df") s3_register("vctrs::vec_ptype2", "tbl_df.grouped_df") } if (!env_has(ns_env("dplyr"), "vec_cast.grouped_df.grouped_df")) { s3_register("vctrs::vec_cast", "grouped_df.grouped_df") s3_register("vctrs::vec_cast", "grouped_df.data.frame") s3_register("vctrs::vec_cast", "grouped_df.tbl_df") s3_register("vctrs::vec_cast", "data.frame.grouped_df") s3_register("vctrs::vec_cast", "tbl_df.grouped_df") } if (!env_has(ns_env("dplyr"), "vec_restore.rowwise_df")) { s3_register("vctrs::vec_restore", "rowwise_df") } if (!env_has(ns_env("dplyr"), "vec_ptype2.rowwise_df.rowwise_df")) { s3_register("vctrs::vec_ptype2", "rowwise_df.rowwise_df") s3_register("vctrs::vec_ptype2", "rowwise_df.data.frame") s3_register("vctrs::vec_ptype2", "rowwise_df.tbl_df") s3_register("vctrs::vec_ptype2", "data.frame.rowwise_df") s3_register("vctrs::vec_ptype2", "tbl_df.rowwise_df") } if (!env_has(ns_env("dplyr"), "vec_cast.rowwise_df.rowwise_df")) { s3_register("vctrs::vec_cast", "rowwise_df.rowwise_df") s3_register("vctrs::vec_cast", "rowwise_df.data.frame") s3_register("vctrs::vec_cast", "rowwise_df.tbl_df") s3_register("vctrs::vec_cast", "data.frame.rowwise_df") s3_register("vctrs::vec_cast", "tbl_df.rowwise_df") } }) on_package_load("sf", { import_from("sf", sf_deps, env = sf_env) if (!env_has(ns_env("sf"), "vec_restore.sf")) { s3_register("vctrs::vec_proxy", "sf") s3_register("vctrs::vec_restore", "sf") } if (!env_has(ns_env("sf"), "vec_ptype2.sf.sf")) { s3_register("vctrs::vec_ptype2", "sf.sf") s3_register("vctrs::vec_ptype2", "sf.data.frame") s3_register("vctrs::vec_ptype2", "data.frame.sf") s3_register("vctrs::vec_ptype2", "sf.tbl_df") s3_register("vctrs::vec_ptype2", "tbl_df.sf") s3_register("vctrs::vec_cast", "sf.sf") s3_register("vctrs::vec_cast", "sf.data.frame") s3_register("vctrs::vec_cast", "data.frame.sf") } }) utils::globalVariables("vec_set_attributes") # Prevent two copies from being made by `attributes(x) <- attrib` on R < 3.6.0 if (getRversion() >= '3.6.0') { vec_set_attributes <- function(x, attrib) { attributes(x) <- attrib x } } else { vec_set_attributes <- function(x, attrib) { .Call(vctrs_set_attributes, x, attrib) } } env_bind(ns, vec_set_attributes = vec_set_attributes) .Call(vctrs_init_library, ns_env()) } # nocov end vctrs/R/type-sclr.R0000644000176200001440000000737413762412012013672 0ustar liggesusersnew_sclr <- function(..., class = character()) { fields <- list(...) stopifnot(has_unique_names(fields)) structure( list(...), class = c(class, "vctrs_sclr") ) } # Subsetting -------------------------------------------------------------- #' @export `[[.vctrs_sclr` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `$.vctrs_sclr` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `[[<-.vctrs_sclr` <- function(x, i, value) { .Call(vctrs_list_set, x, i, value) } #' @export `$<-.vctrs_sclr` <- function(x, i, value) { .Call(vctrs_list_set, x, i, value) } # Shared niceties --------------------------------------------------------- #' @export print.vctrs_sclr <- function(x, ...) { obj_print(x, ...) invisible(x) } #' @export as.list.vctrs_sclr <- function(x, ...) { vec_set_attributes(x, list(names = names(x))) } #' @export as.data.frame.vctrs_sclr <- function(x, row.names = NULL, optional = FALSE, ..., nm = paste(deparse(substitute(x), width.cutoff = 500L), collapse = " ") ) { force(nm) cols <- list(list(x)) if (!optional) { names(cols) <- nm } new_data_frame(cols, n = 1L) } # Vector behaviours ------------------------------------------------------- #' @export `[.vctrs_sclr` <- function(x, ...) { stop_unsupported(x, "[") } #' @export `[<-.vctrs_sclr` <- function(x, ..., value) { stop_unsupported(x, "[<-") } #' @export c.vctrs_sclr <- function(...) { stop_unsupported(..1, "c") } #' @export Math.vctrs_sclr <- function(x, ...) { stop_unsupported(x, .Generic) } #' @export Ops.vctrs_sclr <- function(e1, e2) { stop_unsupported(e1, .Generic) } #' @export Complex.vctrs_sclr <- function(z) { stop_unsupported(z, .Generic) } #' @export Summary.vctrs_sclr <- function(..., na.rm = TRUE) { stop_unsupported(..1, .Generic) } #' @export `names<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "names<-") } #' @export xtfrm.vctrs_sclr <- function(x) { stop_unsupported(x, "xtfrm") } #' @export `dim<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "dim<-") } #' @export `dimnames<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "dimnames<-") } #' @export levels.vctrs_sclr <- function(x) { stop_unsupported(x, "levels") } #' @export `levels<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "levels<-") } #' @export `t.vctrs_sclr` <- function(x) { stop_unsupported(x, "t") } #' @export `is.na<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "is.na<-") } #' @export unique.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export duplicated.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export anyDuplicated.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export as.logical.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.logical") } #' @export as.integer.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.integer") } #' @export as.double.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.double") } #' @export as.character.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.character") } #' @export as.Date.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.Date") } #' @export as.POSIXct.vctrs_sclr <- function(x, tz = "", ...) { stop_unsupported(x, "as.POSIXct") } # Unimplemented ----------------------------------------------------------- #' @export summary.vctrs_sclr <- function(object, ...) { # nocov start stop_unimplemented(object, "summary") # nocov end } vctrs/R/fields.R0000644000176200001440000000164313762412012013207 0ustar liggesusers#' Tools for accessing the fields of a record. #' #' A [rcrd] behaves like a vector, so `length()`, `names()`, and `$` can #' not provide access to the fields of the underlying list. These helpers do: #' `fields()` is equivalent to `names()`; `n_fields()` is equivalent to #' `length()`; `field()` is equivalent to `$`. #' #' @param x A [rcrd], i.e. a list of equal length vectors with unique names. #' @keywords internal #' @export #' @examples #' x <- new_rcrd(list(x = 1:3, y = 3:1, z = letters[1:3])) #' n_fields(x) #' fields(x) #' #' field(x, "y") #' field(x, "y") <- runif(3) #' field(x, "y") fields <- function(x) { .Call(vctrs_fields, x) } #' @export #' @rdname fields n_fields <- function(x) { .Call(vctrs_n_fields, x) } #' @export #' @rdname fields field <- function(x, i) { .Call(vctrs_field_get, x, i) } #' @export #' @rdname fields `field<-` <- function(x, i, value) { .Call(vctrs_field_set, x, i, value) } vctrs/R/type-integer64.R0000644000176200001440000000506113762412012014525 0ustar liggesusers #' @export vec_proxy_compare.integer64 <- function(x, ...) { bit64::rank.integer64(x) } # Print ------------------------------------------------------------------- #' 64 bit integers #' #' A `integer64` is a 64 bits integer vector, implemented in the `bit64` package. #' #' These functions help the `integer64` class from `bit64` in to #' the vctrs type system by providing coercion functions #' and casting functions. #' #' @keywords internal #' @rdname int64 #' @export vec_ptype_full.integer64 <- function(x, ...) { "integer64" } #' @rdname int64 #' @export vec_ptype_abbr.integer64 <- function(x, ...) { "int64" } # Coerce ------------------------------------------------------------------ #' @export #' @rdname int64 #' @export vec_ptype2.integer64 #' @method vec_ptype2 integer64 vec_ptype2.integer64 <- function(x, y, ...) { UseMethod("vec_ptype2.integer64") } #' @method vec_ptype2.integer64 integer64 #' @export vec_ptype2.integer64.integer64 <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer64 integer #' @export vec_ptype2.integer64.integer <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer integer64 #' @export vec_ptype2.integer.integer64 <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer64 logical #' @export vec_ptype2.integer64.logical <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.logical integer64 #' @export vec_ptype2.logical.integer64 <- function(x, y, ...) bit64::integer64() # Cast -------------------------------------------------------------------- #' @export #' @rdname int64 #' @export vec_cast.integer64 #' @method vec_cast integer64 vec_cast.integer64 <- function(x, to, ...) { UseMethod("vec_cast.integer64") } #' @export #' @method vec_cast.integer64 integer64 vec_cast.integer64.integer64 <- function(x, to, ...) { x } #' @export #' @method vec_cast.integer64 integer vec_cast.integer64.integer <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.integer integer64 vec_cast.integer.integer64 <- function(x, to, ...) { as.integer(x) } #' @export #' @method vec_cast.integer64 logical vec_cast.integer64.logical <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.logical integer64 vec_cast.logical.integer64 <- function(x, to, ...) { as.logical(x) } #' @export #' @method vec_cast.integer64 double vec_cast.integer64.double <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.double integer64 vec_cast.double.integer64 <- function(x, to, ...) { as.double(x) } vctrs/R/subscript-loc.R0000644000176200001440000003615613762412012014541 0ustar liggesusers#' Create a vector of locations #' #' @description #' #' These helpers provide a means of standardizing common indexing #' methods such as integer, character or logical indexing. #' #' * `vec_as_location()` accepts integer, character, or logical vectors #' of any size. The output is always an integer vector that is #' suitable for subsetting with `[` or [vec_slice()]. It might be a #' different size than the input because negative selections are #' transformed to positive ones and logical vectors are transformed #' to a vector of indices for the `TRUE` locations. #' #' * `vec_as_location2()` accepts a single number or string. It returns #' a single location as a integer vector of size 1. This is suitable #' for extracting with `[[`. #' #' @inheritParams vec_slice #' @param n A single integer representing the total size of the #' object that `i` is meant to index into. #' @param names If `i` is a character vector, `names` should be a character #' vector that `i` will be matched against to construct the index. Otherwise, #' not used. The default value of `NULL` will result in an error #' if `i` is a character vector. #' @param missing Whether to throw an `"error"` when `i` is a missing #' value, or `"propagate"` it (return it as is). By default, vector #' subscripts can contain missing values and scalar subscripts can't. #' @param arg The argument name to be displayed in error messages when #' `vec_as_location()` and `vec_as_location2()` are used to check the #' type of a function input. #' #' @return `vec_as_location()` returns an integer vector that can be used #' as an index in a subsetting operation. `vec_as_location2()` #' returns an integer of size 1 that can be used a scalar index for #' extracting an element. #' #' @examples #' x <- array(1:6, c(2, 3)) #' dimnames(x) <- list(c("r1", "r2"), c("c1", "c2", "c3")) #' #' # The most common use case validates row indices #' vec_as_location(1, vec_size(x)) #' #' # Negative indices can be used to index from the back #' vec_as_location(-1, vec_size(x)) #' #' # Character vectors can be used if `names` are provided #' vec_as_location("r2", vec_size(x), rownames(x)) #' #' # You can also construct an index for dimensions other than the first #' vec_as_location(c("c2", "c1"), ncol(x), colnames(x)) #' #' @keywords internal #' @export vec_as_location <- function(i, n, names = NULL, ..., missing = c("propagate", "error"), arg = NULL) { if (!missing(...)) ellipsis::check_dots_empty() .Call( vctrs_as_location, i = i, n = n, names = names, loc_negative = "invert", loc_oob = "error", loc_zero = "remove", missing = missing, arg = arg ) } #' @rdname vec_as_location #' @param negative Whether to `"invert"` negative values to positive #' locations, throw an informative `"error"`, or `"ignore"` them. #' @param oob If `"error"`, throws an informative `"error"` if some #' elements are out-of-bounds. If `"extend"`, out-of-bounds #' locations are allowed if they are consecutive after the end. This #' can be used to implement extendable vectors like `letters[1:30]`. #' @param zero Whether to `"remove"` zero values, throw an informative #' `"error"`, or `"ignore"` them. #' @export num_as_location <- function(i, n, ..., missing = c("propagate", "error"), negative = c("invert", "error", "ignore"), oob = c("error", "extend"), zero = c("remove", "error", "ignore"), arg = NULL) { if (!missing(...)) ellipsis::check_dots_empty() if (is.object(i) || !(is_integer(i) || is_double(i))) { abort("`i` must be a numeric vector.") } .Call( vctrs_as_location, i = i, n = n, names = NULL, loc_negative = negative, loc_oob = oob, loc_zero = zero, missing = missing, arg = arg ) } #' @rdname vec_as_location #' @export vec_as_location2 <- function(i, n, names = NULL, ..., missing = c("error", "propagate"), arg = NULL) { if (!missing(...)) ellipsis::check_dots_empty() result_get(vec_as_location2_result( i, n = n, names = names, negative = "error", missing = missing, arg = arg )) } #' @rdname vec_as_location #' @param negative Whether to throw an `"error"` when `i` is a #' negative location value, or `"ignore"` it. #' @export num_as_location2 <- function(i, n, ..., negative = c("error", "ignore"), missing = c("error", "propagate"), arg = NULL) { if (!missing(...)) ellipsis::check_dots_empty() if (!is_integer(i) && !is_double(i)) { abort("`i` must be a numeric vector.") } result_get(vec_as_location2_result( i, n = n, names = NULL, negative = negative, missing = missing, arg = arg )) } vec_as_location2_result <- function(i, n, names, missing, negative, arg) { allow_missing <- arg_match0(missing, c("error", "propagate")) == "propagate" allow_negative <- arg_match0(negative, c("error", "ignore")) == "ignore" result <- vec_as_subscript2_result( i = i, arg = arg, logical = "error" ) if (!is_null(result$err)) { parent <- result$err return(result(err = new_error_location2_type( i = i, subscript_arg = arg, # Should body fields in parents be automatically inherited? body = parent$body, parent = parent ))) } # Locations must be size 1, can't be NA, and must be positive i <- result$ok if (length(i) != 1L) { return(result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_scalar ))) } neg <- typeof(i) == "integer" && !is.na(i) && i < 0L if (allow_negative && neg) { i <- -i } if (is.na(i)) { if (!allow_missing && is.na(i)) { result <- result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_present )) } else { result <- result(i) } return(result) } if (identical(i, 0L)) { return(result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_positive ))) } if (!allow_negative && neg) { return(result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_positive ))) } # FIXME: Use result approach in internal implementation? err <- NULL i <- tryCatch( vec_as_location(i, n, names = names, arg = arg), vctrs_error_subscript_type = function(err) { err <<- err i } ) if (neg) { i <- -i } if (is_null(err)) { result(i) } else { result(err = new_error_location2_type( i = i, parent = err, subscript_arg = arg )) } } stop_location_negative_missing <- function(i, ...) { cnd_signal(new_error_subscript_type( i, ..., body = cnd_body_vctrs_error_location_negative_missing )) } cnd_body_vctrs_error_location_negative_missing <- function(cnd, ...) { missing_loc <- which(is.na(cnd$i)) arg <- append_arg("Subscript", cnd$subscript_arg) if (length(missing_loc) == 1) { loc <- glue::glue("{arg} has a missing value at location {missing_loc}.") } else { n_loc <- length(missing_loc) missing_loc <- ensure_full_stop(enumerate(missing_loc)) loc <- glue::glue( "{arg} has {n_loc} missing values at locations {missing_loc}" ) } format_error_bullets(c( x = "Negative locations can't have missing values.", i = loc )) } stop_location_negative_positive <- function(i, ...) { cnd_signal(new_error_subscript_type( i, ..., body = cnd_body_vctrs_error_location_negative_positive )) } cnd_body_vctrs_error_location_negative_positive <- function(cnd, ...) { positive_loc <- which(cnd$i > 0) arg <- append_arg("Subscript", cnd$subscript_arg) if (length(positive_loc) == 1) { loc <- glue::glue("{arg} has a positive value at location {positive_loc}.") } else { n_loc <- length(positive_loc) positive_loc <- ensure_full_stop(enumerate(positive_loc)) loc <- glue::glue( "{arg} has {n_loc} positive values at locations {positive_loc}" ) } format_error_bullets(c( x = "Negative and positive locations can't be mixed.", i = loc )) } new_error_location2_type <- function(i, ..., class = NULL) { new_error_subscript2_type( class = class, i = i, logical = "error", numeric = "cast", character = "cast", ... ) } cnd_bullets_location2_need_scalar <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} has size {length(i)} but must be size 1.") )) } cnd_bullets_location2_need_present <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't be `NA`.") )) } cnd_bullets_location2_need_positive <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} has value {i} but must be a positive location.") )) } stop_location_negative <- function(i, ...) { cnd_signal(new_error_subscript_type( i, body = cnd_bullets_location_need_non_negative, ... )) } cnd_bullets_location_need_non_negative <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't contain negative locations.") )) } stop_location_zero <- function(i, ...) { cnd_signal(new_error_subscript_type( i, body = cnd_bullets_location_need_non_zero, ... )) } cnd_bullets_location_need_non_zero <- function(cnd, ...) { zero_loc <- which(cnd$i == 0) zero_loc_size <- length(zero_loc) arg <- append_arg("Subscript", cnd$subscript_arg) if (zero_loc_size == 1) { loc <- glue::glue("It has a `0` value at location {zero_loc}.") } else { zero_loc <- ensure_full_stop(enumerate(zero_loc)) loc <- glue::glue( "It has {zero_loc_size} `0` values at locations {zero_loc}" ) } format_error_bullets(c( x = glue::glue("{arg} can't contain `0` values."), i = loc )) } stop_subscript_missing <- function(i, ...) { cnd_signal(new_error_subscript_type( i = i, body = cnd_bullets_subscript_missing, ... )) } cnd_bullets_subscript_missing <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) missing_loc <- which(is.na(cnd$i)) if (length(missing_loc) == 1) { missing_line <- glue::glue("It has a missing value at location {missing_loc}.") } else { missing_enum <- ensure_full_stop(enumerate(missing_loc)) missing_line <- glue::glue("It has missing values at locations {missing_enum}") } format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't contain missing values."), x = missing_line )) } stop_indicator_size <- function(i, n, ...) { cnd_signal(new_error_subscript_size( i, n = n, ..., body = cnd_body_vctrs_error_indicator_size )) } cnd_body_vctrs_error_indicator_size <- function(cnd, ...) { cnd$subscript_arg <- append_arg("subscript", cnd$subscript_arg) glue_data_bullets( cnd, i = "Logical subscripts must match the size of the indexed input.", x = "Input has size {n} but {subscript_arg} has size {vec_size(i)}." ) } stop_subscript_oob <- function(i, subscript_type, ...) { stop_subscript( class = "vctrs_error_subscript_oob", i = i, subscript_type = subscript_type, ... ) } #' @export cnd_header.vctrs_error_subscript_oob <- function(cnd, ...) { if (cnd_subscript_oob_non_consecutive(cnd)) { return(cnd_header_vctrs_error_subscript_oob_non_consecutive(cnd, ...)) } elt <- cnd_subscript_element(cnd) action <- cnd_subscript_action(cnd) glue::glue("Can't {action} {elt[[2]]} that don't exist.") } #' @export cnd_body.vctrs_error_subscript_oob <- function(cnd, ...) { switch(cnd_subscript_type(cnd), numeric = if (cnd_subscript_oob_non_consecutive(cnd)) { cnd_body_vctrs_error_subscript_oob_non_consecutive(cnd, ...) } else { cnd_body_vctrs_error_subscript_oob_location(cnd, ...) }, character = cnd_body_vctrs_error_subscript_oob_name(cnd, ...), abort("Internal error: subscript type can't be `logical` for OOB errors.") ) } cnd_body_vctrs_error_subscript_oob_location <- function(cnd, ...) { i <- cnd$i elt <- cnd_subscript_element(cnd) # In case of negative indexing i <- abs(i) # In case of missing locations i <- i[!is.na(i)] oob <- i[i > cnd$size] oob_enum <- enumerate(oob) format_error_bullets(c( x = glue::glue(ngettext( length(oob), "Location {oob_enum} doesn't exist.", "Locations {oob_enum} don't exist." )), i = glue::glue(ngettext( cnd$size, "There are only {cnd$size} {elt[[1]]}.", "There are only {cnd$size} {elt[[2]]}.", )) )) } cnd_body_vctrs_error_subscript_oob_name <- function(cnd, ...) { elt <- cnd_subscript_element(cnd, capital = TRUE) oob <- cnd$i[!cnd$i %in% cnd$names] oob_enum <- enumerate(glue::backtick(oob)) format_error_bullets(c( x = glue::glue(ngettext( length(oob), "{elt[[1]]} {oob_enum} doesn't exist.", "{elt[[2]]} {oob_enum} don't exist." )) )) } stop_location_oob_non_consecutive <- function(i, size, ...) { stop_subscript_oob( i = i, size = size, subscript_type = "numeric", subscript_oob_non_consecutive = TRUE, ... ) } cnd_header_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) { action <- cnd_subscript_action(cnd) elt <- cnd_subscript_element(cnd) glue::glue("Can't {action} {elt[[2]]} beyond the end with non-consecutive locations.") } cnd_body_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) { i <- sort(cnd$i) i <- i[i > cnd$size] non_consecutive <- i[c(TRUE, diff(i) != 1L)] arg <- append_arg("Subscript", cnd$subscript_arg) if (length(non_consecutive) == 1) { x_line <- glue::glue("{arg} contains non-consecutive location {non_consecutive}.") } else { non_consecutive <- ensure_full_stop(enumerate(non_consecutive)) x_line <- glue::glue("{arg} contains non-consecutive locations {non_consecutive}") } glue_data_bullets( cnd, i = "Input has size {size}.", x = x_line ) } cnd_subscript_oob_non_consecutive <- function(cnd) { out <- cnd$subscript_oob_non_consecutive %||% FALSE stopifnot(is_bool(out)) out } vctrs/R/split.R0000644000176200001440000000216113762412012013070 0ustar liggesusers#' Split a vector into groups #' #' This is a generalisation of [split()] that can split by any type of vector, #' not just factors. Instead of returning the keys in the character names, #' the are returned in a separate parallel vector. #' #' @param x Vector to divide into groups. #' @param by Vector whose unique values defines the groups. #' @return A data frame with two columns and size equal to #' `vec_size(vec_unique(by))`. The `key` column has the same type as #' `by`, and the `val` column is a list containing elements of type #' `vec_ptype(x)`. #' #' Note for complex types, the default `data.frame` print method will be #' suboptimal, and you will want to coerce into a tibble to better #' understand the output. #' @export #' #' @section Dependencies: #' - [vec_group_loc()] #' - [vec_chop()] #' #' @examples #' vec_split(mtcars$cyl, mtcars$vs) #' vec_split(mtcars$cyl, mtcars[c("vs", "am")]) #' #' if (require("tibble")) { #' as_tibble(vec_split(mtcars$cyl, mtcars[c("vs", "am")])) #' as_tibble(vec_split(mtcars, mtcars[c("vs", "am")])) #' } vec_split <- function(x, by) { .Call(vctrs_split, x, by) } vctrs/R/assert.R0000644000176200001440000001374213762412012013245 0ustar liggesusers#' Assert an argument has known prototype and/or size #' #' @description #' #' * `vec_is()` is a predicate that checks if its input is a vector that #' conforms to a prototype and/or a size. #' #' * `vec_assert()` throws an error when the input is not a vector or #' doesn't conform. #' #' @section Scalars and vectors: #' #' Informally, a vector is a collection that makes sense to use as #' column in a data frame. An object is a vector if one of the #' following conditions hold: #' #' - A [vec_proxy()] method is implemented for the class of the #' object. #' #' - The [base type][typeof] of the object is atomic: `"logical"`, #' `"integer"`, `"double"`, `"complex"`, `"character"`, `"raw"` #' #' - The object is a [data.frame]. #' #' - The base type is `"list"`, and one of: #' - The object is a bare `"list"` without a `"class"` attribute. #' - The object explicitly inherits from `"list"`. That is, the #' `"class"` attribute contains `"list"` and `inherits(x, #' "list")` is `TRUE`. #' #' Otherwise an object is treated as scalar and cannot be used as a #' vector. In particular: #' #' - `NULL` is not a vector. #' - S3 lists like `lm` objects are treated as scalars by default. #' - Objects of type [expression] are not treated as vectors. #' - Support for S4 vectors is currently limited to objects that #' inherit from an atomic type. #' - Subclasses of [data.frame] that *append* their class to the `"class"` #' attribute are not treated as vectors. If you inherit from an S3 class, #' always prepend your class to the `"class"` attribute for correct dispatch. #' #' @section Error types: #' #' `vec_is()` never throws. #' `vec_assert()` throws the following errors: #' #' * If the input is not a vector, an error of class #' `"vctrs_error_scalar_type"` is raised. #' #' * If the prototype doesn't match, an error of class #' `"vctrs_error_assert_ptype"` is raised. #' #' * If the size doesn't match, an error of class #' `"vctrs_error_assert_size"` is raised. #' #' Both errors inherit from `"vctrs_error_assert"`. #' #' @param x A vector argument to check. #' @param ptype Prototype to compare against. If the prototype has a #' class, its [vec_ptype()] is compared to that of `x` with #' `identical()`. Otherwise, its [typeof()] is compared to that of #' `x` with `==`. #' @param size Size to compare against #' @param arg Name of argument being checked. This is used in error #' messages. The label of the expression passed as `x` is taken as #' default. #' #' @return `vec_is()` returns `TRUE` or `FALSE`. `vec_assert()` either #' throws a typed error (see section on error types) or returns `x`, #' invisibly. #' @export vec_assert <- function(x, ptype = NULL, size = NULL, arg = as_label(substitute(x))) { if (!vec_is_vector(x)) { stop_scalar_type(x, arg) } if (!is_null(ptype)) { ptype <- vec_ptype(ptype) x_type <- vec_ptype_finalise(vec_ptype(x)) if (!is_same_type(x_type, ptype)) { msg <- vec_assert_type_explain(x_type, ptype, arg) abort( msg, class = c("vctrs_error_assert_ptype", "vctrs_error_assert"), required = ptype, actual = x_type ) } } if (!is_null(size)) { size <- vec_recycle(vec_cast(size, integer()), 1L) x_size <- vec_size(x) if (!identical(x_size, size)) { msg <- paste0("`", arg, "` must have size ", size, ", not size ", x_size, ".") abort( msg, class = c("vctrs_error_assert_size", "vctrs_error_assert"), required = size, actual = x_size ) } } invisible(x) } #' @rdname vec_assert #' @export vec_is <- function(x, ptype = NULL, size = NULL) { if (!vec_is_vector(x)) { return(FALSE) } if (!is_null(ptype)) { ptype <- vec_ptype(ptype) x_type <- vec_ptype_finalise(vec_ptype(x)) if (!is_same_type(x_type, ptype)) { return(FALSE) } } if (!is_null(size)) { size <- vec_recycle(vec_cast(size, integer()), 1L) x_size <- vec_size(x) if (!identical(x_size, size)) { return(FALSE) } } TRUE } #' Is object a vector? #' @noRd #' #' @description #' #' Returns `TRUE` if: #' #' * `x` is an atomic, whether it has a class or not. #' * `x` is a bare list without class. #' * `x` implements [vec_proxy()]. #' #' S3 lists are thus treated as scalars unless they implement a proxy. vec_is_vector <- function(x) { .Call(vctrs_is_vector, x) } #' Is the object a list? #' #' @description #' `vec_is_list()` tests if `x` is considered a list in the vctrs sense. It #' returns `TRUE` if: #' #' * `x` is a bare list with no class. #' * `x` is a list explicitly inheriting from `"list"`. #' #' @param x An object. #' #' @details #' Notably, data frames and S3 record style classes like POSIXlt are not #' considered lists. #' #' @export #' @examples #' vec_is_list(list()) #' vec_is_list(list_of(1)) #' #' vec_is_list(data.frame()) vec_is_list <- function(x) { .Call(vctrs_is_list, x) } is_same_type <- function(x, ptype) { if (is_partial(ptype)) { env <- environment() ptype <- tryCatch( vctrs_error_incompatible_type = function(...) return_from(env, FALSE), vec_ptype_common(x, ptype) ) } x <- vec_slice(x, integer()) ptype <- vec_slice(ptype, integer()) # FIXME: Remove row names for matrices and arrays, and handle empty # but existing dimnames x <- vec_set_names(x, NULL) ptype <- vec_set_names(ptype, NULL) identical(x, ptype) } vec_assert_type_explain <- function(x, type, arg) { arg <- str_backtick(arg) x <- paste0("<", vec_ptype_full(x), ">") type <- paste0("<", vec_ptype_full(type), ">") intro <- paste0(arg, " must be a vector with type") intro <- layout_type(intro, type) outro <- paste0("Instead, it has type") outro <- layout_type(outro, x) paste_line( !!!intro, if (str_is_multiline(intro)) "", !!!outro ) } layout_type <- function(start, type) { if (str_is_multiline(type)) { paste_line( paste0(start, ":"), "", paste0(" ", indent(type, 2)) ) } else { paste0(start, " ", type, ".") } } vctrs/R/order.R0000644000176200001440000000457214042540502013056 0ustar liggesusers#' Order and sort vectors #' #' @param x A vector #' @param direction Direction to sort in. Defaults to `asc`ending. #' @param na_value Should `NA`s be treated as the largest or smallest values? #' @return #' * `vec_order()` an integer vector the same size as `x`. #' * `vec_sort()` a vector with the same size and type as `x`. #' #' @section Dependencies of `vec_order()`: #' * [vec_proxy_order()] #' #' @section Dependencies of `vec_sort()`: #' * [vec_proxy_order()] #' * [vec_order()] #' * [vec_slice()] #' @export #' @examples #' x <- round(c(runif(9), NA), 3) #' vec_order(x) #' vec_sort(x) #' vec_sort(x, "desc") #' #' # Can also handle data frames #' df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) #' vec_order(df) #' vec_sort(df) #' vec_sort(df, "desc") vec_order <- function(x, direction = c("asc", "desc"), na_value = c("largest", "smallest")) { direction <- arg_match0(direction, c("asc", "desc")) na_value <- arg_match0(na_value, c("largest", "smallest")) order_proxy(vec_proxy_order(x), direction = direction, na_value = na_value) } #' @export #' @rdname vec_order vec_sort <- function(x, direction = c("asc", "desc"), na_value = c("largest", "smallest")) { direction <- arg_match0(direction, c("asc", "desc")) na_value <- arg_match0(na_value, c("largest", "smallest")) idx <- vec_order(x, direction = direction, na_value = na_value) vec_slice(x, idx) } order_proxy <- function(proxy, direction = "asc", na_value = "largest") { decreasing <- !identical(direction, "asc") na.last <- identical(na_value, "largest") if (decreasing) { na.last <- !na.last } if (is.data.frame(proxy)) { # Work around type-instability in `base::order()` if (vec_size(proxy) == 0L) { return(integer(0L)) } args <- map(unstructure(proxy), function(.x) { if (is.data.frame(.x)) { .x <- order(vec_order(.x, direction = direction, na_value = na_value)) } .x }) exec("order", !!!args, decreasing = decreasing, na.last = na.last) } else if (is_character(proxy) || is_logical(proxy) || is_integer(proxy) || is_double(proxy) || is.complex(proxy)) { if (is.object(proxy)) { proxy <- unstructure(proxy) } order(proxy, decreasing = decreasing, na.last = na.last) } else { abort("Invalid type returned by `vec_proxy_compare()`.") } } vctrs/R/type-data-frame.R0000644000176200001440000003510014027045462014722 0ustar liggesusers#' Assemble attributes for data frame construction #' #' `new_data_frame()` constructs a new data frame from an existing list. It is #' meant to be performant, and does not check the inputs for correctness in any #' way. It is only safe to use after a call to [df_list()], which collects and #' validates the columns used to construct the data frame. #' #' @seealso #' [df_list()] for a way to safely construct a data frame's underlying #' data structure from individual columns. This can be used to create a #' named list for further use by `new_data_frame()`. #' #' @param x A named list of equal-length vectors. The lengths are not #' checked; it is responsibility of the caller to make sure they are #' equal. #' @param n Number of rows. If `NULL`, will be computed from the length of #' the first element of `x`. #' @param ...,class Additional arguments for creating subclasses. #' The `"names"` and `"row.names"` attributes override input in `x` and `n`, #' respectively: #' #' - `"names"` is used if provided, overriding existing names in `x` #' - `"row.names"` is used if provided, if `n` is provided it must be #' consistent. #' #' @export #' @examples #' new_data_frame(list(x = 1:10, y = 10:1)) new_data_frame <- function(x = list(), n = NULL, ..., class = NULL) { .External(vctrs_new_data_frame, x, n, class, ...) } new_data_frame <- fn_inline_formals(new_data_frame, "x") #' Collect columns for data frame construction #' #' `df_list()` constructs the data structure underlying a data #' frame, a named list of equal-length vectors. It is often used in #' combination with [new_data_frame()] to safely and consistently create #' a helper function for data frame subclasses. #' #' @section Properties: #' #' - Inputs are recycled to a common size with [vec_recycle_common()]. #' #' - With the exception of data frames, inputs are not modified in any way. #' Character vectors are never converted to factors, and lists are stored #' as-is for easy creation of list-columns. #' #' - Unnamed data frame inputs are automatically spliced. Named data frame #' inputs are stored unmodified as data frame columns. #' #' - `NULL` inputs are completely ignored. #' #' - The dots are dynamic, allowing for splicing of lists with `!!!` and #' unquoting. #' #' @seealso #' [new_data_frame()] for constructing data frame subclasses from a validated #' input. [data_frame()] for a fast data frame creation helper. #' #' @param ... Vectors of equal-length. When inputs are named, those names #' are used for names of the resulting list. #' @param .size The common size of vectors supplied in `...`. If `NULL`, this #' will be computed as the common size of the inputs. #' @param .name_repair One of `"check_unique"`, `"unique"`, `"universal"` or #' `"minimal"`. See [vec_as_names()] for the meaning of these options. #' #' @export #' @examples #' # `new_data_frame()` can be used to create custom data frame constructors #' new_fancy_df <- function(x = list(), n = NULL, ..., class = NULL) { #' new_data_frame(x, n = n, ..., class = c(class, "fancy_df")) #' } #' #' # Combine this constructor with `df_list()` to create a safe, #' # consistent helper function for your data frame subclass #' fancy_df <- function(...) { #' data <- df_list(...) #' new_fancy_df(data) #' } #' #' df <- fancy_df(x = 1) #' class(df) df_list <- function(..., .size = NULL, .name_repair = c("check_unique", "unique", "universal", "minimal")) { .Call(vctrs_df_list, list2(...), .size, .name_repair) } df_list <- fn_inline_formals(df_list, ".name_repair") #' Construct a data frame #' #' @description #' `data_frame()` constructs a data frame. It is similar to #' [base::data.frame()], but there are a few notable differences that make it #' more in line with vctrs principles. The Properties section outlines these. #' #' @details #' If no column names are supplied, `""` will be used as a default for all #' columns. This is applied before name repair occurs, so the default #' name repair of `"check_unique"` will error if any unnamed inputs #' are supplied and `"unique"` will repair the empty string column names #' appropriately. If the column names don't matter, use a `"minimal"` name #' repair for convenience and performance. #' #' @inheritSection df_list Properties #' #' @seealso #' [df_list()] for safely creating a data frame's underlying data structure from #' individual columns. [new_data_frame()] for constructing the actual data #' frame from that underlying data structure. Together, these can be useful #' for developers when creating new data frame subclasses supporting #' standard evaluation. #' #' @param ... Vectors to become columns in the data frame. When inputs are #' named, those names are used for column names. #' @param .size The number of rows in the data frame. If `NULL`, this will #' be computed as the common size of the inputs. #' @param .name_repair One of `"check_unique"`, `"unique"`, `"universal"` or #' `"minimal"`. See [vec_as_names()] for the meaning of these options. #' #' @export #' @examples #' data_frame(x = 1, y = 2) #' #' # Inputs are recycled using tidyverse recycling rules #' data_frame(x = 1, y = 1:3) #' #' # Strings are never converted to factors #' class(data_frame(x = "foo")$x) #' #' # List columns can be easily created #' df <- data_frame(x = list(1:2, 2, 3:4), y = 3:1) #' #' # However, the base print method is suboptimal for displaying them, #' # so it is recommended to convert them to tibble #' if (rlang::is_installed("tibble")) { #' tibble::as_tibble(df) #' } #' #' # Named data frame inputs create data frame columns #' df <- data_frame(x = data_frame(y = 1:2, z = "a")) #' #' # The `x` column itself is another data frame #' df$x #' #' # Again, it is recommended to convert these to tibbles for a better #' # print method #' if (rlang::is_installed("tibble")) { #' tibble::as_tibble(df) #' } #' #' # Unnamed data frame input is automatically spliced #' data_frame(x = 1, data_frame(y = 1:2, z = "a")) data_frame <- function(..., .size = NULL, .name_repair = c("check_unique", "unique", "universal", "minimal")) { .Call(vctrs_data_frame, list2(...), .size, .name_repair) } data_frame <- fn_inline_formals(data_frame, ".name_repair") #' @export vec_ptype_full.data.frame <- function(x, ...) { if (length(x) == 0) { return(paste0(class(x)[[1]], "<>")) } else if (length(x) == 1) { return(paste0(class(x)[[1]], "<", names(x), ":", vec_ptype_full(x[[1]]), ">")) } # Needs to handle recursion with indenting types <- map_chr(x, vec_ptype_full) needs_indent <- grepl("\n", types) types[needs_indent] <- map(types[needs_indent], function(x) indent(paste0("\n", x), 4)) names <- paste0(" ", format(names(x))) paste0( class(x)[[1]], "<\n", paste0(names, ": ", types, collapse = "\n"), "\n>" ) } #' @export vec_ptype_abbr.data.frame <- function(x, ...) { if (inherits_only(x, "data.frame")) { "df" } else { class(x)[[1]] } } #' @export vec_proxy_equal.data.frame <- function(x, ...) { df_proxy(x, VCTRS_PROXY_KIND_equal) } #' @export vec_proxy_compare.data.frame <- function(x, ...) { df_proxy(x, VCTRS_PROXY_KIND_compare) } #' @export vec_proxy_order.data.frame <- function(x, ...) { df_proxy(x, VCTRS_PROXY_KIND_order) } # Keep in sync with `enum vctrs_proxy_kind` in `vctrs.h` VCTRS_PROXY_KIND_default <- 0L VCTRS_PROXY_KIND_equal <- 1L VCTRS_PROXY_KIND_compare <- 2L VCTRS_PROXY_KIND_order <- 3L df_proxy <- function(x, kind) { .Call(vctrs_df_proxy, x, kind) } df_is_coercible <- function(x, y, opts) { vec_is_coercible( new_data_frame(x), new_data_frame(y), opts = opts ) } # Coercion ---------------------------------------------------------------- #' Coercion between two data frames #' #' `df_ptype2()` and `df_cast()` are the two functions you need to #' call from `vec_ptype2()` and `vec_cast()` methods for data frame #' subclasses. See [?howto-faq-coercion-data-frame][howto-faq-coercion-data-frame]. #' Their main job is to determine the common type of two data frames, #' adding and coercing columns as needed, or throwing an incompatible #' type error when the columns are not compatible. #' #' @param x,y,to Subclasses of data frame. #' @param ... If you call `df_ptype2()` or `df_cast()` from a #' `vec_ptype2()` or `vec_cast()` method, you must forward the dots #' passed to your method on to `df_ptype2()` or `df_cast()`. #' @inheritParams vec_ptype2 #' @inheritParams vec_cast #' #' @return #' * When `x` and `y` are not compatible, an error of class #' `vctrs_error_incompatible_type` is thrown. #' * When `x` and `y` are compatible, `df_ptype2()` returns the common #' type as a bare data frame. `tib_ptype2()` returns the common type #' as a bare tibble. #' #' @export df_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { .Call(vctrs_df_ptype2_opts, x, y, opts = match_fallback_opts(...), x_arg, y_arg) } #' @rdname df_ptype2 #' @export df_cast <- function(x, to, ..., x_arg = "", to_arg = "") { .Call(vctrs_df_cast_opts, x, to, opts = match_fallback_opts(...), x_arg, to_arg) } df_ptype2_opts <- function(x, y, ..., opts, x_arg = "", y_arg = "") { .Call(vctrs_df_ptype2_opts, x, y, opts = opts, x_arg, y_arg) } df_cast_opts <- function(x, to, ..., opts = fallback_opts(), x_arg = "", to_arg = "") { .Call(vctrs_df_cast_opts, x, to, opts, x_arg, to_arg) } df_cast_params <- function(x, to, ..., x_arg = "", to_arg = "", df_fallback = NULL, s3_fallback = NULL) { opts <- fallback_opts( df_fallback = df_fallback, s3_fallback = s3_fallback ) df_cast_opts(x, to, opts = opts, x_arg = x_arg, to_arg = to_arg) } #' vctrs methods for data frames #' #' These functions help the base data.frame class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name vctrs-data-frame NULL #' @rdname vctrs-data-frame #' @export vec_ptype2.data.frame #' @method vec_ptype2 data.frame #' @export vec_ptype2.data.frame <- function(x, y, ...) { UseMethod("vec_ptype2.data.frame") } #' @method vec_ptype2.data.frame data.frame #' @export vec_ptype2.data.frame.data.frame <- function(x, y, ...) { df_ptype2(x, y, ...) } vec_ptype2_df_fallback_normalise <- function(x, y, opts) { x_orig <- x y_orig <- y ptype <- df_ptype2_opts(x, y, opts = opts) x <- x[0, , drop = FALSE] y <- y[0, , drop = FALSE] x[seq_along(ptype)] <- ptype y[seq_along(ptype)] <- ptype # Names might have been repaired by `[<-` names(x) <- names(ptype) names(y) <- names(ptype) # Restore attributes if no `[` method is implemented if (df_has_base_subset(x)) { x <- vec_restore(x, x_orig) } if (df_has_base_subset(y)) { y <- vec_restore(y, y_orig) } list(x = x, y = y) } vec_cast_df_fallback_normalise <- function(x, to, opts) { orig <- x cast <- df_cast_opts(x, to, opts = opts) # Seq-assign should be more widely implemented than empty-assign? x[seq_along(to)] <- cast # Names might have been repaired by `[<-` names(x) <- names(cast) # Restore attributes if no `[` method is implemented if (df_has_base_subset(x)) { x <- vec_restore(x, orig) } x } df_needs_normalisation <- function(x, y, opts) { is.data.frame(x) && is.data.frame(y) && df_is_coercible(x, y, opts) } # Fallback for data frame subclasses (#981) vec_ptype2_df_fallback <- function(x, y, opts, x_arg = "", y_arg = "") { seen_tibble <- inherits(x, "tbl_df") || inherits(y, "tbl_df") ptype <- vec_ptype2_params( new_data_frame(x), new_data_frame(y), df_fallback = opts$df_fallback, s3_fallback = opts$s3_fallback ) classes <- NULL if (is_df_fallback(x)) { classes <- c(classes, known_classes(x)) x <- df_fallback_as_df(x) } if (is_df_fallback(y)) { classes <- c(classes, known_classes(y)) y <- df_fallback_as_df(y) } x_class <- class(x)[[1]] y_class <- class(y)[[1]] if (needs_fallback_warning(opts$df_fallback) && !all(c(x_class, y_class) %in% c(classes, "tbl_df"))) { fallback_class <- if (seen_tibble) "" else "" msg <- cnd_type_message( x, y, x_arg, y_arg, NULL, "combine", NULL, fallback = fallback_class ) if (identical(x_class, y_class)) { msg <- c( msg, incompatible_attrib_bullets() ) } warn(msg) } # Return a fallback class so we don't warn multiple times. This # fallback class is stripped in `vec_ptype_finalise()`. new_fallback_df( ptype, known_classes = unique(c(classes, x_class, y_class)), seen_tibble = seen_tibble ) } is_df_subclass <- function(x) { inherits(x, "data.frame") && !identical(class(x), "data.frame") } is_df_fallback <- function(x) { inherits(x, "vctrs:::df_fallback") } new_fallback_df <- function(x, known_classes, seen_tibble = FALSE, n = nrow(x)) { class <- "vctrs:::df_fallback" if (seen_tibble) { class <- c(class, "tbl_df", "tbl") } new_data_frame( x, n = n, known_classes = known_classes, seen_tibble = seen_tibble, class = class ) } df_fallback_as_df <- function(x) { if (inherits(x, "tbl_df")) { new_data_frame(x, class = c("tbl_df", "tbl", "data.frame")) } else { new_data_frame(x) } } known_classes <- function(x) { if (is_df_fallback(x)) { attr(x, "known_classes") } } # Cast -------------------------------------------------------------------- #' @rdname vctrs-data-frame #' @export vec_cast.data.frame #' @method vec_cast data.frame #' @export vec_cast.data.frame <- function(x, to, ...) { UseMethod("vec_cast.data.frame") } #' @export #' @method vec_cast.data.frame data.frame vec_cast.data.frame.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") { df_cast(x, to, x_arg = x_arg, to_arg = to_arg) } #' @export vec_restore.data.frame <- function(x, to, ..., n = NULL) { .Call(vctrs_bare_df_restore, x, to, n) } # Helpers ----------------------------------------------------------------- df_size <- function(x) { .Call(vctrs_df_size, x) } df_lossy_cast <- function(out, x, to, ..., x_arg = "", to_arg = "") { extra <- setdiff(names(x), names(to)) maybe_lossy_cast( result = out, x = x, to = to, lossy = length(extra) > 0, locations = int(), x_arg = x_arg, to_arg = to_arg, details = inline_list("Dropped variables: ", extra, quote = "`"), class = "vctrs_error_cast_lossy_dropped" ) } is_informative_error.vctrs_error_cast_lossy_dropped <- function(x, ...) { FALSE } vctrs/R/type-unspecified.R0000644000176200001440000000176013762412012015216 0ustar liggesusers#' A 1d vector of unspecified type #' #' This is a [partial type][new_partial] used to represent logical vectors #' that only contain `NA`. These require special handling because we want to #' allow `NA` to specify missingness without requiring a type. #' #' @keywords internal #' @param n Length of vector #' @export #' @examples #' vec_ptype_show() #' vec_ptype_show(NA) #' #' vec_c(NA, factor("x")) #' vec_c(NA, Sys.Date()) #' vec_c(NA, Sys.time()) #' vec_c(NA, list(1:3, 4:5)) unspecified <- function(n = 0) { .Call(vctrs_unspecified, n) } #' @export `[.vctrs_unspecified` <- function(x, i, ...) { unspecified(length(NextMethod())) } #' @export print.vctrs_unspecified <- function(x, ...) { cat(" [", length(x), "]\n", sep = "") } #' @export vec_ptype_abbr.vctrs_unspecified <- function(x, ...) { "???" } is_unspecified <- function(x) { .Call(vctrs_is_unspecified, x) } ununspecify <- function(x) { if (is_unspecified(x)) { new_logical(length(x)) } else { x } } vctrs/R/vctrs-deprecated.R0000644000176200001440000000533513766452345015223 0ustar liggesusers#' Is a vector empty #' #' @description #' #' `r lifecycle::badge("defunct")` #' #' This function is defunct, please use [vec_is_empty()]. #' #' @param x An object. #' #' @keywords internal #' @export vec_empty <- function(x) { stop_defunct(paste_line( "`vec_empty()` is defunct as of vctrs 0.2.0.", "Please use `vec_is_empty()` instead." )) } #' Deprecated type functions #' #' @description #' #' `r lifecycle::badge("deprecated")` #' #' These functions have been renamed: #' #' * `vec_type()` => [vec_ptype()] #' * `vec_type2()` => [vec_ptype2()] #' * `vec_type_common()` => [vec_ptype_common()] #' #' @param x,y,...,.ptype Arguments for deprecated functions. #' #' @keywords internal #' @export vec_type <- function(x) { warn_deprecated(c("`vec_type()` has been renamed to `vec_ptype()`.")) vec_ptype(x) } #' @rdname vec_type #' @export vec_type_common <- function(..., .ptype = NULL) { warn_deprecated(c("`vec_type_common()` has been renamed to `vec_ptype_common()`.")) vec_ptype_common(..., .ptype = .ptype) } #' @rdname vec_type #' @export vec_type2 <- function(x, y, ...) { warn_deprecated(c("`vec_type2()` has been renamed to `vec_ptype2()`.")) vec_ptype2(x, y, ...) } #' Convert to an index vector #' #' @description #' #' `r lifecycle::badge("deprecated")` #' #' `vec_as_index()` has been renamed to [vec_as_location()] and is #' deprecated as of vctrs 0.2.2. #' #' @inheritParams vec_as_location #' #' @keywords internal #' @export vec_as_index <- function(i, n, names = NULL) { signal_soft_deprecated(paste_line( "`vec_as_index()` is deprecated as of vctrs 0.2.2.", "Please use `vec_as_location() instead.`" )) n <- vec_cast(n, integer()) vec_assert(n, integer(), 1L) i <- vec_as_subscript(i) .Call( vctrs_as_location, i = i, n = n, names = names, loc_negative = "invert", loc_oob = "error", loc_zero = "remove", missing = "propagate", arg = NULL ) } #' Expand the length of a vector #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `vec_repeat()` has been replaced with [vec_rep()] and [vec_rep_each()] and is #' deprecated as of vctrs 0.3.0. #' #' @param x A vector. #' @param each Number of times to repeat each element of `x`. #' @param times Number of times to repeat the whole vector of `x`. #' @return A vector the same type as `x` with size `vec_size(x) * times * each`. #' @keywords internal #' @export vec_repeat <- function(x, each = 1L, times = 1L) { signal_soft_deprecated(paste_line( "`vec_repeat()` is deprecated as of vctrs 0.3.0.", "Please use either `vec_rep()` or `vec_rep_each()` instead." )) vec_assert(each, size = 1L) vec_assert(times, size = 1L) idx <- rep(vec_seq_along(x), times = times, each = each) vec_slice(x, idx) } vctrs/R/type-dplyr.R0000644000176200001440000000605013762412012014047 0ustar liggesusers # All methods in this file are conditionally registered in .onLoad() ### `grouped_df` ----------------------------------------------------- group_intersect <- function(x, new) { intersect(dplyr::group_vars(x), names(new)) } vec_restore.grouped_df <- function(x, to, ...) { vars <- group_intersect(to, x) drop <- dplyr::group_by_drop_default(to) dplyr::grouped_df(x, vars, drop = drop) } # `vec_ptype2()` ----------------------------------------------------- vec_ptype2.grouped_df.grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2.grouped_df.data.frame <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2.data.frame.grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2.grouped_df.tbl_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2.tbl_df.grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } gdf_ptype2 <- function(x, y, ...) { common <- df_ptype2(x, y, ...) x_vars <- dplyr::group_vars(x) y_vars <- dplyr::group_vars(y) vars <- union(x_vars, y_vars) drop <- dplyr::group_by_drop_default(x) && dplyr::group_by_drop_default(y) dplyr::grouped_df(common, vars, drop = drop) } # `vec_cast()` ------------------------------------------------------- vec_cast.grouped_df.grouped_df <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast.grouped_df.data.frame <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast.data.frame.grouped_df <- function(x, to, ...) { df_cast(x, to, ...) } vec_cast.grouped_df.tbl_df <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast.tbl_df.grouped_df <- function(x, to, ...) { tib_cast(x, to, ...) } gdf_cast <- function(x, to, ...) { df <- df_cast(x, to, ...) vars <- dplyr::group_vars(to) drop <- dplyr::group_by_drop_default(to) dplyr::grouped_df(df, vars, drop = drop) } ### `rowwise` -------------------------------------------------------- vec_restore.rowwise_df <- function(x, to, ...) { dplyr::rowwise(x) } # `vec_ptype2()` ----------------------------------------------------- vec_ptype2.rowwise_df.rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2.rowwise_df.data.frame <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2.data.frame.rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2.rowwise_df.tbl_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2.tbl_df.rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } rww_ptype2 <- function(x, y, ...) { dplyr::rowwise(df_ptype2(x, y, ...)) } # `vec_cast()` ------------------------------------------------------- vec_cast.rowwise_df.rowwise_df <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast.rowwise_df.data.frame <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast.data.frame.rowwise_df <- function(x, to, ...) { df_cast(x, to, ...) } vec_cast.rowwise_df.tbl_df <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast.tbl_df.rowwise_df <- function(x, to, ...) { tib_cast(x, to, ...) } rww_cast <- function(x, to, ...) { dplyr::rowwise(df_cast(x, to, ...)) } vctrs/R/shape.R0000644000176200001440000000415513762412012013042 0ustar liggesusers# The dimensionality of an matrix/array is partition into two parts: # * the first dimension = the number of observations # * all other dimensions = the shape parameter of the type # These helpers work with the shape parameter new_shape <- function(type, shape = integer()) { structure(type, dim = c(0L, shape)) } vec_shaped_ptype <- function(ptype, x, y, ..., x_arg = "", y_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } .Call(vctrs_shaped_ptype, ptype, x, y, x_arg, y_arg) } vec_shape2 <- function(x, y, ..., x_arg = "", y_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } .Call(vctrs_shape2, x, y, x_arg, y_arg) } # Should take same signature as `vec_cast()` shape_broadcast <- function(x, to, ..., x_arg, to_arg) { if (is.null(x) || is.null(to)) { return(x) } dim_x <- vec_dim(x) dim_to <- vec_dim(to) # Don't set dimensions for vectors if (length(dim_x) == 1L && length(dim_to) == 1L) { return(x) } if (length(dim_x) > length(dim_to)) { stop_incompatible_cast( x, to, details = "Cannot decrease dimensions.", x_arg = x_arg, to_arg = to_arg ) } dim_x <- n_dim2(dim_x, dim_to)$x dim_to[[1]] <- dim_x[[1]] # don't change number of observations ok <- dim_x == dim_to | dim_x == 1 if (any(!ok)) { stop_incompatible_cast( x, to, details = "Non-recyclable dimensions.", x_arg = x_arg, to_arg = to_arg ) } # Increase dimensionality if required if (vec_dim_n(x) != length(dim_x)) { dim(x) <- dim_x } recycle <- dim_x != dim_to # Avoid expensive subset if (all(!recycle)) { return(x) } indices <- rep(list(missing_arg()), length(dim_to)) indices[recycle] <- map(dim_to[recycle], rep_len, x = 1L) eval_bare(expr(x[!!!indices, drop = FALSE])) } # Helpers ----------------------------------------------------------------- n_dim2 <- function(x, y) { nx <- length(x) ny <- length(y) if (nx == ny) { list(x = x, y = y) } else if (nx < ny) { list(x = c(x, rep(1L, ny - nx)), y = y) } else { list(x = x, y = c(y, rep(1L, nx - ny))) } } vctrs/R/type-table.R0000644000176200001440000000323713762412012014010 0ustar liggesusers#' Table S3 class #' #' These functions help the base table class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name table NULL #' @export vec_restore.table <- function(x, to, ...) { new_table(x, dim = dim(x), dimnames = dimnames(x)) } # Print ------------------------------------------------------------------- #' @export vec_ptype_full.table <- function(x, ...) { paste0("table", vec_ptype_shape(x)) } #' @export vec_ptype_abbr.table <- function(x, ...) { "table" } # Coercion ---------------------------------------------------------------- #' @export vec_ptype2.table.table <- function(x, y, ..., x_arg = "", y_arg = "") { ptype <- vec_ptype2(unstructure(x), unstructure(y)) vec_shaped_ptype(new_table(ptype), x, y, x_arg = x_arg, y_arg = y_arg) } #' @export vec_cast.table.table <- function(x, to, ...) { out <- vec_cast(unstructure(x), unstructure(to)) out <- new_table(out, dim = dim(x), dimnames = dimnames(x)) shape_broadcast(out, to, ...) } # ------------------------------------------------------------------------------ new_table <- function(x = integer(), dim = NULL, dimnames = NULL) { if (is_null(dim)) { dim <- length(x) } else if (!is.integer(dim)) { abort("`dim` must be an integer vector.") } dimnames <- dimnames %||% vec_init(list(), length(dim)) n_elements <- prod(dim) n_x <- length(x) if (n_elements != n_x) { abort(glue::glue( "Length implied by `dim`, {n_elements}, must match the length of `x`, {n_x}." )) } structure(x, dim = dim, dimnames = dimnames, class = "table") } is_bare_table <- function(x) { identical(class(x), "table") } vctrs/R/type-bare.R0000644000176200001440000002430713762412012013633 0ustar liggesusers# Type2 ------------------------------------------------------------------- # Left generics ----------------------------------------------------------- #' @rdname vec_ptype2 #' @export vec_ptype2.logical #' @method vec_ptype2 logical #' @export vec_ptype2.logical <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.logical") } #' @rdname vec_ptype2 #' @export vec_ptype2.integer #' @method vec_ptype2 integer #' @export vec_ptype2.integer <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.integer") } #' @rdname vec_ptype2 #' @export vec_ptype2.double #' @method vec_ptype2 double #' @export vec_ptype2.double <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.double") } #' @rdname vec_ptype2 #' @export vec_ptype2.complex #' @method vec_ptype2 complex #' @export vec_ptype2.complex <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.complex") } #' @rdname vec_ptype2 #' @export vec_ptype2.character #' @method vec_ptype2 character #' @export vec_ptype2.character <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.character") } #' @rdname vec_ptype2 #' @export vec_ptype2.raw #' @method vec_ptype2 raw #' @export vec_ptype2.raw <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.raw") } #' @rdname vec_ptype2 #' @export vec_ptype2.list #' @method vec_ptype2 list #' @export vec_ptype2.list <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.list") } # Numeric-ish #' @method vec_ptype2.logical logical #' @export vec_ptype2.logical.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.logical") } #' @export #' @method vec_ptype2.integer integer vec_ptype2.integer.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.integer") } #' @export #' @method vec_ptype2.logical integer vec_ptype2.logical.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.integer") } #' @export #' @method vec_ptype2.integer logical vec_ptype2.integer.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.logical") } #' @export #' @method vec_ptype2.double double vec_ptype2.double.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.double") } #' @export #' @method vec_ptype2.logical double vec_ptype2.logical.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.double") } #' @export #' @method vec_ptype2.double logical vec_ptype2.double.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.logical") } #' @export #' @method vec_ptype2.integer double vec_ptype2.integer.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.double") } #' @export #' @method vec_ptype2.double integer vec_ptype2.double.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.integer") } #' @export #' @method vec_ptype2.complex complex vec_ptype2.complex.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.complex") } #' @export #' @method vec_ptype2.integer complex vec_ptype2.integer.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.complex") } #' @export #' @method vec_ptype2.complex integer vec_ptype2.complex.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.integer") } #' @export #' @method vec_ptype2.double complex vec_ptype2.double.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.complex") } #' @export #' @method vec_ptype2.complex double vec_ptype2.complex.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.double") } # Character #' @method vec_ptype2.character character #' @export vec_ptype2.character.character <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.character.character") } # Raw #' @export #' @method vec_ptype2.raw raw vec_ptype2.raw.raw <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.raw.raw") } # Lists #' @method vec_ptype2.list list #' @export vec_ptype2.list.list <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.list.list") } # Cast -------------------------------------------------------------------- # These methods for base types are handled at the C level unless # inputs have shape or have lossy casts #' @export #' @rdname vec_cast #' @export vec_cast.logical #' @method vec_cast logical vec_cast.logical <- function(x, to, ...) { UseMethod("vec_cast.logical") } #' @export #' @method vec_cast.logical logical vec_cast.logical.logical <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.logical integer vec_cast.logical.integer <- function(x, to, ..., x_arg = "", to_arg = "") { out <- vec_coerce_bare(x, "logical") out <- shape_broadcast(out, to, x_arg = x_arg, to_arg = to_arg) lossy <- !x %in% c(0L, 1L, NA_integer_) maybe_lossy_cast(out, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } #' @export #' @method vec_cast.logical double vec_cast.logical.double <- function(x, to, ..., x_arg = "", to_arg = "") { out <- vec_coerce_bare(x, "logical") out <- shape_broadcast(out, to, x_arg = x_arg, to_arg = to_arg) lossy <- !x %in% c(0, 1, NA_real_) maybe_lossy_cast(out, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } #' @export #' @rdname vec_cast #' @export vec_cast.integer #' @method vec_cast integer vec_cast.integer <- function(x, to, ...) { UseMethod("vec_cast.integer") } #' @export #' @method vec_cast.integer logical vec_cast.integer.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "integer") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.integer integer vec_cast.integer.integer <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.integer double vec_cast.integer.double <- function(x, to, ..., x_arg = "", to_arg = "") { out <- suppressWarnings(vec_coerce_bare(x, "integer")) x_na <- is.na(x) lossy <- (out != x & !x_na) | xor(x_na, is.na(out)) out <- shape_broadcast(out, to, x_arg = x_arg, to_arg = to_arg) maybe_lossy_cast(out, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } #' @export #' @rdname vec_cast #' @export vec_cast.double #' @method vec_cast double vec_cast.double <- function(x, to, ...) { UseMethod("vec_cast.double") } #' @export #' @method vec_cast.double logical vec_cast.double.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "double") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.double integer vec_cast.double.integer <- vec_cast.double.logical #' @export #' @method vec_cast.double double vec_cast.double.double <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.complex #' @method vec_cast complex vec_cast.complex <- function(x, to, ...) { UseMethod("vec_cast.complex") } #' @export #' @method vec_cast.complex logical vec_cast.complex.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "complex") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.complex integer vec_cast.complex.integer <- vec_cast.complex.logical #' @export #' @method vec_cast.complex double vec_cast.complex.double <- vec_cast.complex.logical #' @export #' @method vec_cast.complex complex vec_cast.complex.complex <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.raw #' @method vec_cast raw vec_cast.raw <- function(x, to, ...) { UseMethod("vec_cast.raw") } #' @export #' @method vec_cast.raw raw vec_cast.raw.raw <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.character #' @method vec_cast character vec_cast.character <- function(x, to, ...) { UseMethod("vec_cast.character") } #' @export #' @method vec_cast.character character vec_cast.character.character <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @rdname vec_cast #' @export vec_cast.list #' @method vec_cast list #' @export vec_cast.list <- function(x, to, ...) { UseMethod("vec_cast.list") } #' @export #' @method vec_cast.list list vec_cast.list.list <- function(x, to, ...) { shape_broadcast(x, to, ...) } # equal -------------------------------------------------------------- #' @export vec_proxy_equal.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_equal(x) } # compare ------------------------------------------------------------ #' @export vec_proxy_compare.raw <- function(x, ...) { # because: # order(as.raw(1:3)) # #> Error in order(as.raw(1:3)): unimplemented type 'raw' in 'orderVector1' as.integer(x) } #' @export vec_proxy_compare.list <- function(x, ...) { stop_unsupported(x, "vec_proxy_compare") } #' @export vec_proxy_compare.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_compare(x) } # order ------------------------------------------------------------ #' @export vec_proxy_order.raw <- function(x, ...) { # Can't rely on fallthrough behavior to `vec_proxy_compare()` because this # isn't an S3 object. Have to call it manually. vec_proxy_compare(x) } #' @export vec_proxy_order.list <- function(x, ...) { # Order lists by first appearance. # This allows list elements to be grouped in `vec_order()`. vec_duplicate_id(x) } #' @export vec_proxy_order.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_order(x) } vctrs/R/utils-cli.R0000644000176200001440000000214713762412012013646 0ustar liggesusers parens <- function(x, left = TRUE) { x_lines <- strsplit(x, "\n") x_lines <- map(x_lines, paren, left = left) map_chr(x_lines, paste0, collapse = "\n") } paren <- function(x, left = TRUE) { if (length(x) <= 1) { if (left) { paste0("( ", x) } else { paste0(x, " )") } } else { if (left) { paste0(c("\u250c ", rep("\u2502 ", length(x) - 2), "\u2514 "), x) } else { paste0(format(x), c(" \u2510", rep(" \u2502", length(x) - 2), " \u2518")) } } } pad_height <- function(x) { pad <- function(x, n) c(x, rep("", n - length(x))) lines <- strsplit(x, "\n") height <- max(map_int(lines, length)) lines <- map(lines, pad, height) map_chr(lines, paste0, "\n", collapse = "") } pad_width <- function(x) { lines <- strsplit(x, "\n", fixed = TRUE) # fix up strsplit bug n <- map_int(lines, length) lines[n == 0] <- "" width <- max(unlist(map(lines, nchar))) lines <- map(lines, format, width = width) map_chr(lines, paste, collapse = "\n") } str_backtick <- function(x) { paste0("`", x, "`") } str_is_multiline <- function(x) { grepl("\n", x) } vctrs/R/order-radix.R0000644000176200001440000001320514042540502014154 0ustar liggesusers#' Order and sort vectors #' #' @description #' `vec_order()` computes the order of `x`. For data frames, the order is #' computed along the rows by computing the order of the first column and #' using subsequent columns to break ties. #' #' `vec_sort()` sorts `x` by computing its order and using `vec_slice()` to #' rearrange. #' #' @details #' Character vectors are ordered in the C-locale. This is different from #' `base::order()`, which respects `base::Sys.setlocale()`. Sorting in a #' consistent locale can produce more reproducible results between different #' sessions and platforms, however, the results of sorting in the C-locale #' can be surprising. For example, capital letters sort before lower case #' letters. Sorting `c("b", "C", "a")` with `vec_sort()` will return #' `c("C", "a", "b")`, but with `base::order()` will return `c("a", "b", "C")` #' unless `base::order(method = "radix")` is explicitly set, which also uses #' the C-locale. While sorting with the C-locale can be useful for #' algorithmic efficiency, in many real world uses it can be the cause of #' data analysis mistakes. To balance these trade-offs, you can supply a #' `chr_transform` to transform character vectors into an alternative #' representation that orders in the C-locale in a less surprising way. For #' example, providing [base::tolower()] as a transform will order the original #' vector in a case-insensitive manner. Locale-aware ordering can be achieved #' by providing `stringi::stri_sort_key()` as a transform, setting the #' collation options as appropriate for your locale. #' #' Character vectors are always translated to UTF-8 before ordering, and before #' any transform is applied by `chr_transform`. #' #' @param x A vector #' @param direction Direction to sort in. #' - A single `"asc"` or `"desc"` for ascending or descending order #' respectively. #' - For data frames, a length `1` or `ncol(x)` character vector containing #' only `"asc"` or `"desc"`, specifying the direction for each column. #' @param na_value Treatment of `NA` values. `NaN` values are treated as #' equivalent to `NA` values. #' - A single `"largest"` or `"smallest"` for treating `NA` values as the #' largest or smallest values respectively. #' - For data frames, a length `1` or `ncol(x)` character vector containing #' only `"largest"` or `"smallest"`, specifying how `NA`s should be treated #' in each column. #' @param chr_transform Transformation of character vectors for sorting in #' alternate locales. #' - If `NULL`, no transformation is done. #' - Otherwise, this must be a function of one argument. The function will be #' invoked with `x`, if it is a character vector, after it has been #' translated to UTF-8, and should return a character vector with the same #' length as `x`, also encoded as UTF-8. #' - For data frames, `chr_transform` will be applied to all character #' columns. #' #' Common transformation functions include: `tolower()` for case-insensitive #' ordering and `stringi::str_sort_key()` for locale-aware ordering. See the #' Details section for more information. #' @return #' * `vec_order()` an integer vector the same size as `x`. #' * `vec_sort()` a vector with the same size and type as `x`. #' #' @section Dependencies of `vec_order()`: #' * [vec_proxy_order()] #' #' @section Dependencies of `vec_sort()`: #' * [vec_order()] #' * [vec_slice()] #' @examples #' x <- round(sample(runif(5), 9, replace = TRUE), 3) #' x <- c(x, NA) #' #' vec_order(x) #' vec_sort(x) #' vec_sort(x, "desc") #' #' # Can also handle data frames #' df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) #' vec_order(df) #' vec_sort(df) #' vec_sort(df, "desc") #' #' # For data frames, `direction` and `na_value` are allowed to be vectors #' # with length equal to the number of columns in the data frame #' vec_sort( #' df, #' direction = c("desc", "asc"), #' na_value = c("largest", "smallest") #' ) #' #' # Character vectors are ordered in the C locale, which orders capital letters #' # below lowercase ones #' y <- c("B", "A", "a") #' vec_sort(y) #' #' # To order in a case-insensitive manner, provide a `chr_transform` that #' # transforms the strings to all lowercase #' vec_sort(y, chr_transform = tolower) #' @noRd vec_order_radix <- function(x, direction = "asc", na_value = "largest", chr_transform = NULL) { .Call(vctrs_order, x, direction, na_value, chr_transform) } #' Identify ordered groups #' #' @description #' `r lifecycle::badge("experimental")` #' #' `vec_order_locs()` returns a data frame containing a `key` column with #' sorted unique groups, and a `loc` column with the locations of each #' group in `x`. It is similar to [vec_group_loc()], except the groups are #' returned sorted rather than by first appearance. #' #' @inheritParams vec_order #' #' @return #' A two column data frame with size equal to `vec_size(vec_unique(x))`. #' * A `key` column of type `vec_ptype(x)`. #' * A `loc` column of type list, with elements of type integer. #' #' @section Dependencies of `vec_order_locs()`: #' * [vec_proxy_order()] #' #' @examples #' df <- data.frame( #' g = sample(2, 10, replace = TRUE), #' x = c(NA, sample(5, 9, replace = TRUE)) #' ) #' #' # `vec_order_locs()` is similar to `vec_group_loc()`, except keys are #' # returned ordered rather than by first appearance. #' vec_order_locs(df) #' #' vec_group_loc(df) #' @noRd vec_order_locs <- function(x, direction = "asc", na_value = "largest", chr_transform = NULL) { .Call(vctrs_order_locs, x, direction, na_value, chr_transform) } vctrs/R/compat-friendly-type.R0000644000176200001440000000267114027045462016025 0ustar liggesusers# nocov start --- r-lib/rlang compat-friendly-type --- 2021-03-25 Thu 09:25 friendly_type_of <- function(x, length = FALSE) { if (is.object(x)) { if (inherits(x, "quosure")) { type <- "quosure" } else { type <- paste(class(x), collapse = "/") } return(sprintf("a <%s> object", type)) } friendly <- as_friendly_type(typeof(x)) if (length && rlang::is_vector(x)) { friendly <- paste0(friendly, sprintf(" of length %s", length(x))) } friendly } as_friendly_type <- function(type) { switch(type, logical = "a logical vector", integer = "an integer vector", numeric = , double = "a double vector", complex = "a complex vector", character = "a character vector", raw = "a raw vector", string = "a string", list = "a list", NULL = "NULL", environment = "an environment", externalptr = "a pointer", weakref = "a weak reference", S4 = "an S4 object", name = , symbol = "a symbol", language = "a call", pairlist = "a pairlist node", expression = "an expression vector", quosure = "a quosure", formula = "a formula", char = "an internal string", promise = "an internal promise", ... = "an internal dots object", any = "an internal `any` object", bytecode = "an internal bytecode object", primitive = , builtin = , special = "a primitive function", closure = "a function", type ) } # nocov end vctrs/R/recycle.R0000644000176200001440000000413313762412012013364 0ustar liggesusers#' Vector recycling #' #' `vec_recycle(x, size)` recycles a single vector to given size. #' `vec_recycle_common(...)` recycles multiple vectors to their common size. #' All functions obey the vctrs recycling rules, described below, and will #' throw an error if recycling is not possible. See [vec_size()] for the #' precise definition of size. #' #' @section Recycling rules: #' The common size of two vectors defines the recycling rules, and can be #' summarise with the following table: #' #' \figure{sizes-recycling.png} #' #' (Note `NULL`s are handled specially; they are treated like empty #' arguments and hence don't affect the size) #' #' This is a stricter set of rules than base R, which will usually #' return output of length `max(nx, ny)`, warning if the length of the longer #' vector is not an integer multiple of the length of the shorter. #' #' We say that two vectors have __compatible size__ if they can be #' recycled to be the same length. #' #' @param x A vector to recycle. #' @param ... #' * For `vec_recycle_common()`, vectors to recycle. #' * For `vec_recycle()`, these dots should be empty. #' @param size Desired output size. #' @param .size Desired output size. If omitted, #' will use the common size from [vec_size_common()]. #' @param x_arg Argument name for `x`. These are used in error #' messages to inform the user about which argument has an #' incompatible size. #' #' @section Dependencies: #' - [vec_slice()] #' #' @export #' @examples #' # Inputs with 1 observation are recycled #' vec_recycle_common(1:5, 5) #' vec_recycle_common(integer(), 5) #' \dontrun{ #' vec_recycle_common(1:5, 1:2) #' } #' #' # Data frames and matrices are recycled along their rows #' vec_recycle_common(data.frame(x = 1), 1:5) #' vec_recycle_common(array(1:2, c(1, 2)), 1:5) #' vec_recycle_common(array(1:3, c(1, 3, 1)), 1:5) vec_recycle <- function(x, size, ..., x_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } .Call(vctrs_recycle, x, size, x_arg) } #' @export #' @rdname vec_recycle vec_recycle_common <- function(..., .size = NULL) { .External2(vctrs_recycle_common, .size) } vctrs/R/arith.R0000644000176200001440000000622313762412012013047 0ustar liggesusers#' Arithmetic operations #' #' This generic provides a common double dispatch mechanism for all infix #' operators (`+`, `-`, `/`, `*`, `^`, `%%`, `%/%`, `!`, `&`, `|`). It is used #' to power the default arithmetic and boolean operators for [vctr]s objects, #' overcoming the limitations of the base [Ops] generic. #' #' `vec_arith_base()` is provided as a convenience for writing methods. It #' recycles `x` and `y` to common length then calls the base operator with the #' underlying [vec_data()]. #' #' `vec_arith()` is also used in `diff.vctrs_vctr()` method via `-`. #' #' @param op An arithmetic operator as a string #' @param x,y A pair of vectors. For `!`, unary `+` and unary `-`, `y` will be #' a sentinel object of class `MISSING`, as created by `MISSING()`. #' @inheritParams ellipsis::dots_empty #' #' @seealso [stop_incompatible_op()] for signalling that an arithmetic #' operation is not permitted/supported. #' @seealso See [vec_math()] for the equivalent for the unary mathematical #' functions. #' @export #' @keywords internal #' @examples #' d <- as.Date("2018-01-01") #' dt <- as.POSIXct("2018-01-02 12:00") #' t <- as.difftime(12, unit = "hours") #' #' vec_arith("-", dt, 1) #' vec_arith("-", dt, t) #' vec_arith("-", dt, d) #' #' vec_arith("+", dt, 86400) #' vec_arith("+", dt, t) #' vec_arith("+", t, t) #' #' vec_arith("/", t, t) #' vec_arith("/", t, 2) #' #' vec_arith("*", t, 2) vec_arith <- function(op, x, y, ...) { if (!missing(...)) { ellipsis::check_dots_empty() } UseMethod("vec_arith", x) } #' @export #' @rdname vec_arith vec_arith.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } # Atomic vectors ---------------------------------------------------------- #' @rdname vec_arith #' @export vec_arith.logical #' @method vec_arith logical #' @export vec_arith.logical <- function(op, x, y, ...) UseMethod("vec_arith.logical", y) #' @method vec_arith.logical default #' @export vec_arith.logical.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.logical logical #' @export vec_arith.logical.logical <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @method vec_arith.logical numeric #' @export vec_arith.logical.numeric <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @rdname vec_arith #' @export vec_arith.numeric #' @method vec_arith numeric #' @export vec_arith.numeric <- function(op, x, y, ...) UseMethod("vec_arith.numeric", y) #' @method vec_arith.numeric default #' @export vec_arith.numeric.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.numeric logical #' @export vec_arith.numeric.logical <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @method vec_arith.numeric numeric #' @export vec_arith.numeric.numeric <- function(op, x, y, ...) vec_arith_base(op, x, y) # Helpers ----------------------------------------------------------------- #' @export #' @rdname vec_arith vec_arith_base <- function(op, x, y) { args <- vec_recycle_common(x, y) op_fn <- getExportedValue("base", op) op_fn(vec_data(args[[1L]]), vec_data(args[[2L]])) } #' @export #' @rdname vec_arith MISSING <- function() { structure(list(), class = "MISSING") } vctrs/R/ptype-abbr-full.R0000644000176200001440000000532314027045462014753 0ustar liggesusers#' Vector type as a string #' #' `vec_ptype_full()` displays the full type of the vector. `vec_ptype_abbr()` #' provides an abbreviated summary suitable for use in a column heading. #' #' @section S3 dispatch: #' The default method for `vec_ptype_full()` uses the first element of the #' class vector. Override this method if your class has parameters that should #' be prominently displayed. #' #' The default method for `vec_ptype_abbr()` [abbreviate()]s `vec_ptype_full()` #' to 8 characters. You should almost always override, aiming for 4-6 #' characters where possible. #' #' These arguments are handled by the generic and not passed to methods: #' * `prefix_named` #' * `suffix_shape` #' #' @param x A vector. #' @param prefix_named If `TRUE`, add a prefix for named vectors. #' @param suffix_shape If `TRUE` (the default), append the shape of #' the vector. #' @inheritParams ellipsis::dots_empty #' #' @keywords internal #' @return A string. #' @export #' @examples #' cat(vec_ptype_full(1:10)) #' cat(vec_ptype_full(iris)) #' #' cat(vec_ptype_abbr(1:10)) vec_ptype_full <- function(x, ...) { if (!missing(...)) { ellipsis::check_dots_empty() } UseMethod("vec_ptype_full") } #' @export #' @rdname vec_ptype_full vec_ptype_abbr <- function(x, ..., prefix_named = FALSE, suffix_shape = TRUE) { if (!missing(...)) { ellipsis::check_dots_empty() } abbr <- vec_ptype_abbr_dispatch(x) return(paste0( if ((prefix_named || is_bare_list(x)) && !is.null(vec_names(x))) "named ", abbr, if (suffix_shape) vec_ptype_shape(x) )) UseMethod("vec_ptype_abbr") } vec_ptype_abbr_dispatch <- function(x, ...) { UseMethod("vec_ptype_abbr") } #' @export vec_ptype_full.NULL <- function(x, ...) "NULL" #' @export vec_ptype_abbr.NULL <- function(x, ...) "NULL" # Default: base types and fallback for S3/S4 ------------------------------ #' @export vec_ptype_full.default <- function(x, ...) { if (is.object(x)) { class(x)[[1]] } else if (is_vector(x)) { paste0(typeof(x), vec_ptype_shape(x)) } else { abort("Not a vector.") } } #' @export vec_ptype_abbr.default <- function(x, ...) { if (is.object(x)) { unname(abbreviate(vec_ptype_full(x), 8)) } else if (is_vector(x)) { switch(typeof(x), list = "list", logical = "lgl", integer = "int", double = "dbl", character = "chr", complex = "cpl", list = "list", expression = "expr", raw = "raw", abbreviate(typeof(x)) ) } else { abort("Not a vector.") } } # Helpers ----------------------------------------------------------------- vec_ptype_shape <- function(x) { dim <- dim2(x) if (length(dim) == 1) { "" } else { paste0("[,", paste(dim[-1], collapse = ","), "]") } } vctrs/R/faq-developer.R0000644000176200001440000000151213762412012014466 0ustar liggesusers#' FAQ - Is my class compatible with vctrs? #' #' @includeRmd man/faq/developer/reference-compatibility.Rmd description #' #' @name reference-faq-compatibility NULL #' FAQ - How does coercion work in vctrs? #' #' @includeRmd man/faq/developer/theory-coercion.Rmd description #' #' @name theory-faq-coercion NULL #' FAQ - How to implement ptype2 and cast methods? #' #' @includeRmd man/faq/developer/howto-coercion.Rmd description #' #' @name howto-faq-coercion NULL #' FAQ - How to implement ptype2 and cast methods? (Data frames) #' #' @includeRmd man/faq/developer/howto-coercion-data-frame.Rmd description #' #' @name howto-faq-coercion-data-frame NULL #' FAQ - Why isn't my class treated as a vector? #' #' @includeRmd man/faq/developer/howto-faq-fix-scalar-type-error.Rmd description #' #' @name howto-faq-fix-scalar-type-error NULL vctrs/R/type-asis.R0000644000176200001440000000440213762412012013653 0ustar liggesusers#' AsIs S3 class #' #' These functions help the base AsIs class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name as-is NULL # ------------------------------------------------------------------------------ # Printing #' @export vec_ptype_full.AsIs <- function(x, ...) { x <- asis_strip(x) paste0("I<", vec_ptype_full(x), ">") } #' @export vec_ptype_abbr.AsIs <- function(x, ...) { x <- asis_strip(x) paste0("I<", vec_ptype_abbr(x), ">") } # ------------------------------------------------------------------------------ # Proxy / restore # Arises with base df ctor: `data.frame(x = I(list(1, 2:3)))` #' @export vec_proxy.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy(x) } #' @export vec_restore.AsIs <- function(x, to, ...) { asis_restore(x) } # ------------------------------------------------------------------------------ # Coercion #' @rdname as-is #' @export vec_ptype2.AsIs #' @method vec_ptype2 AsIs #' @export vec_ptype2.AsIs <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.AsIs") } #' @method vec_ptype2.AsIs AsIs #' @export vec_ptype2.AsIs.AsIs <- function(x, y, ..., x_arg = "", y_arg = "") { x <- asis_strip(x) y <- asis_strip(y) vec_ptype2_asis(x, y, ..., x_arg = x_arg, y_arg = y_arg) } vec_ptype2_asis_left <- function(x, y, ...) { x <- asis_strip(x) vec_ptype2_asis(x, y, ...) } vec_ptype2_asis_right <- function(x, y, ...) { y <- asis_strip(y) vec_ptype2_asis(x, y, ...) } vec_ptype2_asis <- function(x, y, ...) { out <- vec_ptype2(x, y, ...) asis_restore(out) } # ------------------------------------------------------------------------------ # Casting vec_cast_from_asis <- function(x, to, ...) { x <- asis_strip(x) vec_cast(x, to, ...) } vec_cast_to_asis <- function(x, to, ...) { to <- asis_strip(to) out <- vec_cast(x, to, ...) asis_restore(out) } # ------------------------------------------------------------------------------ is_asis <- function(x) { inherits(x, "AsIs") } asis_strip <- function(x) { class(x) <- setdiff(class(x), "AsIs") x } asis_restore <- function(x) { # Using `oldClass()` here to return `NULL` for atomics # so that their implicit class isn't added class(x) <- c("AsIs", oldClass(x)) x } vctrs/R/subscript.R0000644000176200001440000002162113766452345013776 0ustar liggesusers#' Convert to a base subscript type #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' Convert `i` to the base type expected by [vec_as_location()] or #' [vec_as_location2()]. The values of the subscript type are #' not checked in any way (length, missingness, negative elements). #' #' @inheritParams vec_as_location #' @param logical,location,character How to handle logical, numeric, #' and character subscripts. #' #' If `"cast"` and the subscript is not one of the three base types #' (logical, integer or character), the subscript is #' [cast][vec_cast] to the relevant base type, e.g. factors are #' coerced to character. `NULL` is treated as an empty integer #' vector, and is thus coercible depending on the setting of #' `numeric`. Symbols are treated as character vectors and thus #' coercible depending on the setting of `character`. #' #' If `"error"`, the subscript type is disallowed and triggers an #' informative error. #' @keywords internal #' @export vec_as_subscript <- function(i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL) { if (!missing(...)) ellipsis::check_dots_empty() .Call( vctrs_as_subscript, i = i, logical = logical, numeric = numeric, character = character, arg = arg ) } vec_as_subscript_result <- function(i, arg, logical, numeric, character) { .Call( vctrs_as_subscript_result, i = i, logical = logical, numeric = numeric, character = character, arg = arg ) } #' @rdname vec_as_subscript #' @export vec_as_subscript2 <- function(i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL) { if (!missing(...)) ellipsis::check_dots_empty() result_get(vec_as_subscript2_result( i, arg, logical = logical, numeric = numeric, character = character )) } vec_as_subscript2_result <- function(i, arg, logical = "cast", numeric = "cast", character = "cast") { logical <- arg_match0(logical, c("cast", "error")) numeric <- arg_match0(numeric, c("cast", "error")) character <- arg_match0(character, c("cast", "error")) result <- vec_as_subscript_result( i, arg = arg, logical = logical, numeric = numeric, character = character ) # Return a child of subscript error. The child error messages refer # to single subscripts instead of subscript vectors. if (!is_null(result$err)) { parent <- result$err$parent if (inherits(parent, "vctrs_error_cast_lossy")) { bullets <- new_cnd_bullets_subscript_lossy_cast(parent) } else { bullets <- cnd_body.vctrs_error_subscript_type } result$err <- new_error_subscript2_type( i = result$err$i, logical = logical, numeric = numeric, character = character, subscript_arg = arg, body = bullets, parent = result$err$parent ) return(result) } i <- result$ok if (typeof(i) == "logical") { return(result(err = new_error_subscript2_type( i = i, logical = logical, numeric = numeric, character = character, subscript_arg = arg, body = cnd_body.vctrs_error_subscript_type ))) } result } subscript_type_opts <- c("logical", "numeric", "character") subscript_type_opts_indefinite_singular <- c("a logical flag", "a location", "a name") subscript_type_opts_indefinite_plural <- c("logical flags", "locations", "names") as_opts_subscript_type <- function(x, arg = NULL) { if (inherits(x, "vctrs_opts_subscript_type")) { return(x) } new_opts( x, subscript_type_opts, subclass = "vctrs_opts_subscript_type", arg = arg ) } as_opts_subscript2_type <- function(x, arg = NULL) { if ("logical" %in% x) { abort("Logical subscripts can't be converted to a single location.") } as_opts_subscript_type(x, arg = arg) } stop_subscript <- function(i, ..., class = NULL) { abort( class = c(class, "vctrs_error_subscript"), i = i, ... ) } new_error_subscript <- function(class = NULL, i, ...) { error_cnd( c(class, "vctrs_error_subscript"), i = i, ... ) } new_error_subscript_type <- function(i, logical = "cast", numeric = "cast", character = "cast", ..., class = NULL) { new_error_subscript( class = c(class, "vctrs_error_subscript_type"), i = i, logical = logical, numeric = numeric, character = character, ... ) } #' @export cnd_header.vctrs_error_subscript_type <- function(cnd) { action <- cnd_subscript_action(cnd) elt <- cnd_subscript_element(cnd) if (cnd_subscript_scalar(cnd)) { glue::glue("Must {action} {elt[[1]]} with a single valid subscript.") } else { glue::glue("Must {action} {elt[[2]]} with a valid subscript vector.") } } #' @export cnd_body.vctrs_error_subscript_type <- function(cnd) { arg <- append_arg("Subscript", cnd$subscript_arg) type <- obj_type(cnd$i) expected_types <- collapse_subscript_type(cnd) format_error_bullets(c( x = glue::glue("{arg} has the wrong type `{type}`."), i = glue::glue("It must be {expected_types}.") )) } new_cnd_bullets_subscript_lossy_cast <- function(lossy_err) { function(cnd, ...) { format_error_bullets(c(x = cnd_header(lossy_err))) } } collapse_subscript_type <- function(cnd) { types <- c("logical", "numeric", "character") allowed <- cnd[types] != "error" types <- types[allowed] if (length(types) == 2) { last <- " or " } else { last <- ", or " } glue::glue_collapse(types, sep = ", ", last = last) } new_error_subscript_size <- function(i, ..., class = NULL) { new_error_subscript( class = c(class, "vctrs_error_subscript_size"), i = i, ... ) } #' @export cnd_header.vctrs_error_subscript_size <- function(cnd, ...) { cnd_header.vctrs_error_subscript_type(cnd, ...) } new_error_subscript2_type <- function(i, logical, numeric, character, ...) { new_error_subscript_type( i = i, logical = logical, numeric = numeric, character = character, subscript_scalar = TRUE, ... ) } cnd_body_subscript_dim <- function(cnd, ...) { arg <- append_arg("Subscript", cnd$subscript_arg) dim <- length(dim(cnd$i)) if (dim < 2) { abort("Internal error: Unexpected dimensionality in `cnd_body_subcript_dim()`.") } if (dim == 2) { shape <- "a matrix" } else { shape <- "an array" } format_error_bullets(c( x = glue::glue("{arg} must be a simple vector, not {shape}.") )) } cnd_subscript_element <- function(cnd, capital = FALSE) { elt <- cnd$subscript_elt %||% "element" if (!is_string(elt, c("element", "row", "column", "table"))) { abort(paste0( "Internal error: `cnd$subscript_elt` must be one of ", "`element`, `row`, `column` or `table`." )) } if (capital) { switch(elt, element = c("Element", "Elements"), row = c("Row", "Rows"), column = c("Column", "Columns"), table = c("Table", "Tables") ) } else { switch(elt, element = c("element", "elements"), row = c("row", "rows"), column = c("column", "columns"), table = c("table", "tables") ) } } subscript_actions <- c( "subset", "extract", "assign", "rename", "remove", "negate" ) cnd_subscript_action <- function(cnd, assign_to = TRUE) { action <- cnd$subscript_action if (is_null(action)) { if (cnd_subscript_scalar(cnd)) { action <- "extract" } else { action <- "subset" } } if (!is_string(action, subscript_actions)) { abort(paste0( "Internal error: `cnd$subscript_action` must be one of ", "`subset`, `extract`, `assign`, `rename`, `remove`, or `negate`." )) } if (assign_to && action == "assign") { "assign to" } else { action } } cnd_subscript_type <- function(cnd) { type <- cnd$subscript_type if (!is_string(type, c("logical", "numeric", "character"))) { abort("Internal error: `cnd$subscript_type` must be `logical`, `numeric`, or `character`.") } type } cnd_subscript_scalar <- function(cnd) { out <- cnd$subscript_scalar %||% FALSE if (!is_bool(out)) { abort("Internal error: `cnd$subscript_scalar` must be a boolean.") } out } vctrs/R/size.R0000644000176200001440000001123013762412012012704 0ustar liggesusers#' Number of observations #' #' @description #' #' `vec_size(x)` returns the size of a vector. `vec_is_empty()` #' returns `TRUE` if the size is zero, `FALSE` otherwise. #' #' The size is distinct from the [length()] of a vector because it #' generalises to the "number of observations" for 2d structures, #' i.e. it's the number of rows in matrix or a data frame. This #' definition has the important property that every column of a data #' frame (even data frame and matrix columns) have the same size. #' `vec_size_common(...)` returns the common size of multiple vectors. #' #' `list_sizes()` returns an integer vector containing the size of each element #' of a list. It is nearly equivalent to, but faster than, #' `map_int(x, vec_size)`, with the exception that `list_sizes()` will #' error on non-list inputs, as defined by [vec_is_list()]. `list_sizes()` is #' to `vec_size()` as [lengths()] is to [length()]. #' #' @seealso [vec_slice()] for a variation of `[` compatible with `vec_size()`, #' and [vec_recycle()] to recycle vectors to common length. #' @section Invariants: #' * `vec_size(dataframe)` == `vec_size(dataframe[[i]])` #' * `vec_size(matrix)` == `vec_size(matrix[, i, drop = FALSE])` #' * `vec_size(vec_c(x, y))` == `vec_size(x)` + `vec_size(y)` #' #' @param x,... Vector inputs or `NULL`. #' @param .size If `NULL`, the default, the output size is determined by #' recycling the lengths of all elements of `...`. Alternatively, you can #' supply `.size` to force a known size; in this case, `x` and `...` are #' ignored. #' @param .absent The size used when no input is provided, or when all input #' is `NULL`. If left as `NULL` when no input is supplied, an error is thrown. #' @return An integer (or double for long vectors). #' #' `vec_size_common()` returns `.absent` if all inputs are `NULL` or #' absent, `0L` by default. #' #' #' @details #' #' There is no vctrs helper that retrieves the number of columns: as this #' is a property of the [type][vec_ptype_show()]. #' #' `vec_size()` is equivalent to `NROW()` but has a name that is easier to #' pronounce, and throws an error when passed non-vector inputs. #' #' #' @section The size of NULL: #' #' The size of `NULL` is hard-coded to `0L` in `vec_size()`. #' `vec_size_common()` returns `.absent` when all inputs are `NULL` #' (if only some inputs are `NULL`, they are simply ignored). #' #' A default size of 0 makes sense because sizes are most often #' queried in order to compute a total size while assembling a #' collection of vectors. Since we treat `NULL` as an absent input by #' principle, we return the identity of sizes under addition to #' reflect that an absent input doesn't take up any size. #' #' Note that other defaults might make sense under different #' circumstances. For instance, a default size of 1 makes sense for #' finding the common size because 1 is the identity of the recycling #' rules. #' #' @section Dependencies: #' - [vec_proxy()] #' #' @export #' @examples #' vec_size(1:100) #' vec_size(mtcars) #' vec_size(array(dim = c(3, 5, 10))) #' #' vec_size_common(1:10, 1:10) #' vec_size_common(1:10, 1) #' vec_size_common(integer(), 1) #' #' list_sizes(list("a", 1:5, letters)) vec_size <- function(x) { .Call(vctrs_size, x) } #' @export #' @rdname vec_size vec_size_common <- function(..., .size = NULL, .absent = 0L) { .External2(vctrs_size_common, .size, .absent) } #' @rdname vec_size #' @export list_sizes <- function(x) { .Call(vctrs_list_sizes, x) } #' @rdname vec_size #' @export vec_is_empty <- function(x) { vec_size(x) == 0L } #' Default value for empty vectors #' #' Use this inline operator when you need to provide a default value for #' empty (as defined by [vec_is_empty()]) vectors. #' #' @param x A vector #' @param y Value to use if `x` is empty. To preserve type-stability, should #' be the same type as `x`. #' @rdname op-empty-default #' @export #' @examples #' 1:10 %0% 5 #' integer() %0% 5 `%0%` <- function(x, y) { if (vec_is_empty(x)) y else x } # sequences ------------------------------------------------------------------- #' Useful sequences #' #' `vec_seq_along()` is equivalent to [seq_along()] but uses size, not length. #' `vec_init_along()` creates a vector of missing values with size matching #' an existing object. #' #' @param x,y Vectors #' @return #' * `vec_seq_along()` an integer vector with the same size as `x`. #' * `vec_init_along()` a vector with the same type as `x` and the same size #' as `y`. #' @export #' @examples #' vec_seq_along(mtcars) #' vec_init_along(head(mtcars)) vec_seq_along <- function(x) { seq_len(vec_size(x)) } #' @export #' @rdname vec_seq_along vec_init_along <- function(x, y = x) { vec_slice(x, rep_len(NA_integer_, vec_size(y))) } vctrs/R/cast.R0000644000176200001440000001211713762412012012671 0ustar liggesusers#' Cast a vector to a specified type #' #' @description #' #' `vec_cast()` provides directional conversions from one type of #' vector to another. Along with [vec_ptype2()], this generic forms #' the foundation of type coercions in vctrs. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @param x Vectors to cast. #' @param ... For `vec_cast_common()`, vectors to cast. For #' `vec_cast()`, `vec_cast_default()`, and `vec_restore()`, these #' dots are only for future extensions and should be empty. #' @param to,.to Type to cast to. If `NULL`, `x` will be returned as is. #' @param x_arg,to_arg Argument names for `x` and `to`. These are used #' in error messages to inform the user about the locations of #' incompatible types (see [stop_incompatible_type()]). #' @return A vector the same length as `x` with the same type as `to`, #' or an error if the cast is not possible. An error is generated if #' information is lost when casting between compatible types (i.e. when #' there is no 1-to-1 mapping for a specific value). #' #' @section Dependencies of `vec_cast_common()`: #' #' ## vctrs dependencies #' #' - [vec_ptype2()] #' - [vec_cast()] #' #' #' ## base dependencies #' #' Some functions enable a base-class fallback for #' `vec_cast_common()`. In that case the inputs are deemed compatible #' when they have the same [base type][base::typeof] and inherit from #' the same base class. #' #' @seealso Call [stop_incompatible_cast()] when you determine from the #' attributes that an input can't be cast to the target type. #' @export #' @examples #' # x is a double, but no information is lost #' vec_cast(1, integer()) #' #' # When information is lost the cast fails #' try(vec_cast(c(1, 1.5), integer())) #' try(vec_cast(c(1, 2), logical())) #' #' # You can suppress this error and get the partial results #' allow_lossy_cast(vec_cast(c(1, 1.5), integer())) #' allow_lossy_cast(vec_cast(c(1, 2), logical())) #' #' # By default this suppress all lossy cast errors without #' # distinction, but you can be specific about what cast is allowed #' # by supplying prototypes #' allow_lossy_cast(vec_cast(c(1, 1.5), integer()), to_ptype = integer()) #' try(allow_lossy_cast(vec_cast(c(1, 2), logical()), to_ptype = integer())) #' #' # No sensible coercion is possible so an error is generated #' try(vec_cast(1.5, factor("a"))) #' #' # Cast to common type #' vec_cast_common(factor("a"), factor(c("a", "b"))) vec_cast <- function(x, to, ..., x_arg = "", to_arg = "") { if (!missing(...)) { check_ptype2_dots_empty(...) } return(.Call(vctrs_cast, x, to, x_arg, to_arg)) UseMethod("vec_cast", to) } vec_cast_dispatch <- function(x, to, ..., x_arg = "", to_arg = "") { UseMethod("vec_cast", to) } vec_cast_no_fallback <- function(x, to) { vec_cast_common_params(x = x, .to = to, .df_fallback = DF_FALLBACK_none)$x } vec_cast_dispatch_native <- function(x, to, ..., x_arg = "", to_arg = "") { fallback_opts <- match_fallback_opts(...) .Call(vctrs_cast_dispatch_native, x, to, fallback_opts, x_arg, to_arg) } #' @export #' @rdname vec_cast vec_cast_common <- function(..., .to = NULL) { .External2(vctrs_cast_common, .to) } vec_cast_common_opts <- function(..., .to = NULL, .opts = fallback_opts()) { .External2(vctrs_cast_common_opts, .to, .opts) } vec_cast_common_params <- function(..., .to = NULL, .df_fallback = NULL, .s3_fallback = NULL) { opts <- fallback_opts( df_fallback = .df_fallback, s3_fallback = .s3_fallback ) vec_cast_common_opts(..., .to = .to, .opts = opts) } vec_cast_common_fallback <- function(..., .to = NULL) { vec_cast_common_opts(..., .to = .to, .opts = full_fallback_opts()) } #' @rdname vec_default_ptype2 #' @inheritParams vec_cast #' @export vec_default_cast <- function(x, to, ..., x_arg = "", to_arg = "") { if (is_asis(x)) { return(vec_cast_from_asis(x, to, x_arg = x_arg, to_arg = to_arg)) } if (is_asis(to)) { return(vec_cast_to_asis(x, to, x_arg = x_arg, to_arg = to_arg)) } if (inherits(to, "vctrs_vctr") && !inherits(to, c("vctrs_rcrd", "vctrs_list_of"))) { return(vctr_cast(x, to, x_arg = x_arg, to_arg = to_arg)) } opts <- match_fallback_opts(...) if (is_common_class_fallback(to) && length(common_class_suffix(x, to))) { return(x) } # If both data frames, first find the `to` type of columns before # the same-type fallback if (df_needs_normalisation(x, to, opts)) { x <- vec_cast_df_fallback_normalise(x, to, opts) } if (is_same_type(x, to)) { return(x) } if (has_df_fallback(opts$df_fallback) && is_df_subclass(x)) { out <- df_cast_opts( x, to, ..., opts = opts, x_arg = x_arg, to_arg = to_arg ) if (inherits(to, "tbl_df")) { out <- df_as_tibble(out) } return(out) } stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, `vctrs:::from_dispatch` = match_from_dispatch(...) ) } is_informative_error.vctrs_error_cast_lossy <- function(x, ...) { FALSE } vctrs/R/aaa.R0000644000176200001440000000171314042543376012473 0ustar liggesuserson_load <- function(expr, env = parent.frame()) { ns <- topenv(env) expr <- substitute(expr) callback <- function() eval_bare(expr, env) ns$.__rlang_hook__. <- c(ns$.__rlang_hook__., list(callback)) } run_on_load <- function(env = parent.frame()) { ns <- topenv(env) hook <- ns$.__rlang_hook__. env_unbind(ns, ".__rlang_hook__.") for (callback in hook) { callback() } ns$.__rlang_hook__. <- NULL } replace_from <- function(what, pkg, to = topenv(caller_env())) { if (what %in% getNamespaceExports(pkg)) { env <- ns_env(pkg) } else { env <- to } env_get(env, what, inherit = TRUE) } # nocov start # Useful for micro-optimising default arguments requiring evaluation, # such as `param = c("foo", "bar")`. Buys about 0.6us on my desktop. fn_inline_formals <- function(fn, names) { stopifnot(typeof(fn) == "closure") fmls <- formals(fn) fmls[names] <- lapply(fmls[names], eval) formals(fn) <- fmls fn } # nocov end vctrs/R/slice.R0000644000176200001440000001506013762412012013036 0ustar liggesusers#' Get or set observations in a vector #' #' This provides a common interface to extracting and modifying observations #' for all vector types, regardless of dimensionality. It is an analog to `[` #' that matches [vec_size()] instead of `length()`. #' #' @param x A vector #' @param i An integer, character or logical vector specifying the #' locations or names of the observations to get/set. Specify #' `TRUE` to index all elements (as in `x[]`), or `NULL`, `FALSE` or #' `integer()` to index none (as in `x[NULL]`). #' @param value Replacement values. `value` is cast to the type of #' `x`, but only if they have a common type. See below for examples #' of this rule. #' @param x_arg,value_arg Argument names for `x` and `value`. These are used #' in error messages to inform the user about the locations of #' incompatible types and sizes (see [stop_incompatible_type()] and #' [stop_incompatible_size()]). #' @param ... These dots are for future extensions and must be empty. #' @return A vector of the same type as `x`. #' #' @section Genericity: #' #' Support for S3 objects depends on whether the object implements a #' [vec_proxy()] method. #' #' * When a `vec_proxy()` method exists, the proxy is sliced and #' `vec_restore()` is called on the result. #' #' * Otherwise `vec_slice()` falls back to the base generic `[`. #' #' Note that S3 lists are treated as scalars by default, and will #' cause an error if they don't implement a [vec_proxy()] method. #' #' @section Differences with base R subsetting: #' #' * `vec_slice()` only slices along one dimension. For #' two-dimensional types, the first dimension is subsetted. #' #' * `vec_slice()` preserves attributes by default. #' #' * `vec_slice<-()` is type-stable and always returns the same type #' as the LHS. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_proxy()] #' - [vec_restore()] #' #' ## base dependencies #' #' - \code{base::`[`} #' #' If a non-data-frame vector class doesn't have a [vec_proxy()] #' method, the vector is sliced with `[` instead. #' #' @export #' @keywords internal #' @examples #' x <- sample(10) #' x #' vec_slice(x, 1:3) #' #' # You can assign with the infix variant: #' vec_slice(x, 2) <- 100 #' x #' #' # Or with the regular variant that doesn't modify the original input: #' y <- vec_assign(x, 3, 500) #' y #' x #' #' #' # Slicing objects of higher dimension: #' vec_slice(mtcars, 1:3) #' #' # Type stability -------------------------------------------------- #' #' # The assign variant is type stable. It always returns the same #' # type as the input. #' x <- 1:5 #' vec_slice(x, 2) <- 20.0 #' #' # `x` is still an integer vector because the RHS was cast to the #' # type of the LHS: #' vec_ptype(x) #' #' # Compare to `[<-`: #' x[2] <- 20.0 #' vec_ptype(x) #' #' #' # Note that the types must be coercible for the cast to happen. #' # For instance, you can cast a double vector of whole numbers to an #' # integer vector: #' vec_cast(1, integer()) #' #' # But not fractional doubles: #' try(vec_cast(1.5, integer())) #' #' # For this reason you can't assign fractional values in an integer #' # vector: #' x <- 1:3 #' try(vec_slice(x, 2) <- 1.5) vec_slice <- function(x, i) { .Call(vctrs_slice, x, i) } # Called when `x` has dimensions vec_slice_fallback <- function(x, i) { out <- unclass(vec_proxy(x)) vec_assert(out) d <- vec_dim_n(out) if (d == 2) { out <- out[i, , drop = FALSE] } else { miss_args <- rep(list(missing_arg()), d - 1) out <- eval_bare(expr(out[i, !!!miss_args, drop = FALSE])) } vec_restore(out, x) } vec_slice_fallback_integer64 <- function(x, i) { d <- vec_dim_n(x) if (d == 2) { out <- x[i, , drop = FALSE] } else { miss_args <- rep(list(missing_arg()), d - 1) out <- eval_bare(expr(x[i, !!!miss_args, drop = FALSE])) } is_na <- is.na(i) if (!any(is_na)) { return(out) } if (d == 2) { out[is_na,] <- bit64::NA_integer64_ } else { eval_bare(expr(out[is_na, !!!miss_args] <- bit64::NA_integer64_)) } out } # bit64::integer64() objects do not have support for `NA_integer_` # slicing. This manually replaces the garbage values that are created # any time a slice with `NA_integer_` is made. vec_slice_dispatch_integer64 <- function(x, i) { out <- x[i] is_na <- is.na(i) if (!any(is_na)) { return(out) } out[is_na] <- bit64::NA_integer64_ out } #' @rdname vec_slice #' @export `vec_slice<-` <- function(x, i, value) { .Call(vctrs_assign, x, i, value, "", "") } #' @rdname vec_slice #' @export vec_assign <- function(x, i, value, ..., x_arg = "", value_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } .Call(vctrs_assign, x, i, value, x_arg, value_arg) } vec_assign_fallback <- function(x, i, value) { # Work around bug in base `[<-` existing <- !is.na(i) i <- vec_slice(i, existing) value <- vec_slice(value, existing) d <- vec_dim_n(x) miss_args <- rep(list(missing_arg()), d - 1) eval_bare(expr(x[i, !!!miss_args] <- value)) x } # `start` is 0-based vec_assign_seq <- function(x, value, start, size, increasing = TRUE) { .Call(vctrs_assign_seq, x, value, start, size, increasing) } vec_assign_params <- function(x, i, value, assign_names = FALSE) { .Call(vctrs_assign_params, x, i, value, assign_names) } vec_remove <- function(x, i) { vec_slice(x, -vec_as_location(i, length(x), names(x))) } vec_index <- function(x, i, ...) { i <- maybe_missing(i, TRUE) out <- vec_slice(x, i) if (!dots_n(...)) { return(out) } # Need to unclass to avoid infinite recursion through `[` proxy <- vec_data(out) out <- proxy[, ..., drop = FALSE] vec_restore(out, x) } #' Initialize a vector #' #' @param x Template of vector to initialize. #' @param n Desired size of result. #' @export #' @section Dependencies: #' * vec_slice() #' @examples #' vec_init(1:10, 3) #' vec_init(Sys.Date(), 5) #' vec_init(mtcars, 2) vec_init <- function(x, n = 1L) { n <- vec_cast(n, integer()) vec_assert(n, size = 1L) .Call(vctrs_init, x, n) } # Exposed for testing (`start` is 0-based) vec_slice_seq <- function(x, start, size, increasing = TRUE) { .Call(vctrs_slice_seq, x, start, size, increasing) } # Exposed for testing (`i` is 1-based) vec_slice_rep <- function(x, i, n) { .Call(vctrs_slice_rep, x, i, n) } # Forwards arguments to `base::rep()` base_vec_rep <- function(x, ...) { i <- rep(seq_len(vec_size(x)), ...) vec_slice(x, i) } # Emulates `length<-` vec_size_assign <- function(x, n) { x_size <- vec_size(x) if (n > x_size) { i <- seq_len(x_size) i <- c(i, vec_init(int(), n - x_size)) } else { i <- seq_len(n) } vec_slice(x, i) } vctrs/R/partial-frame.R0000644000176200001440000000502614001521566014466 0ustar liggesusers#' Partially specify columns of a data frame #' #' @description #' `r lifecycle::badge("experimental")` #' #' This special class can be passed to `.ptype` in order to specify the #' types of only some of the columns in a data frame. #' #' @param ... Attributes of subclass #' @keywords internal #' @export #' @examples #' pf <- partial_frame(x = double()) #' pf #' #' vec_rbind( #' data.frame(x = 1L, y = "a"), #' data.frame(x = FALSE, z = 10), #' .ptype = partial_frame(x = double(), a = character()) #' ) partial_frame <- function(...) { args <- list2(...) args <- lapply(args, vec_ptype) partial <- new_data_frame(args, n = 0L) new_partial_frame(partial) } new_partial_frame <- function(partial = data.frame(), learned = data.frame()) { stopifnot( is.data.frame(partial), is.data.frame(learned) ) # Fails if `learned` is not compatible with `partial` vec_ptype2(partial, learned) new_partial( partial = partial, learned = learned, class = "vctrs_partial_frame" ) } #' @export vec_ptype_full.vctrs_partial_frame <- function(x, ...) { both <- c(as.list(x$partial), as.list(x$learned)) types <- map_chr(both, vec_ptype_full) needs_indent <- grepl("\n", types) types[needs_indent] <- map(types[needs_indent], function(x) indent(paste0("\n", x), 4)) source <- c(rep(" {partial}", length(x$partial)), rep("", length(x$learned))) names <- paste0(" ", format(names(both))) paste0( "partial_frame<\n", paste0(names, ": ", types, source, collapse = "\n"), "\n>" ) } #' @export vec_ptype_abbr.vctrs_partial_frame <- function(x, ...) { "prtl" } #' @method vec_ptype2 vctrs_partial_frame #' @export vec_ptype2.vctrs_partial_frame <- function(x, y, ...) { UseMethod("vec_ptype2.vctrs_partial_frame") } #' @method vec_ptype2.vctrs_partial_frame vctrs_partial_frame #' @export vec_ptype2.vctrs_partial_frame.vctrs_partial_frame <- function(x, y, ...) { partial <- vec_ptype2(x$partial, y$partial) learned <- vec_ptype2(x$learned, y$learned) new_partial_frame(partial, learned) } #' @method vec_ptype2.vctrs_partial_frame data.frame #' @export vec_ptype2.vctrs_partial_frame.data.frame <- function(x, y, ...) { new_partial_frame(x$partial, vec_ptype2(x$learned, y)) } #' @method vec_ptype2.data.frame vctrs_partial_frame #' @export vec_ptype2.data.frame.vctrs_partial_frame <- function(x, y, ...) { new_partial_frame(y$partial, vec_ptype2(y$learned, x)) } #' @export vec_ptype_finalise.vctrs_partial_frame <- function(x, ...) { out <- x$learned out[names(x$partial)] <- x$partial out } vctrs/R/rep.R0000644000176200001440000000527513762412012012534 0ustar liggesusers#' Repeat a vector #' #' @description #' - `vec_rep()` repeats an entire vector a set number of `times`. #' #' - `vec_rep_each()` repeats each element of a vector a set number of `times`. #' #' - `vec_unrep()` compresses a vector with repeated values. The repeated values #' are returned as a `key` alongside the number of `times` each key is #' repeated. #' #' @details #' Using `vec_unrep()` and `vec_rep_each()` together is similar to using #' [base::rle()] and [base::inverse.rle()]. The following invariant shows #' the relationship between the two functions: #' #' ``` #' compressed <- vec_unrep(x) #' identical(x, vec_rep_each(compressed$key, compressed$times)) #' ``` #' #' There are two main differences between `vec_unrep()` and [base::rle()]: #' #' - `vec_unrep()` treats adjacent missing values as equivalent, while `rle()` #' treats them as different values. #' #' - `vec_unrep()` works along the size of `x`, while `rle()` works along its #' length. This means that `vec_unrep()` works on data frames by compressing #' repeated rows. #' #' @param x A vector. #' @param times #' For `vec_rep()`, a single integer for the number of times to repeat #' the entire vector. #' #' For `vec_rep_each()`, an integer vector of the number of times to repeat #' each element of `x`. `times` will be recycled to the size of `x`. #' #' @return #' For `vec_rep()`, a vector the same type as `x` with size #' `vec_size(x) * times`. #' #' For `vec_rep_each()`, a vector the same type as `x` with size #' `sum(vec_recycle(times, vec_size(x)))`. #' #' For `vec_unrep()`, a data frame with two columns, `key` and `times`. `key` #' is a vector with the same type as `x`, and `times` is an integer vector. #' #' @section Dependencies: #' - [vec_slice()] #' #' @name vec-rep #' @examples #' # Repeat the entire vector #' vec_rep(1:2, 3) #' #' # Repeat within each vector #' vec_rep_each(1:2, 3) #' x <- vec_rep_each(1:2, c(3, 4)) #' x #' #' # After using `vec_rep_each()`, you can recover the original vector #' # with `vec_unrep()` #' vec_unrep(x) #' #' df <- data.frame(x = 1:2, y = 3:4) #' #' # `rep()` repeats columns of data frames, and returns lists #' rep(df, each = 2) #' #' # `vec_rep()` and `vec_rep_each()` repeat rows, and return data frames #' vec_rep(df, 2) #' vec_rep_each(df, 2) #' #' # `rle()` treats adjacent missing values as different #' y <- c(1, NA, NA, 2) #' rle(y) #' #' # `vec_unrep()` treats them as equivalent #' vec_unrep(y) NULL #' @rdname vec-rep #' @export vec_rep <- function(x, times) { .Call(vctrs_rep, x, times) } #' @rdname vec-rep #' @export vec_rep_each <- function(x, times) { .Call(vctrs_rep_each, x, times) } #' @rdname vec-rep #' @export vec_unrep <- function(x) { .Call(vctrs_unrep, x) } vctrs/R/dictionary.R0000644000176200001440000002052613762412012014107 0ustar liggesusers#' Count unique values in a vector #' #' Count the number of unique values in a vector. `vec_count()` has two #' important differences to `table()`: it returns a data frame, and when #' given multiple inputs (as a data frame), it only counts combinations that #' appear in the input. #' #' @param x A vector (including a data frame). #' @param sort One of "count", "key", "location", or "none". #' * "count", the default, puts most frequent values at top #' * "key", orders by the output key column (i.e. unique values of `x`) #' * "location", orders by location where key first seen. This is useful #' if you want to match the counts up to other unique/duplicated functions. #' * "none", leaves unordered. #' @return A data frame with columns `key` (same type as `x`) and #' `count` (an integer vector). #' #' @section Dependencies: #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_order()] #' @export #' @examples #' vec_count(mtcars$vs) #' vec_count(iris$Species) #' #' # If you count a data frame you'll get a data frame #' # column in the output #' str(vec_count(mtcars[c("vs", "am")])) #' #' # Sorting --------------------------------------- #' #' x <- letters[rpois(100, 6)] #' # default is to sort by frequency #' vec_count(x) #' #' # by can sort by key #' vec_count(x, sort = "key") #' #' # or location of first value #' vec_count(x, sort = "location") #' head(x) #' #' # or not at all #' vec_count(x, sort = "none") vec_count <- function(x, sort = c("count", "key", "location", "none")) { sort <- arg_match0(sort, c("count", "key", "location", "none")) # Returns key-value pair giving index of first occurrence value and count kv <- vec_count_impl(x) df <- data_frame( key = vec_slice(x, kv$key), count = kv$val ) if (sort == "none") { return(df) } idx <- switch(sort, location = order(kv$key), key = vec_order(df$key), count = order(-kv$val) ) df <- vec_slice(df, idx) reset_rownames(df) } vec_count_impl <- function(x) { .Call(vctrs_count, x) } reset_rownames <- function(x) { rownames(x) <- NULL is_df <- map_lgl(x, is.data.frame) x[is_df] <- lapply(x[is_df], reset_rownames) x } # Duplicates -------------------------------------------------------------- #' Find duplicated values #' #' * `vec_duplicate_any()`: detects the presence of duplicated values, #' similar to [anyDuplicated()]. #' * `vec_duplicate_detect()`: returns a logical vector describing if each #' element of the vector is duplicated elsewhere. Unlike [duplicated()], it #' reports all duplicated values, not just the second and subsequent #' repetitions. #' * `vec_duplicate_id()`: returns an integer vector giving the location of #' the first occurrence of the value. #' #' @section Missing values: #' In most cases, missing values are not considered to be equal, i.e. #' `NA == NA` is not `TRUE`. This behaviour would be unappealing here, #' so these functions consider all `NAs` to be equal. (Similarly, #' all `NaN` are also considered to be equal.) #' #' @param x A vector (including a data frame). #' @return #' * `vec_duplicate_any()`: a logical vector of length 1. #' * `vec_duplicate_detect()`: a logical vector the same length as `x`. #' * `vec_duplicate_id()`: an integer vector the same length as `x`. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @seealso [vec_unique()] for functions that work with the dual of duplicated #' values: unique values. #' @name vec_duplicate #' @examples #' vec_duplicate_any(1:10) #' vec_duplicate_any(c(1, 1:10)) #' #' x <- c(10, 10, 20, 30, 30, 40) #' vec_duplicate_detect(x) #' # Note that `duplicated()` doesn't consider the first instance to #' # be a duplicate #' duplicated(x) #' #' # Identify elements of a vector by the location of the first element that #' # they're equal to: #' vec_duplicate_id(x) #' # Location of the unique values: #' vec_unique_loc(x) #' # Equivalent to `duplicated()`: #' vec_duplicate_id(x) == seq_along(x) NULL #' @rdname vec_duplicate #' @export vec_duplicate_any <- function(x) { .Call(vctrs_duplicated_any, x) } #' @rdname vec_duplicate #' @export vec_duplicate_detect <- function(x) { .Call(vctrs_duplicated, x) } #' @rdname vec_duplicate #' @export vec_duplicate_id <- function(x) { .Call(vctrs_id, x) } # Unique values ----------------------------------------------------------- #' Find and count unique values #' #' * `vec_unique()`: the unique values. Equivalent to [unique()]. #' * `vec_unique_loc()`: the locations of the unique values. #' * `vec_unique_count()`: the number of unique values. #' #' @inherit vec_duplicate sections #' @param x A vector (including a data frame). #' @return #' * `vec_unique()`: a vector the same type as `x` containing only unique #' values. #' * `vec_unique_loc()`: an integer vector, giving locations of unique values. #' * `vec_unique_count()`: an integer vector of length 1, giving the #' number of unique values. #' @seealso [vec_duplicate] for functions that work with the dual of #' unique values: duplicated values. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @export #' @examples #' x <- rpois(100, 8) #' vec_unique(x) #' vec_unique_loc(x) #' vec_unique_count(x) #' #' # `vec_unique()` returns values in the order that encounters them #' # use sort = "location" to match to the result of `vec_count()` #' head(vec_unique(x)) #' head(vec_count(x, sort = "location")) #' #' # Normally missing values are not considered to be equal #' NA == NA #' #' # But they are for the purposes of considering uniqueness #' vec_unique(c(NA, NA, NA, NA, 1, 2, 1)) vec_unique <- function(x) { vec_slice(x, vec_unique_loc(x)) } #' @rdname vec_unique #' @export vec_unique_loc <- function(x) { .Call(vctrs_unique_loc, x) } #' @rdname vec_unique #' @export vec_unique_count <- function(x) { .Call(vctrs_n_distinct, x) } # Matching ---------------------------------------------------------------- #' Find matching observations across vectors #' #' `vec_in()` returns a logical vector based on whether `needle` is found in #' haystack. `vec_match()` returns an integer vector giving location of #' `needle` in `haystack`, or `NA` if it's not found. #' #' `vec_in()` is equivalent to [%in%]; `vec_match()` is equivalent to `match()`. #' #' @section Missing values: #' In most cases places in R, missing values are not considered to be equal, #' i.e. `NA == NA` is not `TRUE`. The exception is in matching functions #' like [match()] and [merge()], where an `NA` will match another `NA`. #' By `vec_match()` and `vec_in()` will match `NA`s; but you can control #' this behaviour with the `na_equal` argument. #' #' @param needles,haystack Vector of `needles` to search for in vector haystack. #' `haystack` should usually be unique; if not `vec_match()` will only #' return the location of the first match. #' #' `needles` and `haystack` are coerced to the same type prior to #' comparison. #' @inheritParams ellipsis::dots_empty #' @param na_equal If `TRUE`, missing values in `needles` can be #' matched to missing values in `haystack`. If `FALSE`, they #' propagate, missing values in `needles` are represented as `NA` in #' the return value. #' @param needles_arg,haystack_arg Argument tags for `needles` and #' `haystack` used in error messages. #' @return A vector the same length as `needles`. `vec_in()` returns a #' logical vector; `vec_match()` returns an integer vector. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_proxy_equal()] #' #' @export #' @examples #' hadley <- strsplit("hadley", "")[[1]] #' vec_match(hadley, letters) #' #' vowels <- c("a", "e", "i", "o", "u") #' vec_match(hadley, vowels) #' vec_in(hadley, vowels) #' #' # Only the first index of duplicates is returned #' vec_match(c("a", "b"), c("a", "b", "a", "b")) vec_match <- function(needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "") { if (!missing(...)) ellipsis::check_dots_empty() .Call(vctrs_match, needles, haystack, na_equal, needles_arg, haystack_arg) } #' @export #' @rdname vec_match vec_in <- function(needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "") { if (!missing(...)) ellipsis::check_dots_empty() .Call(vctrs_in, needles, haystack, na_equal, needles_arg, haystack_arg) } vctrs/R/equal.R0000644000176200001440000000510513762412012013045 0ustar liggesusers#' Equality proxy #' #' Returns a proxy object (i.e. an atomic vector or data frame of atomic #' vectors). For [vctr]s, this determines the behaviour of `==` and #' `!=` (via [vec_equal()]); [unique()], [duplicated()] (via #' [vec_unique()] and [vec_duplicate_detect()]); [is.na()] and [anyNA()] #' (via [vec_equal_na()]). #' #' The default method calls [vec_proxy()], as the default underlying #' vector data should be equal-able in most cases. If your class is #' not equal-able, provide a `vec_proxy_equal()` method that throws an #' error. #' #' If the proxy for `x` is a data frame, `vec_proxy_equal()` is #' recursively applied on all columns as well. #' #' @param x A vector x. #' @inheritParams ellipsis::dots_empty #' #' @return A 1d atomic vector or a data frame. #' @keywords internal #' #' @section Dependencies: #' - [vec_proxy()] called by default #' #' @export vec_proxy_equal <- function(x, ...) { if (!missing(...)) { ellipsis::check_dots_empty() } return(.Call(vctrs_proxy_equal, x)) UseMethod("vec_proxy_equal") } #' @export vec_proxy_equal.default <- function(x, ...) { stop_native_implementation("vec_proxy_equal.default") } #' Test if two vectors are equal #' #' `vec_equal_na()` tests a special case: equality with `NA`. It is similar to #' [is.na] but: #' * Considers the missing element of a list to be `NULL`. #' * Considered data frames and records to be missing if every component #' is missing. #' This preserves the invariant that `vec_equal_na(x)` is equal to #' `vec_equal(x, vec_init(x), na_equal = TRUE)`. #' #' @inheritParams vec_compare #' @return A logical vector the same size as. Will only contain `NA`s if `na_equal` is `FALSE`. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_recycle_common()] #' - [vec_proxy_equal()] #' #' @export #' @examples #' vec_equal(c(TRUE, FALSE, NA), FALSE) #' vec_equal(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) #' vec_equal_na(c(TRUE, FALSE, NA)) #' #' vec_equal(5, 1:10) #' vec_equal("d", letters[1:10]) #' #' df <- data.frame(x = c(1, 1, 2, 1, NA), y = c(1, 2, 1, NA, NA)) #' vec_equal(df, data.frame(x = 1, y = 2)) #' vec_equal_na(df) vec_equal <- function(x, y, na_equal = FALSE, .ptype = NULL) { vec_assert(na_equal, ptype = logical(), size = 1L) args <- vec_recycle_common(x, y) args <- vec_cast_common_params( !!!args, .to = .ptype, .df_fallback = DF_FALLBACK_quiet ) .Call(vctrs_equal, args[[1]], args[[2]], na_equal) } #' @export #' @rdname vec_equal vec_equal_na <- function(x) { .Call(vctrs_equal_na, x) } obj_equal <- function(x, y) { .Call(vctrs_equal_object, x, y) } vctrs/R/dim.R0000644000176200001440000000311313762412012012504 0ustar liggesusers#' Actual vector dimensions #' #' @description #' * `vec_dim_n()` gives the dimensionality (i.e. number of dimensions) #' * `vec_dim()` returns the size of each dimension #' #' These functions access the raw `"dim"` attribute of the object #' and do not dispatch over the [dim()] generic. #' #' @details #' Unlike base R, we treat vectors with `NULL` dimensions as 1d. This #' simplifies the type system by eliding a special case. Compared to the base R #' equivalent, `vec_dim()` returns [length()], not `NULL`, when `x` is 1d. #' #' @seealso #' [dim2()], a variant of [dim()] that returns [length()] if an object #' doesn't have dimensions. #' #' @param x A vector #' @noRd #' @examples #' # Compared to base R #' x <- 1:5 #' dim(x) #' vec_dim(x) NULL # FIXME: Should `vec_dim()` return the size instead of the length? vec_dim <- function(x) { .Call(vctrs_dim, x) } vec_dim_n <- function(x) { .Call(vctrs_dim_n, x) } has_dim <- function(x) { .Call(vctrs_has_dim, x) } #' Perceived vector dimensions #' #' @description #' `dim2()` is a variant of [dim()] that returns [vec_size()] if an object #' doesn't have dimensions. #' #' @details #' Unlike base R, we treat vectors with `NULL` dimensions as 1d. This #' simplifies the type system by eliding a special case. Compared to the base R #' equivalent, `dim2()` returns [length()], not `NULL`, when `x` is 1d. #' #' @seealso #' [vec_dim()], a variant that never dispatches over the [dim()] generic. #' #' @param x A vector #' @noRd #' @examples #' # Compared to base R #' x <- 1:5 #' dim(x) #' vec_dim(x) dim2 <- function(x) { dim(x) %||% length(x) } vctrs/R/vctrs-package.R0000644000176200001440000000071613766452345014514 0ustar liggesusers#' @description #' \if{html}{\figure{logo.png}{options: align='right'}} #' `r lifecycle::badge("maturing")` #' #' Defines new notions of prototype and size that are #' used to provide tools for consistent and well-founded type-coercion #' and size-recycling, and are in turn connected to ideas of type- and #' size-stability useful for analysing function interfaces. #' #' @keywords internal #' @import rlang #' @useDynLib vctrs, .registration = TRUE "_PACKAGE" vctrs/R/type-explore.R0000644000176200001440000000130613762412012014372 0ustar liggesuserscoerces_to <- function(x, y, using = "strict") { type_max <- switch(using, strict = vec_ptype2, base_c = c, base_unlist = function(x, y) unlist(list(x, y)), base_modify = function(x, y) `[<-`(x, 2, value = y) ) tryCatch({ type <- suppressWarnings(type_max(x, y)) vec_ptype_full(type) }, error = function(e) { NA_character_ }) } maxtype_mat <- function(types, using = "strict") { names(types) <- map_chr(types, function(x) vec_ptype_full(vec_ptype(x))) grid <- expand.grid(x = types, y = types) grid$max <- map2_chr(grid$x, grid$y, coerces_to, using = using) matrix( grid$max, nrow = length(types), dimnames = list(names(types), names(types)) ) } vctrs/R/complete.R0000644000176200001440000000277714027045462013570 0ustar liggesusers#' Complete #' #' @description #' `vec_detect_complete()` detects "complete" observations. An observation is #' considered complete if it is non-missing. For most vectors, this implies that #' `vec_detect_complete(x) == !vec_equal_na(x)`. #' #' For data frames and matrices, a row is only considered complete if all #' elements of that row are non-missing. To compare, `!vec_equal_na(x)` detects #' rows that are partially complete (they have at least one non-missing value). #' #' @details #' A [record][new_rcrd] type vector is considered complete if any field is #' non-missing. #' #' @param x A vector #' #' @return #' A logical vector with the same size as `x`. #' #' @seealso [stats::complete.cases()] #' @export #' @examples #' x <- c(1, 2, NA, 4, NA) #' #' # For most vectors, this is identical to `!vec_equal_na(x)` #' vec_detect_complete(x) #' !vec_equal_na(x) #' #' df <- data_frame( #' x = x, #' y = c("a", "b", NA, "d", "e") #' ) #' #' # This returns `TRUE` where all elements of the row are non-missing. #' # Compare that with `!vec_equal_na()`, which detects rows that have at #' # least one non-missing value. #' df2 <- df #' df2$all_non_missing <- vec_detect_complete(df) #' df2$any_non_missing <- !vec_equal_na(df) #' df2 vec_detect_complete <- function(x) { .Call(vctrs_detect_complete, x) } vec_slice_complete <- function(x) { .Call(vctrs_slice_complete, x) } vec_locate_complete <- function(x) { .Call(vctrs_locate_complete, x) } vec_proxy_complete <- function(x) { .Call(vctrs_proxy_complete, x) } vctrs/R/print-str.R0000644000176200001440000000656214027045462013716 0ustar liggesusers# print ------------------------------------------------------------------- #' `print()` and `str()` generics. #' #' These are constructed to be more easily extensible since you can override #' the `_header()`, `_data()` or `_footer()` components individually. The #' default methods are built on top of `format()`. #' #' @param x A vector #' @param ... Additional arguments passed on to methods. See [print()] and #' [str()] for commonly used options #' @keywords internal #' @export obj_print <- function(x, ...) { obj_print_header(x, ...) obj_print_data(x, ...) obj_print_footer(x, ...) invisible(x) } #' @export #' @rdname obj_print obj_print_header <- function(x, ...) { UseMethod("obj_print_header") } #' @export obj_print_header.default <- function(x, ...) { cat_line("<", vec_ptype_full(x), "[", vec_size(x), "]>") invisible(x) } #' @export #' @rdname obj_print obj_print_data <- function(x, ...) { UseMethod("obj_print_data") } #' @export obj_print_data.default <- function(x, ...) { if (length(x) == 0) return(invisible(x)) out <- stats::setNames(format(x), names(x)) print(out, quote = FALSE) invisible(x) } #' @export #' @rdname obj_print obj_print_footer <- function(x, ...) { UseMethod("obj_print_footer") } #' @export obj_print_footer.default <- function(x, ...) { invisible(x) } # str --------------------------------------------------------------------- #' @export #' @rdname obj_print obj_str <- function(x, ...) { obj_str_header(x, ...) obj_str_data(x, ...) obj_str_footer(x, ...) } #' @export #' @rdname obj_print obj_str_header <- function(x, ...) { UseMethod("obj_str_header") } #' @export obj_str_header.default <- function(x, ...) { invisible(x) } #' @export #' @rdname obj_print obj_str_data <- function(x, ...) { UseMethod("obj_str_data") } #' @export obj_str_data.default <- function(x, ...) { if (is.list(x)) { obj_str_recursive(x, ...) } else { obj_str_leaf(x, ...) } } obj_str_recursive <- function(x, ..., indent.str = "", nest.lev = 0) { if (nest.lev != 0L) cat(" ") cat_line(glue::glue("{vec_ptype_abbr(x)} [1:{vec_size(x)}] ")) utils::str( vec_data(x), no.list = TRUE, ..., nest.lev = nest.lev + 1L, indent.str = indent.str ) } obj_str_leaf <- function(x, ..., indent.str = "", width = getOption("width")) { width <- width - nchar(indent.str) - 2 # Avoid spending too much time formatting elements that won't see length <- ceiling(width / 2) if (length(x) > length) { out <- x[seq2(1, length)] } else { out <- x } title <- glue::glue(" {vec_ptype_abbr(x)} [1:{length(x)}] ") cat_line(inline_list(title, format(out), width = width)) invisible(x) } #' @export #' @rdname obj_print obj_str_footer <- function(x, ...) { UseMethod("obj_str_footer") } #' @export obj_str_footer.default <- function(x, ..., indent.str = "", nest.lev = 0) { attr <- attributes(x) attr[["class"]] <- NULL attr[["names"]] <- NULL if (length(attr) == 0) return(invisible(x)) if (!is.list(x)) { indent.str <- paste0(" ", indent.str) } utils::str( attr, no.list = TRUE, ..., comp.str = "@ ", nest.lev = nest.lev + 1L, indent.str = indent.str ) invisible(x) } vctrs/R/type-factor.R0000644000176200001440000001346214027045462014206 0ustar liggesusers#' Factor/ordered factor S3 class #' #' A [factor] is an integer with attribute `levels`, a character vector. There #' should be one level for each integer between 1 and `max(x)`. #' An [ordered] factor has the same properties as a factor, but possesses #' an extra class that marks levels as having a total ordering. #' #' These functions help the base factor and ordered factor classes fit in to #' the vctrs type system by providing constructors, coercion functions, #' and casting functions. `new_factor()` and `new_ordered()` are low-level #' constructors - they only check that types, but not values, are valid, so #' are for expert use only. #' #' @param x Integer values which index in to `levels`. #' @param levels Character vector of labels. #' @param ...,class Used to for subclasses. #' @keywords internal #' @export new_factor <- function(x = integer(), levels = character(), ..., class = character()) { stopifnot(is.integer(x)) stopifnot(is.character(levels)) structure( x, levels = levels, ..., class = c(class, "factor") ) } #' @export #' @rdname new_factor new_ordered <- function(x = integer(), levels = character()) { new_factor(x = x, levels = levels, class = "ordered") } #' @export vec_proxy.factor <- function(x, ...) { x } #' @export vec_proxy.ordered <- function(x, ...) { x } #' @export vec_restore.factor <- function(x, to, ...) { NextMethod() } #' @export vec_restore.ordered <- function(x, to, ...) { NextMethod() } # Print ------------------------------------------------------------------- #' @export vec_ptype_full.factor <- function(x, ...) { paste0("factor<", hash_label(levels(x)), ">") } #' @export vec_ptype_abbr.factor <- function(x, ...) { "fct" } #' @export vec_ptype_full.ordered <- function(x, ...) { paste0("ordered<", hash_label(levels(x)), ">") } #' @export vec_ptype_abbr.ordered <- function(x, ...) { "ord" } # Coerce ------------------------------------------------------------------ #' @rdname new_factor #' @export vec_ptype2.factor #' @method vec_ptype2 factor #' @export vec_ptype2.factor <- function(x, y, ...) { UseMethod("vec_ptype2.factor") } #' @export vec_ptype2.factor.factor <- function(x, y, ...) { stop_native_implementation("vec_ptype2.factor.factor") } #' @export vec_ptype2.character.factor <- function(x, y, ...) { stop_native_implementation("vec_ptype2.character.factor") } #' @export vec_ptype2.factor.character <- function(x, y, ...) { stop_native_implementation("vec_ptype2.factor.character") } #' @rdname new_factor #' @export vec_ptype2.ordered #' @method vec_ptype2 ordered #' @export vec_ptype2.ordered <- function(x, y, ...) { UseMethod("vec_ptype2.ordered") } #' @export vec_ptype2.ordered.ordered <- function(x, y, ...) { stop_native_implementation("vec_ptype2.ordered.ordered") } #' @export vec_ptype2.ordered.character <- function(x, y, ...) { stop_native_implementation("vec_ptype2.ordered.character") } #' @export vec_ptype2.character.ordered <- function(x, y, ...) { stop_native_implementation("vec_ptype2.character.ordered") } #' @export vec_ptype2.ordered.factor <- function(x, y, ..., x_arg = "", y_arg = "") { stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) } #' @export vec_ptype2.factor.ordered <- function(x, y, ..., x_arg = "", y_arg = "") { stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) } # Cast -------------------------------------------------------------------- #' @rdname new_factor #' @export vec_cast.factor #' @method vec_cast factor #' @export vec_cast.factor <- function(x, to, ...) { UseMethod("vec_cast.factor") } fct_cast <- function(x, to, ..., x_arg = "", to_arg = "") { fct_cast_impl(x, to, ..., x_arg = x_arg, to_arg = to_arg, ordered = FALSE) } fct_cast_impl <- function(x, to, ..., x_arg = "", to_arg = "", ordered = FALSE) { if (length(levels(to)) == 0L) { levels <- levels(x) if (is.null(levels)) { exclude <- NA levels <- unique(x) } else { exclude <- NULL } factor(as.character(x), levels = levels, ordered = ordered, exclude = exclude) } else { lossy <- !(x %in% levels(to) | is.na(x)) out <- factor(x, levels = levels(to), ordered = ordered, exclude = NULL) maybe_lossy_cast( out, x, to, lossy, loss_type = "generality", x_arg = x_arg, to_arg = to_arg ) } } #' @export vec_cast.factor.factor <- function(x, to, ...) { fct_cast(x, to, ...) } #' @export vec_cast.factor.character <-function(x, to, ...) { fct_cast(x, to, ...) } #' @export vec_cast.character.factor <- function(x, to, ...) { stop_native_implementation("vec_cast.character.factor") } #' @rdname new_factor #' @export vec_cast.ordered #' @method vec_cast ordered #' @export vec_cast.ordered <- function(x, to, ...) { UseMethod("vec_cast.ordered") } ord_cast <- function(x, to, ..., x_arg = "", to_arg = "") { fct_cast_impl(x, to, ..., x_arg = x_arg, to_arg = to_arg, ordered = TRUE) } #' @export vec_cast.ordered.ordered <- function(x, to, ...) { ord_cast(x, to, ...) } #' @export vec_cast.ordered.character <-function(x, to, ...) { ord_cast(x, to, ...) } #' @export vec_cast.character.ordered <- function(x, to, ...) { stop_native_implementation("vec_cast.character.ordered") } # Math and arithmetic ----------------------------------------------------- #' @export vec_math.factor <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_arith.factor <- function(op, x, y, ...) { stop_unsupported(x, op) } # Helpers ----------------------------------------------------------------- hash_label <- function(x, length = 5) { if (length(x) == 0) { "" } else { # Can't use obj_hash() because it hashes the string pointers # for performance, so the values in the test change each time substr(rlang::hash(x), 1, length) } } levels_union <- function(x, y) { union(levels(x), levels(y)) } vctrs/R/faq-internal.R0000644000176200001440000000026313762412012014317 0ustar liggesusers#' Internal FAQ - `vec_ptype2()`, `NULL`, and unspecified vectors #' #' @includeRmd man/faq/internal/ptype2-identity.Rmd description #' #' @name internal-faq-ptype2-identity NULL vctrs/R/runs.R0000644000176200001440000000206313762412012012725 0ustar liggesusers#' Runs #' #' @description #' `vec_identify_runs()` returns a vector of identifiers for the elements of #' `x` that indicate which run of repeated values they fall in. The number of #' runs is also returned as an attribute, `n`. #' #' @details #' Unlike [base::rle()], adjacent missing values are considered identical when #' constructing runs. For example, `vec_identify_runs(c(NA, NA))` will return #' `c(1, 1)`, not `c(1, 2)`. #' #' @param x A vector. #' #' @return #' An integer vector with the same size as `x`. A scalar integer attribute, #' `n`, is attached. #' #' @export #' @examples #' x <- c("a", "z", "z", "c", "a", "a") #' #' vec_identify_runs(x) #' #' y <- c(1, 1, 1, 2, 2, 3) #' #' # With multiple columns, the runs are constructed rowwise #' df <- data_frame( #' x = x, #' y = y #' ) #' #' vec_identify_runs(df) vec_identify_runs <- function(x) { .Call(vctrs_identify_runs, x) } vec_locate_runs <- function(x, start = TRUE) { .Call(vctrs_locate_runs, x, start) } vec_detect_runs <- function(x, start = TRUE) { .Call(vctrs_detect_runs, x, start) } vctrs/R/bind.R0000644000176200001440000001616513766452345012703 0ustar liggesusers#' Combine many data frames into one data frame #' #' This pair of functions binds together data frames (and vectors), either #' row-wise or column-wise. Row-binding creates a data frame with common type #' across all arguments. Column-binding creates a data frame with common length #' across all arguments. #' #' @section Invariants: #' #' All inputs are first converted to a data frame. The conversion for #' 1d vectors depends on the direction of binding: #' #' * For `vec_rbind()`, each element of the vector becomes a column in #' a single row. #' * For `vec_cbind()`, each element of the vector becomes a row in a #' single column. #' #' Once the inputs have all become data frames, the following #' invariants are observed for row-binding: #' #' * `vec_size(vec_rbind(x, y)) == vec_size(x) + vec_size(y)` #' * `vec_ptype(vec_rbind(x, y)) = vec_ptype_common(x, y)` #' #' Note that if an input is an empty vector, it is first converted to #' a 1-row data frame with 0 columns. Despite being empty, its #' effective size for the total number of rows is 1. #' #' For column-binding, the following invariants apply: #' #' * `vec_size(vec_cbind(x, y)) == vec_size_common(x, y)` #' * `vec_ptype(vec_cbind(x, y)) == vec_cbind(vec_ptype(x), vec_ptype(x))` #' @param ... Data frames or vectors. #' #' When the inputs are named: #' * `vec_rbind()` assigns names to row names unless `.names_to` is #' supplied. In that case the names are assigned in the column #' defined by `.names_to`. #' * `vec_cbind()` creates packed data frame columns with named #' inputs. #' #' `NULL` inputs are silently ignored. Empty (e.g. zero row) inputs #' will not appear in the output, but will affect the derived `.ptype`. #' @param .names_to #' This controls what to do with input names supplied in `...`. #' * By default, input names are [zapped][rlang::zap]. #' #' * If a string, specifies a column where the input names will be #' copied. These names are often useful to identify rows with #' their original input. If a column name is supplied and `...` is #' not named, an integer column is used instead. #' #' * If `NULL`, the input names are used as row names. #' @param .name_repair One of `"unique"`, `"universal"`, or #' `"check_unique"`. See [vec_as_names()] for the meaning of these #' options. #' #' With `vec_rbind()`, the repair function is applied to all inputs #' separately. This is because `vec_rbind()` needs to align their #' columns before binding the rows, and thus needs all inputs to #' have unique names. On the other hand, `vec_cbind()` applies the #' repair function after all inputs have been concatenated together #' in a final data frame. Hence `vec_cbind()` allows the more #' permissive minimal names repair. #' @inheritParams vec_c #' @return A data frame, or subclass of data frame. #' #' If `...` is a mix of different data frame subclasses, `vec_ptype2()` #' will be used to determine the output type. For `vec_rbind()`, this #' will determine the type of the container and the type of each column; #' for `vec_cbind()` it only determines the type of the output container. #' If there are no non-`NULL` inputs, the result will be `data.frame()`. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_cast_common()] #' - [vec_proxy()] #' - [vec_init()] #' - [vec_assign()] #' - [vec_restore()] #' #' #' ## base dependencies of `vec_rbind()` #' #' - [base::c()] #' #' If columns to combine inherit from a common class, #' `vec_rbind()` falls back to `base::c()` if there exists a `c()` #' method implemented for this class hierarchy. #' #' @seealso [vec_c()] for combining 1d vectors. #' @examples #' # row binding ----------------------------------------- #' #' # common columns are coerced to common class #' vec_rbind( #' data.frame(x = 1), #' data.frame(x = FALSE) #' ) #' #' # unique columns are filled with NAs #' vec_rbind( #' data.frame(x = 1), #' data.frame(y = "x") #' ) #' #' # null inputs are ignored #' vec_rbind( #' data.frame(x = 1), #' NULL, #' data.frame(x = 2) #' ) #' #' # bare vectors are treated as rows #' vec_rbind( #' c(x = 1, y = 2), #' c(x = 3) #' ) #' #' # default names will be supplied if arguments are not named #' vec_rbind( #' 1:2, #' 1:3, #' 1:4 #' ) #' #' # column binding -------------------------------------- #' #' # each input is recycled to have common length #' vec_cbind( #' data.frame(x = 1), #' data.frame(y = 1:3) #' ) #' #' # bare vectors are treated as columns #' vec_cbind( #' data.frame(x = 1), #' y = letters[1:3] #' ) #' #' # if you supply a named data frame, it is packed in a single column #' data <- vec_cbind( #' x = data.frame(a = 1, b = 2), #' y = 1 #' ) #' data #' #' # Packed data frames are nested in a single column. This makes it #' # possible to access it through a single name: #' data$x #' #' # since the base print method is suboptimal with packed data #' # frames, it is recommended to use tibble to work with these: #' if (rlang::is_installed("tibble")) { #' vec_cbind(x = tibble::tibble(a = 1, b = 2), y = 1) #' } #' #' # duplicate names are flagged #' vec_cbind(x = 1, x = 2) #' #' @name vec_bind NULL #' @export #' @param .name_spec A name specification (as documented in [vec_c()]) #' for combining the outer inputs names in `...` and the inner row #' names of the inputs. This only has an effect when `.names_to` is #' set to `NULL`, which causes the input names to be assigned as row #' names. #' @rdname vec_bind vec_rbind <- function(..., .ptype = NULL, .names_to = rlang::zap(), .name_repair = c("unique", "universal", "check_unique"), .name_spec = NULL) { .External2(vctrs_rbind, .ptype, .names_to, .name_repair, .name_spec) } vec_rbind <- fn_inline_formals(vec_rbind, ".name_repair") #' @export #' @rdname vec_bind #' @param .size If, `NULL`, the default, will determine the number of #' rows in `vec_cbind()` output by using the standard recycling rules. #' #' Alternatively, specify the desired number of rows, and any inputs #' of length 1 will be recycled appropriately. vec_cbind <- function(..., .ptype = NULL, .size = NULL, .name_repair = c("unique", "universal", "check_unique", "minimal")) { .External2(vctrs_cbind, .ptype, .size, .name_repair) } vec_cbind <- fn_inline_formals(vec_cbind, ".name_repair") as_df_row <- function(x, quiet = FALSE) { .Call(vctrs_as_df_row, x, quiet) } as_df_col <- function(x, outer_name) { .Call(vctrs_as_df_col, x, outer_name) } #' Frame prototype #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' This is an experimental generic that returns zero-columns variants #' of a data frame. It is needed for [vec_cbind()], to work around the #' lack of colwise primitives in vctrs. Expect changes. #' #' @param x A data frame. #' @inheritParams ellipsis::dots_empty #' #' @keywords internal #' @export vec_cbind_frame_ptype <- function(x, ...) { UseMethod("vec_cbind_frame_ptype") } #' @export vec_cbind_frame_ptype.default <- function(x, ...) { x[0] } #' @export vec_cbind_frame_ptype.sf <- function(x, ...) { data.frame() } vctrs/R/slice-chop.R0000644000176200001440000000771713762412012013777 0ustar liggesusers#' Chopping #' #' @description #' - `vec_chop()` provides an efficient method to repeatedly slice a vector. It #' captures the pattern of `map(indices, vec_slice, x = x)`. When no indices #' are supplied, it is generally equivalent to [as.list()]. #' #' - `vec_unchop()` combines a list of vectors into a single vector, placing #' elements in the output according to the locations specified by `indices`. #' It is similar to [vec_c()], but gives greater control over how the elements #' are combined. When no indices are supplied, it is identical to `vec_c()`. #' #' If `indices` selects every value in `x` exactly once, in any order, then #' `vec_unchop()` is the inverse of `vec_chop()` and the following invariant #' holds: #' #' ``` #' vec_unchop(vec_chop(x, indices), indices) == x #' ``` #' #' @inheritParams vec_c #' @param x A vector #' @param indices For `vec_chop()`, a list of positive integer vectors to #' slice `x` with, or `NULL`. If `NULL`, `x` is split into its individual #' elements, equivalent to using an `indices` of `as.list(vec_seq_along(x))`. #' #' For `vec_unchop()`, a list of positive integer vectors specifying the #' locations to place elements of `x` in. Each element of `x` is recycled to #' the size of the corresponding index vector. The size of `indices` must #' match the size of `x`. If `NULL`, `x` is combined in the order it is #' provided in, which is equivalent to using [vec_c()]. #' @param ptype If `NULL`, the default, the output type is determined by #' computing the common type across all elements of `x`. Alternatively, you #' can supply `ptype` to give the output a known type. #' @return #' - `vec_chop()`: A list of size `vec_size(indices)` or, if `indices == NULL`, #' `vec_size(x)`. #' #' - `vec_unchop()`: A vector of type `vec_ptype_common(!!!x)`, or `ptype`, if #' specified. The size is computed as `vec_size_common(!!!indices)` unless #' the indices are `NULL`, in which case the size is `vec_size_common(!!!x)`. #' #' @section Dependencies of `vec_chop()`: #' - [vec_slice()] #' #' @section Dependencies of `vec_unchop()`: #' - [vec_c()] #' #' @export #' @examples #' vec_chop(1:5) #' vec_chop(1:5, list(1, 1:2)) #' vec_chop(mtcars, list(1:3, 4:6)) #' #' # If `indices` selects every value in `x` exactly once, #' # in any order, then `vec_unchop()` inverts `vec_chop()` #' x <- c("a", "b", "c", "d") #' indices <- list(2, c(3, 1), 4) #' vec_chop(x, indices) #' vec_unchop(vec_chop(x, indices), indices) #' #' # When unchopping, size 1 elements of `x` are recycled #' # to the size of the corresponding index #' vec_unchop(list(1, 2:3), list(c(1, 3, 5), c(2, 4))) #' #' # Names are retained, and outer names can be combined with inner #' # names through the use of a `name_spec` #' lst <- list(x = c(a = 1, b = 2), y = 1) #' vec_unchop(lst, list(c(3, 2), c(1, 4)), name_spec = "{outer}_{inner}") #' #' # An alternative implementation of `ave()` can be constructed using #' # `vec_chop()` and `vec_unchop()` in combination with `vec_group_loc()` #' ave2 <- function(.x, .by, .f, ...) { #' indices <- vec_group_loc(.by)$loc #' chopped <- vec_chop(.x, indices) #' out <- lapply(chopped, .f, ...) #' vec_unchop(out, indices) #' } #' #' breaks <- warpbreaks$breaks #' wool <- warpbreaks$wool #' #' ave2(breaks, wool, mean) #' #' identical( #' ave2(breaks, wool, mean), #' ave(breaks, wool, FUN = mean) #' ) vec_chop <- function(x, indices = NULL) { .Call(vctrs_chop, x, indices) } #' @rdname vec_chop #' @export vec_unchop <- function(x, indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal")) { .Call(vctrs_unchop, x, indices, ptype, name_spec, name_repair) } # Exposed for testing (`starts` is 0-based) vec_chop_seq <- function(x, starts, sizes, increasings = TRUE) { args <- vec_recycle_common(starts, sizes, increasings) .Call(vctrs_chop_seq, x, args[[1]], args[[2]], args[[3]]) } vctrs/R/fill.R0000644000176200001440000000257213766452345012712 0ustar liggesusers#' Fill in missing values with the previous or following value #' #' @description #' `r lifecycle::badge("experimental")` #' #' `vec_fill_missing()` fills gaps of missing values with the previous or #' following non-missing value. #' #' @param x A vector #' @param direction Direction in which to fill missing values. Must be either #' `"down"`, `"up"`, `"downup"`, or `"updown"`. #' @param max_fill A single positive integer specifying the maximum number of #' sequential missing values that will be filled. If `NULL`, there is #' no limit. #' #' @export #' @examples #' x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) #' #' # Filling down replaces missing values with the previous non-missing value #' vec_fill_missing(x, direction = "down") #' #' # To also fill leading missing values, use `"downup"` #' vec_fill_missing(x, direction = "downup") #' #' # Limit the number of sequential missing values to fill with `max_fill` #' vec_fill_missing(x, max_fill = 1) #' #' # Data frames are filled rowwise. Rows are only considered missing #' # if all elements of that row are missing. #' y <- c(1, NA, 2, NA, NA, 3, 4, NA, 5) #' df <- data_frame(x = x, y = y) #' df #' #' vec_fill_missing(df) vec_fill_missing <- function(x, direction = c("down", "up", "downup", "updown"), max_fill = NULL) { .Call(vctrs_fill_missing, x, direction, max_fill) } vctrs/R/type.R0000644000176200001440000001501514027045462012726 0ustar liggesusers#' Find the prototype of a set of vectors #' #' `vec_ptype()` returns the unfinalised prototype of a single vector. #' `vec_ptype_common()` finds the common type of multiple vectors. #' `vec_ptype_show()` nicely prints the common type of any number of #' inputs, and is designed for interactive exploration. #' #' @param x A vector #' @param ... For `vec_ptype()`, these dots are for future extensions and must #' be empty. #' #' For `vec_ptype_common()` and `vec_ptype_show()`, vector inputs. #' @param x_arg Argument name for `x`. This is used in error messages to inform #' the user about the locations of incompatible types. #' @param .ptype If `NULL`, the default, the output type is determined by #' computing the common type across all elements of `...`. #' #' Alternatively, you can supply `.ptype` to give the output known type. #' If `getOption("vctrs.no_guessing")` is `TRUE` you must supply this value: #' this is a convenient way to make production code demand fixed types. #' @return `vec_ptype()` and `vec_ptype_common()` return a prototype #' (a size-0 vector) #' #' @section `vec_ptype()`: #' `vec_ptype()` returns [size][vec_size] 0 vectors potentially #' containing attributes but no data. Generally, this is just #' `vec_slice(x, 0L)`, but some inputs require special #' handling. #' #' * While you can't slice `NULL`, the prototype of `NULL` is #' itself. This is because we treat `NULL` as an identity value in #' the `vec_ptype2()` monoid. #' #' * The prototype of logical vectors that only contain missing values #' is the special [unspecified] type, which can be coerced to any #' other 1d type. This allows bare `NA`s to represent missing values #' for any 1d vector type. #' #' See [internal-faq-ptype2-identity] for more information about #' identity values. #' #' `vec_ptype()` is a _performance_ generic. It is not necessary to implement it #' because the default method will work for any vctrs type. However the default #' method builds around other vctrs primitives like `vec_slice()` which incurs #' performance costs. If your class has a static prototype, you might consider #' implementing a custom `vec_ptype()` method that returns a constant. This will #' improve the performance of your class in many cases ([common #' type][vec_ptype2] imputation in particular). #' #' Because it may contain unspecified vectors, the prototype returned #' by `vec_ptype()` is said to be __unfinalised__. Call #' [vec_ptype_finalise()] to finalise it. Commonly you will need the #' finalised prototype as returned by `vec_slice(x, 0L)`. #' #' @section `vec_ptype_common()`: #' `vec_ptype_common()` first finds the prototype of each input, then #' successively calls [vec_ptype2()] to find a common type. It returns #' a [finalised][vec_ptype_finalise] prototype. #' #' @section Dependencies of `vec_ptype()`: #' - [vec_slice()] for returning an empty slice #' #' @section Dependencies of `vec_ptype_common()`: #' - [vec_ptype2()] #' - [vec_ptype_finalise()] #' #' @export #' @examples #' # Unknown types ------------------------------------------ #' vec_ptype_show() #' vec_ptype_show(NA) #' vec_ptype_show(NULL) #' #' # Vectors ------------------------------------------------ #' vec_ptype_show(1:10) #' vec_ptype_show(letters) #' vec_ptype_show(TRUE) #' #' vec_ptype_show(Sys.Date()) #' vec_ptype_show(Sys.time()) #' vec_ptype_show(factor("a")) #' vec_ptype_show(ordered("a")) #' #' # Matrices ----------------------------------------------- #' # The prototype of a matrix includes the number of columns #' vec_ptype_show(array(1, dim = c(1, 2))) #' vec_ptype_show(array("x", dim = c(1, 2))) #' #' # Data frames -------------------------------------------- #' # The prototype of a data frame includes the prototype of #' # every column #' vec_ptype_show(iris) #' #' # The prototype of multiple data frames includes the prototype #' # of every column that in any data frame #' vec_ptype_show( #' data.frame(x = TRUE), #' data.frame(y = 2), #' data.frame(z = "a") #' ) vec_ptype <- function(x, ..., x_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } return(.Call(vctrs_ptype, x, x_arg)) UseMethod("vec_ptype") } #' @export #' @rdname vec_ptype vec_ptype_common <- function(..., .ptype = NULL) { .External2(vctrs_type_common, .ptype) } vec_ptype_common_opts <- function(..., .ptype = NULL, .opts = fallback_opts()) { .External2(vctrs_ptype_common_opts, .ptype, .opts) } vec_ptype_common_params <- function(..., .ptype = NULL, .df_fallback = NULL, .s3_fallback = NULL) { opts <- fallback_opts( df_fallback = .df_fallback, s3_fallback = .s3_fallback ) vec_ptype_common_opts(..., .ptype = .ptype, .opts = opts) } vec_ptype_common_fallback <- function(..., .ptype = NULL) { vec_ptype_common_opts(..., .ptype = .ptype, .opts = full_fallback_opts()) } #' @export #' @rdname vec_ptype vec_ptype_show <- function(...) { args <- compact(list2(...)) n <- length(args) if (n == 0) { cat_line("Prototype: NULL") } else if (n == 1) { cat_line("Prototype: ", vec_ptype_full(args[[1]])) } else { in_types <- map(args, vec_ptype) out_types <- vector("list", length(in_types)) out_types[[1]] <- in_types[[1]] for (i in seq2(2, n)) { out_types[[i]] <- vec_ptype2(out_types[[i - 1]], in_types[[i]]) } in_full <- paste0("<", map_chr(in_types, vec_ptype_full), ">") out_full <- paste0("<", map_chr(out_types, vec_ptype_full), ">") out <- cbind( n = paste0(seq(0, n - 1), ". "), lhs = c("", out_full[-n]), comma = " , ", rhs = in_full, equals = " = ", res = c(in_full[[1]], out_full[-1]) ) out <- t(apply(out, 1, pad_height)) out <- apply(out, 2, pad_width) out[, "lhs"] <- parens(out[, "lhs"]) out[, "rhs"] <- parens(out[, "rhs"], FALSE) lines <- strsplit(out, "\n") dim(lines) <- dim(out) steps <- apply(lines, 1, function(x) do.call(cbind, x)) if (is.list(steps)) { step_lines <- unlist(lapply(steps, function(x) apply(x, 1, paste0, collapse = ""))) } else { step_lines <- apply(steps, 2, paste0, collapse = "") } cat_line("Prototype: ", out_full[[n]]) cat_line(step_lines) } invisible() } vec_typeof <- function(x) { .Call(vctrs_typeof, x, TRUE) } vec_typeof_bare <- function(x) { .Call(vctrs_typeof, x, FALSE) } vec_type_info <- function(x) { .Call(vctrs_type_info, x) } vec_proxy_info <- function(x) { .Call(vctrs_proxy_info, x) } vctrs/R/conditions.R0000644000176200001440000005046613766452345014142 0ustar liggesusers#' Custom conditions for vctrs package #' #' These functions are called for their side effect of raising #' errors and warnings. #' These conditions have custom classes and structures to make #' testing easier. #' #' @param x,y,to Vectors #' @param subclass Use if you want to further customize the class. #' @param ...,class Only use these fields when creating a subclass. #' @param x_arg,y_arg,to_arg Argument names for `x`, `y`, and `to`. Used in #' error messages to inform the user about the locations of incompatible #' types. #' @param action An option to customize the incompatible type message depending #' on the context. Errors thrown from [vec_ptype2()] use `"combine"` and #' those thrown from [vec_cast()] use `"convert"`. #' @param details Any additional human readable details. #' @param message An overriding message for the error. `details` and #' `message` are mutually exclusive, supplying both is an error. #' #' @examples #' #' # Most of the time, `maybe_lossy_cast()` returns its input normally: #' maybe_lossy_cast( #' c("foo", "bar"), #' NULL, #' "", #' lossy = c(FALSE, FALSE), #' x_arg = "", #' to_arg = "" #' ) #' #' # If `lossy` has any `TRUE`, an error is thrown: #' try(maybe_lossy_cast( #' c("foo", "bar"), #' NULL, #' "", #' lossy = c(FALSE, TRUE), #' x_arg = "", #' to_arg = "" #' )) #' #' # Unless lossy casts are allowed: #' allow_lossy_cast( #' maybe_lossy_cast( #' c("foo", "bar"), #' NULL, #' "", #' lossy = c(FALSE, TRUE), #' x_arg = "", #' to_arg = "" #' ) #' ) #' #' @keywords internal #' @name vctrs-conditions NULL stop_vctrs <- function(message = NULL, class = NULL, ...) { abort(message, class = c(class, "vctrs_error"), ...) } stop_incompatible <- function(x, y, ..., details = NULL, message = NULL, class = NULL) { stop_vctrs( message, class = c(class, "vctrs_error_incompatible"), x = x, y = y, details = details, ... ) } #' @return #' `stop_incompatible_*()` unconditionally raise an error of class #' `"vctrs_error_incompatible_*"` and `"vctrs_error_incompatible"`. #' #' @rdname vctrs-conditions #' @export stop_incompatible_type <- function(x, y, ..., x_arg, y_arg, action = c("combine", "convert"), details = NULL, message = NULL, class = NULL) { vec_assert(x, arg = x_arg) vec_assert(y, arg = y_arg) action <- arg_match(action) message <- cnd_type_message( x, y, x_arg, y_arg, details, action, message, from_dispatch = match_from_dispatch(...) ) stop_incompatible( x, y, x_arg = x_arg, y_arg = y_arg, details = details, ..., message = message, class = c(class, "vctrs_error_incompatible_type") ) } #' @rdname vctrs-conditions #' @export stop_incompatible_cast <- function(x, to, ..., x_arg, to_arg, details = NULL, message = NULL, class = NULL) { stop_incompatible_type( x = x, y = to, ..., x_arg = x_arg, y_arg = to_arg, action = "convert", details = details, message = message, class = class ) } stop_incompatible_shape <- function(x, y, x_size, y_size, axis, x_arg, y_arg) { details <- format_error_bullets(c( x = glue::glue("Incompatible sizes {x_size} and {y_size} along axis {axis}.") )) stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg, details = details) } type_actions <- c( "combine", "convert" ) cnd_type_separator <- function(action) { if (identical(action, "combine")) { "and" } else if (identical(action, "convert")) { "to" } else { abort("Internal error: Unknown `action`.") } } cnd_type_message <- function(x, y, x_arg, y_arg, details, action, message, from_dispatch = FALSE, fallback = NULL) { if (!is_null(message)) { if (!is_null(details)) { abort("Can't supply both `message` and `details`.") } return(message) } x_arg <- arg_as_string(x_arg) y_arg <- arg_as_string(y_arg) if (nzchar(x_arg)) { x_name <- paste0(" `", x_arg, "` ") } else { x_name <- " " } if (nzchar(y_arg)) { y_name <- paste0(" `", y_arg, "` ") } else { y_name <- " " } separator <- cnd_type_separator(action) if (is.data.frame(x) && is.data.frame(y)) { if (vec_is_coercible(new_data_frame(x), new_data_frame(y))) { x_type <- cnd_type_message_df_label(x) y_type <- cnd_type_message_df_label(y) } else { x_type <- vec_ptype_full(x) y_type <- vec_ptype_full(y) } } else { x_type <- cnd_type_message_type_label(x) y_type <- cnd_type_message_type_label(y) } # If we are here directly from dispatch, this means there is no # ptype2 method implemented and the is-same-class fallback has # failed because of diverging attributes. The author of the class # should implement a ptype2 method as documented in the FAQ # indicated below. if (from_dispatch && identical(class(x)[[1]], class(y)[[1]])) { details <- c(incompatible_attrib_bullets(), details) details <- format_error_bullets(details) } if (is_null(fallback)) { end <- "." } else { end <- glue::glue("; falling back to {fallback}.") } if (action == "convert" && nzchar(y_arg)) { header <- glue::glue("Can't convert{x_name}<{x_type}> to match type of{y_name}<{y_type}>{end}") } else { header <- glue::glue("Can't {action}{x_name}<{x_type}> {separator}{y_name}<{y_type}>{end}") } paste_line(header, details) } cnd_type_message_type_label <- function(x) { if (is.data.frame(x)) { class(x)[[1]] } else { vec_ptype_full(x) } } incompatible_attrib_bullets <- function() { c( x = "Some attributes are incompatible.", i = "The author of the class should implement vctrs methods.", i = "See ." ) } cnd_type_message_df_label <- function(x) { x <- class(x)[[1]] if (identical(x, "tbl_df")) { "tibble" } else { x } } #' @rdname vctrs-conditions #' @export stop_incompatible_op <- function(op, x, y, details = NULL, ..., message = NULL, class = NULL) { message <- message %||% glue_lines( "<{vec_ptype_full(x)}> {op} <{vec_ptype_full(y)}> is not permitted", details ) stop_incompatible( x, y, op = op, details = details, ..., message = message, class = c(class, "vctrs_error_incompatible_op") ) } #' @rdname vctrs-conditions #' @export stop_incompatible_size <- function(x, y, x_size, y_size, ..., x_arg, y_arg, details = NULL, message = NULL, class = NULL) { stop_incompatible( x, y, x_size = x_size, y_size = y_size, ..., x_arg = x_arg, y_arg = y_arg, details = details, message = message, class = c(class, "vctrs_error_incompatible_size") ) } #' @export cnd_header.vctrs_error_incompatible_size <- function(cnd, ...) { if (is_string(cnd$message) && nzchar(cnd$message)) { return(cnd$message) } x_size <- vec_cast(cnd$x_size, int()) y_size <- vec_cast(cnd$y_size, int()) stopifnot( length(x_size) == 1, length(y_size) == 1 ) x_arg <- arg_as_string(cnd$x_arg) y_arg <- arg_as_string(cnd$y_arg) if (nzchar(x_arg)) { x_tag <- glue::glue("`{x_arg}` (size {x_size})") } else { x_tag <- glue::glue("input of size {x_size}") } if (nzchar(y_arg)) { y_tag <- glue::glue("to match `{y_arg}` (size {y_size})") } else { y_tag <- glue::glue("to size {y_size}") } glue::glue("Can't recycle {x_tag} {y_tag}.") } #' @export cnd_body.vctrs_error_incompatible_size <- function(cnd, ...) { cnd$details } #' Lossy cast error #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' By default, lossy casts are an error. Use `allow_lossy_cast()` to #' silence these errors and continue with the partial results. In this #' case the lost values are typically set to `NA` or to a lower value #' resolution, depending on the type of cast. #' #' Lossy cast errors are thrown by `maybe_lossy_cast()`. Unlike #' functions prefixed with `stop_`, `maybe_lossy_cast()` usually #' returns a result. If a lossy cast is detected, it throws an error, #' unless it's been wrapped in `allow_lossy_cast()`. In that case, it #' returns the result silently. #' #' @inheritParams stop_incompatible_cast #' @inheritParams vec_cast #' @param result The result of a potentially lossy cast. #' @param to Type to cast to. #' @param lossy A logical vector indicating which elements of `result` #' were lossy. #' #' Can also be a single `TRUE`, but note that `locations` picks up #' locations from this vector by default. In this case, supply your #' own location vector, possibly empty. #' @param loss_type The kind of lossy cast to be mentioned in error #' messages. Can be loss of precision (for instance from double to #' integer) or loss of generality (from character to factor). #' @param locations An optional integer vector giving the #' locations where `x` lost information. #' @param .deprecation If `TRUE`, the error is downgraded to a #' deprecation warning. This is useful for transitioning your class #' to a stricter conversion scheme. The warning advises your users #' to wrap their code with `allow_lossy_cast()`. #' @keywords internal #' @export maybe_lossy_cast <- function(result, x, to, lossy = NULL, locations = NULL, ..., loss_type = c("precision", "generality"), x_arg, to_arg, details = NULL, message = NULL, class = NULL, .deprecation = FALSE) { if (!any(lossy)) { return(result) } if (.deprecation) { maybe_warn_deprecated_lossy_cast(x, to, loss_type, x_arg, to_arg) return(result) } locations <- locations %||% which(lossy) withRestarts( vctrs_restart_error_cast_lossy = function() result, stop_lossy_cast( x = x, to = to, result = result, locations = locations, ..., loss_type = loss_type, x_arg = x_arg, to_arg = to_arg, details = details, message = message, class = class ) ) } stop_lossy_cast <- function(x, to, result, locations = NULL, ..., loss_type, x_arg, to_arg, details = NULL, message = NULL, class = NULL) { stop_vctrs( message, x = x, y = to, to = to, result = result, locations = locations, ..., loss_type = loss_type, x_arg = x_arg, to_arg = to_arg, details = details, class = c(class, "vctrs_error_cast_lossy") ) } #' @export conditionMessage.vctrs_error_cast_lossy <- function(c) { # FIXME: Remove `message` argument if (is_string(c$message) && nzchar(c$message)) { return(c$message) } # FIXME: Add `cnd_details()`? glue_lines( cnd_message(c), c$details ) } #' @export cnd_header.vctrs_error_cast_lossy <- function(cnd, ...) { x_label <- format_arg_label(vec_ptype_full(cnd$x), cnd$x_arg) to_label <- format_arg_label(vec_ptype_full(cnd$to), cnd$to_arg) loss_type <- loss_type(cnd$loss_type) glue::glue("Can't convert from {x_label} to {to_label} due to loss of {loss_type}.") } #' @export cnd_body.vctrs_error_cast_lossy <- function(cnd, ...) { if (length(cnd$locations)) { format_error_bullets(inline_list("Locations: ", cnd$locations)) } else { character() } } loss_type <- function(x) { stopifnot( is_character(x), all(x %in% c("precision", "generality")) ) x[[1]] } # Used in maybe_warn_deprecated_lossy_cast() new_error_cast_lossy <- function(x, to, loss_type, x_arg = "", to_arg = "") { error_cnd( "vctrs_error_cast_lossy", x = x, to = to, loss_type = loss_type, x_arg = x_arg, to_arg = to_arg ) } #' @rdname vctrs-conditions #' @param x_ptype,to_ptype Suppress only the casting errors where `x` #' or `to` match these [prototypes][vec_ptype]. #' @export allow_lossy_cast <- function(expr, x_ptype = NULL, to_ptype = NULL) { withCallingHandlers( vctrs_error_cast_lossy = function(err) { if (!is_null(x_ptype) && !vec_is(err$x, x_ptype)) { return() } if (!is_null(to_ptype) && !vec_is(err$to, to_ptype)) { return() } invokeRestart("vctrs_restart_error_cast_lossy") }, expr ) } maybe_warn_deprecated_lossy_cast <- function(x, to, loss_type, x_arg, to_arg) { # Returns `TRUE` if `allow_lossy_cast()` is on the stack and accepts # to handle the condition handled <- withRestarts( vctrs_restart_error_cast_lossy = function() TRUE, { # Signal fully formed condition but strip the error classes in # case someone is catching: This is not an abortive condition. cnd <- new_error_cast_lossy( x, to, loss_type = loss_type, x_arg = x_arg, to_arg = to_arg ) class(cnd) <- setdiff(class(cnd), c("error", "rlang_error")) signalCondition(cnd) FALSE } ) if (handled) { return(invisible()) } from <- format_arg_label(vec_ptype_abbr(x), x_arg) to <- format_arg_label(vec_ptype_abbr(to), to_arg) warn_deprecated(paste_line( glue::glue("We detected a lossy transformation from `{ from }` to `{ to }`."), "The result will contain lower-resolution values or missing values.", "To suppress this warning, wrap your code with `allow_lossy_cast()`:", "", " # Allow all lossy transformations:", " vctrs::allow_lossy_cast(mycode())", "", " # Allow only a specific transformation:", " vctrs::allow_lossy_cast(mycode(), x_ptype = from, to_ptype = to)", "", "Consult `?vctrs::allow_lossy_cast` for more information." )) invisible() } stop_unsupported <- function(x, method) { msg <- glue::glue("`{method}.{class(x)[[1]]}()` not supported.") stop_vctrs( "vctrs_error_unsupported", message = msg, x = x, method = method ) } stop_unimplemented <- function(x, method) { msg <- glue::glue("`{method}.{class(x)[[1]]}()` not implemented.") stop_vctrs( "vctrs_error_unimplemented", message = msg, x = x, method = method ) } stop_scalar_type <- function(x, arg = NULL) { if (is_null(arg) || !nzchar(arg)) { arg <- "Input" } else { arg <- glue::backtick(arg) } msg <- glue::glue("{arg} must be a vector, not {friendly_type_of(x)}.") stop_vctrs(msg, "vctrs_error_scalar_type", actual = x) } stop_corrupt_factor_levels <- function(x, arg = "x") { msg <- glue::glue("`{arg}` is a corrupt factor with non-character levels") abort(msg) } stop_corrupt_ordered_levels <- function(x, arg = "x") { msg <- glue::glue("`{arg}` is a corrupt ordered factor with non-character levels") abort(msg) } stop_recycle_incompatible_size <- function(x_size, size, x_arg = "x") { stop_vctrs( x_size = x_size, y_size = size, x_arg = x_arg, # FIXME: tibble is the only package that uses `vctrs_error_recycle_incompatible_size` class = c("vctrs_error_incompatible_size", "vctrs_error_recycle_incompatible_size") ) } # Names ------------------------------------------------------------------- stop_names <- function(class = NULL, ...) { stop_vctrs( class = c(class, "vctrs_error_names"), ... ) } stop_names_cannot_be_empty <- function(names) { stop_names( class = "vctrs_error_names_cannot_be_empty", names = names ) } #' @export cnd_header.vctrs_error_names_cannot_be_empty <- function(cnd, ...) { "Names can't be empty." } #' @export cnd_body.vctrs_error_names_cannot_be_empty <- function(cnd, ...) { locations <- detect_empty_names(cnd$names) if (length(locations) == 1) { bullet <- glue::glue("Empty name found at location {locations}.") } else { bullet <- glue::glue("Empty names found at locations {ensure_full_stop(enumerate(locations))}") } bullet <- c(x = bullet) format_error_bullets(bullet) } stop_names_cannot_be_dot_dot <- function(names) { stop_names( class = "vctrs_error_names_cannot_be_dot_dot", names = names ) } #' @export cnd_header.vctrs_error_names_cannot_be_dot_dot <- function(cnd, ...) { "Names can't be of the form `...` or `..j`." } #' @export cnd_body.vctrs_error_names_cannot_be_dot_dot <- function(cnd, ...) { names <- cnd$names locations <- detect_dot_dot(names) names <- names[locations] split <- vec_group_loc(names) info <- map2_chr(split$key, split$loc, make_names_loc_bullet) header <- "These names are invalid:" header <- c(x = header) header <- format_error_bullets(header) message <- bullets(info, header = header) message <- indent(message, 2) message } stop_names_must_be_unique <- function(names, arg = "") { stop_names( class = "vctrs_error_names_must_be_unique", arg = arg, names = names ) } #' @export cnd_header.vctrs_error_names_must_be_unique <- function(cnd, ...) { "Names must be unique." } #' @export cnd_body.vctrs_error_names_must_be_unique <- function(cnd, ...) { names <- cnd$names dups <- vec_group_loc(names) dup_indicator <- map_lgl(dups$loc, function(x) length(x) != 1L) dups <- vec_slice(dups, dup_indicator) header <- "These names are duplicated:" header <- c(x = header) header <- format_error_bullets(header) info <- map2_chr(dups$key, dups$loc, make_names_loc_bullet) message <- bullets(info, header = header) message <- indent(message, 2) arg <- arg_as_string(cnd$arg) if (arg != "") { hint <- c(i = glue::glue("Use argument `{cnd$arg}` to specify repair strategy.")) message <- c(message, format_error_bullets(hint)) } message } make_names_loc_bullet <- function(x, loc) { if (length(loc) == 1) { glue::glue("{glue::double_quote(x)} at location {loc}.") } else { glue::glue("{glue::double_quote(x)} at locations {ensure_full_stop(enumerate(loc))}") } } enumerate <- function(x, max = 5L, allow_empty = FALSE) { n <- length(x) if (n == 0L && !allow_empty) { abort("Internal error: Enumeration can't be empty.") } if (n > max) { paste0(glue::glue_collapse(x[seq2(1, max)], ", "), ", etc.") } else { if (n == 2) { last <- " and " } else { last <- ", and " } glue::glue_collapse(x, ", ", last = last) } } ensure_full_stop <- function(x) { n <- nchar(x) if (substr(x, n, n) == ".") { x } else { paste0(x, ".") } } stop_native_implementation <- function(fn) { abort(paste_line( glue::glue("`{fn}()` is implemented at C level."), "This R function is purely indicative and should never be called." )) } # Helpers ----------------------------------------------------------------- glue_lines <- function(..., env = parent.frame()) { out <- map_chr(chr(...), glue::glue, .envir = env) paste(out, collapse = "\n") } format_arg_label <- function(type, arg = "") { type <- paste0("<", type, ">") if (nzchar(arg)) { paste0("`", arg, "` ", type) } else { type } } arg_as_string <- function(arg) { if (is_null(arg)) { "" } else if (is_string(arg)) { arg } else { as_label(arg) } } append_arg <- function(x, arg) { if (is_null(arg)) { return(x) } arg <- arg_as_string(arg) if (nzchar(arg)) { glue::glue("{x} `{arg}`") } else { x } } vctrs/R/proxy.R0000644000176200001440000001747513766452345013155 0ustar liggesusers#' Proxy and restore #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' `vec_proxy()` returns the data structure containing the values of a #' vector. This data structure is usually the vector itself. In this #' case the proxy is the [identity function][base::identity], which is #' the default `vec_proxy()` method. #' #' Only experts should implement special `vec_proxy()` methods, for #' these cases: #' #' - A vector has vectorised attributes, i.e. metadata for #' each element of the vector. These _record types_ are implemented #' in vctrs by returning a data frame in the proxy method. If you're #' starting your class from scratch, consider deriving from the #' [`rcrd`][new_rcrd] class. It implements the appropriate data #' frame proxy and is generally the preferred way to create a record #' class. #' #' - When you're implementing a vector on top of a non-vector type, #' like an environment or an S4 object. This is currently only #' partially supported. #' #' - S3 lists are considered scalars by default. This is the safe #' choice for list objects such as returned by `stats::lm()`. To #' declare that your S3 list class is a vector, you normally add #' `"list"` to the right of your class vector. Explicit inheritance #' from list is generally the preferred way to declare an S3 list in #' R, for instance it makes it possible to dispatch on #' `generic.list` S3 methods. #' #' If you can't modify your class vector, you can implement an #' identity proxy (i.e. a proxy method that just returns its input) #' to let vctrs know this is a vector list and not a scalar. #' #' `vec_restore()` is the inverse operation of `vec_proxy()`. It #' should only be called on vector proxies. #' #' - It undoes the transformations of `vec_proxy()`. #' #' - It restores attributes and classes. These may be lost when the #' memory values are manipulated. For example slicing a subset of a #' vector's proxy causes a new proxy to be allocated. #' #' By default vctrs restores all attributes and classes #' automatically. You only need to implement a `vec_restore()` method #' if your class has attributes that depend on the data. #' #' @param x A vector. #' @inheritParams ellipsis::dots_empty #' #' @section Proxying: #' #' You should only implement `vec_proxy()` when your type is designed #' around a non-vector class. I.e. anything that is not either: #' #' * An atomic vector #' * A bare list #' * A data frame #' #' In this case, implement `vec_proxy()` to return such a vector #' class. The vctrs operations such as [vec_slice()] are applied on #' the proxy and `vec_restore()` is called to restore the original #' representation of your type. #' #' The most common case where you need to implement `vec_proxy()` is #' for S3 lists. In vctrs, S3 lists are treated as scalars by #' default. This way we don't treat objects like model fits as #' vectors. To prevent vctrs from treating your S3 list as a scalar, #' unclass it in the `vec_proxy()` method. For instance, here is the #' definition for `list_of`: #' #' ``` #' vec_proxy.vctrs_list_of <- function(x) { #' unclass(x) #' } #' ``` #' #' Another case where you need to implement a proxy is [record #' types][new_rcrd]. Record types should return a data frame, as in #' the `POSIXlt` method: #' #' ``` #' vec_proxy.POSIXlt <- function(x) { #' new_data_frame(unclass(x)) #' } #' ``` #' #' Note that you don't need to implement `vec_proxy()` when your class #' inherits from `vctrs_vctr` or `vctrs_rcrd`. #' #' #' @section Restoring: #' #' A restore is a specialised type of cast, primarily used in #' conjunction with `NextMethod()` or a C-level function that works on #' the underlying data structure. A `vec_restore()` method can make #' the following assumptions about `x`: #' #' * It has the correct type. #' * It has the correct names. #' * It has the correct `dim` and `dimnames` attributes. #' * It is unclassed. This way you can call vctrs generics with `x` #' without triggering an infinite loop of restoration. #' #' The length may be different (for example after [vec_slice()] has #' been called), and all other attributes may have been lost. The #' method should restore all attributes so that after restoration, #' `vec_restore(vec_data(x), x)` yields `x`. #' #' To understand the difference between `vec_cast()` and `vec_restore()` #' think about factors: it doesn't make sense to cast an integer to a factor, #' but if `NextMethod()` or another low-level function has stripped attributes, #' you still need to be able to restore them. #' #' The default method copies across all attributes so you only need to #' provide your own method if your attributes require special care #' (i.e. they are dependent on the data in some way). When implementing #' your own method, bear in mind that many R users add attributes to track #' additional metadata that is important to them, so you should preserve any #' attributes that don't require special handling for your class. #' #' @section Dependencies: #' - `x` must be a vector in the vctrs sense (see [vec_is()]) #' - By default the underlying data is returned as is (identity proxy) #' #' All vector classes have a proxy, even those who don't implement any #' vctrs methods. The exception is S3 lists that don't inherit from #' `"list"` explicitly. These might have to implement an identity #' proxy for compatibility with vctrs (see discussion above). #' #' @keywords internal #' @export vec_proxy <- function(x, ...) { if (!missing(...)) { ellipsis::check_dots_empty() } return(.Call(vctrs_proxy, x)) UseMethod("vec_proxy") } #' @export vec_proxy.default <- function(x, ...) { x } #' @rdname vec_proxy #' @param to The original vector to restore to. #' @param n `r lifecycle::badge("experimental")` #' The total size to restore to. This is currently passed by #' `vec_slice()` to solve edge cases arising in data frame #' restoration. In most cases you don't need this information and #' can safely ignore that argument. This parameter should be #' considered internal and experimental, it might change in the #' future. #' @export vec_restore <- function(x, to, ..., n = NULL) { if (!missing(...)) { ellipsis::check_dots_empty() } return(.Call(vctrs_restore, x, to, n)) UseMethod("vec_restore", to) } vec_restore_dispatch <- function(x, to, ..., n = NULL) { UseMethod("vec_restore", to) } #' @export vec_restore.default <- function(x, to, ..., n = NULL) { .Call(vctrs_restore_default, x, to) } vec_restore_default <- function(x, to, ...) { .Call(vctrs_restore_default, x, to) } #' Extract underlying data #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' Extract the data underlying an S3 vector object, i.e. the underlying #' (named) atomic vector, data frame, or list. #' #' @param x A vector or object implementing `vec_proxy()`. #' @return The data underlying `x`, free from any attributes except the names. #' #' @section Difference with `vec_proxy()`: #' #' * `vec_data()` returns unstructured data. The only attributes #' preserved are names, dims, and dimnames. #' #' Currently, due to the underlying memory architecture of R, this #' creates a full copy of the data for atomic vectors. #' #' * `vec_proxy()` may return structured data. This generic is the #' main customisation point for accessing memory values in vctrs, #' along with [vec_restore()]. #' #' Methods must return a vector type. Records and data frames will #' be processed rowwise. #' #' @keywords internal #' @export vec_data <- function(x) { vec_assert(x) x <- vec_proxy(x) if (is.data.frame(x)) { return(new_data_frame(x, row.names = .row_names_info(x, 0L))) } if (has_dim(x)) { x <- vec_set_attributes(x, list(dim = dim(x), dimnames = dimnames(x))) } else { x <- vec_set_attributes(x, list(names = names(x))) } # Reset S4 bit in vector-like S4 objects unset_s4(x) } unset_s4 <- function(x) { .Call(vctrs_unset_s4, x) } vctrs/R/group.R0000644000176200001440000000575013766452345013121 0ustar liggesusers#' Identify groups #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' * `vec_group_id()` returns an identifier for the group that each element of #' `x` falls in, constructed in the order that they appear. The number of #' groups is also returned as an attribute, `n`. #' #' * `vec_group_loc()` returns a data frame containing a `key` column with the #' unique groups, and a `loc` column with the locations of each group in `x`. #' #' * `vec_group_rle()` locates groups in `x` and returns them run length #' encoded in the order that they appear. The return value is a rcrd object #' with fields for the `group` identifiers and the run `length` of the #' corresponding group. The number of groups is also returned as an #' attribute, `n`. #' #' @param x A vector #' @return #' * `vec_group_id()`: An integer vector with the same size as `x`. #' * `vec_group_loc()`: A two column data frame with size equal to #' `vec_size(vec_unique(x))`. #' * A `key` column of type `vec_ptype(x)` #' * A `loc` column of type list, with elements of type integer. #' * `vec_group_rle()`: A `vctrs_group_rle` rcrd object with two integer #' vector fields: `group` and `length`. #' #' Note that when using `vec_group_loc()` for complex types, the default #' `data.frame` print method will be suboptimal, and you will want to coerce #' into a tibble to better understand the output. #' @name vec_group #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @keywords internal #' @examples #' purrr <- c("p", "u", "r", "r", "r") #' vec_group_id(purrr) #' vec_group_rle(purrr) #' #' groups <- mtcars[c("vs", "am")] #' vec_group_id(groups) #' #' group_rle <- vec_group_rle(groups) #' group_rle #' #' # Access fields with `field()` #' field(group_rle, "group") #' field(group_rle, "length") #' #' # `vec_group_id()` is equivalent to #' vec_match(groups, vec_unique(groups)) #' #' vec_group_loc(mtcars$vs) #' vec_group_loc(mtcars[c("vs", "am")]) #' #' if (require("tibble")) { #' as_tibble(vec_group_loc(mtcars[c("vs", "am")])) #' } NULL #' @rdname vec_group #' @export vec_group_id <- function(x) { .Call(vctrs_group_id, x) } #' @rdname vec_group #' @export vec_group_loc <- function(x) { .Call(vctrs_group_loc, x) } #' @rdname vec_group #' @export vec_group_rle <- function(x) { .Call(vctrs_group_rle, x) } #' @export format.vctrs_group_rle <- function(x, ...) { group <- field(x, "group") length <- field(x, "length") paste0(group, "x", length) } #' @export obj_print_header.vctrs_group_rle <- function(x, ...) { size <- vec_size(x) n <- attr(x, "n") cat_line("<", vec_ptype_full(x), "[", size, "][n = ", n, "]>") invisible(x) } # For testing new_group_rle <- function(group, length, n) { vec_assert(group, integer()) vec_assert(length, integer()) vec_assert(n, integer(), 1L) if (vec_size(group) != vec_size(length)) { abort("`group` and `length` must have the same size.") } new_rcrd(list(group = group, length = length), n = n, class = "vctrs_group_rle") } vctrs/R/type-vctr.R0000644000176200001440000004006014042540502013670 0ustar liggesusers#' vctr (vector) S3 class #' #' @description #' This abstract class provides a set of useful default methods that makes it #' considerably easier to get started with a new S3 vector class. See #' `vignette("s3-vector")` to learn how to use it to create your own S3 #' vector classes. #' #' @details #' List vctrs are special cases. When created through `new_vctr()`, the #' resulting list vctr should always be recognized as a list by #' `vec_is_list()`. Because of this, if `inherit_base_type` is `FALSE` #' an error is thrown. #' #' @section Base methods: #' The vctr class provides methods for many base generics using a smaller #' set of generics defined by this package. Generally, you should think #' carefully before overriding any of the methods that vctrs implements for #' you as they've been carefully planned to be internally consistent. #' #' * `[[` and `[` use `NextMethod()` dispatch to the underlying base function, #' then restore attributes with `vec_restore()`. #' `rep()` and `length<-` work similarly. #' #' * `[[<-` and `[<-` cast `value` to same type as `x`, then call #' `NextMethod()`. #' #' * `as.logical()`, `as.integer()`, `as.numeric()`, `as.character()`, #' `as.Date()` and `as.POSIXct()` methods call `vec_cast()`. #' The `as.list()` method calls `[[` repeatedly, and the `as.data.frame()` #' method uses a standard technique to wrap a vector in a data frame. #' #' * `as.factor()`, `as.ordered()` and `as.difftime()` are not generic functions #' in base R, but have been reimplemented as generics in the `generics` #' package. `vctrs` extends these and calls `vec_cast()`. To inherit this #' behaviour in a package, import and re-export the generic of interest #' from `generics`. #' #' * `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()` use #' [vec_proxy()]. #' #' * `<`, `<=`, `>=`, `>`, `min()`, `max()`, `range()`, `median()`, #' `quantile()`, and `xtfrm()` methods use [vec_proxy_compare()]. #' #' * `+`, `-`, `/`, `*`, `^`, `%%`, `%/%`, `!`, `&`, and `|` operators #' use [vec_arith()]. #' #' * Mathematical operations including the Summary group generics (`prod()`, #' `sum()`, `any()`, `all()`), the Math group generics (`abs()`, `sign()`, #' etc), `mean()`, `is.nan()`, `is.finite()`, and `is.infinite()` #' use [vec_math()]. #' #' * `dims()`, `dims<-`, `dimnames()`, `dimnames<-`, `levels()`, and #' `levels<-` methods throw errors. #' #' @param .data Foundation of class. Must be a vector #' @param ... Name-value pairs defining attributes #' @param class Name of subclass. #' @param inherit_base_type `r lifecycle::badge("experimental")` #' A single logical, or `NULL`. Does this class extend the base type of #' `.data`? i.e. does the resulting object extend the behaviour of the #' underlying type? Defaults to `FALSE` for all types except lists, which #' are required to inherit from the base type. #' @export #' @keywords internal #' @aliases vctr new_vctr <- function(.data, ..., class = character(), inherit_base_type = NULL) { if (!is_vector(.data)) { abort("`.data` must be a vector type.") } nms <- validate_names(.data) if (is_list(.data)) { if (is.data.frame(.data)) { abort("`.data` can't be a data frame.") } if (is.null(inherit_base_type)) { inherit_base_type <- TRUE } else if (is_false(inherit_base_type)) { abort("List `.data` must inherit from the base type.") } } # Default to `FALSE` in all cases except lists if (is.null(inherit_base_type)) { inherit_base_type <- FALSE } class <- c(class, "vctrs_vctr", if (inherit_base_type) typeof(.data)) attrib <- list(names = nms, ..., class = class) vec_set_attributes(.data, attrib) } validate_names <- function(.data) { nms <- names(.data) if (!names_all_or_nothing(nms)) { stop("If any elements of `.data` are named, all must be named", call. = FALSE) } nms } names_all_or_nothing <- function(names) { if (is.null(names)) { TRUE } else { all(names != "" & !is.na(names)) } } #' @export vec_proxy.vctrs_vctr <- function(x, ...) { if (is_list(x)) { unclass(x) } else { x } } #' @export vec_restore.vctrs_vctr <- function(x, to, ..., i = NULL) { if (typeof(x) != typeof(to)) { stop_incompatible_cast(x, to, x_arg = "", to_arg = "") } NextMethod() } #' @method vec_cast vctrs_vctr #' @export vec_cast.vctrs_vctr <- function(x, to, ...) { UseMethod("vec_cast.vctrs_vctr") } vctr_cast <- function(x, to, ..., x_arg = "", to_arg = "") { # These are not strictly necessary, but make bootstrapping a new class # a bit simpler if (is.object(x)) { if (is_same_type(x, to)) { x } else { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } } else { # FIXME: `vec_restore()` should only be called on proxies vec_restore(x, to) } } #' @export c.vctrs_vctr <- function(..., recursive = FALSE, use.names = TRUE) { if (!is_false(recursive)) { abort("`recursive` must be `FALSE` when concatenating vctrs classes.") } if (!is_true(use.names)) { abort("`use.names` must be `TRUE` when concatenating vctrs classes.") } vec_c(...) } # Printing ---------------------------------------------------------------- #' @export print.vctrs_vctr <- function(x, ...) { obj_print(x, ...) invisible(x) } #' @export str.vctrs_vctr <- function(object, ...) { obj_str(object, ...) } #' @export format.vctrs_vctr <- function(x, ...) { format(vec_data(x), ...) } # Subsetting -------------------------------------------------------------- #' @export `[.vctrs_vctr` <- function(x, i, ...) { vec_index(x, i, ...) } #' @export `[[.vctrs_vctr` <- function(x, i, ...) { if (is.list(x)) { NextMethod() } else { vec_restore(NextMethod(), x) } } #' @export `$.vctrs_vctr` <- function(x, i) { if (is.list(x)) { NextMethod() } else { vec_restore(NextMethod(), x) } } #' @export rep.vctrs_vctr <- function(x, ...) { vec_restore(NextMethod(), x) } #' @export `length<-.vctrs_vctr` <- function(x, value) { vec_restore(NextMethod(), x) } #' @export diff.vctrs_vctr <- function(x, lag = 1L, differences = 1L, ...) { stopifnot(length(lag) == 1L, lag >= 1L) stopifnot(length(differences) == 1L, differences >= 1L) n <- vec_size(x) if (lag * differences >= n) return(vec_slice(x, 0L)) out <- x for (i in seq_len(differences)) { n <- vec_size(out) lhs <- (1L + lag):n rhs <- 1L:(n - lag) out <- vec_slice(out, lhs) - vec_slice(out, rhs) } out } # Modification ------------------------------------------------------------- #' @export `[[<-.vctrs_vctr` <- function(x, ..., value) { if (!is.list(x)) { value <- vec_cast(value, x) } NextMethod() } #' @export `$<-.vctrs_vctr` <- function(x, i, value) { if (is.list(x)) { NextMethod() } else { # Default behaviour is to cast LHS to a list abort("$ operator is invalid for atomic vectors.") } } #' @export `[<-.vctrs_vctr` <- function(x, i, value) { value <- vec_cast(value, x) NextMethod() } #' @export `names<-.vctrs_vctr` <- function(x, value) { if (length(value) != 0 && length(value) != length(x)) { abort("`names()` must be the same length as x.") } if (!names_all_or_nothing(value)) { abort("If any elements are named, all elements must be named.") } NextMethod() } # Coercion ---------------------------------------------------------------- #' @export as.logical.vctrs_vctr <- function(x, ...) { vec_cast(x, logical()) } #' @export as.integer.vctrs_vctr <- function(x, ...) { vec_cast(x, integer()) } #' @export as.double.vctrs_vctr <- function(x, ...) { vec_cast(x, double()) } #' @export as.character.vctrs_vctr <- function(x, ...) { vec_cast(x, character()) } #' @export as.list.vctrs_vctr <- function(x, ...) { out <- vec_chop(x) if (vec_is_list(x)) { out <- lapply(out, `[[`, 1) } out } #' @export as.Date.vctrs_vctr <- function(x, ...) { vec_cast(x, new_date()) } #' @export as.POSIXct.vctrs_vctr <- function(x, tz = "", ...) { vec_cast(x, new_datetime(tzone = tz)) } #' @export as.POSIXlt.vctrs_vctr <- function(x, tz = "", ...) { to <- as.POSIXlt(new_datetime(), tz = tz) vec_cast(x, to) } # Work around inconsistencies in as.data.frame() as.data.frame2 <- function(x) { # Unclass to avoid dispatching on `as.data.frame()` methods that break size # invariants, like `as.data.frame.table()` (#913). This also prevents infinite # recursion with shaped vctrs in `as.data.frame.vctrs_vctr()`. x <- unclass(x) out <- as.data.frame(x) if (vec_dim_n(x) == 1) { # 1D arrays are not stripped from their dimensions out[[1]] <- as.vector(out[[1]]) # 1D arrays are auto-labelled with substitute() names(out) <- "V1" } out } #' @export as.data.frame.vctrs_vctr <- function(x, row.names = NULL, optional = FALSE, ..., nm = paste(deparse(substitute(x), width.cutoff = 500L), collapse = " ")) { force(nm) if (has_dim(x)) { return(as.data.frame2(x)) } cols <- list(x) if (!optional) { names(cols) <- nm } new_data_frame(cols, n = vec_size(x)) } # Dynamically registered in .onLoad() as.factor.vctrs_vctr <- function(x, levels = character(), ...) { vec_cast(x, new_factor(levels = levels)) } # Dynamically registered in .onLoad() as.ordered.vctrs_vctr <- function(x, levels = character(), ...) { vec_cast(x, new_ordered(levels = levels)) } # Dynamically registered in .onLoad() as.difftime.vctrs_vctr <- function(x, units = "secs", ...) { vec_cast(x, new_duration(units = units)) } # Equality ---------------------------------------------------------------- #' @export `==.vctrs_vctr` <- function(e1, e2) { vec_equal(e1, e2) } #' @export `!=.vctrs_vctr` <- function(e1, e2) { !vec_equal(e1, e2) } #' @export is.na.vctrs_vctr <- function(x) { vec_equal_na(x) } #' @export anyNA.vctrs_vctr <- if (getRversion() >= "3.2") { function(x, recursive = FALSE) { any(is.na(x)) } } else { function(x) { any(is.na(x)) } } #' @export unique.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_unique(x) } #' @export duplicated.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_duplicate_id(x) != seq_along(x) } #' @export anyDuplicated.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_duplicate_any(x) } # Comparison ---------------------------------------------------------------- #' @export `<=.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) <= 0 } #' @export `<.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) < 0 } #' @export `>=.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) >= 0 } #' @export `>.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) > 0 } #' @export xtfrm.vctrs_vctr <- function(x) { proxy <- vec_proxy_order(x) if (is.object(proxy) && typeof(proxy) %in% c("integer", "double", "character")) { proxy <- unstructure(proxy) } # order(order(x)) ~= rank(x) if (typeof(proxy) %in% c("integer", "double")) { proxy } else { vec_order(vec_order(proxy)) } } #' @importFrom stats median #' @export median.vctrs_vctr <- function(x, ..., na.rm = FALSE) { # nocov start stop_unimplemented(x, "median") # nocov end } #' @importFrom stats quantile #' @export quantile.vctrs_vctr <- function(x, ..., type = 1, na.rm = FALSE) { # nocov start stop_unimplemented(x, "quantile") # nocov end } #' @export min.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast(Inf, x)) } # TODO: implement to do vec_arg_min() rank <- xtfrm(x) if (isTRUE(na.rm)) { idx <- which.min(rank) } else { idx <- which(vec_equal(rank, min(rank), na_equal = TRUE)) } x[[idx[[1]]]] } #' @export max.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast(-Inf, x)) } # TODO: implement to do vec_arg_max() rank <- xtfrm(x) if (isTRUE(na.rm)) { idx <- which.max(rank) } else { idx <- which(vec_equal(rank, max(rank), na_equal = TRUE)) } x[[idx[[1]]]] } #' @export range.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast(c(Inf, -Inf), x)) } # Inline `min()` / `max()` to only call `xtfrm()` once rank <- xtfrm(x) if (isTRUE(na.rm)) { idx_min <- which.min(rank) idx_max <- which.max(rank) } else { idx_min <- which(vec_equal(rank, min(rank), na_equal = TRUE)) idx_max <- which(vec_equal(rank, max(rank), na_equal = TRUE)) } c(x[[idx_min[[1]]]], x[[idx_max[[1]]]]) } # Numeric ----------------------------------------------------------------- #' @export Math.vctrs_vctr <- function(x, ...) { vec_math(.Generic, x, ...) } #' @export Summary.vctrs_vctr <- function(..., na.rm = FALSE) { vec_math(.Generic, vec_c(...), na.rm = na.rm) } #' @export mean.vctrs_vctr <- function(x, ..., na.rm = FALSE) { vec_math("mean", x, na.rm = na.rm) } #' @export is.finite.vctrs_vctr <- function(x) { vec_math("is.finite", x) } #' @export is.infinite.vctrs_vctr <- function(x) { vec_math("is.infinite", x) } #' @export is.nan.vctrs_vctr <- function(x) { vec_math("is.nan", x) } # Arithmetic -------------------------------------------------------------- #' @export `+.vctrs_vctr` <- function(e1, e2) { if (missing(e2)) { vec_arith("+", e1, MISSING()) } else { vec_arith("+", e1, e2) } } #' @export `-.vctrs_vctr` <- function(e1, e2) { if (missing(e2)) { vec_arith("-", e1, MISSING()) } else { vec_arith("-", e1, e2) } } #' @export `*.vctrs_vctr` <- function(e1, e2) { vec_arith("*", e1, e2) } #' @export `/.vctrs_vctr` <- function(e1, e2) { vec_arith("/", e1, e2) } #' @export `^.vctrs_vctr` <- function(e1, e2) { vec_arith("^", e1, e2) } #' @export `%%.vctrs_vctr` <- function(e1, e2) { vec_arith("%%", e1, e2) } #' @export `%/%.vctrs_vctr` <- function(e1, e2) { vec_arith("%/%", e1, e2) } #' @export `!.vctrs_vctr` <- function(x) { vec_arith("!", x, MISSING()) } #' @export `&.vctrs_vctr` <- function(e1, e2) { vec_arith("&", e1, e2) } #' @export `|.vctrs_vctr` <- function(e1, e2) { vec_arith("|", e1, e2) } # Unimplemented ------------------------------------------------------------ #' @export summary.vctrs_vctr <- function(object, ...) { # nocov start stop_unimplemented(object, "summary") # nocov end } # Unsupported -------------------------------------------------------------- #' @export `dim<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "dim<-") } #' @export `dimnames<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "dimnames<-") } #' @export levels.vctrs_vctr <- function(x) { stop_unsupported(x, "levels") } #' @export `levels<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "levels<-") } #' @export `t.vctrs_vctr` <- function(x) { stop_unsupported(x, "t") } #' @export `is.na<-.vctrs_vctr` <- function(x, value) { vec_assign(x, value, vec_init(x)) } # Helpers ----------------------------------------------------------------- # This simple class is used for testing as defining methods inside # a test does not work (because the lexical scope is lost) # nocov start new_hidden <- function(x = double()) { stopifnot(is.numeric(x)) new_vctr(vec_cast(x, double()), class = "hidden", inherit_base_type = FALSE) } format.hidden <- function(x, ...) rep("xxx", length(x)) local_hidden <- function(frame = caller_env()) { local_bindings(.env = global_env(), .frame = frame, vec_ptype2.hidden.hidden = function(x, y, ...) new_hidden(), vec_ptype2.hidden.double = function(x, y, ...) new_hidden(), vec_ptype2.double.hidden = function(x, y, ...) new_hidden(), vec_ptype2.hidden.logical = function(x, y, ...) new_hidden(), vec_ptype2.logical.hidden = function(x, y, ...) new_hidden(), vec_cast.hidden.hidden = function(x, to, ...) x, vec_cast.hidden.double = function(x, to, ...) new_hidden(vec_data(x)), vec_cast.double.hidden = function(x, to, ...) vec_data(x), vec_cast.hidden.logical = function(x, to, ...) new_hidden(as.double(x)), vec_cast.logical.hidden = function(x, to, ...) as.logical(vec_data(x)) ) } # nocov end vctrs/R/type-rcrd.R0000644000176200001440000000660013762412012013650 0ustar liggesusers# Constructor and basic methods --------------------------------------------- #' rcrd (record) S3 class #' #' The rcrd class extends [vctr]. A rcrd is composed of 1 or more [field]s, #' which must be vectors of the same length. Is designed specifically for #' classes that can naturally be decomposed into multiple vectors of the same #' length, like [POSIXlt], but where the organisation should be considered #' an implementation detail invisible to the user (unlike a [data.frame]). #' #' @param fields A list or a data frame. Lists must be rectangular #' (same sizes), and contain uniquely named vectors (at least #' one). `fields` is validated with [df_list()] to ensure uniquely #' named vectors. #' @param ... Additional attributes #' @param class Name of subclass. #' @export #' @aliases ses rcrd #' @keywords internal new_rcrd <- function(fields, ..., class = character()) { if (vec_is_list(fields) && length(vec_unique(list_sizes(fields))) > 1L) { abort("All fields must be the same size.") } fields <- df_list(!!!fields) if (!length(fields)) { abort("`fields` must be a list of length 1 or greater.") } structure(fields, ..., class = c(class, "vctrs_rcrd", "vctrs_vctr")) } #' @export vec_proxy.vctrs_rcrd <- function(x, ...) { new_data_frame(unclass(x)) } #' @export vec_restore.vctrs_rcrd <- function(x, to, ...) { x <- NextMethod() attr(x, "row.names") <- NULL x } #' @export length.vctrs_rcrd <- function(x) { vec_size(x) } #' @export names.vctrs_rcrd <- function(x) { NULL } #' @export format.vctrs_rcrd <- function(x, ...) { if (inherits(x, "vctrs_foobar")) { # For unit tests exec("paste", !!!vec_data(x), sep = ":") } else { stop_unimplemented(x, "format") } } #' @export obj_str_data.vctrs_rcrd <- function(x, ...) { obj_str_leaf(x, ...) } #' @method vec_cast vctrs_rcrd #' @export vec_cast.vctrs_rcrd <- function(x, to, ...) UseMethod("vec_cast.vctrs_rcrd") #' @export vec_cast.vctrs_rcrd.vctrs_rcrd <- function(x, to, ...) { out <- vec_cast(vec_data(x), vec_data(to), ...) new_rcrd(out) } # Subsetting -------------------------------------------------------------- #' @export `[.vctrs_rcrd` <- function(x, i, ...) { vec_index(x, i, ...) } #' @export `[[.vctrs_rcrd` <- function(x, i, ...) { out <- vec_slice(vec_data(x), i) vec_restore(out, x) } #' @export `$.vctrs_rcrd` <- function(x, i, ...) { stop_unsupported(x, "subsetting with $") } #' @export rep.vctrs_rcrd <- function(x, ...) { out <- lapply(vec_data(x), base_vec_rep, ...) vec_restore(out, x) } #' @export `length<-.vctrs_rcrd` <- function(x, value) { out <- vec_size_assign(vec_data(x), value) vec_restore(out, x) } # Replacement ------------------------------------------------------------- #' @export `[[<-.vctrs_rcrd` <- function(x, i, value) { force(i) x[i] <- value x } #' @export `$<-.vctrs_rcrd` <- function(x, i, value) { stop_unsupported(x, "subset assignment with $") } #' @export `[<-.vctrs_rcrd` <- function(x, i, value) { i <- maybe_missing(i, TRUE) value <- vec_cast(value, x) out <- vec_assign(vec_data(x), i, vec_data(value)) vec_restore(out, x) } # Equality and ordering --------------------------------------------------- # FIXME #' @export vec_proxy_compare.vctrs_rcrd <- function(x, ...) { new_data_frame(vec_data(x), n = length(x)) } #' @export vec_math.vctrs_rcrd <- function(.fn, .x, ...) { stop_unsupported(.x, "vec_math") } vctrs/R/type2.R0000644000176200001440000002121413762412012013000 0ustar liggesusers#' Find the common type for a pair of vectors #' #' @description #' #' `vec_ptype2()` defines the coercion hierarchy for a set of related #' vector types. Along with [vec_cast()], this generic forms the #' foundation of type coercions in vctrs. #' #' `vec_ptype2()` is relevant when you are implementing vctrs methods #' for your class, but it should not usually be called directly. If #' you need to find the common type of a set of inputs, call #' [vec_ptype_common()] instead. This function supports multiple #' inputs and [finalises][vec_ptype_finalise] the common type. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @inheritParams ellipsis::dots_empty #' @param x,y Vector types. #' @param x_arg,y_arg Argument names for `x` and `y`. These are used #' in error messages to inform the user about the locations of #' incompatible types (see [stop_incompatible_type()]). #' #' @seealso [stop_incompatible_type()] when you determine from the #' attributes that an input can't be cast to the target type. #' #' @section Dependencies: #' - [vec_ptype()] is applied to `x` and `y` #' #' @export vec_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { if (!missing(...)) { check_ptype2_dots_empty(...) } return(.Call(vctrs_ptype2, x, y, x_arg, y_arg)) UseMethod("vec_ptype2") } vec_ptype2_dispatch_s3 <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2") } vec_ptype2_dispatch_native <- function(x, y, ..., x_arg = "", y_arg = "") { fallback_opts <- match_fallback_opts(...) .Call(vctrs_ptype2_dispatch_native, x, y, fallback_opts, x_arg, y_arg) } #' Default cast and ptype2 methods #' #' @description #' #' These functions are automatically called when no [vec_ptype2()] or #' [vec_cast()] method is implemented for a pair of types. #' #' * They apply special handling if one of the inputs is of type #' `AsIs` or `sfc`. #' #' * They attempt a number of fallbacks in cases where it would be too #' inconvenient to be strict: #' #' - If the class and attributes are the same they are considered #' compatible. `vec_default_cast()` returns `x` in this case. #' #' - In case of incompatible data frame classes, they fall back to #' `data.frame`. If an incompatible subclass of tibble is #' involved, they fall back to `tbl_df`. #' #' * Otherwise, an error is thrown with [stop_incompatible_type()] or #' [stop_incompatible_cast()]. #' #' @keywords internal #' @export vec_default_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { if (is_asis(x)) { return(vec_ptype2_asis_left(x, y, x_arg = x_arg, y_arg = y_arg)) } if (is_asis(y)) { return(vec_ptype2_asis_right(x, y, x_arg = x_arg, y_arg = y_arg)) } opts <- match_fallback_opts(...) # If both data frames, first find common type of columns before the # same-type fallback if (df_needs_normalisation(x, y, opts)) { out <- vec_ptype2_df_fallback_normalise(x, y, opts) x <- out$x y <- out$y } if (opts$s3_fallback && can_fall_back(x, y)) { common <- common_class_suffix(x, y) if (length(common)) { return(new_common_class_fallback(x, common)) } } if (is_same_type(x, y)) { return(vec_ptype(x, x_arg = x_arg)) } if (has_df_fallback(opts$df_fallback)) { if (is_df_subclass(x) && is.data.frame(y)) { return(vec_ptype2_df_fallback(x, y, opts)) } if (is_df_subclass(y) && is.data.frame(x)) { return(vec_ptype2_df_fallback(x, y, opts)) } } # The from-dispatch parameter is set only when called from our S3 # dispatch mechanism, when no method is found to dispatch to. It # indicates whether the error message should provide advice about # diverging attributes. stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, `vctrs:::from_dispatch` = match_from_dispatch(...) ) } # We can't check for a proxy or ptype2 method to determine whether a # class is foreign, because we implement these generics for many base # classes and we still need to allow base fallbacks with subclasses. can_fall_back <- function(x, y) { # Work around bug with hard-coded `tsp` attribute in Rf_setAttrib() if (inherits(x, "ts") || inherits(y, "ts")) { return(FALSE) } if (is.data.frame(x) || is.data.frame(y)) { return(FALSE) } if (!identical(typeof(x), typeof(y))) { return(FALSE) } # Suboptimal: Prevent bad interaction with proxy-assign has_no_proxy(x) && has_no_proxy(y) } has_no_proxy <- function(x) { proxy <- vec_proxy(x) # Don't compare data for performance identical(typeof(x), typeof(proxy)) && identical(attributes(x), attributes(proxy)) } new_common_class_fallback <- function(x, fallback_class) { structure( vec_ptype(x), class = "vctrs:::common_class_fallback", fallback_class = fallback_class ) } #' @export `vec_proxy.vctrs:::common_class_fallback` <- function(x, ...) { x } is_common_class_fallback <- function(x) { inherits(x, "vctrs:::common_class_fallback") } common_class_suffix <- function(x, y) { vec_common_suffix(fallback_class(x), fallback_class(y)) } fallback_class <- function(x) { if (is_common_class_fallback(x)) { attr(x, "fallback_class") } else { class(x) } } check_ptype2_dots_empty <- function(..., `vctrs:::from_dispatch`, `vctrs:::df_fallback`, `vctrs:::s3_fallback`) { ellipsis::check_dots_empty() } match_fallback_opts <- function(..., `vctrs:::df_fallback` = NULL, `vctrs:::s3_fallback` = NULL) { fallback_opts( df_fallback = `vctrs:::df_fallback`, s3_fallback = `vctrs:::s3_fallback` ) } match_from_dispatch <- function(..., `vctrs:::from_dispatch` = FALSE) { `vctrs:::from_dispatch` } fallback_opts <- function(df_fallback = NULL, s3_fallback = NULL) { # Order is important for the C side list( df_fallback = df_fallback %||% df_fallback_default(), s3_fallback = s3_fallback %||% s3_fallback_default() ) } full_fallback_opts <- function() { fallback_opts( df_fallback = DF_FALLBACK_quiet, s3_fallback = S3_FALLBACK_true ) } vec_ptype2_opts <- function(x, y, ..., opts, x_arg = "", y_arg = "") { .Call(vctrs_ptype2_opts, x, y, opts, x_arg, y_arg) } vec_ptype2_params <- function(x, y, ..., df_fallback = NULL, s3_fallback = NULL, x_arg = "", y_arg = "") { opts <- fallback_opts( df_fallback = df_fallback, s3_fallback = s3_fallback ) vec_ptype2_opts(x, y, opts = opts, x_arg = x_arg, y_arg = y_arg) } vec_ptype2_no_fallback <- function(x, y, ..., x_arg = "", y_arg = "") { opts <- fallback_opts( df_fallback = DF_FALLBACK_none, s3_fallback = S3_FALLBACK_false ) vec_ptype2_opts(x, y, ..., , opts = opts, x_arg = x_arg, y_arg = y_arg) } # Kept in sync with ptype2.h df_fallback_default <- function() 0L DF_FALLBACK_warn_maybe <- 0L DF_FALLBACK_warn <- 1L DF_FALLBACK_none <- 2L DF_FALLBACK_quiet <- 3L s3_fallback_default <- function() 0L S3_FALLBACK_false <- 0L S3_FALLBACK_true <- 1L has_df_fallback <- function(df_fallback) { df_fallback != DF_FALLBACK_none } needs_fallback_warning <- function(df_fallback) { if (df_fallback == DF_FALLBACK_warn_maybe) { is_true(peek_option("vctrs:::warn_on_fallback")) } else { df_fallback == DF_FALLBACK_warn } } with_fallback_warning <- function(expr) { with_options(expr, `vctrs:::warn_on_fallback` = TRUE) } with_fallback_quiet <- function(expr) { with_options(expr, `vctrs:::warn_on_fallback` = FALSE) } vec_typeof2 <- function(x, y) { .Call(vctrs_typeof2, x, y) } vec_typeof2_s3 <- function(x, y) { .Call(vctrs_typeof2_s3, x, y) } # https://github.com/r-lib/vctrs/issues/571 vec_is_coercible <- function(x, y, ..., opts = fallback_opts(), x_arg = "", y_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } .Call( vctrs_is_coercible, x, y, opts, x_arg, y_arg ) } vec_is_subtype <- function(x, super, ..., x_arg = "", super_arg = "") { tryCatch( vctrs_error_incompatible_type = function(...) FALSE, { common <- vctrs::vec_ptype2(x, super, ..., x_arg = x_arg, y_arg = super_arg) vec_is(common, super) } ) } vec_implements_ptype2 <- function(x) { .Call(vctrs_implements_ptype2, x) } vctrs/R/hash.R0000644000176200001440000000042513762412012012661 0ustar liggesusers # These return raw vectors of hashes. Vector elements are coded with # 32 bit hashes. Thus, the size of the raw vector of hashes is 4 times # the size of the input. vec_hash <- function(x) { .Call(vctrs_hash, x) } obj_hash <- function(x) { .Call(vctrs_hash_object, x) } vctrs/R/translate.R0000644000176200001440000000011713762412012013731 0ustar liggesusersvec_normalize_encoding <- function(x) { .Call(vctrs_normalize_encoding, x) } vctrs/R/compat-lifecycle.R0000644000176200001440000001634013762412012015161 0ustar liggesusers# nocov start --- compat-lifecycle --- 2019-11-15 Fri 15:55 # This file serves as a reference for currently unexported rlang # lifecycle functions. Please find the most recent version in rlang's # repository. These functions require rlang in your `Imports` # DESCRIPTION field but you don't need to import rlang in your # namespace. #' Signal deprecation #' #' @description #' #' These functions provide two levels of verbosity for deprecation #' warnings. #' #' * `signal_soft_deprecated()` warns only if called from the global #' environment (so the user can change their script) or from the #' package currently being tested (so the package developer can fix #' the package). #' #' * `warn_deprecated()` warns unconditionally. #' #' * `stop_defunct()` fails unconditionally. #' #' Both functions warn only once per session by default to avoid #' overwhelming the user with repeated warnings. #' #' @param msg The deprecation message. #' @param id The id of the deprecation. A warning is issued only once #' for each `id`. Defaults to `msg`, but you should give a unique ID #' when the message is built programmatically and depends on inputs. #' @param env The environment in which the soft-deprecated function #' was called. A warning is issued if called from the global #' environment. If testthat is running, a warning is also called if #' the retired function was called from the package being tested. #' #' @section Controlling verbosity: #' #' The verbosity of retirement warnings can be controlled with global #' options. You'll generally want to set these options locally with #' one of these helpers: #' #' * `with_lifecycle_silence()` disables all soft-deprecation and #' deprecation warnings. #' #' * `with_lifecycle_warnings()` enforces warnings for both #' soft-deprecated and deprecated functions. The warnings are #' repeated rather than signalled once per session. #' #' * `with_lifecycle_errors()` enforces errors for both #' soft-deprecated and deprecated functions. #' #' All the `with_` helpers have `scoped_` variants that are #' particularly useful in testthat blocks. #' #' @noRd #' @seealso [lifecycle()] NULL signal_soft_deprecated <- function(msg, id = msg, env = caller_env(2)) { msg <- lifecycle_validate_message(msg) stopifnot( rlang::is_string(id), rlang::is_environment(env) ) if (rlang::is_true(rlang::peek_option("lifecycle_disable_warnings"))) { return(invisible(NULL)) } env_inherits_global <- function(env) { # `topenv(emptyenv())` returns the global env. Return `FALSE` in # that case to allow passing the empty env when the # soft-deprecation should not be promoted to deprecation based on # the caller environment. if (rlang::is_reference(env, emptyenv())) { return(FALSE) } rlang::is_reference(topenv(env), rlang::global_env()) } if (rlang::is_true(rlang::peek_option("lifecycle_verbose_soft_deprecation")) || env_inherits_global(env)) { warn_deprecated(msg, id) return(invisible(NULL)) } # Test for environment names rather than reference/contents because # testthat clones the namespace tested_package <- Sys.getenv("TESTTHAT_PKG") if (nzchar(tested_package) && identical(Sys.getenv("NOT_CRAN"), "true") && rlang::env_name(topenv(env)) == rlang::env_name(ns_env(tested_package))) { warn_deprecated(msg, id) return(invisible(NULL)) } rlang::signal(msg, "lifecycle_soft_deprecated") } warn_deprecated <- function(msg, id = msg) { msg <- lifecycle_validate_message(msg) stopifnot(rlang::is_string(id)) if (rlang::is_true(rlang::peek_option("lifecycle_disable_warnings"))) { return(invisible(NULL)) } if (!rlang::is_true(rlang::peek_option("lifecycle_repeat_warnings")) && rlang::env_has(deprecation_env, id)) { return(invisible(NULL)) } rlang::env_poke(deprecation_env, id, TRUE); has_colour <- function() rlang::is_installed("crayon") && crayon::has_color() silver <- function(x) if (has_colour()) crayon::silver(x) else x if (rlang::is_true(rlang::peek_option("lifecycle_warnings_as_errors"))) { .Signal <- stop_defunct } else { .Signal <- .Deprecated } if (!rlang::is_true(rlang::peek_option("lifecycle_repeat_warnings"))) { msg <- paste0(msg, "\n", silver("This warning is displayed once per session.")) } .Signal(msg = msg) } deprecation_env <- new.env(parent = emptyenv()) stop_defunct <- function(msg) { msg <- lifecycle_validate_message(msg) err <- cnd( c("defunctError", "error", "condition"), old = NULL, new = NULL, package = NULL, message = msg ) stop(err) } local_lifecycle_silence <- function(frame = rlang::caller_env()) { rlang::local_options(.frame = frame, lifecycle_disable_warnings = TRUE ) } with_lifecycle_silence <- function(expr) { local_lifecycle_silence() expr } local_lifecycle_warnings <- function(frame = rlang::caller_env()) { rlang::local_options(.frame = frame, lifecycle_disable_warnings = FALSE, lifecycle_verbose_soft_deprecation = TRUE, lifecycle_repeat_warnings = TRUE ) } with_lifecycle_warnings <- function(expr) { local_lifecycle_warnings() expr } local_lifecycle_errors <- function(frame = rlang::caller_env()) { local_lifecycle_warnings(frame = frame) rlang::local_options(.frame = frame, lifecycle_warnings_as_errors = TRUE ) } with_lifecycle_errors <- function(expr) { local_lifecycle_errors() expr } #' Embed a lifecycle badge in documentation #' #' @description #' #' Use `lifecycle()` within a `Sexpr` macro to embed a #' [lifecycle](https://www.tidyverse.org/lifecycle/) badge in your #' documentation. The badge should appear first in the description: #' #' ``` #' \Sexpr[results=rd, stage=render]{mypkg:::lifecycle("questioning")} #' ``` #' #' The badge appears as an image in the HTML version of the #' documentation. To make them available in your package, visit #' and copy #' all the files starting with `lifecycle-` in your `man/figures/` #' folder. #' #' @param stage A lifecycle stage as a string, one of: #' `"experimental"`, `"maturing"`, `"stable"`, `"questioning"`, #' `"archived"`, `"soft-deprecated"`, `"deprecated"`, `"defunct"`. #' #' @keywords internal #' @noRd NULL lifecycle <- function(stage) { url <- paste0("https://www.tidyverse.org/lifecycle/#", stage) img <- lifecycle_img(stage, url) sprintf( "\\ifelse{html}{%s}{\\strong{%s}}", img, upcase1(stage) ) } lifecycle_img <- function(stage, url) { file <- sprintf("lifecycle-%s.svg", stage) stage_alt <- upcase1(stage) switch(stage, experimental = , maturing = , stable = , questioning = , retired = , archived = sprintf( "\\out{%s lifecycle}", url, file.path("figures", file), stage_alt ) , `soft-deprecated` = , deprecated = , defunct = sprintf( "\\figure{%s}{options: alt='%s lifecycle'}", file, stage_alt ), rlang::abort(sprintf("Unknown lifecycle stage `%s`", stage)) ) } upcase1 <- function(x) { substr(x, 1, 1) <- toupper(substr(x, 1, 1)) x } lifecycle_validate_message <- function(msg) { stopifnot(is_character(msg)) paste0(msg, collapse = "\n") } # nocov end vctrs/R/compat-purrr.R0000644000176200001440000001015413762412012014371 0ustar liggesusers# nocov start - compat-purrr (last updated: rlang 0.2.0) # This file serves as a reference for compatibility functions for # purrr. They are not drop-in replacements but allow a similar style # of programming. This is useful in cases where purrr is too heavy a # package to depend on. Please find the most recent version in rlang's # repository. map <- function(.x, .f, ...) { lapply(.x, .f, ...) } map_mold <- function(.x, .f, .mold, ...) { out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE) names(out) <- names(.x) out } map_lgl <- function(.x, .f, ...) { map_mold(.x, .f, logical(1), ...) } map_int <- function(.x, .f, ...) { map_mold(.x, .f, integer(1), ...) } map_dbl <- function(.x, .f, ...) { map_mold(.x, .f, double(1), ...) } map_chr <- function(.x, .f, ...) { map_mold(.x, .f, character(1), ...) } map_cpl <- function(.x, .f, ...) { map_mold(.x, .f, complex(1), ...) } walk <- function(.x, .f, ...) { map(.x, .f, ...) invisible(.x) } pluck <- function(.x, .f) { map(.x, `[[`, .f) } pluck_lgl <- function(.x, .f) { map_lgl(.x, `[[`, .f) } pluck_int <- function(.x, .f) { map_int(.x, `[[`, .f) } pluck_dbl <- function(.x, .f) { map_dbl(.x, `[[`, .f) } pluck_chr <- function(.x, .f) { map_chr(.x, `[[`, .f) } pluck_cpl <- function(.x, .f) { map_cpl(.x, `[[`, .f) } map2 <- function(.x, .y, .f, ...) { Map(.f, .x, .y, ...) } map2_lgl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "logical") } map2_int <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "integer") } map2_dbl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "double") } map2_chr <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "character") } map2_cpl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "complex") } args_recycle <- function(args) { lengths <- map_int(args, length) n <- max(lengths) stopifnot(all(lengths == 1L | lengths == n)) to_recycle <- lengths == 1L args[to_recycle] <- map(args[to_recycle], function(x) rep.int(x, n)) args } pmap <- function(.l, .f, ...) { args <- args_recycle(.l) do.call("mapply", c( FUN = list(quote(.f)), args, MoreArgs = quote(list(...)), SIMPLIFY = FALSE, USE.NAMES = FALSE )) } probe <- function(.x, .p, ...) { if (is_logical(.p)) { stopifnot(length(.p) == length(.x)) .p } else { map_lgl(.x, .p, ...) } } keep <- function(.x, .f, ...) { .x[probe(.x, .f, ...)] } discard <- function(.x, .p, ...) { sel <- probe(.x, .p, ...) .x[is.na(sel) | !sel] } map_if <- function(.x, .p, .f, ...) { matches <- probe(.x, .p) .x[matches] <- map(.x[matches], .f, ...) .x } transpose <- function(.l) { inner_names <- names(.l[[1]]) if (is.null(inner_names)) { fields <- seq_along(.l[[1]]) } else { fields <- set_names(inner_names) } map(fields, function(i) { map(.l, .subset2, i) }) } every <- function(.x, .p, ...) { for (i in seq_along(.x)) { if (!rlang::is_true(.p(.x[[i]], ...))) return(FALSE) } TRUE } some <- function(.x, .p, ...) { for (i in seq_along(.x)) { if (rlang::is_true(.p(.x[[i]], ...))) return(TRUE) } FALSE } negate <- function(.p) { function(...) !.p(...) } reduce <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init) } reduce_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE) } accumulate <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init, accumulate = TRUE) } accumulate_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE, accumulate = TRUE) } detect <- function(.x, .f, ..., .right = FALSE, .p = is_true) { for (i in index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(.x[[i]]) } } NULL } detect_index <- function(.x, .f, ..., .right = FALSE, .p = is_true) { for (i in index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(i) } } 0L } index <- function(x, right = FALSE) { idx <- seq_along(x) if (right) { idx <- rev(idx) } idx } # nocov end vctrs/R/compare.R0000644000176200001440000001016013762412012013361 0ustar liggesusers# proxies ----------------------------------------------------------------- #' Comparison and order proxy #' #' @description #' `vec_proxy_compare()` and `vec_proxy_order()` return proxy objects, i.e. #' an atomic vector or data frame of atomic vectors. #' #' For [`vctrs_vctr`][vctr] objects: #' #' - `vec_proxy_compare()` determines the behavior of `<`, `>`, `>=` #' and `<=` (via [vec_compare()]); and [min()], [max()], [median()], and #' [quantile()]. #' #' - `vec_proxy_order()` determines the behavior of `order()` and `sort()` #' (via `xtfrm()`). #' #' @details #' The default method of `vec_proxy_compare()` assumes that all classes built #' on top of atomic vectors or records are comparable. Internally the default #' calls [vec_proxy_equal()]. If your class is not comparable, you will need #' to provide a `vec_proxy_compare()` method that throws an error. #' #' The behavior of `vec_proxy_order()` is identical to `vec_proxy_compare()`, #' with the exception of lists. Lists are not comparable, as comparing #' elements of different types is undefined. However, to allow ordering of #' data frames containing list-columns, the ordering proxy of a list is #' generated as an integer vector that can be used to order list elements #' by first appearance. #' #' @param x A vector x. #' @inheritParams ellipsis::dots_empty #' @return A 1d atomic vector or a data frame. #' #' @section Dependencies: #' - [vec_proxy_equal()] called by default in `vec_proxy_compare()` #' - [vec_proxy_compare()] called by default in `vec_proxy_order()` #' #' @keywords internal #' @export #' @examples #' # Lists are not comparable #' x <- list(1:2, 1, 1:2, 3) #' try(vec_compare(x, x)) #' #' # But lists are orderable by first appearance to allow for #' # ordering data frames with list-cols #' df <- new_data_frame(list(x = x)) #' vec_sort(df) vec_proxy_compare <- function(x, ...) { if (!missing(...)) { # For backward compatibility with older dplyr versions if (match_relax(...)) { return(vec_proxy_order(x)) } ellipsis::check_dots_empty() } return(.Call(vctrs_proxy_compare, x)) UseMethod("vec_proxy_compare") } #' @export vec_proxy_compare.default <- function(x, ...) { stop_native_implementation("vec_proxy_compare.default") } match_relax <- function(..., relax = FALSE) { relax } #' @rdname vec_proxy_compare #' @export vec_proxy_order <- function(x, ...) { if (!missing(...)) { ellipsis::check_dots_empty() } return(.Call(vctrs_proxy_order, x)) UseMethod("vec_proxy_order") } #' @export vec_proxy_order.default <- function(x, ...) { stop_native_implementation("vec_proxy_order.default") } # compare ----------------------------------------------------------------- #' Compare two vectors #' #' @section S3 dispatch: #' `vec_compare()` is not generic for performance; instead it uses #' [vec_proxy_compare()] to #' #' @param x,y Vectors with compatible types and lengths. #' @param na_equal Should `NA` values be considered equal? #' @param .ptype Override to optionally specify common type #' @return An integer vector with values -1 for `x < y`, 0 if `x == y`, #' and 1 if `x > y`. If `na_equal` is `FALSE`, the result will be `NA` #' if either `x` or `y` is `NA`. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_recycle_common()] #' - [vec_proxy_compare()] #' #' @export #' @examples #' vec_compare(c(TRUE, FALSE, NA), FALSE) #' vec_compare(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) #' #' vec_compare(1:10, 5) #' vec_compare(runif(10), 0.5) #' vec_compare(letters[1:10], "d") #' #' df <- data.frame(x = c(1, 1, 1, 2), y = c(0, 1, 2, 1)) #' vec_compare(df, data.frame(x = 1, y = 1)) vec_compare <- function(x, y, na_equal = FALSE, .ptype = NULL) { vec_assert(x) vec_assert(y) vec_assert(na_equal, ptype = logical(), size = 1L) args <- vec_recycle_common(x, y) args <- vec_cast_common_params( !!!args, .to = .ptype, .df_fallback = DF_FALLBACK_quiet ) .Call(vctrs_compare, vec_proxy_compare(args[[1]]), vec_proxy_compare(args[[2]]), na_equal) } # Helpers ----------------------------------------------------------------- # Used for testing cmp <- function(x, y) (x > y) - (x < y) vctrs/R/type-sf.R0000644000176200001440000000624413762412012013332 0ustar liggesusers # Imported at load-time in `sf_env` st_crs = function(...) stop_sf() st_precision = function(...) stop_sf() st_as_sf = function(...) stop_sf() stop_sf = function() abort("Internal error: Failed sf import.") sf_deps = c( "st_crs", "st_precision", "st_as_sf" ) sf_env = env() # sf namespace local(envir = sf_env, { # Registered at load-time (same for all other methods) vec_proxy.sf = function(x, ...) { x } vec_restore.sf = function(x, to, ...) { sfc_name = attr(to, "sf_column") crs = st_crs(to) prec = st_precision(to) st_as_sf( x, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } sf_ptype2 = function(x, y, ...) { data = vctrs::df_ptype2(x, y, ...) # Workaround for `c()` fallback sentinels. Must be fixed before # moving the methods downstream. opts <- match_fallback_opts(...) if (identical(opts$s3_fallback, S3_FALLBACK_true)) { return(data) } x_sf <- inherits(x, "sf") y_sf <- inherits(y, "sf") if (x_sf && y_sf) { # Take active geometry from left-hand side sfc_name = attr(x, "sf_column") # CRS and precision must match crs = common_crs(x, y) prec = common_prec(x, y) } else if (x_sf) { sfc_name = attr(x, "sf_column") crs = st_crs(x) prec = st_precision(x) } else if (y_sf) { sfc_name = attr(y, "sf_column") crs = st_crs(y) prec = st_precision(y) } else { stop("Internal error: Expected at least one `sf` input.") } st_as_sf( data, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } vec_ptype2.sf.sf = function(x, y, ...) { sf_ptype2(x, y, ...) } vec_ptype2.sf.data.frame = function(x, y, ...) { sf_ptype2(x, y, ...) } vec_ptype2.data.frame.sf = function(x, y, ...) { sf_ptype2(x, y, ...) } # Maybe we should not have these methods, but they are currently # required to avoid the base-df fallback vec_ptype2.sf.tbl_df = function(x, y, ...) { new_data_frame(sf_ptype2(x, y, ...)) } vec_ptype2.tbl_df.sf = function(x, y, ...) { new_data_frame(sf_ptype2(x, y, ...)) } sf_cast = function(x, to, ...) { data = vctrs::df_cast(x, to, ...) # Workaround for `c()` fallback sentinels. Must be fixed before # moving the methods downstream. opts <- match_fallback_opts(...) if (identical(opts$s3_fallback, S3_FALLBACK_true)) { return(data) } sfc_name = attr(to, "sf_column") crs = st_crs(to) prec = st_precision(to) st_as_sf( data, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } vec_cast.sf.sf = function(x, to, ...) { sf_cast(x, to, ...) } vec_cast.sf.data.frame = function(x, to, ...) { sf_cast(x, to, ...) } vec_cast.data.frame.sf = function(x, to, ...) { df_cast(x, to, ...) } # take conservative approach of requiring equal CRS and precision common_crs = function(x, y) { lhs = st_crs(x) rhs = st_crs(y) if (lhs != rhs) stop("coordinate reference systems not equal: use st_transform() first?") lhs } common_prec = function(x, y) { lhs = st_precision(x) rhs = st_precision(y) if (lhs != rhs) stop("precisions not equal") lhs } }) # local(envir = sf_env) env_bind(ns_env("vctrs"), !!!as.list(sf_env)) # Local Variables: # indent-tabs-mode: t # ess-indent-offset: 4 # tab-width: 4 # End: vctrs/R/partial.R0000644000176200001440000000322714001521643013373 0ustar liggesusers#' Partial type #' #' @description #' `r lifecycle::badge("experimental")` #' #' Use `new_partial()` when constructing a new partial type subclass; #' and use `is_partial()` to test if a type is partial. All subclasses #' need to provide a `vec_ptype_finalise()` method. #' #' @details #' As the name suggests, a partial type _partially_ specifies a type, and #' it must be combined with data to yield a full type. A useful example #' of a partial type is [partial_frame()], which makes it possible to #' specify the type of just a few columns in a data frame. Use this constructor #' if you're making your own partial type. #' #' @param ... Attributes of the partial type #' @param class Name of subclass. #' @export #' @keywords internal new_partial <- function(..., class = character()) { new_sclr(..., class = c(class, "vctrs_partial")) } #' @export obj_print_header.vctrs_partial <- function(x, ...) { NULL invisible(x) } #' @export obj_print_data.vctrs_partial <- function(x, ...) { cat_line(vec_ptype_full(x)) invisible(x) } #' @rdname new_partial #' @export is_partial <- function(x) { .Call(vctrs_is_partial, x) } #' @rdname new_partial #' @inheritParams ellipsis::dots_empty #' @export vec_ptype_finalise <- function(x, ...) { if (!missing(...)) { ellipsis::check_dots_empty() } return(.Call(vctrs_ptype_finalise, x)) UseMethod("vec_ptype_finalise") } vec_ptype_finalise_dispatch <- function(x, ...) { UseMethod("vec_ptype_finalise") } #' @export vec_ptype_finalise.vctrs_partial <- function(x, ...) { # nocov start stop_unimplemented(x, "vec_ptype_finalise") # nocov end } #' @export vec_ptype_finalise.default <- function(x, ...) { x } vctrs/R/numeric.R0000644000176200001440000000432513762412012013403 0ustar liggesusers#' Mathematical operations #' #' This generic provides a common dispatch mechanism for all regular unary #' mathematical functions. It is used as a common wrapper around many of the #' Summary group generics, the Math group generics, and a handful of other #' mathematical functions like `mean()` (but not `var()` or `sd()`). #' #' `vec_math_base()` is provided as a convenience for writing methods. It #' calls the base `.fn` on the underlying [vec_data()]. #' #' @section Included functions: #' #' * From the [Summary] group generic: #' `prod()`, `sum()`, `any()`, `all()`. #' #' * From the [Math] group generic: #' `abs()`, `sign()`, `sqrt()`, `ceiling()`, `floor()`, `trunc()`, `cummax()`, #' `cummin()`, `cumprod()`, `cumsum()`, `log()`, `log10()`, `log2()`, #' `log1p()`, `acos()`, `acosh()`, `asin()`, `asinh()`, `atan()`, `atanh()`, #' `exp()`, `expm1()`, `cos()`, `cosh()`, `cospi()`, `sin()`, `sinh()`, #' `sinpi()`, `tan()`, `tanh()`, `tanpi()`, `gamma()`, `lgamma()`, #' `digamma()`, `trigamma()`. #' #' * Additional generics: `mean()`, `is.nan()`, `is.finite()`, `is.infinite()`. #' #' Note that `median()` is currently not implemented, and `sd()` and #' `var()` are currently not generic and so do not support custom #' classes. #' #' @seealso [vec_arith()] for the equivalent for the arithmetic infix operators. #' @param .fn A mathematical function from the base package, as a string. #' @param .x A vector. #' @param ... Additional arguments passed to `.fn`. #' @keywords internal #' @export #' @examples #' x <- new_vctr(c(1, 2.5, 10)) #' x #' #' abs(x) #' sum(x) #' cumsum(x) vec_math <- function(.fn, .x, ...) { UseMethod("vec_math", .x) } #' @export vec_math.default <- function(.fn, .x, ...) { if (!is_double(.x) && !is_logical_dispatch(.fn, .x)) { stop_unimplemented(.x, "vec_math") } out <- vec_math_base(.fn, .x, ...) # Don't restore output of logical predicates like `any()`, # `is.finite()`, or `is.nan()` if (is_double(out)) { out <- vec_restore(out, .x) } out } is_logical_dispatch <- function(fn, x) { is_logical(x) && fn %in% c("any", "all") } #' @export #' @rdname vec_math vec_math_base <- function(.fn, .x, ...) { .fn <- getExportedValue("base", .fn) .fn(vec_data(.x), ...) } vctrs/R/faq.R0000644000176200001440000000150613762412012012506 0ustar liggesusers#' FAQ - How is the compatibility of vector types decided? #' #' @includeRmd man/faq/user/faq-compatibility-types.Rmd description #' #' @name faq-compatibility-types NULL #' FAQ - Error/Warning: Some attributes are incompatible #' #' @description #' #' This error occurs when [vec_ptype2()] or [vec_cast()] are supplied #' vectors of the same classes with different attributes. In this #' case, vctrs doesn't know how to combine the inputs. #' #' To fix this error, the maintainer of the class should implement #' self-to-self coercion methods for [vec_ptype2()] and [vec_cast()]. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @name faq-error-incompatible-attributes NULL #' FAQ - Error: Input must be a vector #' #' @includeRmd man/faq/user/faq-error-scalar-type.Rmd description #' #' @name faq-error-scalar-type NULL vctrs/R/c.R0000644000176200001440000000730513762412012012164 0ustar liggesusers#' Combine many vectors into one vector #' #' Combine all arguments into a new vector of common type. #' #' @section Invariants: #' * `vec_size(vec_c(x, y)) == vec_size(x) + vec_size(y)` #' * `vec_ptype(vec_c(x, y)) == vec_ptype_common(x, y)`. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_cast_common()] with fallback #' - [vec_proxy()] #' - [vec_restore()] #' #' #' ## base dependencies #' #' - [base::c()] #' #' If inputs inherit from a common class hierarchy, `vec_c()` falls #' back to `base::c()` if there exists a `c()` method implemented for #' this class hierarchy. #' #' @param ... Vectors to coerce. #' @param .name_repair How to repair names, see `repair` options in #' [vec_as_names()]. #' @return A vector with class given by `.ptype`, and length equal to the #' sum of the `vec_size()` of the contents of `...`. #' #' The vector will have names if the individual components have names #' (inner names) or if the arguments are named (outer names). If both #' inner and outer names are present, an error is thrown unless a #' `.name_spec` is provided. #' @inheritParams vec_ptype_show #' @inheritParams name_spec #' @inheritParams vec_as_names #' @seealso [vec_cbind()]/[vec_rbind()] for combining data frames by rows #' or columns. #' @export #' @examples #' vec_c(FALSE, 1L, 1.5) #' #' # Date/times -------------------------- #' c(Sys.Date(), Sys.time()) #' c(Sys.time(), Sys.Date()) #' #' vec_c(Sys.Date(), Sys.time()) #' vec_c(Sys.time(), Sys.Date()) #' #' # Factors ----------------------------- #' c(factor("a"), factor("b")) #' vec_c(factor("a"), factor("b")) #' #' #' # By default, named inputs must be length 1: #' vec_c(name = 1) #' try(vec_c(name = 1:3)) #' #' # Pass a name specification to work around this: #' vec_c(name = 1:3, .name_spec = "{outer}_{inner}") #' #' # See `?name_spec` for more examples of name specifications. vec_c <- function(..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal")) { .External2(vctrs_c, .ptype, .name_spec, .name_repair) } vec_c <- fn_inline_formals(vec_c, ".name_repair") base_c <- function(xs) { # Dispatch in the base namespace which inherits from the global env exec("c", !!!xs, .env = ns_env("base")) } base_c_invoke <- function(xs) { # Remove all `NULL` arguments which prevent dispatch if in first # position and might not be handled correctly by methods xs <- compact(xs) unspecified <- map_lgl(xs, fallback_is_unspecified) if (!any(unspecified)) { return(base_c(xs)) } # First concatenate without the unspecified chunks. This way the # `c()` method doesn't need to handle unspecified inputs correctly, # and we're guaranteed to dispatch on the correct class even if the # first input is unspecified. out <- base_c(xs[!unspecified]) # Create index vector with `NA` locations for unspecified chunks locs <- c_locs(xs) locs[unspecified] <- map(locs[unspecified], rep_along, na_int) locs[!unspecified] <- c_locs(xs[!unspecified]) locs <- vec_c(!!!locs, .ptype = int()) # Expand the concatenated vector with unspecified chunks out[locs] } # FIXME: Should be unnecessary in the future. We currently attach an # attribute to unspecified columns initialised in `df_cast()`. We # can't use an unspecified vector because we (unnecessarily but for # convenience) go through `vec_assign()` before falling back in # `vec_rbind()`. fallback_is_unspecified <- function(x) { is_unspecified(x) || is_true(attr(x, "vctrs:::unspecified")) } c_locs <- function(xs) { locs <- reduce(lengths(xs), .init = list(0), function(output, input) { n <- last(last(output)) c(output, list(seq(n + 1, n + input))) }) locs[-1] } vctrs/NEWS.md0000644000176200001440000010425514042546166012530 0ustar liggesusers# vctrs 0.3.8 * Compatibility with next version of rlang. # vctrs 0.3.7 * `vec_ptype_abbr()` gains arguments to control whether to indicate named vectors with a prefix (`prefix_named`) and indicate shaped vectors with a suffix (`suffix_shape`) (#781, @krlmlr). * `vec_ptype()` is now an optional _performance_ generic. It is not necessary to implement, but if your class has a static prototype, you might consider implementing a custom `vec_ptype()` method that returns a constant to improve performance in some cases (such as common type imputation). * New `vec_detect_complete()`, inspired by `stats::complete.cases()`. For most vectors, this is identical to `!vec_equal_na()`. For data frames and matrices, this detects rows that only contain non-missing values. * `vec_order()` can now order complex vectors (#1330). * Removed dependency on digest in favor of `rlang::hash()`. * Fixed an issue where `vctrs_rcrd` objects were not being proxied correctly when used as a data frame column (#1318). * `register_s3()` is now licensed with the "unlicense" which makes it very clear that it's fine to copy and paste into your own package (@maxheld83, #1254). # vctrs 0.3.6 * Fixed an issue with tibble 3.0.0 where removing column names with `names(x) <- NULL` is now deprecated (#1298). * Fixed a GCC 11 issue revealed by CRAN checks. # vctrs 0.3.5 * New experimental `vec_fill_missing()` for filling in missing values with the previous or following value. It is similar to `tidyr::fill()`, but also works with data frames and has an additional `max_fill` argument to limit the number of sequential missing values to fill. * New `vec_unrep()` to compress a vector with repeated values. It is very similar to run length encoding, and works nicely alongside `vec_rep_each()` as a way to invert the compression. * `vec_cbind()` with only empty data frames now preserves the common size of the inputs in the result (#1281). * `vec_c()` now correctly returns a named result with named empty inputs (#1263). * vctrs has been relicensed as MIT (#1259). * Functions that make comparisons within a single vector, such as `vec_unique()`, or between two vectors, such as `vec_match()`, now convert all character input to UTF-8 before making comparisons (#1246). * New `vec_identify_runs()` which returns a vector of identifiers for the elements of `x` that indicate which run of repeated values they fall in (#1081). * Fixed an encoding translation bug with lists containing data frames which have columns where `vec_size()` is different from the low level `Rf_length()` (#1233). # vctrs 0.3.4 * Fixed a GCC sanitiser error revealed by CRAN checks. # vctrs 0.3.3 * The `table` class is now implemented as a wrapper type that delegates its coercion methods. It used to be restricted to integer tables (#1190). * Named one-dimensional arrays now behave consistently with simple vectors in `vec_names()` and `vec_rbind()`. * `new_rcrd()` now uses `df_list()` to validate the fields. This makes it more flexible as the fields can now be of any type supported by vctrs, including data frames. * Thanks to the previous change the `[[` method of records now preserves list fields (#1205). * `vec_data()` now preserves data frames. This is consistent with the notion that data frames are a primitive vector type in vctrs. This shouldn't affect code that uses `[[` and `length()` to manipulate the data. On the other hand, the vctrs primitives like `vec_slice()` will now operate rowwise when `vec_data()` returns a data frame. * `outer` is now passed unrecycled to name specifications. Instead, the return value is recycled (#1099). * Name specifications can now return `NULL`. The names vector will only be allocated if the spec function returns non-`NULL` during the concatenation. This makes it possible to ignore outer names without having to create an empty names vector when there are no inner names: ``` zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner # `NULL` names rather than a vector of "" names(vec_c(a = 1:2, .name_spec = zap_outer_spec)) #> NULL # Names are allocated when inner names exist names(vec_c(a = 1:2, c(b = 3L), .name_spec = zap_outer_spec)) #> [1] "" "" "b" ``` * Fixed several performance issues in `vec_c()` and `vec_unchop()` with named vectors. * The restriction that S3 lists must have a list-based proxy to be considered lists by `vec_is_list()` has been removed (#1208). * New performant `data_frame()` constructor for creating data frames in a way that follows tidyverse semantics. Among other things, inputs are recycled using tidyverse recycling rules, strings are never converted to factors, list-columns are easier to create, and unnamed data frame input is automatically spliced. * New `df_list()` for safely and consistently constructing the data structure underlying a data frame, a named list of equal-length vectors. It is useful in combination with `new_data_frame()` for creating user-friendly constructors for data frame subclasses that use the tidyverse rules for recycling and determining types. * Fixed performance issue with `vec_order()` on classed vectors which affected `dplyr::group_by()` (tidyverse/dplyr#5423). * `vec_set_names()` no longer alters the input in-place (#1194). * New `vec_proxy_order()` that provides an ordering proxy for use in `vec_order()` and `vec_sort()`. The default method falls through to `vec_proxy_compare()`. Lists are special cased, and return an integer vector proxy that orders by first appearance. * List columns in data frames are no longer comparable through `vec_compare()`. * The experimental `relax` argument has been removed from `vec_proxy_compare()`. # vctrs 0.3.2 * Fixed a performance issue in `bind_rows()` with S3 columns (#1122, #1124, #1151, tidyverse/dplyr#5327). * `vec_slice()` now checks sizes of data frame columns in case the data structure is corrupt (#552). * The native routines in vctrs now dispatch and evaluate in the vctrs namespace. This improves the continuity of evaluation in backtraces. * `new_data_frame()` is now twice as fast when `class` is supplied. * New `vec_names2()`, `vec_names()` and `vec_set_names()` (#1173). # vctrs 0.3.1 * `vec_slice()` no longer restores attributes of foreign objects for which a `[` method exist. This fixes an issue with `ts` objects which were previously incorrectly restored. * The `as.list()` method for `vctrs_rcrd` objects has been removed in favor of directly using the method for `vctrs_vctr`, which calls `vec_chop()`. * `vec_c()` and `vec_rbind()` now fall back to `base::c()` if the inputs have a common class hierarchy for which a `c()` method is implemented but no self-to-self `vec_ptype2()` method is implemented. * `vec_rbind()` now internally calls `vec_proxy()` and `vec_restore()` on the data frame common type that is used to create the output (#1109). * `vec_as_location2("0")` now works correctly (#1131). * `?reference-faq-compatibility` is a new reference guide on vctrs primitives. It includes an overview of the fallbacks to base R generics implemented in vctrs for compatibility with existing classes. * The documentation of vctrs functions now includes a Dependencies section to reference which other vctrs operations are called from that function. By following the dependencies links recursively, you will find the vctrs primitives on which an operation relies. ## CRAN results * Fixed type declaration mismatches revealed by LTO build. * Fixed r-devel issue with new `c.factor()` method. # vctrs 0.3.0 This version features an overhaul of the coercion system to make it more consistent and easier to implement. See the _Breaking changes_ and _Type system_ sections for details. There are three new documentation topics if you'd like to learn how to implement coercion methods to make your class compatible with tidyverse packages like dplyr: * https://vctrs.r-lib.org/reference/theory-faq-coercion.html for an overview of the coercion mechanism in vctrs. * https://vctrs.r-lib.org/reference/howto-faq-coercion.html for a practical guide about implementing methods for vectors. * https://vctrs.r-lib.org/reference/howto-faq-coercion-data-frame.html for a practical guide about implementing methods for data frames. ## Reverse dependencies troubleshooting The following errors are caused by breaking changes. * `"Can't convert to ."` `vec_cast()` no longer converts to list. Use `vec_chop()` or `as.list()` instead. * `"Can't convert to ."` `vec_cast()` no longer converts to character. Use `as.character()`to deparse objects. * `"names for target but not for current"` Names of list-columns are now preserved by `vec_rbind()`. Adjust tests accordingly. ## Breaking changes * Double-dispatch methods for `vec_ptype2()` and `vec_cast()` are no longer inherited (#710). Class implementers must implement one set of methods for each compatible class. For example, a tibble subclass no longer inherits from the `vec_ptype2()` methods between `tbl_df` and `data.frame`. This means that you explicitly need to implement `vec_ptype2()` methods with `tbl_df` and `data.frame`. This change requires a bit more work from class maintainers but is safer because the coercion hierarchies are generally different from class hierarchies. See the S3 dispatch section of `?vec_ptype2` for more information. * `vec_cast()` is now restricted to the same conversions as `vec_ptype2()` methods (#606, #741). This change is motivated by safety and performance: - It is generally sloppy to generically convert arbitrary inputs to one type. Restricted coercions are more predictable and allow your code to fail earlier when there is a type issue. - When unrestricted conversions are useful, this is generally towards a known type. For example, `glue::glue()` needs to convert arbitrary inputs to the known character type. In this case, using double dispatch instead of a single dispatch generic like `as.character()` is wasteful. - To implement the useful semantics of coercible casts (already used in `vec_assign()`), two double dispatch were needed. Now it can be done with one double dispatch by calling `vec_cast()` directly. * `stop_incompatible_cast()` now throws an error of class `vctrs_error_incompatible_type` rather than `vctrs_error_incompatible_cast`. This means that `vec_cast()` also throws errors of this class, which better aligns it with `vec_ptype2()` now that they are restricted to the same conversions. * The `y` argument of `stop_incompatible_cast()` has been renamed to `to` to better match `to_arg`. ## Type system * Double-dispatch methods for `vec_ptype2()` and `vec_cast()` are now easier to implement. They no longer need any the boiler plate. Implementing a method for classes `foo` and `bar` is now as simple as: ``` #' @export vec_ptype2.foo.bar <- function(x, y, ...) new_foo() ``` vctrs also takes care of implementing the default and unspecified methods. If you have implemented these methods, they are no longer called and can now be removed. One consequence of the new dispatch mechanism is that `NextMethod()` is now completely unsupported. This is for the best as it never worked correctly in a double-dispatch setting. Parent methods must now be called manually. * `vec_ptype2()` methods now get zero-size prototypes as inputs. This guarantees that methods do not peek at the data to determine the richer type. * `vec_is_list()` no longer allows S3 lists that implement a `vec_proxy()` method to automatically be considered lists. A S3 list must explicitly inherit from `"list"` in the base class to be considered a list. * `vec_restore()` no longer restores row names if the target is not a data frame. This fixes an issue where `POSIXlt` objects would carry a `row.names` attribute after a proxy/restore roundtrip. * `vec_cast()` to and from data frames preserves the row names of inputs. * The internal function `vec_names()` now returns row names if the input is a data frame. Similarly, `vec_set_names()` sets row names on data frames. This is part of a general effort at making row names the vector names of data frames in vctrs. If necessary, the row names are repaired verbosely but without error to make them unique. This should be a mostly harmless change for users, but it could break unit tests in packages if they make assumptions about the row names. ## Compatibility and fallbacks * With the double dispatch changes, the coercion methods are no longer inherited from parent classes. This is because the coercion hierarchy is in principle different from the S3 hierarchy. A consequence of this change is that subclasses that don't implement coercion methods are now in principle incompatible. This is particularly problematic with subclasses of data frames for which throwing incompatible errors would be too incovenient for users. To work around this, we have implemented a fallback to the relevant base data frame class (either `data.frame` or `tbl_df`) in coercion methods (#981). This fallback is silent unless you set the `vctrs:::warn_on_fallback` option to `TRUE`. In the future we may extend this fallback principle to other base types when they are explicitly included in the class vector (such as `"list"`). * Improved support for foreign classes in the combining operations `vec_c()`, `vec_rbind()`, and `vec_unchop()`. A foreign class is a class that doesn't implement `vec_ptype2()`. When all the objects to combine have the same foreign class, one of these fallbacks is invoked: - If the class implements a `base::c()` method, the method is used for the combination. (FIXME: `vec_rbind()` currently doesn't use this fallback.) - Otherwise if the objects have identical attributes and the same base type, we consider them to be compatible. The vectors are concatenated and the attributes are restored (#776). These fallbacks do not make your class completely compatible with vctrs-powered packages, but they should help in many simple cases. * `vec_c()` and `vec_unchop()` now fall back to `base::c()` for S4 objects if the object doesn't implement `vec_ptype2()` but sets an S4 `c()` method (#919). ## Vector operations * `vec_rbind()` and `vec_c()` with data frame inputs now consistently preserve the names of list-columns, df-columns, and matrix-columns (#689). This can cause some false positives in unit tests, if they are sensitive to internal names (#1007). * `vec_rbind()` now repairs row names silently to avoid confusing messages when the row names are not informative and were not created on purpose. * `vec_rbind()` gains option to treat input names as row names. This is disabled by default (#966). * New `vec_rep()` and `vec_rep_each()` for repeating an entire vector and elements of a vector, respectively. These two functions provide a clearer interface for the functionality of `vec_repeat()`, which is now deprecated. * `vec_cbind()` now calls `vec_restore()` on inputs emptied of their columns before computing the common type. This has consequences for data frame classes with special columns that devolve into simpler classes when the columns are subsetted out. These classes are now always simplified by `vec_cbind()`. For instance, column-binding a grouped data frame with a data frame now produces a tibble (the simplified class of a grouped data frame). * `vec_match()` and `vec_in()` gain parameters for argument tags (#944). * The internal version of `vec_assign()` now has support for assigning names and inner names. For data frames, the names are assigned recursively. * `vec_assign()` gains `x_arg` and `value_arg` parameters (#918). * `vec_group_loc()`, which powers `dplyr::group_by()`, now has more efficient vector access (#911). * `vec_ptype()` gained an `x_arg` argument. * New `list_sizes()` for computing the size of every element in a list. `list_sizes()` is to `vec_size()` as `lengths()` is to `length()`, except that it only supports lists. Atomic vectors and data frames result in an error. * `new_data_frame()` infers size from row names when `n = NULL` (#894). * `vec_c()` now accepts `rlang::zap()` as `.name_spec` input. The returned vector is then always unnamed, and the names do not cause errors when they can't be combined. They are still used to create more informative messages when the inputs have incompatible types (#232). ## Classes * vctrs now supports the `data.table` class. The common type of a data frame and a data table is a data table. * `new_vctr()` now always appends a base `"list"` class to list `.data` to be compatible with changes to `vec_is_list()`. This affects `new_list_of()`, which now returns an object with a base class of `"list"`. * dplyr methods are now implemented for `vec_restore()`, `vec_ptype2()`, and `vec_cast()`. The user-visible consequence (and breaking change) is that row-binding a grouped data frame and a data frame or tibble now returns a grouped data frame. It would previously return a tibble. * The `is.na<-()` method for `vctrs_vctr` now supports numeric and character subscripts to indicate where to insert missing values (#947). * Improved support for vector-like S4 objects (#550, #551). * The base classes `AsIs` and `table` have vctrs methods (#904, #906). * `POSIXlt` and `POSIXct` vectors are handled more consistently (#901). * Ordered factors that do not have identical levels are now incompatible. They are now incompatible with all factors. ## Indexing and names * `vec_as_subscript()` now fails when the subscript is a matrix or an array, consistently with `vec_as_location()`. * Improved error messages in `vec_as_location()` when subscript is a matrix or array (#936). * `vec_as_location2()` properly picks up `subscript_arg` (tidyverse/tibble#735). * `vec_as_names()` now has more informative error messages when names are not unique (#882). * `vec_as_names()` gains a `repair_arg` argument that when set will cause `repair = "check_unique"` to generate an informative hint (#692). ## Conditions * `stop_incompatible_type()` now has an `action` argument for customizing whether the coercion error came from `vec_ptype2()` or `vec_cast()`. `stop_incompatible_cast()` is now a thin wrapper around `stop_incompatible_type(action = "convert")`. * `stop_` functions now take `details` after the dots. This argument can no longer be passed by position. * Supplying both `details` and `message` to the `stop_` functions is now an internal error. * `x_arg`, `y_arg`, and `to_arg` are now compulsory arguments in `stop_` functions like `stop_incompatible_type()`. * Lossy cast errors are now considered internal. Please don't test for the class or explicitly handle them. * New argument `loss_type` for the experimental function `maybe_lossy_cast()`. It can take the values "precision" or "generality" to indicate in the error message which kind of loss is the error about (double to integer loses precision, character to factor loses generality). * Coercion and recycling errors are now more consistent. ## CRAN results * Fixed clang-UBSAN error "nan is outside the range of representable values of type 'int'" (#902). * Fixed compilation of stability vignette following the date conversion changes on R-devel. # vctrs 0.2.4 * Factors and dates methods are now implemented in C for efficiency. * `new_data_frame()` now correctly updates attributes and supports merging of the `"names"` and `"row.names"` arguments (#883). * `vec_match()` gains an `na_equal` argument (#718). * `vec_chop()`'s `indices` argument has been restricted to positive integer vectors. Character and logical subscripts haven't proven useful, and this aligns `vec_chop()` with `vec_unchop()`, for which only positive integer vectors make sense. * New `vec_unchop()` for combining a list of vectors into a single vector. It is similar to `vec_c()`, but gives greater control over how the elements are placed in the output through the use of a secondary `indices` argument. * Breaking change: When `.id` is supplied, `vec_rbind()` now creates the identifier column at the start of the data frame rather than at the end. * `numeric_version` and `package_version` lists are now treated as vectors (#723). * `vec_slice()` now properly handles symbols and S3 subscripts. * `vec_as_location()` and `vec_as_subscript()` are now fully implemented in C for efficiency. * `num_as_location()` gains a new argument, `zero`, for controlling whether to `"remove"`, `"ignore"`, or `"error"` on zero values (#852). # vctrs 0.2.3 * The main feature of this release is considerable performance improvements with factors and dates. * `vec_c()` now falls back to `base::c()` if the vector doesn't implement `vec_ptype2()` but implements `c()`. This should improve the compatibility of vctrs-based functions with foreign classes (#801). * `new_data_frame()` is now faster. * New `vec_is_list()` for detecting if a vector is a list in the vctrs sense. For instance, objects of class `lm` are not lists. In general, classes need to explicitly inherit from `"list"` to be considered as lists by vctrs. * Unspecified vectors of `NA` can now be assigned into a list (#819). ``` x <- list(1, 2) vec_slice(x, 1) <- NA x #> [[1]] #> NULL #> #> [[2]] #> 2 ``` * `vec_ptype()` now errors on scalar inputs (#807). * `vec_ptype_finalise()` is now recursive over all data frame types, ensuring that unspecified columns are correctly finalised to logical (#800). * `vec_ptype()` now correctly handles unspecified columns in data frames, and will always return an unspecified column type (#800). * `vec_slice()` and `vec_chop()` now work correctly with `bit64::integer64()` objects when an `NA` subscript is supplied. By extension, this means that `vec_init()` now works with these objects as well (#813). * `vec_rbind()` now binds row names. When named inputs are supplied and `names_to` is `NULL`, the names define row names. If `names_to` is supplied, they are assigned in the column name as before. * `vec_cbind()` now uses the row names of the first named input. * The `c()` method for `vctrs_vctr` now throws an error when `recursive` or `use.names` is supplied (#791). # vctrs 0.2.2 * New `vec_as_subscript()` function to cast inputs to the base type of a subscript (logical, numeric, or character). `vec_as_index()` has been renamed to `vec_as_location()`. Use `num_as_location()` if you need more options to control how numeric subscripts are converted to a vector of locations. * New `vec_as_subscript2()`, `vec_as_location2()`, and `num_as_location2()` variants for validating scalar subscripts and locations (e.g. for indexing with `[[`). * `vec_as_location()` now preserves names of its inputs if possible. * `vec_ptype2()` methods for base classes now prevent inheritance. This makes sense because the subtyping graph created by `vec_ptype2()` methods is generally not the same as the inheritance relationships defined by S3 classes. For instance, subclasses are often a richer type than their superclasses, and should often be declared as supertypes (e.g. `vec_ptype2()` should return the subclass). We introduced this breaking change in a patch release because `new_vctr()` now adds the base type to the class vector by default, which caused `vec_ptype2()` to dispatch erroneously to the methods for base types. We'll finish switching to this approach in vctrs 0.3.0 for the rest of the base S3 classes (dates, data frames, ...). * `vec_equal_na()` now works with complex vectors. * `vctrs_vctr` class gains an `as.POSIXlt()` method (#717). * `vec_is()` now ignores names and row names (#707). * `vec_slice()` now support Altvec vectors (@jimhester, #696). * `vec_proxy_equal()` is now applied recursively across the columns of data frames (#641). * `vec_split()` no longer returns the `val` column as a `list_of`. It is now returned as a bare list (#660). * Complex numbers are now coercible with integer and double (#564). * zeallot has been moved from Imports to Suggests, meaning that `%<-%` is no longer re-exported from vctrs. * `vec_equal()` no longer propagates missing values when comparing list elements. This means that `vec_equal(list(NULL), list(NULL))` will continue to return `NA` because `NULL` is the missing element for a list, but now `vec_equal(list(NA), list(NA))` returns `TRUE` because the `NA` values are compared directly without checking for missingness. * Lists of expressions are now supported in `vec_equal()` and functions that compare elements, such as `vec_unique()` and `vec_match()`. This ensures that they work with the result of modeling functions like `glm()` and `mgcv::gam()` which store "family" objects containing expressions (#643). * `new_vctr()` gains an experimental `inherit_base_type` argument which determines whether or not the class of the underlying type will be included in the class. * `list_of()` now inherits explicitly from "list" (#593). * `vec_ptype()` has relaxed default behaviour for base types; now if two vectors both inherit from (e.g.) "character", the common type is also "character" (#497). * `vec_equal()` now correctly treats `NULL` as the missing value element for lists (#653). * `vec_cast()` now casts data frames to lists rowwise, i.e. to a list of data frames of size 1. This preserves the invariant of `vec_size(vec_cast(x, to)) == vec_size(x)` (#639). * Positive and negative 0 are now considered equivalent by all functions that check for equality or uniqueness (#637). * New experimental functions `vec_group_rle()` for returning run length encoded groups; `vec_group_id()` for constructing group identifiers from a vector; `vec_group_loc()` for computing the locations of unique groups in a vector (#514). * New `vec_chop()` for repeatedly slicing a vector. It efficiently captures the pattern of `map(indices, vec_slice, x = x)`. * Support for multiple character encodings has been added to functions that compare elements within a single vector, such as `vec_unique()`, and across multiple vectors, such as `vec_match()`. When multiple encodings are encountered, a translation to UTF-8 is performed before any comparisons are made (#600, #553). * Equality and ordering methods are now implemented for raw and complex vectors (@romainfrancois). # vctrs 0.2.1 Maintenance release for CRAN checks. # vctrs 0.2.0 With the 0.2.0 release, many vctrs functions have been rewritten with native C code to improve performance. Functions like `vec_c()` and `vec_rbind()` should now be fast enough to be used in packages. This is an ongoing effort, for instance the handling of factors and dates has not been rewritten yet. These classes still slow down vctrs primitives. The API in 0.2.0 has been updated, please see a list of breaking changes below. vctrs has now graduated from experimental to a maturing package. Please note that API changes are still planned for future releases, for instance `vec_ptype2()` and `vec_cast()` might need to return a sentinel instead of failing with an error when there is no common type or possible cast. ## Breaking changes * Lossy casts now throw errors of type `vctrs_error_cast_lossy`. Previously these were warnings. You can suppress these errors selectively with `allow_lossy_cast()` to get the partial cast results. To implement your own lossy cast operation, call the new exported function `maybe_lossy_cast()`. * `vec_c()` now fails when an input is supplied with a name but has internal names or is length > 1: ``` vec_c(foo = c(a = 1)) #> Error: Can't merge the outer name `foo` with a named vector. #> Please supply a `.name_spec` specification. vec_c(foo = 1:3) #> Error: Can't merge the outer name `foo` with a vector of length > 1. #> Please supply a `.name_spec` specification. ``` You can supply a name specification that describes how to combine the external name of the input with its internal names or positions: ``` # Name spec as glue string: vec_c(foo = c(a = 1), .name_spec = "{outer}_{inner}") # Name spec as a function: vec_c(foo = c(a = 1), .name_spec = function(outer, inner) paste(outer, inner, sep = "_")) vec_c(foo = c(a = 1), .name_spec = ~ paste(.x, .y, sep = "_")) ``` * `vec_empty()` has been renamed to `vec_is_empty()`. * `vec_dim()` and `vec_dims()` are no longer exported. * `vec_na()` has been renamed to `vec_init()`, as the primary use case is to initialize an output container. * `vec_slice<-` is now type stable (#140). It always returns the same type as the LHS. If needed, the RHS is cast to the correct type, but only if both inputs are coercible. See examples in `?vec_slice`. * We have renamed the `type` particle to `ptype`: - `vec_type()` => `vec_ptype()` - `vec_type2()` => `vec_ptype2()` - `vec_type_common()` => `vec_ptype_common()` Consequently, `vec_ptype()` was renamed to `vec_ptype_show()`. ## New features * New `vec_proxy()` generic. This is the main customisation point in vctrs along with `vec_restore()`. You should only implement it when your type is designed around a non-vector class (atomic vectors, bare lists, data frames). In this case, `vec_proxy()` should return such a vector class. The vctrs operations will be applied on the proxy and `vec_restore()` is called to restore the original representation of your type. The most common case where you need to implement `vec_proxy()` is for S3 lists. In vctrs, S3 lists are treated as scalars by default. This way we don't treat objects like model fits as vectors. To prevent vctrs from treating your S3 list as a scalar, unclass it from the `vec_proxy()` method. For instance here is the definition for `list_of`: ``` #' @export vec_proxy.vctrs_list_of <- function(x) { unclass(x) } ``` If you inherit from `vctrs_vctr` or `vctrs_rcrd` you don't need to implement `vec_proxy()`. * `vec_c()`, `vec_rbind()`, and `vec_cbind()` gain a `.name_repair` argument (#227, #229). * `vec_c()`, `vec_rbind()`, `vec_cbind()`, and all functions relying on `vec_ptype_common()` now have more informative error messages when some of the inputs have nested data frames that are not convergent: ``` df1 <- tibble(foo = tibble(bar = tibble(x = 1:3, y = letters[1:3]))) df2 <- tibble(foo = tibble(bar = tibble(x = 1:3, y = 4:6))) vec_rbind(df1, df2) #> Error: No common type for `..1$foo$bar$y` and `..2$foo$bar$y` . ``` * `vec_cbind()` now turns named data frames to packed columns. ```r data <- tibble::tibble(x = 1:3, y = letters[1:3]) data <- vec_cbind(data, packed = data) data # A tibble: 3 x 3 x y packed$x $y 1 1 a 1 a 2 2 b 2 b 3 3 c 3 c ``` Packed data frames are nested in a single column. This makes it possible to access it through a single name: ```r data$packed # A tibble: 3 x 2 x y 1 1 a 2 2 b 3 3 c ``` We are planning to use this syntax more widely in the tidyverse. * New `vec_is()` function to check whether a vector conforms to a prototype and/or a size. Unlike `vec_assert()`, it doesn't throw errors but returns `TRUE` or `FALSE` (#79). Called without a specific type or size, `vec_assert()` tests whether an object is a data vector or a scalar. S3 lists are treated as scalars by default. Implement a `vec_is_vector()` for your class to override this property (or derive from `vctrs_vctr`). * New `vec_order()` and `vec_sort()` for ordering and sorting generalised vectors. * New `.names_to` parameter for `vec_rbind()`. If supplied, this should be the name of a column where the names of the inputs are copied. This is similar to the `.id` parameter of `dplyr::bind_rows()`. * New `vec_seq_along()` and `vec_init_along()` create useful sequences (#189). * `vec_slice()` now preserves character row names, if present. * New `vec_split(x, by)` is a generalisation of `split()` that can divide a vector into groups formed by the unique values of another vector. Returns a two-column data frame containing unique values of `by` aligned with matching `x` values (#196). ## Other features and bug fixes * Using classed errors of class `"vctrs_error_assert"` for failed assertions, and of class `"vctrs_error_incompatible"` (with subclasses `_type`, `_cast` and `_op`) for errors on incompatible types (#184). * Character indexing is now only supported for named objects, an error is raised for unnamed objects (#171). * Predicate generics now consistently return logical vectors when passed a `vctrs_vctr` class. They used to restore the output to their input type (#251). * `list_of()` now has an `as.character()` method. It uses `vec_ptype_abbr()` to collapse complex objects into their type representation (tidyverse/tidyr#654). * New `stop_incompatible_size()` to signal a failure due to mismatched sizes. * New `validate_list_of()` (#193). * `vec_arith()` is consistent with base R when combining `difftime` and `date`, with a warning if casts are lossy (#192). * `vec_c()` and `vec_rbind()` now handle data.frame columns properly (@yutannihilation, #182). * `vec_cast(x, data.frame())` preserves the number of rows in `x`. * `vec_equal()` now handles missing values symmetrically (#204). * `vec_equal_na()` now returns `TRUE` for data frames and records when every component is missing, not when _any_ component is missing (#201). * `vec_init()` checks input is a vector. * `vec_proxy_compare()` gains an experimental `relax` argument, which allows data frames to be orderable even if all their columns are not (#210). * `vec_size()` now works with positive short row names. This fixes issues with data frames created with jsonlite (#220). * `vec_slice<-` now has a `vec_assign()` alias. Use `vec_assign()` when you don't want to modify the original input. * `vec_slice()` now calls `vec_restore()` automatically. Unlike the default `[` method from base R, attributes are preserved by default. * `vec_slice()` can correct slice 0-row data frames (#179). * New `vec_repeat()` for repeating each element of a vector the same number of times. * `vec_type2(x, data.frame())` ensures that the returned object has names that are a length-0 character vector. vctrs/MD50000644000176200001440000006060414042554003011726 0ustar liggesusers3848ea61ddbdaed2a53fa27bdb82e312 *DESCRIPTION efb5a9df619b572f3449c7813bfdbac8 *LICENSE 26e8ea3522f7f470d41408ce931449af *LICENSE.note 0420f0ab9c1b93a97657274eab1bbc06 *NAMESPACE d78565a699dbf2e37b6139675f6777f7 *NEWS.md 196356bc4e0a9a6bf85bde3ca1eaba7d *R/aaa.R f24fdf599a553075c2161da61e7b21f9 *R/arith.R 0bb34f009c0bdbc5777b6721690225cd *R/assert.R 54eee28208f7d3c435392cb8c9fc8b9f *R/bind.R 1b5bb91eecfaa488417bf09dd4b750c8 *R/c.R 0f0b90af87fa2b604add52c63bab55ec *R/cast-list.R 58e2d3ba977d18fb732f56296205509c *R/cast.R 654acdbfd2e79282bb8ab2f02f6bcea8 *R/compare.R eab58ca65ed3ea2e7d96f223606d4776 *R/compat-friendly-type.R bf3266e842b5b0d6684b38f7ba65a8a4 *R/compat-lifecycle.R c2454266d642fd2f2eb2c582662d5531 *R/compat-linked-version.R de569e3f582fc5f58bc85d8431862990 *R/compat-purrr.R 91597edfe6b5eabf9e42c8b0b558bca3 *R/complete.R 0d6be577449f2be0dca3aa8f163dd27e *R/conditions.R abc66ef1ee316c56e5355f934b7109ce *R/dictionary.R ecc8ced30047498f466cc3777bce6b2d *R/dim.R 0f3c2232999dfc990d8878ef255904c3 *R/equal.R f9ad69bae40b93ad914d1ee4d516ba3a *R/faq-developer.R 23e1082e791534d5f3d88a606451fb86 *R/faq-internal.R 35aaea2ead8b231a5122bf8441cbaca7 *R/faq.R 4086555f3ef197850db758371ae41269 *R/fields.R a769a65ebacdd8fb319b46649522e274 *R/fill.R efc60fc729132b5d9732d9faff6deed2 *R/group.R a87a8c656a676bfb84d33e1a0f5e824e *R/hash.R 188b289c97bce739003aaf17bdafbe64 *R/names.R 3a821b1c8551b93b103d03e3c472128e *R/numeric.R 343762d5d172ed4d17cfe3623ae17990 *R/order-radix.R bdb9c479f1393e67b86dcd54ebaae880 *R/order.R 2d5d29b81efedf7d8a5af2e906ee7435 *R/partial-factor.R 193f0f1abca2342cb18f524b4b810b34 *R/partial-frame.R d8ac9cf7109b5d4a9a4edfe9fb26c511 *R/partial.R f505e15a583469413be4b578bd67cf3f *R/print-str.R 252060600f95761c06c7164905715358 *R/proxy.R b4862b1b08e73e772f509f8e2b74ed6f *R/ptype-abbr-full.R e8cdc534f227acc05179e5db27988965 *R/recycle.R 0c75a48abbe2319f57e1776721aafb19 *R/register-s3.R ef95fac91e65f47a8c415df8b63946dd *R/rep.R 047f7bd6bf2d65c7f40c3dd442b74151 *R/runs.R b23fc3cfb673f0ad6fd06388a7e5247c *R/shape.R 9a4ccd53e3434b6f9d20443e443d54e7 *R/size.R 48ebddcfc035c0d36e569060f9a07a1b *R/slice-chop.R cb79bf9c9e973b95f64c53df25ec866b *R/slice.R 8b848a6338284986af45a15584abc639 *R/split.R 7dd67105455c54e5fcd27ec9639c6f97 *R/subscript-loc.R 1a4d77051b6ea2b57e8d605832ec73ff *R/subscript.R 8df766ac76d37fb6d47979756441c6de *R/translate.R 2edaff488bdad489d3b410ab2b28544c *R/type-asis.R 901a69ff42e4b6c05d55b526877802f9 *R/type-bare.R 95dc810b51b7fbcc5119031e947d5c16 *R/type-data-frame.R d07bfea50eba71085489e46f391295f4 *R/type-date-time.R a7b7cf1e4891de7de2105e9456298636 *R/type-dplyr.R 1bc366ed2f22e61411c9023903952a4f *R/type-explore.R 969de5f38d121c3e2c6f6717dd51a73c *R/type-factor.R 4a7c46c08f8f1fc0a82afb70a50e441f *R/type-integer64.R 9c6357224f7f554d4216c69cc7bda85a *R/type-list-of.R 1f6c8603dcb6bd68acf72dd47f8c0aa7 *R/type-misc.R ea2f41ddcd6b99639c02e54a92332deb *R/type-rcrd.R 5f3abd93050255499756203ed4b8a068 *R/type-sclr.R 1b7f1c95f6c6e5aa09761adb33ad1baf *R/type-sf.R 63877851f7e324f61bb6fd38a9b66610 *R/type-table.R 45eb5ea0d076235ef3106f1094c30d9e *R/type-tibble.R f436fec5de5f7708e5672ce7accc05a8 *R/type-unspecified.R f7c73f09f57f2d6e3d35788784abddf0 *R/type-vctr.R d70b27c33e371915eb0fbb373638aea7 *R/type.R 3ab5056585b6ab54e4fc6c7f10c0c87c *R/type2.R 6137f151be3f347f7585f08fe722db83 *R/utils-cli.R b19af3b1e1f5ca45efa105c97beac461 *R/utils.R 1953849377a5a27baca8b56a1e030341 *R/vctrs-deprecated.R cf1d56499eae89996f3f357b5071423c *R/vctrs-package.R d6975d26be4a786d8dca2dd949d73995 *R/zzz.R f2cf682fd34dd4c9cac43b40e63d285f *README.md dc4aacab93aa12cbe371fc9bca101a99 *build/vignette.rds 43cc81e569685f0ede8af83150d4f016 *inst/WORDLIST b1ae054dde163543fef592cd0d0bbc10 *inst/doc/pillar.R 37de0c2501ece054db90cdd36536cfbb *inst/doc/pillar.Rmd d6757b9f00a8f0a0877a4433fcee84b1 *inst/doc/pillar.html ae9e1fe9ceef6febb0d2fcc340b41f95 *inst/doc/s3-vector.R 7aae14131fd15f5c67106e33f8a3b263 *inst/doc/s3-vector.Rmd 6aea9deeba885fedb9468e90e7acdd17 *inst/doc/s3-vector.html eb2ce80049219a0c9e12195fb15ee16e *inst/doc/stability.R a16ae23dfc9ed0d60015bcfa8e899618 *inst/doc/stability.Rmd d1aa783a4288d8b39a01e71de7201830 *inst/doc/stability.html 0352c1b2b6bfe4ec421cc38492b8ec6b *inst/doc/type-size.R 72c63cb0dd9c179724530995e4cdf7f1 *inst/doc/type-size.Rmd b97c927b21f85d0d8b4b9e40caf42177 *inst/doc/type-size.html 96207f2b169b78506ee71004ae085ae9 *inst/include/vctrs.c 37e713e13519edfe7a44eb1bcbdb6e35 *inst/include/vctrs.h 6b865e241282842dfe6ebf1861cf5d5b *man/as-is.Rd 1b30b59d7cd0570ff5ee2cca4a63b7c5 *man/data_frame.Rd 45414c31bb338ab549bce1a0a04e7f18 *man/df_list.Rd c57201e923b98f9421bc0223114c5060 *man/df_ptype2.Rd eaae4a865f37107317c625189bb1db97 *man/faq-compatibility-types.Rd c8f030b331b929901ad27ef08a3a953f *man/faq-error-incompatible-attributes.Rd 11b09ae046824fdee81326dc1543835a *man/faq-error-scalar-type.Rd 853adb55a5e326ffce89148289ed888c *man/faq/developer/howto-coercion-data-frame.Rmd 58044acb430cf975face52241f2eff95 *man/faq/developer/howto-coercion.Rmd c7ed48d17afbad700d5b4b64765ad8ad *man/faq/developer/howto-faq-fix-scalar-type-error.Rmd ae1e2d8343daa530961f5fff4e31d454 *man/faq/developer/links-coercion.Rmd 0af8c2fcec3117584270e0f5d91b0611 *man/faq/developer/reference-compatibility.Rmd 7e11e2d99450d1d6c4a8677c12b6d43c *man/faq/developer/snippet-roxy-workflow.Rmd 2cd021d41c4a4da2d44c2ee9390afd67 *man/faq/developer/theory-coercion.Rmd 4b524b24fa740228c5aec8fdcf1c0917 *man/faq/internal/ptype2-identity.Rmd 3b15b08b414102dd5e55333e10d68955 *man/faq/setup.Rmd b061365fbea176c543525a5f395653a5 *man/faq/user/faq-compatibility-types.Rmd c9cc0d04d448f628b050de2a85b237b0 *man/faq/user/faq-error-scalar-type.Rmd 856c670ccf80ab0a433a5a1a172b313d *man/fields.Rd 69eb638d262c60d5bd5ed2aa4717190c *man/figures/cast.png f5627d4a9d3071aad2dec7dbd3d33eca *man/figures/coerce.png e01d20997a9c8d069ef28e3c6013bd8c *man/figures/combined.png cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg e544367a961afd79dd37cfb81e5c1e96 *man/figures/logo.png 25e2f330cc7176f22ad3cc5a3ba4dc22 *man/figures/sizes-recycling.png ff7dec2d166e669c6b6abf085030f369 *man/figures/sizes.graffle 793b22af8f7b1648699748172cab8af7 *man/figures/types.graffle ecb1898f0d3232051d9a1969c0bc8570 *man/figures/vec-count-deps.graffle 0c3de09d34b6b7e94e7671c08e9df3f8 *man/figures/vec-count-deps.png 2995e7508f8f053600f8e21cec6b60c3 *man/figures/vec-count-deps.svg e3e846206be8ff7b3628b55ec935735c *man/howto-faq-coercion-data-frame.Rd f1b1827621253d16a9121c2ef67eb32c *man/howto-faq-coercion.Rd c9e88c9adef860f593f9bd8f256f67d9 *man/howto-faq-fix-scalar-type-error.Rd adb03fe5026bdbf0027002f12f8792d5 *man/int64.Rd 6120ad57460012aad2d26fb5685f8c8e *man/internal-faq-ptype2-identity.Rd f8eb1a5e3c5158bdffd0f6d455230e08 *man/list_of.Rd 9ef18ea44769d9a9d69deffed77c3c85 *man/maybe_lossy_cast.Rd c27f4c03fec7bc8d9c384dec298dc6e5 *man/name_spec.Rd 8fad77609cc985ea0f0502759e5001b8 *man/new_data_frame.Rd 96a6f14e3014f3b7e3e9db90d0516fc4 *man/new_date.Rd 75dcc37c27e47ab214a3fb275e49365a *man/new_factor.Rd 4b83a08a6ed1055066d2860e552b8b46 *man/new_list_of.Rd 075149d44a907c86d691f73d36fd4008 *man/new_partial.Rd 55d7a6b660343c6c2ed6785475b5b433 *man/new_rcrd.Rd 86a13abac5a90fedb0c05f29eb5283aa *man/new_vctr.Rd 0432b9bc77e37a36a76ba1f65697ea6e *man/obj_print.Rd 4bd8924e7585671c5f2995ca1fcb8578 *man/op-empty-default.Rd 2a36787ed9c7d3bdaf57ba8c7d03ff7c *man/partial_factor.Rd 9aab73d2dab1a252ddc23a457dfe798e *man/partial_frame.Rd 3b7562d5b5d13c4a724341bedbf65c72 *man/reference-faq-compatibility.Rd ca97743e34240e81c730e3e33d76656f *man/s3_register.Rd febea111fcbfa3fadeb25ea4a5239008 *man/table.Rd 7394ba0933b57f2039c0d19a0df54cfc *man/theory-faq-coercion.Rd 679d46a041225b72bf80ad526513cc47 *man/unspecified.Rd 9bbe1876f7ea9c0c17040da8cb0a2de4 *man/vctrs-conditions.Rd 00d4712f39f895035e899b48827887d0 *man/vctrs-data-frame.Rd 2151d0d803834c5f9e7af825fc4ab3e0 *man/vctrs-package.Rd 17e1fb1305f943ee84e226386a4292fb *man/vec-rep.Rd 41aec7be7b1c7722937983d2fdf1b8e3 *man/vec_arith.Rd be5ef09fa278a0d5e5818d1e466abf31 *man/vec_as_index.Rd e669fdeed29109ebf62f0e68aeaf7d65 *man/vec_as_location.Rd 200e1ec37f19a28595990e6e60fbed70 *man/vec_as_names.Rd 4751d9e1734abce229836322b3cd18ec *man/vec_as_names_legacy.Rd 4dbfc890f70f980b9804abf451050ecb *man/vec_as_subscript.Rd b96f80437b9ac0b257fb6ce9db28cd4e *man/vec_assert.Rd 513073e0b189c8f7397488ce6c1f5052 *man/vec_bind.Rd 4bdfa1009a8c285e069b188db459ba0d *man/vec_c.Rd f4070970f709a6b8b0f3e2c494cdd79f *man/vec_cast.Rd 6497ea65f7a25c6b57f7da8868a969b7 *man/vec_cbind_frame_ptype.Rd 8f88ed65efbddfb052819eb1e43bca23 *man/vec_chop.Rd 522e1b8abb03db8ee65ebcb80c0cc93d *man/vec_compare.Rd 72f87a47843f7b9fc36aeeae5ac65eca *man/vec_count.Rd ed716990b6a50620562e744ee927ab75 *man/vec_data.Rd 38435ed2f8b6f5e67c5891fee9ab6f1b *man/vec_default_ptype2.Rd a61ce2dccbfc33c2967e225baa923756 *man/vec_detect_complete.Rd e1817b7f472d5eb862fae6e02081941c *man/vec_duplicate.Rd cf78fee622d53673b7e08d5c4406c222 *man/vec_empty.Rd db8669dce5b009de6b113c9c1f312072 *man/vec_equal.Rd ab5db56ce9cf88567ef1544dc58a0fb4 *man/vec_fill_missing.Rd 8ba05e321b26a23b2e52d76a07dc625f *man/vec_group.Rd 87086ee6e3f66843d826fc61b7f2417b *man/vec_identify_runs.Rd 6f8697155e5275ab798e6a430f020197 *man/vec_init.Rd 1910f3c0644b33010b7dcb0a574e8c1c *man/vec_is_list.Rd 63b8dd27a1038d248082a77cffcd25e8 *man/vec_list_cast.Rd b4a957dc485dcfa8a4edd03827f3e85e *man/vec_match.Rd 6a0ece01d92f2d27a52fa1fc6ec99d36 *man/vec_math.Rd 6dc1d5785635777f192fe68ee52734d9 *man/vec_names.Rd 0bb1b4f1e154921097132d126ae0de83 *man/vec_order.Rd 3a638ac6150470acfa02c6e160d371fd *man/vec_proxy.Rd d425b3ea537a9a5c707232acb05684e8 *man/vec_proxy_compare.Rd dc84fd074677e150a97b05c08e867ee9 *man/vec_proxy_equal.Rd d64d91679ca28f6b35c195bc335c1cb9 *man/vec_ptype.Rd fa7f90b4736d2391a5358088e796316f *man/vec_ptype2.Rd f2e5b287c46b120025e0974e5e136089 *man/vec_ptype_full.Rd 850c3564ab3cd63e8eb9ecc06a0cc419 *man/vec_recycle.Rd c3c863bc5fe6d0bb6466b176a66c3548 *man/vec_repeat.Rd 998136cc73dfd5148c60699abd8424f0 *man/vec_seq_along.Rd c9d7c14ec952e33716a927b75f47ac54 *man/vec_size.Rd 2dcd5109f14c1ffed339e92d0438df68 *man/vec_slice.Rd 45bfe942611373e1b51f27bd83a535c9 *man/vec_split.Rd a1c483af2aa4664b8757c8447bf03c59 *man/vec_type.Rd 2b6faf07f1ecd9709c37d9ac6c14d6a0 *man/vec_unique.Rd d25d5af44b59cd40d3830b54f7a7cf7f *src/Makevars a7bb33b58c58ccb5f2f89653569b2420 *src/altrep-rle.c c5b1a3e04c67fd31fdfd169c66b17047 *src/altrep-rle.h c8be48dec7f9bef5439d405aa591ad8b *src/altrep.h 5951770a03698aba1253032c5765abc7 *src/arg-counter.c 1f92d79f7917aa2644f648c85775f449 *src/arg-counter.h b79d7bf916cc57698ce3085787472e62 *src/arg.c dce0a2ebf0a98d291c92b80664a76a2a *src/arg.h 16e1a03d8d8d4e157b2d658673aa32f7 *src/bind.c e1787d0301a8bc1190c160f4dcf72d50 *src/c-unchop.c 98dac2d8698088fcac9feb85b2fdaf99 *src/c.c c0382df93e0dfdd2f219baaad2aeb750 *src/c.h 5cdc4038ef8f269274f10df41a8235a1 *src/callables.c 83eae5a62befd7764c85926e14fbaa7b *src/cast-bare.c d00aeeee102ac99949fd5a444e2a5f75 *src/cast-dispatch.c 4b06bfdf3154a2be31d948ec7f84ecc6 *src/cast.c 8cd6833d2c3b04ed7010a51a92732f62 *src/cast.h ccf333b13ff1957602e1ade3d10dd983 *src/compare.c c1bf3db7dd18403ecc71033e7b3da5f5 *src/complete.c 64b67a6b1f0c8bbb7b18f8cf974ffcc3 *src/conditions.c c24dd6d22a8f391d90bd5d03c7099f59 *src/decl/ptype-decl.h 9f0f486a672a6992f7bc25f13c2ff432 *src/dictionary.c d386e244318b5500da8b91c0cd2b42d7 *src/dictionary.h d8fcaebfc5c7324ba107aca5601330c3 *src/dim.c 7086cd74c2917134d88f340648629624 *src/dim.h 18a9f53259b841064da2c516d3e201ce *src/equal.c 2280ff5dd05394078331d70d425a15eb *src/equal.h aa7b46e86d9f60232d6e811237e68dde *src/fields.c 337edcdb69b8dae9a687be96d2973752 *src/fill.c 77cedaa940a34cfbe9d14b1f41d3db0b *src/group.c 12dd3a8d05caf37435ed791482eb9588 *src/growable.c b24c200d83ac1a234b9aad74950d7a31 *src/hash.c 4604e7db87456d74586450eef43c6ff6 *src/hash.h e4a7d731f3347af4b369a906cb33c04c *src/init.c bc0b5ab2753c46da091aa25aa541e4dd *src/lazy.h a87d2d4b3cdeffe1354dcd8d118e49bf *src/names.c bff0a51e6d84623453700b3fe28dc135 *src/names.h bea66b0b026637670cefcaed3de1e238 *src/order-groups.c cb33736e8f0007c2cf8df2f477782b54 *src/order-groups.h 3ceb31a2adb98d5c9943dbe1077d1530 *src/order-radix.c feefe35d487a20748ab83c46b3ffe8ee *src/order-radix.h bbc38dd68f57a85b4c39b5928105a689 *src/order-sortedness.c caadc1bbf06454ecc58e337b87174b22 *src/order-sortedness.h a7ac1075e92e1f97f3681ec81b893b8d *src/order-transform.c 44f37fe95019132be37afcdc1d335b91 *src/order-transform.h 02c89bf88b1c3049268324ab0155d310 *src/order-truelength.c 26290c37e7b92f5a8929fff8f228e4d3 *src/order-truelength.h e4ae16436962964550b47329c78f8eb1 *src/owned.h 7141a86749cbd565a9e34020d988e493 *src/poly-op.c 6de3ca5561bcd07d89fa6dcc375cf03e *src/poly-op.h dd4e7681922362aba78c0521406cc780 *src/proxy-restore.c 488e65ea9e10c8b9238da1ad4f15a917 *src/proxy.c 4dbfeaf6dc1eb47c1825e61a256e8496 *src/ptype-common.h 3bf0632c2c210e21e1a4046250ae94fa *src/ptype2-dispatch.c 660a5f65a0293e11fe5b5d9ae1195df3 *src/ptype2.h 82a4089b6e68204f85193ee5b8e87330 *src/rep.c 8d24e05331fea8a80e599009087fbaae *src/runs.c 2ea211b484722cc2b42ded8fa60339cd *src/shape.c 3a7614fba274c28e7c5472906c92e056 *src/shape.h b651559832024ca7c4063bda11527ae1 *src/size-common.c a3b42b7cce10c73dc19ad73d75056e43 *src/size.c e04c433e35f3836951e384d4412116be *src/slice-array.c 09e5a5b09a2ac115b3d886a9f6f9ead0 *src/slice-assign-array.c ec16360ff0760ecf34330dc775faf0ef *src/slice-assign.c 166d85c39a74ff613d2b4e894995dcd8 *src/slice-assign.h 0be23426beb7f9749eb8cf31d4d4254a *src/slice-chop.c a3bfaf7481d859ccc657ab705157c118 *src/slice.c 6f51d15a64073573fb837d82e4eb45a8 *src/slice.h a9b375713d95fa32bb1819c45ab9966c *src/split.c aea1306cc30ac00a04f29421a47abe5b *src/strides.h a0a2acb694b040128b0a814b564abe3d *src/subscript-loc.c fa806b85231030ef6427980de1b2b349 *src/subscript-loc.h 0c14b30a477c050bdc3ad63698e25046 *src/subscript.c f3e76863b19103b4d2a0ef1bbf4fef48 *src/subscript.h 7f0a60d5c5aec7c256f7dcfe9407c17f *src/translate.c 1edc925a45eac01bfeb22211a7c357e0 *src/translate.h 7f6d41aa909a5474fd30bf51b17c9c1b *src/type-data-frame.c 3c33aacd24c81ca1bd20c0bc68af3ed0 *src/type-data-frame.h 2db540495921452b07c3db8d9f9592ba *src/type-date-time.c ebcac966fca56d915742fd0522541081 *src/type-factor.c d2358a64fce91c032292d1af8e169ba2 *src/type-factor.h 101aae35820185b5731e4652502af09d *src/type-info.c 2f99f7aa71032dfb4df806564d5dbca0 *src/type-tibble.c bd4f06b39d5fc978f0197b33ec124d0d *src/type-tibble.h dea89c30900519091e848e730998da3b *src/type.c 0cffdf13b89ee2a5a91f1b908dc3a725 *src/type2.c 04b762ce0872df8284e7692708d4b933 *src/typeof2-s3.c c8430c6b42c571e81bc588ae84762d4e *src/typeof2.c 9b291c614a2ba9ed615570f51128864c *src/unspecified.c 7d930f1fd18788161a29207f0955fdb1 *src/utils-dispatch.c d068f6be58e7a041788dcbe423fd22d2 *src/utils-rlang.c f0ea654c49238baca6f6e5cec0481d8c *src/utils-rlang.h d1359dfa870477f6503d6cf6570e94d3 *src/utils.c a24ba1f8d221129458f173c3212f8097 *src/utils.h c462a97b920a7c1248e7465969094cf4 *src/vctrs.h 6e723c55b4cdcbaefb71e7c0a2880366 *src/version.c 36298abc8bb1476f930ae3c5cfe46ffe *tests/testthat.R f392d42aabf155f2e7f0f0f39e590494 *tests/testthat/error/test-bind.txt 6db85a51557210ffc0b33361a6dceba3 *tests/testthat/error/test-c.txt 9ca7ea2174de9d4dcefdedb6c3cacef3 *tests/testthat/error/test-cast.txt 1d4c1a2a141769a86e9a43f50fbd9f9c *tests/testthat/error/test-conditions.txt 9fba632566530a2df79a386155f88b27 *tests/testthat/error/test-dictionary.txt 643950770586213fbb1bd1b6b5241e42 *tests/testthat/error/test-recycle.txt d21a43884efa31466e928bc142d5d8d4 *tests/testthat/error/test-rep.txt 3fabe82f707cf48770c3be390c57e269 *tests/testthat/error/test-shape.txt 7ff951ae4c7c119fa5961bffd51cc436 *tests/testthat/error/test-slice-assign.txt d967909d65d75baa48fd6c71f4b7b96f *tests/testthat/error/test-slice.txt 8f3eb8c0bf02aa57c8beafd358a57a16 *tests/testthat/error/test-subscript-loc.txt 9180c00feee6e6a0445d0af532ff7976 *tests/testthat/error/test-subscript.txt c1e1978c57745557b166d6df40930c65 *tests/testthat/error/test-type-asis.txt c6c381183278a88232f30de44eb30bab *tests/testthat/error/test-type-data-frame.txt 6a2db50a51064b6a8ffc926ce5df6abc *tests/testthat/error/test-type2.txt 6610429bf42328fa4b94c9cd8b3e3c61 *tests/testthat/error/test-unchop.txt dbaddd2cc47dd9d09dc1cd128d38ca15 *tests/testthat/helper-c.R cab609efdc70e227e331646c2b3b99e5 *tests/testthat/helper-cast.R 02669d9d7da34dab98f8fd1af74173c2 *tests/testthat/helper-conditions.R 76c635db233174b0c228e30b12b09ce3 *tests/testthat/helper-encoding.R a3b8abb53e27250247b0cb95b932239e *tests/testthat/helper-expectations.R ead65126fe8184d9cddc7f63cff8e786 *tests/testthat/helper-memory.R 01768424b3f2525728e0a2d92b52c5a0 *tests/testthat/helper-order.R b0c833310272df9b6b9cf757e9caa8b9 *tests/testthat/helper-output.R 1ad042c8b3fe0df05a2e0e62ee1e3896 *tests/testthat/helper-performance.R 4ad6b8ddf952740269ab6a4338392e91 *tests/testthat/helper-rational.R d631192e912cbe5378eca69faf77752b *tests/testthat/helper-s3.R f1a6538c32d7adb0b67d38196afee201 *tests/testthat/helper-s4.R 1a6c1bf6b317385ea29d194695ceaf3d *tests/testthat/helper-shape.R 94900d6ac252d4dca59089862f0c02af *tests/testthat/helper-size.R a195f504649598e6e9cd5b41ac922300 *tests/testthat/helper-type-dplyr.R 4e96c9aba5d25277c76f2eafbf4797a0 *tests/testthat/helper-types.R 4504795a67b63dd8e608dfa66430ba6e *tests/testthat/helper-vctrs.R 929027f6e4855c4afe03934201f233c2 *tests/testthat/out/vec-ptype-0.txt 6041399246127cb2c6b37821a37e4c0a *tests/testthat/out/vec-ptype-1.txt 3cdee079cea92524e4767807d4637d1e *tests/testthat/out/vec-ptype-2.txt a001c92c68abacc2b3678bdb1392eb3f *tests/testthat/out/vec-ptype-3.txt 2a0d3ab6648b5865b7db646d2bf1a462 *tests/testthat/output/bind-name-repair.txt 785b4855a9420a1fbfd12d1599fcc768 *tests/testthat/output/test-vec-as-names.txt d687aa35de3c3021daf5e76f8e767fa2 *tests/testthat/performance/test-bind.txt 38a5d5444575a077d106776b24652b7c *tests/testthat/performance/test-c.txt 66aed74f1596d63d18733b1fe5bf6607 *tests/testthat/test-arith.R 338d389a07f4b5cf1e57a9104557528b *tests/testthat/test-assert-explanations.txt 05e10b84273cb18a3965d8601bfe5ac0 *tests/testthat/test-assert.R f4c36e3d4f9b20412fdd3e9a438013da *tests/testthat/test-bind.R 7cccca7e2391235763d150099c3fad25 *tests/testthat/test-c.R 6fa31052008e8a1ec4ecb0c40e1d5e72 *tests/testthat/test-cast-error-nested.txt 214575d94613e94ff35ff231317cc78e *tests/testthat/test-cast-list.R f7e4d5000430e57a9c5597d00d94afe8 *tests/testthat/test-cast.R cb38f372aa0b6c239048fabcc12766a9 *tests/testthat/test-compare.R a9f19979a9d879d1774d9dd355cb1277 *tests/testthat/test-complete.R 97dcd15685b9fd96d882b31ba7f28303 *tests/testthat/test-conditions.R 73d0dabd613be499eb98189645317742 *tests/testthat/test-dictionary.R d67c0927e0bbee60571e58452b9c0b5a *tests/testthat/test-dim.R c3eada52c55600a0b22b0e57a45a9ea3 *tests/testthat/test-empty.R fb8f10c1cf7774dbf3b83879cff13638 *tests/testthat/test-equal.R 82c226106468a9b9b0194c25b73faf69 *tests/testthat/test-fields.R bd48cd5bcd6b514e65ee591d83638329 *tests/testthat/test-fill.R 5ed0de9e8a6463fe36bc75538b9d5690 *tests/testthat/test-group.R 543408eef52312f76a20d50b95aa3c1e *tests/testthat/test-hash-hash.txt 731b89544b1b9b48c8fef61318071949 *tests/testthat/test-hash.R 3fcd7edef3d5c85088414f65a2232d38 *tests/testthat/test-lifecycle-deprecated.R f9fe97611639e49dc50a46bee23d915e *tests/testthat/test-list_of-print.txt 4bcfd8ed7c141308a0764999e185cdd9 *tests/testthat/test-list_of-str-empty.txt 3e8821e1f513884c455ae501899c14cb *tests/testthat/test-list_of-str.txt 8d40169d77a512b45e61d1945f57f996 *tests/testthat/test-list_of-type.txt b64816369c2b34ef8c6efa7ceab49c71 *tests/testthat/test-names.R bf3f0e8913a6e189f00ba1e3cd8ec9a5 *tests/testthat/test-order-radix.R e1109e186dfda039e3a4b61800a900ac *tests/testthat/test-order.R b225ebb4e55e1f3dc11e69b741936bfe *tests/testthat/test-partial-factor-print-both.txt 7505e7573c3547f6a2ed243b1f6e4132 *tests/testthat/test-partial-factor-print-empty.txt a98c487631a789583987cf73bcbb590e *tests/testthat/test-partial-factor-print-learned.txt e1e03fa88ae63f338c9397904dc11ec1 *tests/testthat/test-partial-factor-print-partial.txt e193a3ede017ee46e6ec486bc5a6c8cb *tests/testthat/test-partial-factor.R 5c33c00aa55262152308c627371c3500 *tests/testthat/test-partial-frame-print.txt 3d20d54393c04c213f8f49471bb6615d *tests/testthat/test-partial-frame.R f47c9cf5606d4451dd7420ef72e9b57b *tests/testthat/test-print-str-attr.txt 1a661d02e1b10e92c710b44398a6f647 *tests/testthat/test-print-str-mtcars.txt 0bb30c63929d984f234026bd1a4b8c65 *tests/testthat/test-print-str.R aa4ccd8b5bee35e1eb170a187f1cc95f *tests/testthat/test-proxy-restore.R 1fdf7a083296a9fd11c9cee793d168a4 *tests/testthat/test-proxy.R 074fc1b7bd532783cae64461da410c0e *tests/testthat/test-ptype-abbr-full.R 15637ccf42d4f3f8546a879308e4a591 *tests/testthat/test-rcrd-format.txt 44106467eb97ffea9a2883d4e32644f0 *tests/testthat/test-recycle.R 5770aa190056c9fa9acef7d9bf3dbc30 *tests/testthat/test-rep.R 2cb930e8f917c7b736d6ddd123834efb *tests/testthat/test-runs.R 240acbbf684c59e1be0a64cb362ecea2 *tests/testthat/test-s4.R c81119483f19f1298b51e5a2fa97102e *tests/testthat/test-shape-print.txt dda07116c0e45d03d591bc484c222dfb *tests/testthat/test-shape.R 9343c9751844038db90d31a25a7f185d *tests/testthat/test-size.R 2f2d4131636863f37136dd28b9ebc44b *tests/testthat/test-slice-assign.R 1beb188870602c8c46ca66b8220dcb87 *tests/testthat/test-slice-chop.R 03100108073c6d7f3392d6784955a374 *tests/testthat/test-slice.R ade7ef62ecdbac77c01275c67ba1bc0b *tests/testthat/test-split.R e865fa701116503057b5f31e5d0e5aca *tests/testthat/test-subscript-loc.R 1caf683115c9c0245d6a6d82f0170298 *tests/testthat/test-subscript.R 8eaae25fb4ec6dbb832ddb714a3d2636 *tests/testthat/test-translate.R 89081113f18615c5ad1892b43a58c0bf *tests/testthat/test-type-asis.R 079c3e3cbbc0011feeb8b22516a9e957 *tests/testthat/test-type-bare.R 14f57c2e4b95238f8985301655190f66 *tests/testthat/test-type-data-frame-embedded.txt 01b3c8f4af7cfece02e912d2b074ce4c *tests/testthat/test-type-data-frame.R 1c8d54d674c77ccacd4c7bfc0eae6449 *tests/testthat/test-type-data-frame.txt 617872678c1e28a855f4437b1f2d9c1b *tests/testthat/test-type-date-time.R f795b221de83dbbc5c3add632db18264 *tests/testthat/test-type-date-time.txt 345625c3a2089ae223aa916e5abda775 *tests/testthat/test-type-dplyr.R 0d2a0111dde527d86a9cb462aafe3c3b *tests/testthat/test-type-factor.R ece1b7198e31c938df834f1e20b76094 *tests/testthat/test-type-factor.txt 46842fad3e4dbba1fdc190c4602defd6 *tests/testthat/test-type-group-rle.txt 0ad7bc32ddbaaa1331376c89c66713b5 *tests/testthat/test-type-integer64.R 3659d16770c4b51c733fb5876000b490 *tests/testthat/test-type-list-of.R cf48c291e86acce844b243e57de362b7 *tests/testthat/test-type-misc.R 3710026045ab905e8fcaa42fa04920fc *tests/testthat/test-type-rational.R a7d04ee4ae3d1db20d42df9637156c34 *tests/testthat/test-type-rcrd.R eabcddd56636ae5f54d21f0d2c9296f3 *tests/testthat/test-type-sclr.R f0d35cdc1a61875a11cfb26afe8fb0d5 *tests/testthat/test-type-sf.R 800a6d975a769d4113cd5b1113a1cb35 *tests/testthat/test-type-table.R cda13d25b1b2ff9e8420ef4e0eb2a995 *tests/testthat/test-type-tibble.R ac4577e324ff22ecacf3ff868fb97ee1 *tests/testthat/test-type-unspecified.R 6301d24246251a8d40cc03c41f393038 *tests/testthat/test-type-unspecified.txt bcbe604bc2db54e83a18531e6250d133 *tests/testthat/test-type-vctr.R b0696d5312766c905427e616f46cf457 *tests/testthat/test-type-vec-c-error.txt ec0a66ecd6d8e2b5a2ed2a79fd3d61cf *tests/testthat/test-type-vec-size-common-error.txt 3a87fb4511e6b173e863b89c0b540f8d *tests/testthat/test-type-vec-type-common-error.txt 4c53b74af00d9953ed674fd558d9a5c2 *tests/testthat/test-type.R 2fa057c9f35dabc52ae499b3d0bb999e *tests/testthat/test-type2-error-messages.txt 9bb2f0982b3a74083f40c371305c504c *tests/testthat/test-type2.R 60e429f9a5fe38018fd3c7d03ef1d929 *tests/testthat/test-type2.txt 197c30fe0d707de9b6e50ca89f25b917 *tests/testthat/test-utils.R bcf4a0cda7d861e924cba4a7c3b75722 *tests/testthat/test-vctr-print-names.txt f04a12e223a1d31f957df65a40f660d5 *tests/testthat/test-vctr-print.txt 9c1793ea42fd231a18fd557062f59736 *tests/testthat/test-vctrs.R 37de0c2501ece054db90cdd36536cfbb *vignettes/pillar.Rmd 7aae14131fd15f5c67106e33f8a3b263 *vignettes/s3-vector.Rmd a16ae23dfc9ed0d60015bcfa8e899618 *vignettes/stability.Rmd 72c63cb0dd9c179724530995e4cdf7f1 *vignettes/type-size.Rmd vctrs/inst/0000755000176200001440000000000014042546502012372 5ustar liggesusersvctrs/inst/doc/0000755000176200001440000000000014042546502013137 5ustar liggesusersvctrs/inst/doc/pillar.R0000644000176200001440000001032014042546477014554 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = pillar::style_subtle("#>")) ## ----setup-------------------------------------------------------------------- library(vctrs) library(pillar) ## ---- eval = FALSE------------------------------------------------------------ # usethis::use_package("vctrs") # usethis::use_package("pillar") ## ----------------------------------------------------------------------------- #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ## ----------------------------------------------------------------------------- library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ## ----------------------------------------------------------------------------- #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ## ----------------------------------------------------------------------------- deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ## ----------------------------------------------------------------------------- data ## ----------------------------------------------------------------------------- print(data, width = 30) ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ## ----------------------------------------------------------------------------- deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ## ----------------------------------------------------------------------------- #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ## ----eval = FALSE------------------------------------------------------------- # expect_snapshot(pillar_shaft(data$loc)) vctrs/inst/doc/s3-vector.html0000644000176200001440000044113614042546501015662 0ustar liggesusers S3 vectors

S3 vectors

This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you’ll also need to provide methods that actually make the vector useful.

I assume that you’re already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of the S3 chapter of Advanced R.

This article refers to “vectors of numbers” as double vectors. Here, “double” stands for “double precision floating point number”, see also double().

library(vctrs)
library(zeallot)

This vignette works through five big topics:

  • The basics of creating a new vector class with vctrs.
  • The coercion and casting system.
  • The record and list-of types.
  • Equality and comparison proxies.
  • Arithmetic operators.

They’re collectively demonstrated with a number of simple S3 classes:

  • Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting.

  • Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions.

  • Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care.

  • Rational: a pair of integer vectors that defines a rational number like 2 / 3. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for +, -, and friends.

  • Polynomial: a list of integer vectors that define polynomials like 1 + x - x^3. Sorting such vectors correctly requires a custom equality method.

  • Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties.

  • Period and frequency: a pair of classes represent a period, or it’s inverse, frequency. This allows us to explore more arithmetic operators.

Basics

In this section you’ll learn how to create a new vctrs class by calling new_vctr(). This creates an object with class vctrs_vctr which has a number of methods. These are designed to make your life as easy as possible. For example:

  • The print() and str() methods are defined in terms of format() so you get a pleasant, consistent display as soon as you’ve made your format() method.

  • You can immediately put your new vector class in a data frame because as.data.frame.vctrs_vctr() does the right thing.

  • Subsetting ([, [[, and $), length<-, and rep() methods automatically preserve attributes because they use vec_restore(). A default vec_restore() works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data.

  • Default subset-assignment methods ([<-, [[<-, and $<-) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages.

Percent class

In this section, I’ll show you how to make a percent class, i.e., a double vector that is printed as a percentage. We start by defining a low-level constructor that uses vec_assert() to checks types and/or sizes then calls new_vctr().

percent is built on a double vector of any length and doesn’t have any attributes.

new_percent <- function(x = double()) {
  vec_assert(x, double())
  new_vctr(x, class = "vctrs_percent")
}

x <- new_percent(c(seq(0, 1, length.out = 4), NA))
x
#> <vctrs_percent[5]>
#> [1] 0.0000000 0.3333333 0.6666667 1.0000000        NA

str(x)
#>  vctrs_pr [1:5] 0.0000000, 0.3333333, 0.6666667, 1.0000000,        NA

Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as blob), it’s fine to use the package name without prefix as the class name.

We then follow up with a user friendly helper. Here we’ll use vec_cast() to allow it to accept anything coercible to a double:

percent <- function(x = double()) {
  x <- vec_cast(x, double())
  new_percent(x)
}

Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype.

new_percent()
#> <vctrs_percent[0]>
percent()
#> <vctrs_percent[0]>

For the convenience of your users, consider implementing an is_percent() function:

is_percent <- function(x) {
  inherits(x, "vctrs_percent")
}

format() method

The first method for every class should almost always be a format() method. This should return a character vector the same length as x. The easiest way to do this is to rely on one of R’s low-level formatting functions like formatC():

format.vctrs_percent <- function(x, ...) {
  out <- formatC(signif(vec_data(x) * 100, 3))
  out[is.na(x)] <- NA
  out[!is.na(x)] <- paste0(out[!is.na(x)], "%")
  out
}
x
#> <vctrs_percent[5]>
#> [1] 0%    33.3% 66.7% 100%  <NA>

(Note the use of vec_data() so format() doesn’t get stuck in an infinite loop, and that I take a little care to not convert NA to "NA"; this leads to better printing.)

The format method is also used by data frames, tibbles, and str():

data.frame(x)
#>       x
#> 1    0%
#> 2 33.3%
#> 3 66.7%
#> 4  100%
#> 5  <NA>

For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in str():

vec_ptype_abbr.vctrs_percent <- function(x, ...) {
  "prcnt"
}

tibble::tibble(x)
#> # A tibble: 5 x 1
#>         x
#>   <prcnt>
#> 1      0%
#> 2   33.3%
#> 3   66.7%
#> 4    100%
#> 5      NA

str(x)
#>  prcnt [1:5] 0%, 33.3%, 66.7%, 100%, <NA>

If you need more control over printing in tibbles, implement a method for pillar::pillar_shaft(). See vignette("pillar", package = "vctrs") for details.

Casting and coercion

The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens implicitly (e.g in c()) we call it coercion; when the change happens explicitly (e.g. with as.integer(x)), we call it casting.

One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it’s possible to make accurate predictions about what (e.g.) c(x, y) should do when x and y have different prototypes. vctrs achieves this goal through two generics:

  • vec_ptype2(x, y) defines possible set of coercions. It returns a prototype if x and y can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes.

  • vec_cast(x, to) defines the possible sets of casts. It returns x translated to have prototype to, or throws an error if the conversion isn’t possible. The set of possible casts is a superset of possible coercions because they’re requested explicitly.

Double dispatch

Both generics use double dispatch which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means:

  • You end up with method names with two classes, like vec_ptype2.foo.bar().

  • You don’t need to implement default methods (they would never be called if you do).

  • You can’t call NextMethod().

Percent class

We’ll make our percent class coercible back and forth with double vectors.

vec_ptype2() provides a user friendly error message if the coercion doesn’t exist and makes sure NA is handled in a standard way. NA is technically a logical vector, but we want to stand in for a missing value of any type.

vec_ptype2("bogus", percent())
#> Error: Can't combine <character> and <vctrs_percent>.
vec_ptype2(percent(), NA)
#> <vctrs_percent[0]>
vec_ptype2(NA, percent())
#> <vctrs_percent[0]>

By default and in simple cases, an object of the same class is compatible with itself:

vec_ptype2(percent(), percent())
#> <vctrs_percent[0]>

However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we’ll start by saying that a vctrs_percent combined with a vctrs_percent yields a vctrs_percent, which we indicate by returning a prototype generated by the constructor.

vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent()

Next we define methods that say that combining a percent and double should yield a double. We avoid returning a percent here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers.

Because double dispatch is a bit of a hack, we need to provide two methods. It’s your responsibility to ensure that each member of the pair returns the same result: if they don’t you will get weird and unpredictable behaviour.

The double dispatch mechanism requires us to refer to the underlying type, double, in the method name. If we implemented vec_ptype2.vctrs_percent.numeric(), it would never be called.

vec_ptype2.vctrs_percent.double <- function(x, y, ...) double()
vec_ptype2.double.vctrs_percent <- function(x, y, ...) double()

We can check that we’ve implemented this correctly with vec_ptype_show():

vec_ptype_show(percent(), double(), percent())
#> Prototype: <double>
#> 0. (                 , <vctrs_percent> ) = <vctrs_percent>
#> 1. ( <vctrs_percent> , <double>        ) = <double>       
#> 2. ( <double>        , <vctrs_percent> ) = <double>

The vec_ptype2() methods define which input is the richer type that vctrs should coerce to. However, they don’t perform any conversion. This is the job of vec_cast(), which we implement next. We’ll provide a method to cast a percent to a percent:

vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x

And then for converting back and forth between doubles. To convert a double to a percent we use the percent() helper (not the constructor; this is unvalidated user input). To convert a percent to a double, we strip the attributes.

Note that for historical reasons the order of argument in the signature is the opposite as for vec_ptype2(). The class for to comes first, and the class for x comes second.

Again, the double dispatch mechanism requires us to refer to the underlying type, double, in the method name. Implementing vec_cast.vctrs_percent.numeric() has no effect.

vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x)
vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x)

Then we can check this works with vec_cast():

vec_cast(0.5, percent())
#> <vctrs_percent[1]>
#> [1] 50%
vec_cast(percent(0.5), double())
#> [1] 0.5

Once you’ve implemented vec_ptype2() and vec_cast(), you get vec_c(), [<-, and [[<- implementations for free.

vec_c(percent(0.5), 1)
#> [1] 0.5 1.0
vec_c(NA, percent(0.5))
#> <vctrs_percent[2]>
#> [1] <NA> 50%
# but
vec_c(TRUE, percent(0.5))
#> Error: Can't combine `..1` <logical> and `..2` <vctrs_percent>.

x <- percent(c(0.5, 1, 2))
x[1:2] <- 2:1
#> Error: Can't convert <integer> to <vctrs_percent>.
x[[3]] <- 0.5
x
#> <vctrs_percent[3]>
#> [1] 50%  100% 50%

You’ll also get mostly correct behaviour for c(). The exception is when you use c() with a base R class:

# Correct
c(percent(0.5), 1)
#> [1] 0.5 1.0
c(percent(0.5), factor(1))
#> Error: Can't combine `..1` <vctrs_percent> and `..2` <factor<25c7e>>.

# Incorrect
c(factor(1), percent(0.5))
#> [1] 1.0 0.5

Unfortunately there’s no way to fix this problem with the current design of c().

Again, as a convenience, consider providing an as_percent() function that makes use of the casts defined in your vec_cast.vctrs_percent() methods:

as_percent <- function(x) {
  vec_cast(x, new_percent())
}

Occasionally, it is useful to provide conversions that go beyond what’s allowed in casting. For example, we could offer a parsing method for character vectors. In this case, as_percent() should be generic, the default method should cast, and then additional methods should implement more flexible conversion:

as_percent <- function(x, ...) {
  UseMethod("as_percent")
}

as_percent.default <- function(x, ...) {
  vec_cast(x, new_percent())
}

as_percent.character <- function(x) {
  value <- as.numeric(gsub(" *% *$", "", x)) / 100
  new_percent(value)
}

Decimal class

Now that you’ve seen the basics with a very simple S3 class, we’ll gradually explore more complicated scenarios. This section creates a decimal class that prints with the specified number of decimal places. This is very similar to percent but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1).

We start off as before, defining a low-level constructor, a user-friendly constructor, a format() method, and a vec_ptype_abbr(). Note that additional object attributes are simply passed along to new_vctr():

new_decimal <- function(x = double(), digits = 2L) {
  vec_assert(x, ptype = double())
  vec_assert(digits, ptype = integer(), size = 1)

  new_vctr(x, digits = digits, class = "vctrs_decimal")
}

decimal <- function(x = double(), digits = 2L) {
  x <- vec_cast(x, double())
  digits <- vec_recycle(vec_cast(digits, integer()), 1L)

  new_decimal(x, digits = digits)
}

digits <- function(x) attr(x, "digits")

format.vctrs_decimal <- function(x, ...) {
  sprintf(paste0("%-0.", digits(x), "f"), x)
}

vec_ptype_abbr.vctrs_decimal <- function(x, ...) {
  "dec"
}

x <- decimal(runif(10), 1L)
x
#> <vctrs_decimal[10]>
#>  [1] 0.1 0.8 0.6 0.2 0.0 0.5 0.5 0.3 0.7 0.8

Note that I provide a little helper to extract the digits attribute. This makes the code a little easier to read and should not be exported.

By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You’ll see what to do if the attributes are data dependent in the next section.

x[1:2]
#> <vctrs_decimal[2]>
#> [1] 0.1 0.8
x[[1]]
#> <vctrs_decimal[1]>
#> [1] 0.1

For the sake of exposition, we’ll assume that digits is an important attribute of the class and should be included in the full type:

vec_ptype_full.vctrs_decimal <- function(x, ...) {
  paste0("decimal<", digits(x), ">")
}

x
#> <decimal<1>[10]>
#>  [1] 0.1 0.8 0.6 0.2 0.0 0.5 0.5 0.3 0.7 0.8

Now consider vec_cast() and vec_ptype2(). Casting and coercing from one decimal to another requires a little thought as the values of the digits attribute might be different, and we need some way to reconcile them. Here I’ve decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error.

vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) {
  new_decimal(digits = max(digits(x), digits(y)))
}
vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) {
  new_decimal(vec_data(x), digits = digits(to))
}

vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2))
#> <decimal<3>[2]>
#> [1] 0.010 0.020

Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal).

vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x
vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y

vec_cast.vctrs_decimal.double  <- function(x, to, ...) new_decimal(x, digits = digits(to))
vec_cast.double.vctrs_decimal  <- function(x, to, ...) vec_data(x)

vec_c(decimal(1, digits = 1), pi)
#> <decimal<1>[2]>
#> [1] 1.0 3.1
vec_c(pi, decimal(1, digits = 1))
#> <decimal<1>[2]>
#> [1] 3.1 1.0

If type x has greater resolution than y, there will be some inputs that lose precision. These should generate errors using stop_lossy_cast(). You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution.

vec_cast(c(1, 2, 10), to = integer())
#> [1]  1  2 10

vec_cast(c(1.5, 2, 10.5), to = integer())
#> Error: Can't convert from <double> to <integer> due to loss of precision.
#> • Locations: 1, 3

Cached sum class

The next level up in complexity is an object that has data-dependent attributes. To explore this idea we’ll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors:

new_cached_sum <- function(x = double(), sum = 0L) {
  vec_assert(x, ptype = double())
  vec_assert(sum, ptype = double(), size = 1L)

  new_vctr(x, sum = sum, class = "vctrs_cached_sum")
}

cached_sum <- function(x) {
  x <- vec_cast(x, double())
  new_cached_sum(x, sum(x))
}

For this class, we can use the default format() method, and instead, we’ll customise the obj_print_footer() method. This is a good place to display user facing attributes.

obj_print_footer.vctrs_cached_sum <- function(x, ...) {
  cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "")
}

x <- cached_sum(runif(10))
x
#> <vctrs_cached_sum[10]>
#>  [1] 0.87460066 0.17494063 0.03424133 0.32038573 0.40232824 0.19566983
#>  [7] 0.40353812 0.06366146 0.38870131 0.97554784
#> # Sum: 3.83

We’ll also override sum() and mean() to use the attribute. This is easiest to do with vec_math(), which you’ll learn about later.

vec_math.vctrs_cached_sum <- function(.fn, .x, ...) {
  cat("Using cache\n")
  switch(.fn,
    sum = attr(.x, "sum"),
    mean = attr(.x, "sum") / length(.x),
    vec_math_base(.fn, .x, ...)
  )
}

sum(x)
#> Using cache
#> [1] 3.833615

As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they’ll work, but return the incorrect result:

x[1:2]
#> <vctrs_cached_sum[2]>
#> [1] 0.8746007 0.1749406
#> # Sum: 3.83

To fix this, you need to provide a vec_restore() method. Note that this method dispatches on the to argument.

vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) {
  new_cached_sum(x, sum(x))
}

x[1]
#> <vctrs_cached_sum[1]>
#> [1] 0.8746007
#> # Sum: 0.875

This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with vec_data() and then reapplying them again with vec_restore(). The default vec_restore() method copies over all attributes, which is not appropriate when the attributes depend on the data.

Note that vec_restore.class is subtly different from vec_cast.class.class(). vec_restore() is used when restoring attributes that have been lost; vec_cast() is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with new_vctr(). vec_restore.factor() would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with vec_cast().

Record-style objects

Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is POSIXlt, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override length() and subsetting methods to conceal this implementation detail.

x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3))
x
#> [1] "2020-01-01 00:00:01 CET" "2020-01-01 00:00:02 CET"
#> [3] "2020-01-01 00:00:03 CET"

length(x)
#> [1] 3
length(unclass(x))
#> [1] 11

x[[1]] # the first date time
#> [1] "2020-01-01 00:00:01 CET"
unclass(x)[[1]] # the first component, the number of seconds
#> [1] 1 2 3

vctrs makes it easy to create new record-style classes using new_rcrd(), which has a wide selection of default methods.

Rational class

A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short).

As usual we start with low-level and user-friendly constructors. The low-level constructor calls new_rcrd(), which needs a named list of equal-length vectors.

new_rational <- function(n = integer(), d = integer()) {
  vec_assert(n, ptype = integer())
  vec_assert(d, ptype = integer())

  new_rcrd(list(n = n, d = d), class = "vctrs_rational")
}

Our user friendly constructor casts n and d to integers and recycles them to the same length.

rational <- function(n = integer(), d = integer()) {
  c(n, d) %<-% vec_cast_common(n, d, .to = integer())
  c(n, d) %<-% vec_recycle_common(n, d)

  new_rational(n, d)
}

x <- rational(1, 1:10)

Behind the scenes, x is a named list with two elements. But those details are hidden so that it behaves like a vector:

names(x)
#> NULL
length(x)
#> [1] 10

To access the underlying fields we need to use field() and fields():

fields(x)
#> [1] "n" "d"
field(x, "n")
#>  [1] 1 1 1 1 1 1 1 1 1 1

Notice that we can’t print() or str() the new rational vector x yet. Printing causes an error:

x
#> <vctrs_rational[10]>
#> Error: `format.vctrs_rational()` not implemented.

str(x)
#> Error: `format.vctrs_rational()` not implemented.

This is because we haven’t defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call vec_data(x).

vec_data(x)
#>    n  d
#> 1  1  1
#> 2  1  2
#> 3  1  3
#> 4  1  4
#> 5  1  5
#> 6  1  6
#> 7  1  7
#> 8  1  8
#> 9  1  9
#> 10 1 10

str(vec_data(x))
#> 'data.frame':    10 obs. of  2 variables:
#>  $ n: int  1 1 1 1 1 1 1 1 1 1
#>  $ d: int  1 2 3 4 5 6 7 8 9 10

It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way:

format.vctrs_rational <- function(x, ...) {
  n <- field(x, "n")
  d <- field(x, "d")

  out <- paste0(n, "/", d)
  out[is.na(n) | is.na(d)] <- NA

  out
}

vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl"
vec_ptype_full.vctrs_rational <- function(x, ...) "rational"

x
#> <rational[10]>
#>  [1] 1/1  1/2  1/3  1/4  1/5  1/6  1/7  1/8  1/9  1/10

vctrs uses the format() method in str(), hiding the underlying implementation details from the user:

str(x)
#>  rtnl [1:10] 1/1, 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10

For rational, vec_ptype2() and vec_cast() follow the same pattern as percent(). We allow coercion from integer and to doubles.

vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational()
vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational()
vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational()

vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x
vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d")
vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1)

vec_c(rational(1, 2), 1L, NA)
#> <rational[3]>
#> [1] 1/2  1/1  <NA>

Decimal2 class

The previous implementation of decimal was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems.

A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a scale). The following code is a very quick sketch of how you might start creating such a class:

new_decimal2 <- function(l, r, scale = 2L) {
  vec_assert(l, ptype = integer())
  vec_assert(r, ptype = integer())
  vec_assert(scale, ptype = integer(), size = 1L)

  new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2")
}

decimal2 <- function(l, r, scale = 2L) {
  l <- vec_cast(l, integer())
  r <- vec_cast(r, integer())
  c(l, r) %<-% vec_recycle_common(l, r)
  scale <- vec_cast(scale, integer())

  # should check that r < 10^scale
  new_decimal2(l = l, r = r, scale = scale)
}

format.vctrs_decimal2 <- function(x, ...) {
  val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale")
  sprintf(paste0("%.0", attr(x, "scale"), "f"), val)
}

decimal2(10, c(0, 5, 99))
#> <vctrs_decimal2[3]>
#> [1] 10.00 10.05 10.99

Equality and comparison

vctrs provides three “proxy” generics. Two of these let you control how your class determines equality and ordering:

  • vec_proxy_equal() returns a data vector suitable for comparison. It underpins ==, !=, unique(), anyDuplicated(), and is.na().

  • vec_proxy_compare() specifies how to compare the elements of your vector. This proxy is used in <, <=, >=, >, min(), max(), median(), quantile(), and xtfrm() (used in order() and sort()) methods.

By default, vec_proxy_equal() and vec_proxy_compare() just call vec_proxy().

  • vec_proxy() returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn’t need to implement vec_proxy().

You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work.

These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C.

Rational class

Let’s explore these ideas by with the rational class we started on above. By default, vec_proxy() converts a record to a data frame, and the default comparison works column by column:

x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2))
x
#> <rational[4]>
#> [1] 1/1 2/1 1/2 2/2

vec_proxy(x)
#>   n d
#> 1 1 1
#> 2 2 1
#> 3 1 2
#> 4 2 2

x == rational(1, 1)
#> [1]  TRUE FALSE FALSE FALSE

This makes sense as a default but isn’t correct here because rational(1, 1) represents the same number as rational(2, 2), so they should be equal. We can fix that by implementing a vec_proxy_equal() method that divides n and d by their greatest common divisor:

# Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632
gcd <- function(x, y) {
  r <- x %% y
  ifelse(r, gcd(y, r), y)
}

vec_proxy_equal.vctrs_rational <- function(x, ...) {
  n <- field(x, "n")
  d <- field(x, "d")
  gcd <- gcd(n, d)

  data.frame(n = n / gcd, d = d / gcd)
}
vec_proxy_equal(x)
#>   n d
#> 1 1 1
#> 2 2 1
#> 3 1 2
#> 4 1 1

x == rational(1, 1)
#> [1]  TRUE FALSE FALSE  TRUE

vec_proxy_equal() is also used by unique():

unique(x)
#> <rational[3]>
#> [1] 1/1 2/1 1/2

We now need to fix sort() similarly, since it currently sorts by n, then by d:

sort(x)
#> <rational[4]>
#> [1] 1/1 1/2 2/1 2/2

The easiest fix is to convert the fraction to a decimal and then sort that:

vec_proxy_compare.vctrs_rational <- function(x, ...) {
  field(x, "n") / field(x, "d")
}

sort(x)
#> <rational[4]>
#> [1] 1/2 1/1 2/2 2/1

(We could have used the same approach in vec_proxy_equal(), but when working with floating point numbers it’s not necessarily true that x == y implies that d * x == d * y.)

Polynomial class

A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like 1 + 3x - 2x^2) using a list of integer vectors (like c(1, 3, -2)). Note the use of new_list_of() in the constructor.

new_poly <- function(x) {
  new_list_of(x, ptype = integer(), class = "vctrs_poly")
}

poly <- function(...) {
  x <- list(...)
  x <- lapply(x, vec_cast, integer())
  new_poly(x)
}

vec_ptype_full.vctrs_poly <- function(x, ...) "polynomial"
vec_ptype_abbr.vctrs_poly <- function(x, ...) "poly"

format.vctrs_poly <- function(x, ...) {
  format_one <- function(x) {
    if (length(x) == 0) {
      return("")
    } else if (length(x) == 1) {
      format(x)
    } else {
      suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "")
      out <- paste0(x, suffix)
      out <- out[x != 0L]
      paste0(out, collapse = " + ")
    }
  }
  vapply(x, format_one, character(1))
}

obj_print_data.vctrs_poly <- function(x, ...) {
  if (length(x) == 0)
    return()
  print(format(x), quote = FALSE)
}

p <- poly(1, c(1, 0, 1), c(1, 0, 0, 0, 2))
p
#> <polynomial[3]>
#> [1] 1         1⋅x^2 + 1 1⋅x^4 + 2

The resulting objects will inherit from the vctrs_list_of class, which provides tailored methods for $, [[, the corresponding assignment operators, and other methods.

class(p)
#> [1] "vctrs_poly"    "vctrs_list_of" "vctrs_vctr"    "list"
p[2]
#> <polynomial[1]>
#> [1] 1⋅x^2 + 1
p[[2]]
#> [1] 1 0 1

Equality works out of the box because we can tell if two integer vectors are equal:

p == poly(c(1, 0, 1))
#> [1] FALSE  TRUE FALSE

But we can’t order them because lists are not comparable:

sort(p)
#> <polynomial[3]>
#> [1] 1         1⋅x^2 + 1 1⋅x^4 + 2

So we need to define a vec_proxy_compare() method:

vec_proxy_compare.vctrs_poly <- function(x, ...) {
  x_raw <- vec_data(x)
  # First figure out the maximum length
  n <- max(vapply(x_raw, length, integer(1)))

  # Then expand all vectors to this length by filling in with zeros
  full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x))

  # Then turn into a data frame
  as.data.frame(do.call(rbind, full))
}

sort(poly(3, 2, 1))
#> <polynomial[3]>
#> [1] 3 2 1
sort(poly(1, c(1, 0, 0), c(1, 0)))
#> <polynomial[3]>
#> [1] 1     1⋅x^2 1⋅x^1

Arithmetic

vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once:

  • vec_math(fn, x, ...) specifies the behaviour of mathematical functions like abs(), sum(), and mean(). (Note that var() and sd() can’t be overridden, see ?vec_math() for the complete list supported by vec_math().)

  • vec_arith(op, x, y) specifies the behaviour of the arithmetic operations like +, -, and %%. (See ?vec_arith() for the complete list.)

Both generics define the behaviour for multiple functions because sum.vctrs_vctr(x) calls vec_math.vctrs_vctr("sum", x), and x + y calls vec_math.x_class.y_class("+", x, y). They’re accompanied by vec_math_base() and vec_arith_base() which make it easy to call the underlying base R functions.

vec_arith() uses double dispatch and needs the following standard boilerplate:

vec_arith.MYCLASS <- function(op, x, y, ...) {
  UseMethod("vec_arith.MYCLASS", y)
}
vec_arith.MYCLASS.default <- function(op, x, y, ...) {
  stop_incompatible_op(op, x, y)
}

Cached sum class

I showed an example of vec_math() to define sum() and mean() methods for cached_sum. Now let’s talk about exactly how it works. Most vec_math() functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to vec_math_base() for those that you don’t care about.

vec_math.vctrs_cached_sum <- function(.fn, .x, ...) {
  switch(.fn,
    sum = attr(.x, "sum"),
    mean = attr(.x, "sum") / length(.x),
    vec_math_base(.fn, .x, ...)
  )
}

Meter class

To explore the infix arithmetic operators exposed by vec_arith() I’ll create a new class that represents a measurement in meters:

new_meter <- function(x) {
  stopifnot(is.double(x))
  new_vctr(x, class = "vctrs_meter")
}

format.vctrs_meter <- function(x, ...) {
  paste0(format(vec_data(x)), " m")
}

meter <- function(x) {
  x <- vec_cast(x, double())
  new_meter(x)
}

x <- meter(1:10)
x
#> <vctrs_meter[10]>
#>  [1]  1 m  2 m  3 m  4 m  5 m  6 m  7 m  8 m  9 m 10 m

Because meter is built on top of a double vector, basic mathematic operations work:

sum(x)
#> <vctrs_meter[1]>
#> [1] 55 m
mean(x)
#> <vctrs_meter[1]>
#> [1] 5.5 m

But we can’t do arithmetic:

x + 1
#> Error: <vctrs_meter> + <double> is not permitted
meter(10) + meter(1)
#> Error: <vctrs_meter> + <vctrs_meter> is not permitted
meter(10) * 3
#> Error: <vctrs_meter> * <double> is not permitted

To allow these infix functions to work, we’ll need to provide vec_arith() generic. But before we do that, let’s think about what combinations of inputs we should support:

  • It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can’t multiply meters (because that would yield an area).

  • For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don’t make much sense as we, strictly speaking, are dealing with objects of different nature.

vec_arith() is another function that uses double dispatch, so as usual we start with a template.

vec_arith.vctrs_meter <- function(op, x, y, ...) {
  UseMethod("vec_arith.vctrs_meter", y)
}
vec_arith.vctrs_meter.default <- function(op, x, y, ...) {
  stop_incompatible_op(op, x, y)
}

Then write the method for two meter objects. We use a switch statement to cover the cases we care about and stop_incompatible_op() to throw an informative error message for everything else.

vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) {
  switch(
    op,
    "+" = ,
    "-" = new_meter(vec_arith_base(op, x, y)),
    "/" = vec_arith_base(op, x, y),
    stop_incompatible_op(op, x, y)
  )
}

meter(10) + meter(1)
#> <vctrs_meter[1]>
#> [1] 11 m
meter(10) - meter(1)
#> <vctrs_meter[1]>
#> [1] 9 m
meter(10) / meter(1)
#> [1] 10
meter(10) * meter(1)
#> Error: <vctrs_meter> * <vctrs_meter> is not permitted

Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while meter(10) / 2 makes sense, 2 / meter(10) does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over numeric here instead of double.

vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) {
  switch(
    op,
    "/" = ,
    "*" = new_meter(vec_arith_base(op, x, y)),
    stop_incompatible_op(op, x, y)
  )
}
vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) {
  switch(
    op,
    "*" = new_meter(vec_arith_base(op, x, y)),
    stop_incompatible_op(op, x, y)
  )
}

meter(2) * 10
#> <vctrs_meter[1]>
#> [1] 20 m
meter(2) * as.integer(10)
#> <vctrs_meter[1]>
#> [1] 20 m
10 * meter(2)
#> <vctrs_meter[1]>
#> [1] 20 m
meter(20) / 10
#> <vctrs_meter[1]>
#> [1] 2 m
10 / meter(20)
#> Error: <double> / <vctrs_meter> is not permitted
meter(20) + 10
#> Error: <vctrs_meter> + <double> is not permitted

For completeness, we also need vec_arith.vctrs_meter.MISSING for the unary + and - operators:

vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) {
  switch(op,
    `-` = x * -1,
    `+` = x,
    stop_incompatible_op(op, x, y)
  )
}
-meter(1)
#> <vctrs_meter[1]>
#> [1] -1 m
+meter(1)
#> <vctrs_meter[1]>
#> [1] 1 m

Implementing a vctrs S3 class in a package

Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things:

  • Register the S3 methods by listing them in the NAMESPACE file.

  • Create documentation around your methods, for the sake of your user and to satisfy R CMD check.

Let’s assume that the percent class is implemented in the pizza package in the file R/percent.R. Here we walk through the major sections of this hypothetical file. You’ve seen all of this code before, but now it’s augmented by the roxygen2 directives that produce the correct NAMESPACE entries and help topics.

Getting started

First, the pizza package needs to include vctrs in the Imports section of its DESCRIPTION (perhaps by calling usethis::use_package("vctrs"). While vctrs is under very active development, it probably makes sense to state a minimum version.

Imports:
    a_package,
    another_package,
    ...
    vctrs (>= x.y.z),
    ...

Then we make all vctrs functions available within the pizza package by including the directive #' @import vctrs somewhere. Usually, it’s not good practice to @import the entire namespace of a package, but vctrs is deliberately designed with this use case in mind.

Where should we put #' @import vctrs? There are two natural locations:

  • With package-level docs in R/pizza-doc.R. You can use usethis::use_package_doc() to initiate this package-level documentation.

  • In R/percent.R. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package.

We also must use one of these locations to dump some internal documentation that’s needed to avoid R CMD check complaints. We don’t expect any human to ever read this documentation. Here’s how this dummy documentation should look, combined with the #' @import vctrs directive described above.

#' Internal vctrs methods
#'
#' @import vctrs
#' @keywords internal
#' @name pizza-vctrs
NULL

This should appear in R/pizza-doc.R (package-level docs) or in R/percent.R (class-focused file).

Remember to call devtools::document() regularly, as you develop, to regenerate NAMESPACE and the .Rd files.

From this point on, the code shown is expected to appear in R/percent.R.

Low-level and user-friendly constructors

Next we add our constructor:

new_percent <- function(x = double()) {
  vec_assert(x, double())
  new_vctr(x, class = "pizza_percent")
}

Note that the name of the package must be included in the class name (pizza_percent), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class.

We can also add a call to setOldClass() for compatibility with S4:

# for compatibility with the S4 system
methods::setOldClass(c("pizza_percent", "vctrs_vctr"))

Because we’ve used a function from the methods package, you’ll also need to add methods to Imports, with (e.g.) usethis::use_package("methods"). This is a “free” dependency because methods is bundled with every R install.

Next we implement, export, and document a user-friendly helper: percent().

#' `percent` vector
#'
#' This creates a double vector that represents percentages so when it is
#' printed, it is multiplied by 100 and suffixed with `%`.
#'
#' @param x A numeric vector
#' @return An S3 vector of class `pizza_percent`.
#' @export
#' @examples
#' percent(c(0.25, 0.5, 0.75))
percent <- function(x = double()) {
  x <- vec_cast(x, double())
  new_percent(x)
}

(Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do pizza::percent(); it would be redundant to have pizza::pizza_percent().)

Other helpers

It’s a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor percent():

#' @export
#' @rdname percent
is_percent <- function(x) {
  inherits(x, "pizza_percent")
}

You’ll also need to update the percent() documentation to reflect that x now means two different things:

#' @param x
#'  * For `percent()`: A numeric vector
#'  * For `is_percent()`: An object to test.

Next we provide the key methods to make printing work. These are S3 methods, so they don’t need to be documented, but they do need to be exported.

#' @export
format.pizza_percent <- function(x, ...) {
  out <- formatC(signif(vec_data(x) * 100, 3))
  out[is.na(x)] <- NA
  out[!is.na(x)] <- paste0(out[!is.na(x)], "%")
  out
}

#' @export
vec_ptype_abbr.pizza_percent <- function(x, ...) {
  "prcnt"
}

Finally, we implement methods for vec_ptype2() and vec_cast().

#' @export
vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent()
#' @export
vec_ptype2.double.vctrs_percent <- function(x, y, ...) double()

#' @export
vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x
#' @export
vec_cast.pizza_percent.double <- function(x, to, ...) percent(x)
#' @export
vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x)

Testing

It’s good practice to test your new class. Specific recommendations:

  • R/percent.R is the type of file where you really do want 100% test coverage. You can use devtools::test_coverage_file() to check this.

  • Make sure to test behaviour with zero-length inputs and missing values.

  • Use testthat::verify_output() to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about verify_output() in the testthat v2.3.0 blog post; it’s an example of a so-called golden test.

  • Check for method symmetry; use expect_s3_class(), probably with exact = TRUE, to ensure that vec_c(x, y) and vec_c(y, x) return the same type of output for the important xs and ys in your domain.

  • Use testthat::expect_error() to check that inputs that can’t be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include vctrs_error_assert_ptype, vctrs_error_assert_size, and vctrs_error_incompatible_type.

    expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type")

If your tests pass when run by devtools::test(), but fail when run in R CMD check, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated NAMESPACE.

vctrs/inst/doc/s3-vector.R0000644000176200001440000004226314042546500015114 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ## ----setup-------------------------------------------------------------------- library(vctrs) library(zeallot) ## ----------------------------------------------------------------------------- new_percent <- function(x = double()) { vec_assert(x, double()) new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ## ----------------------------------------------------------------------------- percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ## ----------------------------------------------------------------------------- new_percent() percent() ## ----------------------------------------------------------------------------- is_percent <- function(x) { inherits(x, "vctrs_percent") } ## ----------------------------------------------------------------------------- format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ## ---- include = FALSE--------------------------------------------------------- # As of R 3.5, print.vctr can not find format.percent since it's not in # it's lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ## ----------------------------------------------------------------------------- x ## ----------------------------------------------------------------------------- data.frame(x) ## ----------------------------------------------------------------------------- vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ## ---- error = TRUE------------------------------------------------------------ vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ## ----------------------------------------------------------------------------- vec_ptype2(percent(), percent()) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ## ----------------------------------------------------------------------------- vec_ptype_show(percent(), double(), percent()) ## ----------------------------------------------------------------------------- vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ## ----------------------------------------------------------------------------- vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ## ----------------------------------------------------------------------------- vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ## ---- error = TRUE------------------------------------------------------------ vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ## ---- error = TRUE------------------------------------------------------------ # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ## ----------------------------------------------------------------------------- as_percent <- function(x) { vec_cast(x, new_percent()) } ## ----------------------------------------------------------------------------- as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ## ----------------------------------------------------------------------------- new_decimal <- function(x = double(), digits = 2L) { vec_assert(x, ptype = double()) vec_assert(digits, ptype = integer(), size = 1) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ## ----------------------------------------------------------------------------- x[1:2] x[[1]] ## ----------------------------------------------------------------------------- vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ## ---- error = TRUE------------------------------------------------------------ vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ## ----------------------------------------------------------------------------- new_cached_sum <- function(x = double(), sum = 0L) { vec_assert(x, ptype = double()) vec_assert(sum, ptype = double(), size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ## ----------------------------------------------------------------------------- obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ## ----------------------------------------------------------------------------- vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ## ----------------------------------------------------------------------------- x[1:2] ## ----------------------------------------------------------------------------- vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ## ----------------------------------------------------------------------------- x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ## ----------------------------------------------------------------------------- new_rational <- function(n = integer(), d = integer()) { vec_assert(n, ptype = integer()) vec_assert(d, ptype = integer()) new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ## ----------------------------------------------------------------------------- rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ## ----------------------------------------------------------------------------- names(x) length(x) ## ----------------------------------------------------------------------------- fields(x) field(x, "n") ## ---- error = TRUE------------------------------------------------------------ x str(x) ## ----------------------------------------------------------------------------- vec_data(x) str(vec_data(x)) ## ----------------------------------------------------------------------------- format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ## ----------------------------------------------------------------------------- str(x) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ## ----------------------------------------------------------------------------- new_decimal2 <- function(l, r, scale = 2L) { vec_assert(l, ptype = integer()) vec_assert(r, ptype = integer()) vec_assert(scale, ptype = integer(), size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ## ----------------------------------------------------------------------------- x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ## ----------------------------------------------------------------------------- # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ## ----------------------------------------------------------------------------- unique(x) ## ----------------------------------------------------------------------------- sort(x) ## ----------------------------------------------------------------------------- vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } sort(x) ## ----------------------------------------------------------------------------- new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly") } poly <- function(...) { x <- list(...) x <- lapply(x, vec_cast, integer()) new_poly(x) } vec_ptype_full.vctrs_poly <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly <- function(x, ...) "poly" format.vctrs_poly <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } else if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly <- function(x, ...) { if (length(x) == 0) return() print(format(x), quote = FALSE) } p <- poly(1, c(1, 0, 1), c(1, 0, 0, 0, 2)) p ## ----------------------------------------------------------------------------- class(p) p[2] p[[2]] ## ----------------------------------------------------------------------------- p == poly(c(1, 0, 1)) ## ---- error = TRUE------------------------------------------------------------ sort(p) ## ----------------------------------------------------------------------------- vec_proxy_compare.vctrs_poly <- function(x, ...) { x_raw <- vec_data(x) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } sort(poly(3, 2, 1)) sort(poly(1, c(1, 0, 0), c(1, 0))) ## ----------------------------------------------------------------------------- vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ## ----------------------------------------------------------------------------- vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ## ----------------------------------------------------------------------------- new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ## ----------------------------------------------------------------------------- sum(x) mean(x) ## ---- error = TRUE------------------------------------------------------------ x + 1 meter(10) + meter(1) meter(10) * 3 ## ----------------------------------------------------------------------------- vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ## ---- error = TRUE------------------------------------------------------------ vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ## ---- error = TRUE------------------------------------------------------------ vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ## ----------------------------------------------------------------------------- vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ## ----eval = FALSE------------------------------------------------------------- # #' Internal vctrs methods # #' # #' @import vctrs # #' @keywords internal # #' @name pizza-vctrs # NULL ## ----------------------------------------------------------------------------- new_percent <- function(x = double()) { vec_assert(x, double()) new_vctr(x, class = "pizza_percent") } ## ----------------------------------------------------------------------------- # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ## ----------------------------------------------------------------------------- #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ## ----------------------------------------------------------------------------- #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ## ----------------------------------------------------------------------------- #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ## ----eval = FALSE------------------------------------------------------------- # #' @export # format.pizza_percent <- function(x, ...) { # out <- formatC(signif(vec_data(x) * 100, 3)) # out[is.na(x)] <- NA # out[!is.na(x)] <- paste0(out[!is.na(x)], "%") # out # } # # #' @export # vec_ptype_abbr.pizza_percent <- function(x, ...) { # "prcnt" # } ## ---- eval = FALSE------------------------------------------------------------ # #' @export # vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() # #' @export # vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() # # #' @export # vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x # #' @export # vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) # #' @export # vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ## ---- eval = FALSE------------------------------------------------------------ # expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") vctrs/inst/doc/type-size.Rmd0000644000176200001440000003155113650511520015535 0ustar liggesusers--- title: "Prototypes and sizes" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Prototypes and sizes} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Rather than using `class()` and `length()`, vctrs has notions of prototype (`vec_ptype_show()`) and size (`vec_size()`). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules. Size and prototype are motivated by thinking about the optimal behaviour for `c()` and `rbind()`, particularly inspired by data frames with columns that are matrices or data frames. ```{r} library(vctrs) ``` ## Prototype The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the `class()` of an object is inadequate for this purpose: * The `class()` doesn't include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a `POSIXct`. You cannot combine two factors or two `POSIXct`s without thinking about the attributes. * The `class()` of a matrix is "matrix" and doesn't include the type of the underlying vector or the dimensionality. Instead, vctrs takes advantage of R's vectorised nature and uses a __prototype__, a 0-observation slice of the vector (this is basically `x[0]` but with some subtleties we'll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data. Conveniently, you can create many prototypes using existing base functions (e.g, `double()` and `factor(levels = c("a", "b"))`). vctrs provides a few helpers (e.g. `new_date()`, `new_datetime()`, and `new_duration()`) where the equivalents in base R are missing. ### Base prototypes `vec_ptype()` creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides `vec_ptype_show()`, which prints the prototype in a friendly way (and returns nothing). Using `vec_ptype_show()` allows us to see the prototypes base R classes: * Atomic vectors have no attributes and just display the underlying `typeof()`: ```{r} vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ``` * The prototype of matrices and arrays include the base type and the dimensions after the first: ```{r} vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ``` * The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it's highly likely that their levels are also equal. ```{r} vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ``` While `vec_ptype_show()` prints only the hash, the prototype object itself does contain all levels: ```{r} vec_ptype(factor("a")) ``` * Base R has three key date time classes: dates, date-times (`POSIXct`), and durations (`difftime)`. Date-times have a timezone, and durations have a unit. ```{r} vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ``` * Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column: ```{r} vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ``` Data frames can have columns that are themselves data frames, making this a "recursive" type: ```{r} df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ``` ### Coercing to common type It's often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via `vec_ptype_common()`. `vec_ptype_common()` possesses the following invariants: * `class(vec_ptype_common(x, y))` equals `class(vec_ptype_common(y, x))`. * `class(vec_ptype_common(x, vec_ptype_common(y, z))` equals `class(vec_ptype_common(vec_ptype_common(x, y), z))`. * `vec_ptype_common(x, NULL) == vec_ptype(x)`. i.e., `vec_ptype_common()` is both commutative and associative (with respect to class) and has an identity element, `NULL`; i.e., it's a __commutative monoid__. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects. Like with `vec_ptype()`, the easiest way to explore `vec_ptype_common()` is with `vec_ptype_show()`: when given multiple inputs, it will print their common prototype. (In other words: program with `vec_ptype_common()` but play with `vec_ptype_show()`.) * The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically: ```{r, error = TRUE} vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ``` * Matrices and arrays are automatically broadcast to higher dimensions: ```{r} vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ``` Provided that the dimensions follow the vctrs recycling rules: ```{r, error = TRUE} vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ``` * Factors combine levels in the order in which they appear. ```{r} fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ``` * Combining a date and date-time yields a date-time: ```{r} vec_ptype_show(new_date(), new_datetime()) ``` When combining two date times, the timezone is taken from the first input: ```{r} vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ``` Unless it's the local timezone, in which case any explicit time zone will win: ```{r} vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ``` * The common type of two data frames is the common type of each column that occurs in both data frames: ```{r} vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ``` And the union of the columns that only occur in one: ```{r} vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ``` Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled. ### Casting to specified type `vec_ptype_common()` finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That's the job of `vec_cast_common()`: ```{r} str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ``` Alternatively, you can cast to a specific prototype using `vec_cast()`: ```{r, error = TRUE} # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ``` If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error. ```{r, error = TRUE} vec_cast(c(1.5, 2), integer()) ``` You can suppress the lossy cast errors with `allow_lossy_cast()`: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ``` This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ``` The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound. ## Size `vec_size()` was motivated by the need to have an invariant that describes the number of "observations" in a data structure. This is particularly important for data frames, as it's useful to have some function such that `f(data.frame(x))` equals `f(x)`. No base function has this property: * `length(data.frame(x))` equals `1` because the length of a data frame is the number of columns. * `nrow(data.frame(x))` does not equal `nrow(x)` because `nrow()` of a vector is `NULL`. * `NROW(data.frame(x))` equals `NROW(x)` for vector `x`, so is almost what we want. But because `NROW()` is defined in terms of `length()`, it returns a value for every object, even types that can't go in a data frame, e.g. `data.frame(mean)` errors even though `NROW(mean)` is `1`. We define `vec_size()` as follows: * It is the length of 1d vectors. * It is the number of rows of data frames, matrices, and arrays. * It throws error for non vectors. Given `vec_size()`, we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns. ### Slicing `vec_slice()` is to `vec_size()` as `[` is to `length()`; i.e., it allows you to select observations regardless of the dimensionality of the underlying object. `vec_slice(x, i)` is equivalent to: * `x[i]` when `x` is a vector. * `x[i, , drop = FALSE]` when `x` is a data frame. * `x[i, , , drop = FALSE]` when `x` is a 3d array. ```{r} x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ``` `vec_slice(data.frame(x), i)` equals `data.frame(vec_slice(x, i))` (modulo variable and row names). Prototypes are generated with `vec_slice(x, 0L)`; given a prototype, you can initialize a vector of given size (filled with `NA`s) with `vec_init()`. ### Common sizes: recycling rules Closely related to the definition of size are the __recycling rules__. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in `vec_size_common()`, which gives the common size of a set of vectors: ```{r} vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ``` vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like `dest == c("IAH", "HOU"))`, at the cost of occasionally requiring an explicit calls to `rep()`. ```{r, echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates n error"} knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ``` You can apply the recycling rules in two ways: * If you have a vector and desired size, use `vec_recycle()`: ```{r} vec_recycle(1:3, 3) vec_recycle(1, 10) ``` * If you have multiple vectors and you want to recycle them to the same size, use `vec_recycle_common()`: ```{r} vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ``` ## Appendix: recycling in base R The recycling rules in base R are described in [The R Language Definition](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Recycling-rules) but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions. Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer: ```{r} rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ``` If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning: ```{r} invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ``` But some functions recycle silently: ```{r} length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ``` And `data.frame()` throws an error: ```{r, error = TRUE} data.frame(1:2, 1:3) ``` The R language definition states that "any arithmetic operation involving a zero-length vector has a zero-length result". But outside of arithmetic, this rule is not consistently followed: ```{r, error = TRUE} # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) ``` vctrs/inst/doc/pillar.Rmd0000644000176200001440000002130513753015426015073 0ustar liggesusers--- title: "Printing vectors nicely in tibbles" author: "Kirill Müller, Hadley Wickham" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Printing vectors nicely in tibbles} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = pillar::style_subtle("#>")) ``` You can get basic control over how a vector is printed in a tibble by providing a `format()` method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics: * `vctrs::vec_ptype_abbr()` determines what goes into the column header. * `pillar::pillar_shaft()` determines what goes into the body, or the shaft, of the column. Technically a [*pillar*](https://en.wikipedia.org/wiki/Column#Nomenclature) is composed of a *shaft* (decorated with an *ornament*), with a *capital* above and a *base* below. Multiple pillars form a *colonnade*, which can be stacked in multiple *tiers*. This is the motivation behind the names in our API. This short vignette shows the basics of column styling using a `"latlon"` vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you'll need to create documentation and the `NAMESPACE` file. In this vignette, we'll attach pillar and vctrs: ```{r setup} library(vctrs) library(pillar) ``` You don't need to do this in a package. Instead, you'll need to _import_ the packages by then to the `Imports:` section of your `DESCRIPTION`. The following helper does this for you: ```{r, eval = FALSE} usethis::use_package("vctrs") usethis::use_package("pillar") ``` ## Prerequisites To illustrate the basic ideas we're going to create a `"latlon"` class that encodes geographic coordinates in a record. We'll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using `vctrs_rcrd()`, we already get the infrastructure to make this class fully compatible with data frames for free. See `vignette("s3-vector", package = "vctrs")` for details on the record data type. ```{r} #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ``` ## Using in a tibble Columns of this class can be used in a tibble right away because we've made a class using the vctrs infrastructure and have provided a `format()` method: ```{r} library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ``` This output is ok, but we could improve it by: 1. Using a more description type abbreviation than ``. 1. Using a dash of colour to highlight the most important parts of the value. 1. Providing a narrower view when horizontal space is at a premium. The following sections show how to enhance the rendering. ## Fixing the data type Instead of `` we'd prefer to use ``. We can do that by implementing the `vec_ptype_abbr()` method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that's under 6 characters. ```{r} #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ``` ## Custom rendering The `format()` method is used by default for rendering. For custom formatting you need to implement the `pillar_shaft()` method. This function should always return a pillar shaft object, created by `new_pillar_shaft_simple()` or similar. `new_pillar_shaft_simple()` accepts ANSI escape codes for colouring, and pillar includes some built in styles like `style_subtle()`. We can use subtle style for the degree and minute separators to make the data more obvious. First we define a degree formatter that makes use of `style_subtle()`: ```{r} deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ``` And then we pass that to our `format()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ``` Currently, ANSI escapes are not rendered in vignettes, so this result doesn't look any different, but if you run the code yourself you'll see an improved display. ```{r} data ``` As well as the functions in pillar, the [cli](http://cli.r-lib.org/) package provides a variety of tools for styling text. ## Truncation Tibbles can automatically compacts columns when there's no enough horizontal space to display everything: ```{r} print(data, width = 30) ``` Currently the latlon class isn't ever compacted because we haven't specified a minimum width when constructing the shaft. Let's fix that and re-print the data: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ``` ## Adaptive rendering Truncation may be useful for character data, but for lat-lon data it'd be nicer to show full degrees and remove the minutes. We'll first write a function that does this: ```{r} deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ``` Then use it as part of more sophisticated implementation of the `pillar_shaft()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ``` Now the `pillar_shaft()` method returns an object of class `"pillar_shaft_latlon"` created by `new_pillar_shaft()`. This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (`get_max_extent()` is a helper that computes the maximum display width occupied by the values in a character vector.) All that's left to do is to implement a `format()` method for our new `"pillar_shaft_latlon"` class. This method will be called with a `width` argument, which then determines which of the formats to choose. The formatting of our choice is passed to the `new_ornament()` function: ```{r} #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ``` ## Testing If you want to test the output of your code, you can compare it with a known state recorded in a text file. The `testthat::expect_snapshot()` function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won't make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN. Use this testthat expectation in one of your test files to create a snapshot test: ```{r eval = FALSE} expect_snapshot(pillar_shaft(data$loc)) ``` See for more information. vctrs/inst/doc/stability.R0000644000176200001440000001164614042546501015275 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(vctrs) library(zeallot) ## ----------------------------------------------------------------------------- vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ## ----------------------------------------------------------------------------- vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ## ----------------------------------------------------------------------------- vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ## ----------------------------------------------------------------------------- env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ## ----------------------------------------------------------------------------- vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ## ----------------------------------------------------------------------------- c(FALSE, 1L, 2.5) ## ----------------------------------------------------------------------------- vec_c(FALSE, 1L, 2.5) ## ---- error = TRUE------------------------------------------------------------ c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ## ----------------------------------------------------------------------------- c(10.5, factor("x")) ## ----------------------------------------------------------------------------- c(mean, globalenv()) ## ---- error = TRUE------------------------------------------------------------ c(getRversion(), "x") c("x", getRversion()) ## ---- error = TRUE------------------------------------------------------------ vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ## ----------------------------------------------------------------------------- fa <- factor("a") fb <- factor("b") c(fa, fb) ## ----------------------------------------------------------------------------- vec_c(fa, fb) vec_c(fb, fa) ## ----------------------------------------------------------------------------- datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ## ----------------------------------------------------------------------------- vec_c(datetime_nz) ## ----------------------------------------------------------------------------- datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ## ----------------------------------------------------------------------------- date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ## ----------------------------------------------------------------------------- vec_c(date, datetime) vec_c(date, datetime_nz) ## ----------------------------------------------------------------------------- c(NA, fa) c(NA, date) c(NA, datetime) ## ----------------------------------------------------------------------------- vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ## ----------------------------------------------------------------------------- df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ## ----------------------------------------------------------------------------- vec_c(df1, df2) ## ----------------------------------------------------------------------------- m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ## ----------------------------------------------------------------------------- c(m, 1) vec_c(m, 1) ## ---- eval = FALSE------------------------------------------------------------ # vec_c <- function(...) { # args <- compact(list2(...)) # # ptype <- vec_ptype_common(!!!args) # if (is.null(ptype)) # return(NULL) # # ns <- map_int(args, vec_size) # out <- vec_init(ptype, sum(ns)) # # pos <- 1 # for (i in seq_along(ns)) { # n <- ns[[i]] # # x <- vec_cast(args[[i]], to = ptype) # vec_slice(out, pos:(pos + n - 1)) <- x # pos <- pos + n # } # # out # } ## ----------------------------------------------------------------------------- if_else <- function(test, yes, no) { vec_assert(test, logical()) c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ## ----------------------------------------------------------------------------- if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) vctrs/inst/doc/s3-vector.Rmd0000644000176200001440000011552014027052305015430 0ustar liggesusers--- title: "S3 vectors" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{S3 vectors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ``` This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you'll also need to provide methods that actually make the vector useful. I assume that you're already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of [the S3 chapter](https://adv-r.hadley.nz/s3.html) of _Advanced R_. This article refers to "vectors of numbers" as *double vectors*. Here, "double" stands for ["double precision floating point number"](https://en.wikipedia.org/wiki/Double-precision_floating-point_format), see also `double()`. ```{r setup} library(vctrs) library(zeallot) ``` This vignette works through five big topics: * The basics of creating a new vector class with vctrs. * The coercion and casting system. * The record and list-of types. * Equality and comparison proxies. * Arithmetic operators. They're collectively demonstrated with a number of simple S3 classes: * Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting. * Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions. * Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care. * Rational: a pair of integer vectors that defines a rational number like `2 / 3`. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for `+`, `-`, and friends. * Polynomial: a list of integer vectors that define polynomials like `1 + x - x^3`. Sorting such vectors correctly requires a custom equality method. * Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties. * Period and frequency: a pair of classes represent a period, or it's inverse, frequency. This allows us to explore more arithmetic operators. ## Basics In this section you'll learn how to create a new vctrs class by calling `new_vctr()`. This creates an object with class `vctrs_vctr` which has a number of methods. These are designed to make your life as easy as possible. For example: * The `print()` and `str()` methods are defined in terms of `format()` so you get a pleasant, consistent display as soon as you've made your `format()` method. * You can immediately put your new vector class in a data frame because `as.data.frame.vctrs_vctr()` does the right thing. * Subsetting (`[`, `[[`, and `$`), `length<-`, and `rep()` methods automatically preserve attributes because they use `vec_restore()`. A default `vec_restore()` works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data. * Default subset-assignment methods (`[<-`, `[[<-`, and `$<-`) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages. ### Percent class In this section, I'll show you how to make a `percent` class, i.e., a double vector that is printed as a percentage. We start by defining a low-level [constructor](https://adv-r.hadley.nz/s3.html#s3-constrcutor) that uses `vec_assert()` to checks types and/or sizes then calls `new_vctr()`. `percent` is built on a double vector of any length and doesn't have any attributes. ```{r} new_percent <- function(x = double()) { vec_assert(x, double()) new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ``` Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as [blob](https://blob.tidyverse.org/)), it's fine to use the package name without prefix as the class name. We then follow up with a user friendly [helper](https://adv-r.hadley.nz/s3.html#helpers). Here we'll use `vec_cast()` to allow it to accept anything coercible to a double: ```{r} percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype. ```{r} new_percent() percent() ``` For the convenience of your users, consider implementing an `is_percent()` function: ```{r} is_percent <- function(x) { inherits(x, "vctrs_percent") } ``` ### `format()` method The first method for every class should almost always be a `format()` method. This should return a character vector the same length as `x`. The easiest way to do this is to rely on one of R's low-level formatting functions like `formatC()`: ```{r} format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ``` ```{r, include = FALSE} # As of R 3.5, print.vctr can not find format.percent since it's not in # it's lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ``` ```{r} x ``` (Note the use of `vec_data()` so `format()` doesn't get stuck in an infinite loop, and that I take a little care to not convert `NA` to `"NA"`; this leads to better printing.) The format method is also used by data frames, tibbles, and `str()`: ```{r} data.frame(x) ``` For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in `str()`: ```{r} vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ``` If you need more control over printing in tibbles, implement a method for `pillar::pillar_shaft()`. See `vignette("pillar", package = "vctrs")` for details. ## Casting and coercion The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens _implicitly_ (e.g in `c()`) we call it __coercion__; when the change happens _explicitly_ (e.g. with `as.integer(x)`), we call it __casting__. One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it's possible to make accurate predictions about what (e.g.) `c(x, y)` should do when `x` and `y` have different prototypes. vctrs achieves this goal through two generics: * `vec_ptype2(x, y)` defines possible set of coercions. It returns a prototype if `x` and `y` can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes. * `vec_cast(x, to)` defines the possible sets of casts. It returns `x` translated to have prototype `to`, or throws an error if the conversion isn't possible. The set of possible casts is a superset of possible coercions because they're requested explicitly. ### Double dispatch Both generics use __[double dispatch](https://en.wikipedia.org/wiki/Double_dispatch)__ which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means: - You end up with method names with two classes, like `vec_ptype2.foo.bar()`. - You don't need to implement default methods (they would never be called if you do). - You can't call `NextMethod()`. ### Percent class {#percent} We'll make our percent class coercible back and forth with double vectors. `vec_ptype2()` provides a user friendly error message if the coercion doesn't exist and makes sure `NA` is handled in a standard way. `NA` is technically a logical vector, but we want to stand in for a missing value of any type. ```{r, error = TRUE} vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ``` By default and in simple cases, an object of the same class is compatible with itself: ```{r} vec_ptype2(percent(), percent()) ``` However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we'll start by saying that a `vctrs_percent` combined with a `vctrs_percent` yields a `vctrs_percent`, which we indicate by returning a prototype generated by the constructor. ```{r} vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ``` Next we define methods that say that combining a `percent` and double should yield a `double`. We avoid returning a `percent` here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers. Because double dispatch is a bit of a hack, we need to provide two methods. It's your responsibility to ensure that each member of the pair returns the same result: if they don't you will get weird and unpredictable behaviour. The double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. If we implemented `vec_ptype2.vctrs_percent.numeric()`, it would never be called. ```{r} vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ``` We can check that we've implemented this correctly with `vec_ptype_show()`: ```{r} vec_ptype_show(percent(), double(), percent()) ``` The `vec_ptype2()` methods define which input is the richer type that vctrs should coerce to. However, they don't perform any conversion. This is the job of `vec_cast()`, which we implement next. We'll provide a method to cast a percent to a percent: ```{r} vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ``` And then for converting back and forth between doubles. To convert a double to a percent we use the `percent()` helper (not the constructor; this is unvalidated user input). To convert a `percent` to a double, we strip the attributes. Note that for historical reasons the order of argument in the signature is the opposite as for `vec_ptype2()`. The class for `to` comes first, and the class for `x` comes second. Again, the double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. Implementing `vec_cast.vctrs_percent.numeric()` has no effect. ```{r} vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ``` Then we can check this works with `vec_cast()`: ```{r} vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ``` Once you've implemented `vec_ptype2()` and `vec_cast()`, you get `vec_c()`, `[<-`, and `[[<-` implementations for free. ```{r, error = TRUE} vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ``` You'll also get mostly correct behaviour for `c()`. The exception is when you use `c()` with a base R class: ```{r, error = TRUE} # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ``` Unfortunately there's no way to fix this problem with the current design of `c()`. Again, as a convenience, consider providing an `as_percent()` function that makes use of the casts defined in your `vec_cast.vctrs_percent()` methods: ```{r} as_percent <- function(x) { vec_cast(x, new_percent()) } ``` Occasionally, it is useful to provide conversions that go beyond what's allowed in casting. For example, we could offer a parsing method for character vectors. In this case, `as_percent()` should be generic, the default method should cast, and then additional methods should implement more flexible conversion: ```{r} as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ``` ### Decimal class Now that you've seen the basics with a very simple S3 class, we'll gradually explore more complicated scenarios. This section creates a `decimal` class that prints with the specified number of decimal places. This is very similar to `percent` but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1). We start off as before, defining a low-level constructor, a user-friendly constructor, a `format()` method, and a `vec_ptype_abbr()`. Note that additional object attributes are simply passed along to `new_vctr()`: ```{r} new_decimal <- function(x = double(), digits = 2L) { vec_assert(x, ptype = double()) vec_assert(digits, ptype = integer(), size = 1) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ``` Note that I provide a little helper to extract the `digits` attribute. This makes the code a little easier to read and should not be exported. By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You'll see what to do if the attributes are data dependent in the next section. ```{r} x[1:2] x[[1]] ``` For the sake of exposition, we'll assume that `digits` is an important attribute of the class and should be included in the full type: ```{r} vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ``` Now consider `vec_cast()` and `vec_ptype2()`. Casting and coercing from one decimal to another requires a little thought as the values of the `digits` attribute might be different, and we need some way to reconcile them. Here I've decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error. ```{r} vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ``` Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal). ```{r} vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ``` If type `x` has greater resolution than `y`, there will be some inputs that lose precision. These should generate errors using `stop_lossy_cast()`. You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution. ```{r, error = TRUE} vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ``` ### Cached sum class {#cached-sum} The next level up in complexity is an object that has data-dependent attributes. To explore this idea we'll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors: ```{r} new_cached_sum <- function(x = double(), sum = 0L) { vec_assert(x, ptype = double()) vec_assert(sum, ptype = double(), size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ``` For this class, we can use the default `format()` method, and instead, we'll customise the `obj_print_footer()` method. This is a good place to display user facing attributes. ```{r} obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ``` We'll also override `sum()` and `mean()` to use the attribute. This is easiest to do with `vec_math()`, which you'll learn about later. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ``` As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they'll work, but return the incorrect result: ```{r} x[1:2] ``` To fix this, you need to provide a `vec_restore()` method. Note that this method dispatches on the `to` argument. ```{r} vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ``` This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with `vec_data()` and then reapplying them again with `vec_restore()`. The default `vec_restore()` method copies over all attributes, which is not appropriate when the attributes depend on the data. Note that `vec_restore.class` is subtly different from `vec_cast.class.class()`. `vec_restore()` is used when restoring attributes that have been lost; `vec_cast()` is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with `new_vctr()`. `vec_restore.factor()` would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with `vec_cast()`. ## Record-style objects Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is `POSIXlt`, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override `length()` and subsetting methods to conceal this implementation detail. ```{r} x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ``` vctrs makes it easy to create new record-style classes using `new_rcrd()`, which has a wide selection of default methods. ### Rational class A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short). As usual we start with low-level and user-friendly constructors. The low-level constructor calls `new_rcrd()`, which needs a named list of equal-length vectors. ```{r} new_rational <- function(n = integer(), d = integer()) { vec_assert(n, ptype = integer()) vec_assert(d, ptype = integer()) new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ``` Our user friendly constructor casts `n` and `d` to integers and recycles them to the same length. ```{r} rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ``` Behind the scenes, `x` is a named list with two elements. But those details are hidden so that it behaves like a vector: ```{r} names(x) length(x) ``` To access the underlying fields we need to use `field()` and `fields()`: ```{r} fields(x) field(x, "n") ``` Notice that we can't `print()` or `str()` the new rational vector `x` yet. Printing causes an error: ```{r, error = TRUE} x str(x) ``` This is because we haven't defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call `vec_data(x)`. ```{r} vec_data(x) str(vec_data(x)) ``` It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way: ```{r} format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ``` vctrs uses the `format()` method in `str()`, hiding the underlying implementation details from the user: ```{r} str(x) ``` For `rational`, `vec_ptype2()` and `vec_cast()` follow the same pattern as `percent()`. We allow coercion from integer and to doubles. ```{r} vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ``` ### Decimal2 class The previous implementation of `decimal` was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems. A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a `scale`). The following code is a very quick sketch of how you might start creating such a class: ```{r} new_decimal2 <- function(l, r, scale = 2L) { vec_assert(l, ptype = integer()) vec_assert(r, ptype = integer()) vec_assert(scale, ptype = integer(), size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ``` ## Equality and comparison vctrs provides three "proxy" generics. Two of these let you control how your class determines equality and ordering: * `vec_proxy_equal()` returns a data vector suitable for comparison. It underpins `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()`. * `vec_proxy_compare()` specifies how to compare the elements of your vector. This proxy is used in `<`, `<=`, `>=`, `>`, `min()`, `max()`, `median()`, `quantile()`, and `xtfrm()` (used in `order()` and `sort()`) methods. By default, `vec_proxy_equal()` and `vec_proxy_compare()` just call `vec_proxy()`. * `vec_proxy()` returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn't need to implement `vec_proxy()`. You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work. These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C. ### Rational class Let's explore these ideas by with the rational class we started on above. By default, `vec_proxy()` converts a record to a data frame, and the default comparison works column by column: ```{r} x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ``` This makes sense as a default but isn't correct here because `rational(1, 1)` represents the same number as `rational(2, 2)`, so they should be equal. We can fix that by implementing a `vec_proxy_equal()` method that divides `n` and `d` by their greatest common divisor: ```{r} # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ``` `vec_proxy_equal()` is also used by `unique()`: ```{r} unique(x) ``` We now need to fix `sort()` similarly, since it currently sorts by `n`, then by `d`: ```{r} sort(x) ``` The easiest fix is to convert the fraction to a decimal and then sort that: ```{r} vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } sort(x) ``` (We could have used the same approach in `vec_proxy_equal()`, but when working with floating point numbers it's not necessarily true that `x == y` implies that `d * x == d * y`.) ### Polynomial class A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like `1 + 3x - 2x^2`) using a list of integer vectors (like `c(1, 3, -2)`). Note the use of `new_list_of()` in the constructor. ```{r} new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly") } poly <- function(...) { x <- list(...) x <- lapply(x, vec_cast, integer()) new_poly(x) } vec_ptype_full.vctrs_poly <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly <- function(x, ...) "poly" format.vctrs_poly <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } else if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly <- function(x, ...) { if (length(x) == 0) return() print(format(x), quote = FALSE) } p <- poly(1, c(1, 0, 1), c(1, 0, 0, 0, 2)) p ``` The resulting objects will inherit from the `vctrs_list_of` class, which provides tailored methods for `$`, `[[`, the corresponding assignment operators, and other methods. ```{r} class(p) p[2] p[[2]] ``` Equality works out of the box because we can tell if two integer vectors are equal: ```{r} p == poly(c(1, 0, 1)) ``` But we can't order them because lists are not comparable: ```{r, error = TRUE} sort(p) ``` So we need to define a `vec_proxy_compare()` method: ```{r} vec_proxy_compare.vctrs_poly <- function(x, ...) { x_raw <- vec_data(x) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } sort(poly(3, 2, 1)) sort(poly(1, c(1, 0, 0), c(1, 0))) ``` ## Arithmetic vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once: * `vec_math(fn, x, ...)` specifies the behaviour of mathematical functions like `abs()`, `sum()`, and `mean()`. (Note that `var()` and `sd()` can't be overridden, see `?vec_math()` for the complete list supported by `vec_math()`.) * `vec_arith(op, x, y)` specifies the behaviour of the arithmetic operations like `+`, `-`, and `%%`. (See `?vec_arith()` for the complete list.) Both generics define the behaviour for multiple functions because `sum.vctrs_vctr(x)` calls `vec_math.vctrs_vctr("sum", x)`, and `x + y` calls `vec_math.x_class.y_class("+", x, y)`. They're accompanied by `vec_math_base()` and `vec_arith_base()` which make it easy to call the underlying base R functions. `vec_arith()` uses double dispatch and needs the following standard boilerplate: ```{r} vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` ### Cached sum class I showed an example of `vec_math()` to define `sum()` and `mean()` methods for `cached_sum`. Now let's talk about exactly how it works. Most `vec_math()` functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to `vec_math_base()` for those that you don't care about. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ``` ### Meter class To explore the infix arithmetic operators exposed by `vec_arith()` I'll create a new class that represents a measurement in `meter`s: ```{r} new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ``` Because `meter` is built on top of a double vector, basic mathematic operations work: ```{r} sum(x) mean(x) ``` But we can't do arithmetic: ```{r, error = TRUE} x + 1 meter(10) + meter(1) meter(10) * 3 ``` To allow these infix functions to work, we'll need to provide `vec_arith()` generic. But before we do that, let's think about what combinations of inputs we should support: * It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can't multiply meters (because that would yield an area). * For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don't make much sense as we, strictly speaking, are dealing with objects of different nature. `vec_arith()` is another function that uses double dispatch, so as usual we start with a template. ```{r} vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Then write the method for two meter objects. We use a switch statement to cover the cases we care about and `stop_incompatible_op()` to throw an informative error message for everything else. ```{r, error = TRUE} vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ``` Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while `meter(10) / 2` makes sense, `2 / meter(10)` does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over `numeric` here instead of `double`. ```{r, error = TRUE} vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ``` For completeness, we also need `vec_arith.vctrs_meter.MISSING` for the unary `+` and `-` operators: ```{r} vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ``` ## Implementing a vctrs S3 class in a package Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things: * Register the S3 methods by listing them in the `NAMESPACE` file. * Create documentation around your methods, for the sake of your user and to satisfy `R CMD check`. Let's assume that the `percent` class is implemented in the pizza package in the file `R/percent.R`. Here we walk through the major sections of this hypothetical file. You've seen all of this code before, but now it's augmented by the roxygen2 directives that produce the correct `NAMESPACE` entries and help topics. ### Getting started First, the pizza package needs to include vctrs in the `Imports` section of its `DESCRIPTION` (perhaps by calling `usethis::use_package("vctrs")`. While vctrs is under very active development, it probably makes sense to state a minimum version. ``` Imports: a_package, another_package, ... vctrs (>= x.y.z), ... ``` Then we make all vctrs functions available within the pizza package by including the directive `#' @import vctrs` somewhere. Usually, it's not good practice to `@import` the entire namespace of a package, but vctrs is deliberately designed with this use case in mind. Where should we put `#' @import vctrs`? There are two natural locations: * With package-level docs in `R/pizza-doc.R`. You can use `usethis::use_package_doc()` to initiate this package-level documentation. * In `R/percent.R`. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package. We also must use one of these locations to dump some internal documentation that's needed to avoid `R CMD check` complaints. We don't expect any human to ever read this documentation. Here's how this dummy documentation should look, combined with the `#' @import vctrs` directive described above. ```{r eval = FALSE} #' Internal vctrs methods #' #' @import vctrs #' @keywords internal #' @name pizza-vctrs NULL ``` This should appear in `R/pizza-doc.R` (package-level docs) or in `R/percent.R` (class-focused file). Remember to call `devtools::document()` regularly, as you develop, to regenerate `NAMESPACE` and the `.Rd` files. From this point on, the code shown is expected to appear in `R/percent.R`. ### Low-level and user-friendly constructors Next we add our constructor: ```{r} new_percent <- function(x = double()) { vec_assert(x, double()) new_vctr(x, class = "pizza_percent") } ``` Note that the name of the package must be included in the class name (`pizza_percent`), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class. We can also add a call to `setOldClass()` for compatibility with S4: ```{r} # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ``` Because we've used a function from the methods package, you'll also need to add methods to `Imports`, with (e.g.) `usethis::use_package("methods")`. This is a "free" dependency because methods is bundled with every R install. Next we implement, export, and document a user-friendly helper: `percent()`. ```{r} #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` (Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do `pizza::percent()`; it would be redundant to have `pizza::pizza_percent()`.) ### Other helpers It's a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor `percent()`: ```{r} #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ``` You'll also need to update the `percent()` documentation to reflect that `x` now means two different things: ```{r} #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ``` Next we provide the key methods to make printing work. These are S3 methods, so they don't need to be documented, but they do need to be exported. ```{r eval = FALSE} #' @export format.pizza_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } #' @export vec_ptype_abbr.pizza_percent <- function(x, ...) { "prcnt" } ``` Finally, we implement methods for `vec_ptype2()` and `vec_cast()`. ```{r, eval = FALSE} #' @export vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() #' @export vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() #' @export vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x #' @export vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) #' @export vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ``` ### Testing It's good practice to test your new class. Specific recommendations: * `R/percent.R` is the type of file where you really do want 100% test coverage. You can use `devtools::test_coverage_file()` to check this. * Make sure to test behaviour with zero-length inputs and missing values. * Use `testthat::verify_output()` to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about `verify_output()` in the [testthat v2.3.0 blog post](https://www.tidyverse.org/blog/2019/11/testthat-2-3-0/); it's an example of a so-called [golden test](https://ro-che.info/articles/2017-12-04-golden-tests). * Check for method symmetry; use `expect_s3_class()`, probably with `exact = TRUE`, to ensure that `vec_c(x, y)` and `vec_c(y, x)` return the same type of output for the important `x`s and `y`s in your domain. * Use `testthat::expect_error()` to check that inputs that can't be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include `vctrs_error_assert_ptype`, `vctrs_error_assert_size`, and `vctrs_error_incompatible_type`. ```{r, eval = FALSE} expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") ``` If your tests pass when run by `devtools::test()`, but fail when run in `R CMD check`, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated `NAMESPACE`. vctrs/inst/doc/stability.Rmd0000644000176200001440000003132513654033264015617 0ustar liggesusers--- title: "Type and size stability" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Type and size stability} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the "shape" of the output you only need to know the "shape"s of the inputs. This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can't predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases. ```{r setup} library(vctrs) library(zeallot) ``` ## Definitions We say a function is __type-stable__ iff: 1. You can predict the output type knowing only the input types. 1. The order of arguments in ... does not affect the output type. Similarly, a function is __size-stable__ iff: 1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size. Very few base R functions are size-stable, so I'll also define a slightly weaker condition. I'll call a function __length-stable__ iff: 1. You can predict the output _length_ knowing only the input _lengths_, or there is a single numeric input that specifies the output _length_. (But note that length-stable is not a particularly robust definition because `length()` returns a value for things that are not vectors.) We'll call functions that don't obey these principles __type-unstable__ and __size-unstable__ respectively. On top of type- and size-stability it's also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions. The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again. ### Examples To make these ideas concrete, let's apply them to a few base functions: 1. `mean()` is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error). 1. Surprisingly, `median()` is type-unstable: ```{r} vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ``` It is, however, size-stable, since it always returns a vector of length 1. 1. `sapply()` is type-unstable because you can't predict the output type only knowing the input types: ```{r} vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ``` It's not quite size-stable; `vec_size(sapply(x, f))` is `vec_size(x)` for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns). 1. `vapply()` is a type-stable version of `sapply()` because `vec_ptype_show(vapply(x, fn, template))` is always `vec_ptype_show(template)`. It is size-unstable for the same reasons as `sapply()`. 1. `c()` is type-unstable because `c(x, y)` doesn't always output the same type as `c(y, x)`. ```{r} vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ``` `c()` is *almost always* length-stable because `length(c(x, y))` *almost always* equals `length(x) + length(y)`. One common source of instability here is dealing with non-vectors (see the later section "Non-vectors"): ```{r} env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ``` 1. `paste(x1, x2)` is length-stable because `length(paste(x1, x2))` equals `max(length(x1), length(x2))`. However, it doesn't follow the usual arithmetic recycling rules because `paste(1:2, 1:3)` doesn't generate a warning. 1. `ifelse()` is length-stable because `length(ifelse(cond, true, false))` is always `length(cond)`. `ifelse()` is type-unstable because the output type depends on the value of `cond`: ```{r} vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ``` 1. `read.csv(file)` is type-unstable and size-unstable because, while you know it will return a data frame, you don't know what columns it will return or how many rows it will have. Similarly, `df[[i]]` is not type-stable because the result depends on the _value_ of `i`. There are many important functions that can not be made type-stable or size-stable! With this understanding of type- and size-stability in hand, we'll use them to analyse some base R functions in greater depth and then propose alternatives with better properties. ## `c()` and `vctrs::vec_c()` In this section we'll compare and contrast `c()` and `vec_c()`. `vec_c()` is both type- and size-stable because it possesses the following invariants: * `vec_ptype(vec_c(x, y))` equals `vec_ptype_common(x, y)`. * `vec_size(vec_c(x, y))` equals `vec_size(x) + vec_size(y)`. `c()` has another undesirable property in that it's not consistent with `unlist()`; i.e., `unlist(list(x, y))` does not always equal `c(x, y)`; i.e., base R has multiple sets of type-coercion rules. I won't consider this problem further here. I have two goals here: * To fully document the quirks of `c()`, hence motivating the development of an alternative. * To discuss non-obvious consequences of the type- and size-stability above. ### Atomic vectors If we only consider atomic vectors, `c()` is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical. ```{r} c(FALSE, 1L, 2.5) ``` `vec_c()` obeys similar rules: ```{r} vec_c(FALSE, 1L, 2.5) ``` But it does not automatically coerce to character vectors or lists: ```{r, error = TRUE} c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ``` ### Incompatible vectors and non-vectors In general, most base methods do not throw an error: ```{r} c(10.5, factor("x")) ``` If the inputs aren't vectors, `c()` automatically puts them in a list: ```{r} c(mean, globalenv()) ``` For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list: ```{r, error = TRUE} c(getRversion(), "x") c("x", getRversion()) ``` `vec_c()` throws an error if the inputs are not vectors or not automatically coercible: ```{r, error = TRUE} vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ``` ### Factors Combining two factors returns an integer vector: ```{r} fa <- factor("a") fb <- factor("b") c(fa, fb) ``` (This is documented in `c()` but is still undesirable.) `vec_c()` returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with `dplyr::bind_rows()`, which is stricter and is a common source of user difficulty.) ```{r} vec_c(fa, fb) vec_c(fb, fa) ``` ### Date-times `c()` strips the time zone associated with date-times: ```{r} datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ``` This behaviour is documented in `?DateTimeClasses` but is the source of considerable user pain. `vec_c()` preserves time zones: ```{r} vec_c(datetime_nz) ``` What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there's no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone: ```{r} datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ``` ### Dates and date-times Combining dates and date-times with `c()` gives silently incorrect results: ```{r} date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ``` This behaviour arises because neither `c.Date()` nor `c.POSIXct()` check that all inputs are of the same type. `vec_c()` uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time). ```{r} vec_c(date, datetime) vec_c(date, datetime_nz) ``` ### Missing values If a missing value comes at the beginning of the inputs, `c()` falls back to the internal behaviour, which strips all attributes: ```{r} c(NA, fa) c(NA, date) c(NA, datetime) ``` `vec_c()` takes a different approach treating a logical vector consisting only of `NA` as the `unspecified()` class which can be converted to any other 1d type: ```{r} vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ``` ### Data frames Because it is *almost always* length-stable, `c()` combines data frames column wise (into a list): ```{r} df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ``` `vec_c()` is size-stable, which implies it will row-bind data frames: ```{r} vec_c(df1, df2) ``` ### Matrices and arrays The same reasoning applies to matrices: ```{r} m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ``` One difference is that `vec_c()` will "broadcast" a vector to match the dimensions of a matrix: ```{r} c(m, 1) vec_c(m, 1) ``` ### Implementation The basic implementation of `vec_c()` is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with `vec_init()`, and then insert each input into the correct place in the output. ```{r, eval = FALSE} vec_c <- function(...) { args <- compact(list2(...)) ptype <- vec_ptype_common(!!!args) if (is.null(ptype)) return(NULL) ns <- map_int(args, vec_size) out <- vec_init(ptype, sum(ns)) pos <- 1 for (i in seq_along(ns)) { n <- ns[[i]] x <- vec_cast(args[[i]], to = ptype) vec_slice(out, pos:(pos + n - 1)) <- x pos <- pos + n } out } ``` (The real `vec_c()` is a bit more complicated in order to handle inner and outer names). ## `ifelse()` One of the functions that motivate the development of vctrs is `ifelse()`. It has the surprising property that the result value is "A vector of the same length and attributes (including dimensions and class) as `test`". To me, it seems more reasonable for the type of the output to be controlled by the type of the `yes` and `no` arguments. In `dplyr::if_else()` I swung too far towards strictness: it throws an error if `yes` and `no` are not the same type. This is annoying in practice because it requires typed missing values (`NA_character_` etc), and because the checks are only on the class (not the full prototype), it's easy to create invalid output. I found it much easier to understand what `ifelse()` _should_ do once I internalised the ideas of type- and size-stability: * The first argument must be logical. * `vec_ptype(if_else(test, yes, no))` equals `vec_ptype_common(yes, no)`. Unlike `ifelse()` this implies that `if_else()` must always evaluate both `yes` and `no` in order to figure out the correct type. I think this is consistent with `&&` (scalar operation, short circuits) and `&` (vectorised, evaluates both sides). * `vec_size(if_else(test, yes, no))` equals `vec_size_common(test, yes, no)`. I think the output could have the same size as `test` (i.e., the same behaviour as `ifelse`), but I _think_ as a general rule that your inputs should either be mutually recycling or not. This leads to the following implementation: ```{r} if_else <- function(test, yes, no) { vec_assert(test, logical()) c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ``` By using `vec_size()` and `vec_slice()`, this definition of `if_else()` automatically works with data.frames and matrices: ```{r} if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) ``` vctrs/inst/doc/type-size.R0000644000176200001440000001254314042546502015220 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- library(vctrs) ## ----------------------------------------------------------------------------- vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ## ----------------------------------------------------------------------------- vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ## ----------------------------------------------------------------------------- vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ## ----------------------------------------------------------------------------- vec_ptype(factor("a")) ## ----------------------------------------------------------------------------- vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ## ----------------------------------------------------------------------------- vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ## ----------------------------------------------------------------------------- df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ## ---- error = TRUE------------------------------------------------------------ vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ## ----------------------------------------------------------------------------- vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ## ---- error = TRUE------------------------------------------------------------ vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ## ----------------------------------------------------------------------------- fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ## ----------------------------------------------------------------------------- vec_ptype_show(new_date(), new_datetime()) ## ----------------------------------------------------------------------------- vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ## ----------------------------------------------------------------------------- vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ## ----------------------------------------------------------------------------- vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ## ----------------------------------------------------------------------------- vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ## ----------------------------------------------------------------------------- str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ## ---- error = TRUE------------------------------------------------------------ # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ## ---- error = TRUE------------------------------------------------------------ vec_cast(c(1.5, 2), integer()) ## ----------------------------------------------------------------------------- allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ## ----------------------------------------------------------------------------- allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ## ----------------------------------------------------------------------------- x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ## ----------------------------------------------------------------------------- vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ## ---- echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates n error"---- knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ## ----------------------------------------------------------------------------- vec_recycle(1:3, 3) vec_recycle(1, 10) ## ----------------------------------------------------------------------------- vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ## ----------------------------------------------------------------------------- rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ## ----------------------------------------------------------------------------- invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ## ----------------------------------------------------------------------------- length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ## ---- error = TRUE------------------------------------------------------------ data.frame(1:2, 1:3) ## ---- error = TRUE------------------------------------------------------------ # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) vctrs/inst/doc/type-size.html0000644000176200001440000023152614042546502015767 0ustar liggesusers Prototypes and sizes

Prototypes and sizes

Rather than using class() and length(), vctrs has notions of prototype (vec_ptype_show()) and size (vec_size()). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules.

Size and prototype are motivated by thinking about the optimal behaviour for c() and rbind(), particularly inspired by data frames with columns that are matrices or data frames.

library(vctrs)

Prototype

The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the class() of an object is inadequate for this purpose:

  • The class() doesn’t include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a POSIXct. You cannot combine two factors or two POSIXcts without thinking about the attributes.

  • The class() of a matrix is “matrix” and doesn’t include the type of the underlying vector or the dimensionality.

Instead, vctrs takes advantage of R’s vectorised nature and uses a prototype, a 0-observation slice of the vector (this is basically x[0] but with some subtleties we’ll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data.

Conveniently, you can create many prototypes using existing base functions (e.g, double() and factor(levels = c("a", "b"))). vctrs provides a few helpers (e.g. new_date(), new_datetime(), and new_duration()) where the equivalents in base R are missing.

Base prototypes

vec_ptype() creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides vec_ptype_show(), which prints the prototype in a friendly way (and returns nothing).

Using vec_ptype_show() allows us to see the prototypes base R classes:

  • Atomic vectors have no attributes and just display the underlying typeof():

    vec_ptype_show(FALSE)
    #> Prototype: logical
    vec_ptype_show(1L)
    #> Prototype: integer
    vec_ptype_show(2.5)
    #> Prototype: double
    vec_ptype_show("three")
    #> Prototype: character
    vec_ptype_show(list(1, 2, 3))
    #> Prototype: list
  • The prototype of matrices and arrays include the base type and the dimensions after the first:

    vec_ptype_show(array(logical(), c(2, 3)))
    #> Prototype: logical[,3]
    vec_ptype_show(array(integer(), c(2, 3, 4)))
    #> Prototype: integer[,3,4]
    vec_ptype_show(array(character(), c(2, 3, 4, 5)))
    #> Prototype: character[,3,4,5]
  • The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it’s highly likely that their levels are also equal.

    vec_ptype_show(factor("a"))
    #> Prototype: factor<4d52a>
    vec_ptype_show(ordered("b"))
    #> Prototype: ordered<9b7e3>

    While vec_ptype_show() prints only the hash, the prototype object itself does contain all levels:

    vec_ptype(factor("a"))
    #> factor(0)
    #> Levels: a
  • Base R has three key date time classes: dates, date-times (POSIXct), and durations (difftime). Date-times have a timezone, and durations have a unit.

    vec_ptype_show(Sys.Date())
    #> Prototype: date
    vec_ptype_show(Sys.time())
    #> Prototype: datetime<local>
    vec_ptype_show(as.difftime(10, units = "mins"))
    #> Prototype: duration<mins>
  • Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column:

    vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x"))
    #> Prototype: data.frame<
    #>   a: logical
    #>   b: integer
    #>   c: double
    #>   d: character
    #> >

    Data frames can have columns that are themselves data frames, making this a “recursive” type:

    df <- data.frame(x = FALSE)
    df$y <- data.frame(a = 1L, b = 2.5)
    vec_ptype_show(df)
    #> Prototype: data.frame<
    #>   x: logical
    #>   y: 
    #>     data.frame<
    #>       a: integer
    #>       b: double
    #>     >
    #> >

Coercing to common type

It’s often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via vec_ptype_common(). vec_ptype_common() possesses the following invariants:

  • class(vec_ptype_common(x, y)) equals class(vec_ptype_common(y, x)).

  • class(vec_ptype_common(x, vec_ptype_common(y, z)) equals class(vec_ptype_common(vec_ptype_common(x, y), z)).

  • vec_ptype_common(x, NULL) == vec_ptype(x).

i.e., vec_ptype_common() is both commutative and associative (with respect to class) and has an identity element, NULL; i.e., it’s a commutative monoid. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects.

Like with vec_ptype(), the easiest way to explore vec_ptype_common() is with vec_ptype_show(): when given multiple inputs, it will print their common prototype. (In other words: program with vec_ptype_common() but play with vec_ptype_show().)

  • The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically:

    vec_ptype_show(logical(), integer(), double())
    #> Prototype: <double>
    #> 0. (           , <logical> ) = <logical>
    #> 1. ( <logical> , <integer> ) = <integer>
    #> 2. ( <integer> , <double>  ) = <double>
    
    vec_ptype_show(logical(), character())
    #> Error: Can't combine <logical> and <character>.
  • Matrices and arrays are automatically broadcast to higher dimensions:

    vec_ptype_show(
      array(1, c(0, 1)), 
      array(1, c(0, 2))
    )
    #> Prototype: <double[,2]>
    #> 0. (              , <double[,1]> ) = <double[,1]>
    #> 1. ( <double[,1]> , <double[,2]> ) = <double[,2]>
    
    vec_ptype_show(
      array(1, c(0, 1)), 
      array(1, c(0, 3)),
      array(1, c(0, 3, 4)),
      array(1, c(0, 3, 4, 5))
    )
    #> Prototype: <double[,3,4,5]>
    #> 0. (                , <double[,1]>     ) = <double[,1]>    
    #> 1. ( <double[,1]>   , <double[,3]>     ) = <double[,3]>    
    #> 2. ( <double[,3]>   , <double[,3,4]>   ) = <double[,3,4]>  
    #> 3. ( <double[,3,4]> , <double[,3,4,5]> ) = <double[,3,4,5]>

    Provided that the dimensions follow the vctrs recycling rules:

    vec_ptype_show(
      array(1, c(0, 2)), 
      array(1, c(0, 3))
    )
    #> Error: Can't combine <double[,2]> and <double[,3]>.
    #> ✖ Incompatible sizes 2 and 3 along axis 2.
  • Factors combine levels in the order in which they appear.

    fa <- factor("a")
    fb <- factor("b")
    
    levels(vec_ptype_common(fa, fb))
    #> [1] "a" "b"
    levels(vec_ptype_common(fb, fa))
    #> [1] "b" "a"
  • Combining a date and date-time yields a date-time:

    vec_ptype_show(new_date(), new_datetime())
    #> Prototype: <datetime<local>>
    #> 0. (        , <date>            ) = <date>           
    #> 1. ( <date> , <datetime<local>> ) = <datetime<local>>

    When combining two date times, the timezone is taken from the first input:

    vec_ptype_show(
      new_datetime(tzone = "US/Central"), 
      new_datetime(tzone = "Pacific/Auckland")
    )
    #> Prototype: <datetime<US/Central>>
    #> 0. (                        , <datetime<US/Central>>       ) = <datetime<US/Central>>
    #> 1. ( <datetime<US/Central>> , <datetime<Pacific/Auckland>> ) = <datetime<US/Central>>

    Unless it’s the local timezone, in which case any explicit time zone will win:

    vec_ptype_show(
      new_datetime(tzone = ""), 
      new_datetime(tzone = ""), 
      new_datetime(tzone = "Pacific/Auckland")
    )
    #> Prototype: <datetime<Pacific/Auckland>>
    #> 0. (                   , <datetime<local>>            ) = <datetime<local>>           
    #> 1. ( <datetime<local>> , <datetime<local>>            ) = <datetime<local>>           
    #> 2. ( <datetime<local>> , <datetime<Pacific/Auckland>> ) = <datetime<Pacific/Auckland>>
  • The common type of two data frames is the common type of each column that occurs in both data frames:

    vec_ptype_show(
      data.frame(x = FALSE), 
      data.frame(x = 1L),
      data.frame(x = 2.5)
    )
    #> Prototype: <data.frame<x:double>>
    #> 0. (                         , <data.frame<x:logical>> ) = <data.frame<x:logical>>
    #> 1. ( <data.frame<x:logical>> , <data.frame<x:integer>> ) = <data.frame<x:integer>>
    #> 2. ( <data.frame<x:integer>> , <data.frame<x:double>>  ) = <data.frame<x:double>>

    And the union of the columns that only occur in one:

    vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1))
    #> Prototype: <data.frame<
    #>   x: double
    #>   y: double
    #>   z: double
    #> >>
    #> 0. ┌              , <data.frame< ┐ = <data.frame<
    #>    │                  x: double  │     x: double 
    #>    │                  y: double  │     y: double 
    #>    └                >>           ┘   >>          
    #> 1. ┌ <data.frame< , <data.frame< ┐ = <data.frame<
    #>    │   x: double      y: double  │     x: double 
    #>    │   y: double      z: double  │     y: double 
    #>    │ >>             >>           │     z: double 
    #>    └                             ┘   >>

    Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled.

Casting to specified type

vec_ptype_common() finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That’s the job of vec_cast_common():

str(vec_cast_common(
  FALSE, 
  1:5, 
  2.5
))
#> List of 3
#>  $ : num 0
#>  $ : num [1:5] 1 2 3 4 5
#>  $ : num 2.5

str(vec_cast_common(
  factor("x"), 
  factor("y")
))
#> List of 2
#>  $ : Factor w/ 2 levels "x","y": 1
#>  $ : Factor w/ 2 levels "x","y": 2

str(vec_cast_common(
  data.frame(x = 1),
  data.frame(y = 1:2)
))
#> List of 2
#>  $ :'data.frame':    1 obs. of  2 variables:
#>   ..$ x: num 1
#>   ..$ y: int NA
#>  $ :'data.frame':    2 obs. of  2 variables:
#>   ..$ x: num [1:2] NA NA
#>   ..$ y: int [1:2] 1 2

Alternatively, you can cast to a specific prototype using vec_cast():

# Cast succeeds
vec_cast(c(1, 2), integer())
#> [1] 1 2

# Cast fails
vec_cast(c(1.5, 2.5), factor("a"))
#> Error: Can't convert <double> to <factor<4d52a>>.

If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error.

vec_cast(c(1.5, 2), integer())
#> Error: Can't convert from <double> to <integer> due to loss of precision.
#> • Locations: 1

You can suppress the lossy cast errors with allow_lossy_cast():

allow_lossy_cast(
  vec_cast(c(1.5, 2), integer())
)
#> [1] 1 2

This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed:

allow_lossy_cast(
  vec_cast(c(1.5, 2), integer()),
  x_ptype = double(),
  to_ptype = integer()
)
#> [1] 1 2

The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound.

Size

vec_size() was motivated by the need to have an invariant that describes the number of “observations” in a data structure. This is particularly important for data frames, as it’s useful to have some function such that f(data.frame(x)) equals f(x). No base function has this property:

  • length(data.frame(x)) equals 1 because the length of a data frame is the number of columns.

  • nrow(data.frame(x)) does not equal nrow(x) because nrow() of a vector is NULL.

  • NROW(data.frame(x)) equals NROW(x) for vector x, so is almost what we want. But because NROW() is defined in terms of length(), it returns a value for every object, even types that can’t go in a data frame, e.g. data.frame(mean) errors even though NROW(mean) is 1.

We define vec_size() as follows:

  • It is the length of 1d vectors.
  • It is the number of rows of data frames, matrices, and arrays.
  • It throws error for non vectors.

Given vec_size(), we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns.

Slicing

vec_slice() is to vec_size() as [ is to length(); i.e., it allows you to select observations regardless of the dimensionality of the underlying object. vec_slice(x, i) is equivalent to:

  • x[i] when x is a vector.
  • x[i, , drop = FALSE] when x is a data frame.
  • x[i, , , drop = FALSE] when x is a 3d array.
x <- sample(1:10)
df <- data.frame(x = x)

vec_slice(x, 5:6)
#> [1] 8 2
vec_slice(df, 5:6)
#>   x
#> 1 8
#> 2 2

vec_slice(data.frame(x), i) equals data.frame(vec_slice(x, i)) (modulo variable and row names).

Prototypes are generated with vec_slice(x, 0L); given a prototype, you can initialize a vector of given size (filled with NAs) with vec_init().

Common sizes: recycling rules

Closely related to the definition of size are the recycling rules. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in vec_size_common(), which gives the common size of a set of vectors:

vec_size_common(1:3, 1:3, 1:3)
#> [1] 3
vec_size_common(1:10, 1)
#> [1] 10
vec_size_common(integer(), 1)
#> [1] 0

vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like dest == c("IAH", "HOU")), at the cost of occasionally requiring an explicit calls to rep().

Summary of vctrs recycling rules. X indicates n error

Summary of vctrs recycling rules. X indicates n error

You can apply the recycling rules in two ways:

  • If you have a vector and desired size, use vec_recycle():

    vec_recycle(1:3, 3)
    #> [1] 1 2 3
    vec_recycle(1, 10)
    #>  [1] 1 1 1 1 1 1 1 1 1 1
  • If you have multiple vectors and you want to recycle them to the same size, use vec_recycle_common():

    vec_recycle_common(1:3, 1:3)
    #> [[1]]
    #> [1] 1 2 3
    #> 
    #> [[2]]
    #> [1] 1 2 3
    vec_recycle_common(1:10, 1)
    #> [[1]]
    #>  [1]  1  2  3  4  5  6  7  8  9 10
    #> 
    #> [[2]]
    #>  [1] 1 1 1 1 1 1 1 1 1 1

Appendix: recycling in base R

The recycling rules in base R are described in The R Language Definition but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions.

Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer:

rep(1, 6) + 1
#> [1] 2 2 2 2 2 2
rep(1, 6) + 1:2
#> [1] 2 3 2 3 2 3
rep(1, 6) + 1:3
#> [1] 2 3 4 2 3 4

If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning:

invisible(pmax(1:2, 1:3))
#> Warning in pmax(1:2, 1:3): an argument will be fractionally recycled
invisible(1:2 + 1:3)
#> Warning in 1:2 + 1:3: longer object length is not a multiple of shorter object
#> length
invisible(cbind(1:2, 1:3))
#> Warning in cbind(1:2, 1:3): number of rows of result is not a multiple of vector
#> length (arg 1)

But some functions recycle silently:

length(atan2(1:3, 1:2))
#> [1] 3
length(paste(1:3, 1:2))
#> [1] 3
length(ifelse(1:3, 1:2, 1:2))
#> [1] 3

And data.frame() throws an error:

data.frame(1:2, 1:3)
#> Error in data.frame(1:2, 1:3): arguments imply differing number of rows: 2, 3

The R language definition states that “any arithmetic operation involving a zero-length vector has a zero-length result”. But outside of arithmetic, this rule is not consistently followed:

# length-0 output
1:2 + integer()
#> integer(0)
atan2(1:2, integer())
#> numeric(0)
pmax(1:2, integer())
#> integer(0)

# dropped
cbind(1:2, integer())
#>      [,1]
#> [1,]    1
#> [2,]    2

# recycled to length of first
ifelse(rep(TRUE, 4), integer(), character())
#> [1] NA NA NA NA

# preserved-ish
paste(1:2, integer())
#> [1] "1 " "2 "

# Errors
data.frame(1:2, integer())
#> Error in data.frame(1:2, integer()): arguments imply differing number of rows: 2, 0
vctrs/inst/doc/pillar.html0000644000176200001440000014062214042546500015313 0ustar liggesusers Printing vectors nicely in tibbles

Printing vectors nicely in tibbles

Kirill Müller, Hadley Wickham

You can get basic control over how a vector is printed in a tibble by providing a format() method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics:

  • vctrs::vec_ptype_abbr() determines what goes into the column header.
  • pillar::pillar_shaft() determines what goes into the body, or the shaft, of the column.

Technically a pillar is composed of a shaft (decorated with an ornament), with a capital above and a base below. Multiple pillars form a colonnade, which can be stacked in multiple tiers. This is the motivation behind the names in our API.

This short vignette shows the basics of column styling using a "latlon" vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you’ll need to create documentation and the NAMESPACE file. In this vignette, we’ll attach pillar and vctrs:

library(vctrs)
library(pillar)

You don’t need to do this in a package. Instead, you’ll need to import the packages by then to the Imports: section of your DESCRIPTION. The following helper does this for you:

usethis::use_package("vctrs")
usethis::use_package("pillar")

Prerequisites

To illustrate the basic ideas we’re going to create a "latlon" class that encodes geographic coordinates in a record. We’ll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using vctrs_rcrd(), we already get the infrastructure to make this class fully compatible with data frames for free. See vignette("s3-vector", package = "vctrs") for details on the record data type.

#' @export
latlon <- function(lat, lon) {
  new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon")
}

#' @export
format.earth_latlon <- function(x, ..., formatter = deg_min) {
  x_valid <- which(!is.na(x))

  lat <- field(x, "lat")[x_valid]
  lon <- field(x, "lon")[x_valid]

  ret <- rep(NA_character_, vec_size(x))
  ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon"))
  # It's important to keep NA in the vector!
  ret
}

deg_min <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- trunc(x)
  x <- x - deg
  min <- round(x * 60)

  # Ensure the columns are always the same width so they line up nicely
  ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]]))
  format(ret, justify = "right")
}

latlon(c(32.71, 2.95), c(-117.17, 1.67))
#> <earth_latlon[2]>
#> [1] 32°43'N 117°10'W  2°57'N   1°40'E

Using in a tibble

Columns of this class can be used in a tibble right away because we’ve made a class using the vctrs infrastructure and have provided a format() method:

library(tibble)
#> 
#> Attaching package: 'tibble'
#> The following object is masked from 'package:vctrs':
#> 
#>     data_frame

loc <- latlon(
  c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA),
  c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA)
)

data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc)

data
#> # A tibble: 6 x 3
#>   venue          year              loc
#>   <chr>         <int>       <erth_ltl>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

This output is ok, but we could improve it by:

  1. Using a more description type abbreviation than <erth_ltl>.

  2. Using a dash of colour to highlight the most important parts of the value.

  3. Providing a narrower view when horizontal space is at a premium.

The following sections show how to enhance the rendering.

Fixing the data type

Instead of <erth_ltl> we’d prefer to use <latlon>. We can do that by implementing the vec_ptype_abbr() method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that’s under 6 characters.

#' @export
vec_ptype_abbr.earth_latlon <- function(x) {
  "latlon"
}

data
#> # A tibble: 6 x 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

Custom rendering

The format() method is used by default for rendering. For custom formatting you need to implement the pillar_shaft() method. This function should always return a pillar shaft object, created by new_pillar_shaft_simple() or similar. new_pillar_shaft_simple() accepts ANSI escape codes for colouring, and pillar includes some built in styles like style_subtle(). We can use subtle style for the degree and minute separators to make the data more obvious.

First we define a degree formatter that makes use of style_subtle():

deg_min_color <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- trunc(x)
  x <- x - deg
  rad <- round(x * 60)
  ret <- sprintf(
    "%d%s%.2d%s%s",
    deg,
    pillar::style_subtle("°"),
    rad,
    pillar::style_subtle("'"),
    pm[ifelse(sign >= 0, 1, 2)]
  )
  format(ret, justify = "right")
}

And then we pass that to our format() method:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  out <- format(x, formatter = deg_min_color)
  pillar::new_pillar_shaft_simple(out, align = "right")
}

Currently, ANSI escapes are not rendered in vignettes, so this result doesn’t look any different, but if you run the code yourself you’ll see an improved display.

data
#> # A tibble: 6 x 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

As well as the functions in pillar, the cli package provides a variety of tools for styling text.

Truncation

Tibbles can automatically compacts columns when there’s no enough horizontal space to display everything:

print(data, width = 30)
#> # A tibble: 6 x 3
#>   venue  year              loc
#>   <chr> <int>         <latlon>
#> 1 rstu…  2017 28°20'N  81°33'W
#> 2 rstu…  2018 32°43'N 117°10'W
#> 3 rstu…  2019 30°16'N  97°44'W
#> 4 rstu…  2020 37°47'N 122°25'W
#> 5 rstu…  2021 28°30'N  81°24'W
#> 6 rstu…  2022               NA

Currently the latlon class isn’t ever compacted because we haven’t specified a minimum width when constructing the shaft. Let’s fix that and re-print the data:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  out <- format(x)
  pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10)
}

print(data, width = 30)
#> # A tibble: 6 x 3
#>   venue      year          loc
#>   <chr>     <int>     <latlon>
#> 1 rstudio:…  2017 28°20'N  81…
#> 2 rstudio:…  2018 32°43'N 117…
#> 3 rstudio:…  2019 30°16'N  97…
#> 4 rstudio:…  2020 37°47'N 122…
#> 5 rstudio:…  2021 28°30'N  81…
#> 6 rstudio:…  2022           NA

Adaptive rendering

Truncation may be useful for character data, but for lat-lon data it’d be nicer to show full degrees and remove the minutes. We’ll first write a function that does this:

deg <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- round(x)

  ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)])
  format(ret, justify = "right")
}

Then use it as part of more sophisticated implementation of the pillar_shaft() method:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  deg <- format(x, formatter = deg)
  deg_min <- format(x)

  pillar::new_pillar_shaft(
    list(deg = deg, deg_min = deg_min),
    width = pillar::get_max_extent(deg_min),
    min_width = pillar::get_max_extent(deg),
    class = "pillar_shaft_latlon"
  )
}

Now the pillar_shaft() method returns an object of class "pillar_shaft_latlon" created by new_pillar_shaft(). This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (get_max_extent() is a helper that computes the maximum display width occupied by the values in a character vector.)

All that’s left to do is to implement a format() method for our new "pillar_shaft_latlon" class. This method will be called with a width argument, which then determines which of the formats to choose. The formatting of our choice is passed to the new_ornament() function:

#' @export
format.pillar_shaft_latlon <- function(x, width, ...) {
  if (get_max_extent(x$deg_min) <= width) {
    ornament <- x$deg_min
  } else {
    ornament <- x$deg
  }

  pillar::new_ornament(ornament, align = "right")
}

data
#> # A tibble: 6 x 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA
print(data, width = 30)
#> # A tibble: 6 x 3
#>   venue      year          loc
#>   <chr>     <int>     <latlon>
#> 1 rstudio:…  2017   28°N  82°W
#> 2 rstudio:…  2018   33°N 117°W
#> 3 rstudio:…  2019   30°N  98°W
#> 4 rstudio:…  2020   38°N 122°W
#> 5 rstudio:…  2021   28°N  81°W
#> 6 rstudio:…  2022           NA

Testing

If you want to test the output of your code, you can compare it with a known state recorded in a text file. The testthat::expect_snapshot() function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won’t make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN.

Use this testthat expectation in one of your test files to create a snapshot test:

expect_snapshot(pillar_shaft(data$loc))

See https://testthat.r-lib.org/articles/snapshotting.html for more information.

vctrs/inst/doc/stability.html0000644000176200001440000015560614042546501016045 0ustar liggesusers Type and size stability

Type and size stability

This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the “shape” of the output you only need to know the “shape”s of the inputs.

This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can’t predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases.

library(vctrs)
library(zeallot)

Definitions

We say a function is type-stable iff:

  1. You can predict the output type knowing only the input types.
  2. The order of arguments in … does not affect the output type.

Similarly, a function is size-stable iff:

  1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size.

Very few base R functions are size-stable, so I’ll also define a slightly weaker condition. I’ll call a function length-stable iff:

  1. You can predict the output length knowing only the input lengths, or there is a single numeric input that specifies the output length.

(But note that length-stable is not a particularly robust definition because length() returns a value for things that are not vectors.)

We’ll call functions that don’t obey these principles type-unstable and size-unstable respectively.

On top of type- and size-stability it’s also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions.

The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again.

Examples

To make these ideas concrete, let’s apply them to a few base functions:

  1. mean() is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error).

  2. Surprisingly, median() is type-unstable:

    vec_ptype_show(median(c(1L, 1L)))
    #> Prototype: double
    vec_ptype_show(median(c(1L, 1L, 1L)))
    #> Prototype: integer

    It is, however, size-stable, since it always returns a vector of length 1.

  3. sapply() is type-unstable because you can’t predict the output type only knowing the input types:

    vec_ptype_show(sapply(1L, function(x) c(x, x)))
    #> Prototype: integer[,1]
    vec_ptype_show(sapply(integer(), function(x) c(x, x)))
    #> Prototype: list

    It’s not quite size-stable; vec_size(sapply(x, f)) is vec_size(x) for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns).

  4. vapply() is a type-stable version of sapply() because vec_ptype_show(vapply(x, fn, template)) is always vec_ptype_show(template).
    It is size-unstable for the same reasons as sapply().

  5. c() is type-unstable because c(x, y) doesn’t always output the same type as c(y, x).

    vec_ptype_show(c(NA, Sys.Date()))
    #> Prototype: double
    vec_ptype_show(c(Sys.Date(), NA))
    #> Prototype: date

    c() is almost always length-stable because length(c(x, y)) almost always equals length(x) + length(y). One common source of instability here is dealing with non-vectors (see the later section “Non-vectors”):

    env <- new.env(parent = emptyenv())
    length(env)
    #> [1] 0
    length(mean)
    #> [1] 1
    length(c(env, mean))
    #> [1] 2
  6. paste(x1, x2) is length-stable because length(paste(x1, x2)) equals max(length(x1), length(x2)). However, it doesn’t follow the usual arithmetic recycling rules because paste(1:2, 1:3) doesn’t generate a warning.

  7. ifelse() is length-stable because length(ifelse(cond, true, false)) is always length(cond). ifelse() is type-unstable because the output type depends on the value of cond:

    vec_ptype_show(ifelse(NA, 1L, 1L))
    #> Prototype: logical
    vec_ptype_show(ifelse(FALSE, 1L, 1L))
    #> Prototype: integer
  8. read.csv(file) is type-unstable and size-unstable because, while you know it will return a data frame, you don’t know what columns it will return or how many rows it will have. Similarly, df[[i]] is not type-stable because the result depends on the value of i. There are many important functions that can not be made type-stable or size-stable!

With this understanding of type- and size-stability in hand, we’ll use them to analyse some base R functions in greater depth and then propose alternatives with better properties.

c() and vctrs::vec_c()

In this section we’ll compare and contrast c() and vec_c(). vec_c() is both type- and size-stable because it possesses the following invariants:

  • vec_ptype(vec_c(x, y)) equals vec_ptype_common(x, y).
  • vec_size(vec_c(x, y)) equals vec_size(x) + vec_size(y).

c() has another undesirable property in that it’s not consistent with unlist(); i.e., unlist(list(x, y)) does not always equal c(x, y); i.e., base R has multiple sets of type-coercion rules. I won’t consider this problem further here.

I have two goals here:

  • To fully document the quirks of c(), hence motivating the development of an alternative.

  • To discuss non-obvious consequences of the type- and size-stability above.

Atomic vectors

If we only consider atomic vectors, c() is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical.

c(FALSE, 1L, 2.5)
#> [1] 0.0 1.0 2.5

vec_c() obeys similar rules:

vec_c(FALSE, 1L, 2.5)
#> [1] 0.0 1.0 2.5

But it does not automatically coerce to character vectors or lists:

c(FALSE, "x")
#> [1] "FALSE" "x"
vec_c(FALSE, "x")
#> Error: Can't combine `..1` <logical> and `..2` <character>.

c(FALSE, list(1))
#> [[1]]
#> [1] FALSE
#> 
#> [[2]]
#> [1] 1
vec_c(FALSE, list(1))
#> Error: Can't combine `..1` <logical> and `..2` <list>.

Incompatible vectors and non-vectors

In general, most base methods do not throw an error:

c(10.5, factor("x"))
#> [1] 10.5  1.0

If the inputs aren’t vectors, c() automatically puts them in a list:

c(mean, globalenv())
#> [[1]]
#> function (x, ...) 
#> UseMethod("mean")
#> <bytecode: 0x7f87de841660>
#> <environment: namespace:base>
#> 
#> [[2]]
#> <environment: R_GlobalEnv>

For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list:

c(getRversion(), "x")
#> Error: invalid version specification 'x'

c("x", getRversion())
#> [[1]]
#> [1] "x"
#> 
#> [[2]]
#> [1] 4 0 4

vec_c() throws an error if the inputs are not vectors or not automatically coercible:

vec_c(mean, globalenv())
#> Error: `..1` must be a vector, not a function.

vec_c(Sys.Date(), factor("x"), "x")
#> Error: Can't combine `..1` <date> and `..2` <factor<bf275>>.

Factors

Combining two factors returns an integer vector:

fa <- factor("a")
fb <- factor("b")

c(fa, fb)
#> [1] 1 1

(This is documented in c() but is still undesirable.)

vec_c() returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with dplyr::bind_rows(), which is stricter and is a common source of user difficulty.)

vec_c(fa, fb)
#> [1] a b
#> Levels: a b
vec_c(fb, fa)
#> [1] b a
#> Levels: b a

Date-times

c() strips the time zone associated with date-times:

datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland")
c(datetime_nz)
#> [1] "2019-12-31 21:00:00 CET"

This behaviour is documented in ?DateTimeClasses but is the source of considerable user pain.

vec_c() preserves time zones:

vec_c(datetime_nz)
#> [1] "2020-01-01 09:00:00 NZDT"

What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there’s no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone:

datetime_local <- as.POSIXct("2020-01-01 09:00")
datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central")

vec_c(datetime_local, datetime_houston, datetime_nz)
#> [1] "2020-01-01 02:00:00 CST" "2020-01-01 09:00:00 CST"
#> [3] "2019-12-31 14:00:00 CST"
vec_c(datetime_houston, datetime_nz)
#> [1] "2020-01-01 09:00:00 CST" "2019-12-31 14:00:00 CST"
vec_c(datetime_nz, datetime_houston)
#> [1] "2020-01-01 09:00:00 NZDT" "2020-01-02 04:00:00 NZDT"

Dates and date-times

Combining dates and date-times with c() gives silently incorrect results:

date <- as.Date("2020-01-01")
datetime <- as.POSIXct("2020-01-01 09:00")

c(date, datetime)
#> [1] "2020-01-01" "2020-01-01"
c(datetime, date)
#> [1] "2020-01-01 09:00:00 CET" "2020-01-01 01:00:00 CET"

This behaviour arises because neither c.Date() nor c.POSIXct() check that all inputs are of the same type.

vec_c() uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time).

vec_c(date, datetime)
#> [1] "2020-01-01 00:00:00 CET" "2020-01-01 09:00:00 CET"
vec_c(date, datetime_nz)
#> [1] "2020-01-01 00:00:00 NZDT" "2020-01-01 09:00:00 NZDT"

Missing values

If a missing value comes at the beginning of the inputs, c() falls back to the internal behaviour, which strips all attributes:

c(NA, fa)
#> [1] NA  1
c(NA, date)
#> [1]    NA 18262
c(NA, datetime)
#> [1]         NA 1577865600

vec_c() takes a different approach treating a logical vector consisting only of NA as the unspecified() class which can be converted to any other 1d type:

vec_c(NA, fa)
#> [1] <NA> a   
#> Levels: a
vec_c(NA, date)
#> [1] NA           "2020-01-01"
vec_c(NA, datetime)
#> [1] NA                        "2020-01-01 09:00:00 CET"

Data frames

Because it is almost always length-stable, c() combines data frames column wise (into a list):

df1 <- data.frame(x = 1)
df2 <- data.frame(x = 2)
str(c(df1, df1))
#> List of 2
#>  $ x: num 1
#>  $ x: num 1

vec_c() is size-stable, which implies it will row-bind data frames:

vec_c(df1, df2)
#>   x
#> 1 1
#> 2 2

Matrices and arrays

The same reasoning applies to matrices:

m <- matrix(1:4, nrow = 2)
c(m, m)
#> [1] 1 2 3 4 1 2 3 4
vec_c(m, m)
#>      [,1] [,2]
#> [1,]    1    3
#> [2,]    2    4
#> [3,]    1    3
#> [4,]    2    4

One difference is that vec_c() will “broadcast” a vector to match the dimensions of a matrix:

c(m, 1)
#> [1] 1 2 3 4 1

vec_c(m, 1)
#>      [,1] [,2]
#> [1,]    1    3
#> [2,]    2    4
#> [3,]    1    1

Implementation

The basic implementation of vec_c() is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with vec_init(), and then insert each input into the correct place in the output.

vec_c <- function(...) {
  args <- compact(list2(...))

  ptype <- vec_ptype_common(!!!args)
  if (is.null(ptype))
    return(NULL)

  ns <- map_int(args, vec_size)
  out <- vec_init(ptype, sum(ns))

  pos <- 1
  for (i in seq_along(ns)) {
    n <- ns[[i]]
    
    x <- vec_cast(args[[i]], to = ptype)
    vec_slice(out, pos:(pos + n - 1)) <- x
    pos <- pos + n
  }

  out
}

(The real vec_c() is a bit more complicated in order to handle inner and outer names).

ifelse()

One of the functions that motivate the development of vctrs is ifelse(). It has the surprising property that the result value is “A vector of the same length and attributes (including dimensions and class) as test”. To me, it seems more reasonable for the type of the output to be controlled by the type of the yes and no arguments.

In dplyr::if_else() I swung too far towards strictness: it throws an error if yes and no are not the same type. This is annoying in practice because it requires typed missing values (NA_character_ etc), and because the checks are only on the class (not the full prototype), it’s easy to create invalid output.

I found it much easier to understand what ifelse() should do once I internalised the ideas of type- and size-stability:

  • The first argument must be logical.

  • vec_ptype(if_else(test, yes, no)) equals vec_ptype_common(yes, no). Unlike ifelse() this implies that if_else() must always evaluate both yes and no in order to figure out the correct type. I think this is consistent with && (scalar operation, short circuits) and & (vectorised, evaluates both sides).

  • vec_size(if_else(test, yes, no)) equals vec_size_common(test, yes, no). I think the output could have the same size as test (i.e., the same behaviour as ifelse), but I think as a general rule that your inputs should either be mutually recycling or not.

This leads to the following implementation:

if_else <- function(test, yes, no) {
  vec_assert(test, logical())
  c(yes, no) %<-% vec_cast_common(yes, no)
  c(test, yes, no) %<-% vec_recycle_common(test, yes, no)

  out <- vec_init(yes, vec_size(yes))
  vec_slice(out, test) <- vec_slice(yes, test)
  vec_slice(out, !test) <- vec_slice(no, !test)

  out
}

x <- c(NA, 1:4)
if_else(x > 2, "small", "big")
#> [1] NA      "big"   "big"   "small" "small"
if_else(x > 2, factor("small"), factor("big"))
#> [1] <NA>  big   big   small small
#> Levels: small big
if_else(x > 2, Sys.Date(), Sys.Date() + 7)
#> [1] NA           "2021-05-06" "2021-05-06" "2021-04-29" "2021-04-29"

By using vec_size() and vec_slice(), this definition of if_else() automatically works with data.frames and matrices:

if_else(x > 2, data.frame(x = 1), data.frame(y = 2))
#>    x  y
#> 1 NA NA
#> 2 NA  2
#> 3 NA  2
#> 4  1 NA
#> 5  1 NA

if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30))
#>      [,1] [,2]
#> [1,]   NA   NA
#> [2,]   30   30
#> [3,]   30   30
#> [4,]    4    9
#> [5,]    5   10
vctrs/inst/include/0000755000176200001440000000000014042546503014016 5ustar liggesusersvctrs/inst/include/vctrs.c0000644000176200001440000000065113650511520015320 0ustar liggesusers#include "vctrs.h" bool (*vec_is_vector)(SEXP) = NULL; R_len_t (*short_vec_size)(SEXP) = NULL; SEXP (*short_vec_recycle)(SEXP, R_len_t) = NULL; void vctrs_init_api() { vec_is_vector = (bool (*)(SEXP)) R_GetCCallable("vctrs", "vec_is_vector"); short_vec_size = (R_len_t (*)(SEXP)) R_GetCCallable("vctrs", "short_vec_size"); short_vec_recycle = (SEXP (*)(SEXP, R_len_t)) R_GetCCallable("vctrs", "short_vec_recycle"); } vctrs/inst/include/vctrs.h0000644000176200001440000000041013650511520015316 0ustar liggesusers#ifndef VCTRS_H #define VCTRS_H #include #include #include extern bool (*vec_is_vector)(SEXP); extern R_len_t (*short_vec_size)(SEXP); extern SEXP (*short_vec_recycle)(SEXP, R_len_t); void vctrs_init_api(); #endif vctrs/inst/WORDLIST0000644000176200001440000000001313473164157013567 0ustar liggesusersvectorised