vctrs/0000755000176200001440000000000014532470452011421 5ustar liggesusersvctrs/NAMESPACE0000644000176200001440000004724714465453271012663 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("!",vctrs_vctr) S3method("!=",vctrs_vctr) S3method("$",vctrs_list_of) S3method("$",vctrs_rcrd) S3method("$",vctrs_sclr) S3method("$",vctrs_vctr) S3method("$<-",vctrs_list_of) S3method("$<-",vctrs_rcrd) S3method("$<-",vctrs_sclr) S3method("$<-",vctrs_vctr) S3method("%%",vctrs_vctr) S3method("%/%",vctrs_vctr) S3method("&",vctrs_vctr) S3method("*",vctrs_vctr) S3method("+",vctrs_vctr) S3method("-",vctrs_vctr) S3method("/",vctrs_vctr) S3method("<",vctrs_vctr) S3method("<=",vctrs_vctr) S3method("==",vctrs_vctr) S3method(">",vctrs_vctr) S3method(">=",vctrs_vctr) S3method("[",vctrs_rcrd) S3method("[",vctrs_sclr) S3method("[",vctrs_unspecified) S3method("[",vctrs_vctr) S3method("[<-",vctrs_list_of) S3method("[<-",vctrs_rcrd) S3method("[<-",vctrs_sclr) S3method("[<-",vctrs_vctr) S3method("[[",vctrs_list_of) S3method("[[",vctrs_rcrd) S3method("[[",vctrs_sclr) S3method("[[",vctrs_vctr) S3method("[[<-",vctrs_list_of) S3method("[[<-",vctrs_rcrd) S3method("[[<-",vctrs_sclr) S3method("[[<-",vctrs_vctr) S3method("^",vctrs_vctr) S3method("dim<-",vctrs_sclr) S3method("dim<-",vctrs_vctr) S3method("dimnames<-",vctrs_sclr) S3method("dimnames<-",vctrs_vctr) S3method("is.na<-",vctrs_sclr) S3method("is.na<-",vctrs_vctr) S3method("length<-",vctrs_rcrd) S3method("length<-",vctrs_vctr) S3method("levels<-",vctrs_sclr) S3method("levels<-",vctrs_vctr) S3method("names<-",vctrs_rcrd) S3method("names<-",vctrs_sclr) S3method("names<-",vctrs_vctr) S3method("|",vctrs_vctr) S3method(Complex,vctrs_sclr) S3method(Math,vctrs_sclr) S3method(Math,vctrs_vctr) S3method(Ops,vctrs_sclr) S3method(Summary,vctrs_sclr) S3method(Summary,vctrs_vctr) S3method(anyDuplicated,vctrs_sclr) S3method(anyDuplicated,vctrs_vctr) S3method(anyNA,vctrs_vctr) S3method(as.Date,vctrs_sclr) S3method(as.Date,vctrs_vctr) S3method(as.POSIXct,vctrs_sclr) S3method(as.POSIXct,vctrs_vctr) S3method(as.POSIXlt,vctrs_vctr) S3method(as.character,vctrs_list_of) S3method(as.character,vctrs_sclr) S3method(as.character,vctrs_vctr) S3method(as.data.frame,vctrs_sclr) S3method(as.data.frame,vctrs_vctr) S3method(as.double,vctrs_sclr) S3method(as.double,vctrs_vctr) S3method(as.integer,vctrs_sclr) S3method(as.integer,vctrs_vctr) S3method(as.list,vctrs_list_of) S3method(as.list,vctrs_sclr) S3method(as.list,vctrs_vctr) S3method(as.logical,vctrs_sclr) S3method(as.logical,vctrs_vctr) S3method(as_list_of,list) S3method(as_list_of,vctrs_list_of) S3method(c,vctrs_sclr) S3method(c,vctrs_vctr) S3method(can_fall_back,"vctrs:::common_class_fallback") S3method(can_fall_back,data.frame) S3method(can_fall_back,default) S3method(can_fall_back,ts) S3method(can_fall_back,vctrs_vctr) S3method(cnd_body,vctrs_error_cast_lossy) S3method(cnd_body,vctrs_error_incompatible_size) S3method(cnd_body,vctrs_error_matches_incomplete) S3method(cnd_body,vctrs_error_matches_multiple) S3method(cnd_body,vctrs_error_matches_nothing) S3method(cnd_body,vctrs_error_matches_relationship_many_to_one) S3method(cnd_body,vctrs_error_matches_relationship_one_to_many) S3method(cnd_body,vctrs_error_matches_relationship_one_to_one) S3method(cnd_body,vctrs_error_matches_remaining) S3method(cnd_body,vctrs_error_names_cannot_be_dot_dot) S3method(cnd_body,vctrs_error_names_cannot_be_empty) S3method(cnd_body,vctrs_error_names_must_be_unique) S3method(cnd_body,vctrs_error_subscript_oob) S3method(cnd_body,vctrs_error_subscript_type) S3method(cnd_header,vctrs_error_cast_lossy) S3method(cnd_header,vctrs_error_incompatible_size) S3method(cnd_header,vctrs_error_matches_incomplete) S3method(cnd_header,vctrs_error_matches_multiple) S3method(cnd_header,vctrs_error_matches_nothing) S3method(cnd_header,vctrs_error_matches_relationship_many_to_one) S3method(cnd_header,vctrs_error_matches_relationship_one_to_many) S3method(cnd_header,vctrs_error_matches_relationship_one_to_one) S3method(cnd_header,vctrs_error_matches_remaining) S3method(cnd_header,vctrs_error_names_cannot_be_dot_dot) S3method(cnd_header,vctrs_error_names_cannot_be_empty) S3method(cnd_header,vctrs_error_names_must_be_unique) S3method(cnd_header,vctrs_error_subscript_oob) S3method(cnd_header,vctrs_error_subscript_size) S3method(cnd_header,vctrs_error_subscript_type) S3method(diff,vctrs_vctr) S3method(duplicated,vctrs_sclr) S3method(duplicated,vctrs_vctr) S3method(format,vctrs_bytes) S3method(format,vctrs_group_rle) S3method(format,vctrs_list_of) S3method(format,vctrs_rcrd) S3method(format,vctrs_vctr) S3method(is.finite,vctrs_vctr) S3method(is.infinite,vctrs_vctr) S3method(is.na,vctrs_vctr) S3method(is.nan,vctrs_vctr) S3method(length,vctrs_rcrd) S3method(levels,vctrs_sclr) S3method(levels,vctrs_vctr) S3method(max,vctrs_vctr) S3method(mean,vctrs_vctr) S3method(median,vctrs_vctr) S3method(min,vctrs_vctr) S3method(na.exclude,vctrs_vctr) S3method(na.fail,vctrs_vctr) S3method(na.omit,vctrs_vctr) S3method(names,vctrs_rcrd) S3method(obj_print_data,default) S3method(obj_print_data,vctrs_list_of) S3method(obj_print_data,vctrs_partial) S3method(obj_print_footer,default) S3method(obj_print_header,default) S3method(obj_print_header,vctrs_group_rle) S3method(obj_print_header,vctrs_partial) S3method(obj_str_data,default) S3method(obj_str_data,vctrs_rcrd) S3method(obj_str_footer,default) S3method(obj_str_header,default) S3method(print,vctrs_bytes) S3method(print,vctrs_sclr) S3method(print,vctrs_unspecified) S3method(print,vctrs_vctr) S3method(quantile,vctrs_vctr) S3method(range,vctrs_vctr) S3method(rep,vctrs_rcrd) S3method(rep,vctrs_vctr) S3method(str,vctrs_vctr) S3method(summary,vctrs_sclr) S3method(summary,vctrs_vctr) S3method(t,vctrs_sclr) S3method(t,vctrs_vctr) S3method(unique,vctrs_sclr) S3method(unique,vctrs_vctr) S3method(vec_arith,Date) S3method(vec_arith,POSIXct) S3method(vec_arith,POSIXlt) S3method(vec_arith,default) S3method(vec_arith,difftime) S3method(vec_arith,factor) S3method(vec_arith,logical) S3method(vec_arith,numeric) S3method(vec_arith.Date,Date) S3method(vec_arith.Date,POSIXct) S3method(vec_arith.Date,POSIXlt) S3method(vec_arith.Date,default) S3method(vec_arith.Date,difftime) S3method(vec_arith.Date,numeric) S3method(vec_arith.POSIXct,Date) S3method(vec_arith.POSIXct,POSIXct) S3method(vec_arith.POSIXct,POSIXlt) S3method(vec_arith.POSIXct,default) S3method(vec_arith.POSIXct,difftime) S3method(vec_arith.POSIXct,numeric) S3method(vec_arith.POSIXlt,Date) S3method(vec_arith.POSIXlt,POSIXct) S3method(vec_arith.POSIXlt,POSIXlt) S3method(vec_arith.POSIXlt,default) S3method(vec_arith.POSIXlt,difftime) S3method(vec_arith.POSIXlt,numeric) S3method(vec_arith.difftime,Date) S3method(vec_arith.difftime,MISSING) S3method(vec_arith.difftime,POSIXct) S3method(vec_arith.difftime,POSIXlt) S3method(vec_arith.difftime,default) S3method(vec_arith.difftime,difftime) S3method(vec_arith.difftime,numeric) S3method(vec_arith.logical,default) S3method(vec_arith.logical,logical) S3method(vec_arith.logical,numeric) S3method(vec_arith.numeric,Date) S3method(vec_arith.numeric,POSIXct) S3method(vec_arith.numeric,POSIXlt) S3method(vec_arith.numeric,default) S3method(vec_arith.numeric,difftime) S3method(vec_arith.numeric,logical) S3method(vec_arith.numeric,numeric) S3method(vec_cast,Date) S3method(vec_cast,POSIXct) S3method(vec_cast,POSIXlt) S3method(vec_cast,character) S3method(vec_cast,character.factor) S3method(vec_cast,character.ordered) S3method(vec_cast,complex) S3method(vec_cast,data.frame) S3method(vec_cast,data.frame.data.table) S3method(vec_cast,data.table.data.frame) S3method(vec_cast,data.table.data.table) S3method(vec_cast,difftime) S3method(vec_cast,double) S3method(vec_cast,double.exclude) S3method(vec_cast,double.omit) S3method(vec_cast,exclude.double) S3method(vec_cast,exclude.exclude) S3method(vec_cast,exclude.integer) S3method(vec_cast,factor) S3method(vec_cast,factor.character) S3method(vec_cast,factor.factor) S3method(vec_cast,integer) S3method(vec_cast,integer.exclude) S3method(vec_cast,integer.omit) S3method(vec_cast,integer64) S3method(vec_cast,list) S3method(vec_cast,list.vctrs_list_of) S3method(vec_cast,logical) S3method(vec_cast,omit.double) S3method(vec_cast,omit.integer) S3method(vec_cast,omit.omit) S3method(vec_cast,ordered) S3method(vec_cast,ordered.character) S3method(vec_cast,ordered.ordered) S3method(vec_cast,raw) S3method(vec_cast,table.table) S3method(vec_cast,vctrs_list_of) S3method(vec_cast,vctrs_list_of.list) S3method(vec_cast,vctrs_rcrd) S3method(vec_cast,vctrs_rcrd.vctrs_rcrd) S3method(vec_cast,vctrs_vctr) S3method(vec_cast.Date,Date) S3method(vec_cast.Date,POSIXct) S3method(vec_cast.Date,POSIXlt) S3method(vec_cast.POSIXct,Date) S3method(vec_cast.POSIXct,POSIXct) S3method(vec_cast.POSIXct,POSIXlt) S3method(vec_cast.POSIXlt,Date) S3method(vec_cast.POSIXlt,POSIXct) S3method(vec_cast.POSIXlt,POSIXlt) S3method(vec_cast.character,character) S3method(vec_cast.complex,complex) S3method(vec_cast.complex,double) S3method(vec_cast.complex,integer) S3method(vec_cast.complex,logical) S3method(vec_cast.data.frame,data.frame) S3method(vec_cast.difftime,difftime) S3method(vec_cast.double,double) S3method(vec_cast.double,integer) S3method(vec_cast.double,integer64) S3method(vec_cast.double,logical) S3method(vec_cast.integer,double) S3method(vec_cast.integer,integer) S3method(vec_cast.integer,integer64) S3method(vec_cast.integer,logical) S3method(vec_cast.integer64,double) S3method(vec_cast.integer64,integer) S3method(vec_cast.integer64,integer64) S3method(vec_cast.integer64,logical) S3method(vec_cast.list,list) S3method(vec_cast.logical,double) S3method(vec_cast.logical,integer) S3method(vec_cast.logical,integer64) S3method(vec_cast.logical,logical) S3method(vec_cast.raw,raw) S3method(vec_cast.vctrs_list_of,vctrs_list_of) S3method(vec_cbind_frame_ptype,default) S3method(vec_cbind_frame_ptype,sf) S3method(vec_math,Date) S3method(vec_math,POSIXct) S3method(vec_math,POSIXlt) S3method(vec_math,default) S3method(vec_math,factor) S3method(vec_math,vctrs_rcrd) S3method(vec_proxy,"vctrs:::common_class_fallback") S3method(vec_proxy,AsIs) S3method(vec_proxy,Date) S3method(vec_proxy,POSIXct) S3method(vec_proxy,POSIXlt) S3method(vec_proxy,default) S3method(vec_proxy,exclude) S3method(vec_proxy,factor) S3method(vec_proxy,numeric_version) S3method(vec_proxy,omit) S3method(vec_proxy,ordered) S3method(vec_proxy,vctrs_list_of) S3method(vec_proxy,vctrs_rcrd) S3method(vec_proxy,vctrs_vctr) S3method(vec_proxy_compare,AsIs) S3method(vec_proxy_compare,POSIXlt) S3method(vec_proxy_compare,array) S3method(vec_proxy_compare,default) S3method(vec_proxy_compare,list) S3method(vec_proxy_compare,raw) S3method(vec_proxy_equal,AsIs) S3method(vec_proxy_equal,POSIXlt) S3method(vec_proxy_equal,array) S3method(vec_proxy_equal,default) S3method(vec_proxy_equal,integer64) S3method(vec_proxy_equal,numeric_version) S3method(vec_proxy_order,AsIs) S3method(vec_proxy_order,array) S3method(vec_proxy_order,default) S3method(vec_proxy_order,list) S3method(vec_proxy_order,raw) S3method(vec_ptype2,AsIs) S3method(vec_ptype2,Date) S3method(vec_ptype2,POSIXct) S3method(vec_ptype2,POSIXlt) S3method(vec_ptype2,character) S3method(vec_ptype2,character.factor) S3method(vec_ptype2,character.ordered) S3method(vec_ptype2,complex) S3method(vec_ptype2,data.frame) S3method(vec_ptype2,data.frame.data.table) S3method(vec_ptype2,data.table.data.frame) S3method(vec_ptype2,data.table.data.table) S3method(vec_ptype2,difftime) S3method(vec_ptype2,double) S3method(vec_ptype2,double.exclude) S3method(vec_ptype2,double.omit) S3method(vec_ptype2,exclude.double) S3method(vec_ptype2,exclude.exclude) S3method(vec_ptype2,exclude.integer) S3method(vec_ptype2,factor) S3method(vec_ptype2,factor.character) S3method(vec_ptype2,factor.factor) S3method(vec_ptype2,factor.ordered) S3method(vec_ptype2,integer) S3method(vec_ptype2,integer.exclude) S3method(vec_ptype2,integer.omit) S3method(vec_ptype2,integer64) S3method(vec_ptype2,list) S3method(vec_ptype2,list.vctrs_list_of) S3method(vec_ptype2,logical) S3method(vec_ptype2,omit.double) S3method(vec_ptype2,omit.integer) S3method(vec_ptype2,omit.omit) S3method(vec_ptype2,ordered) S3method(vec_ptype2,ordered.character) S3method(vec_ptype2,ordered.factor) S3method(vec_ptype2,ordered.ordered) S3method(vec_ptype2,raw) S3method(vec_ptype2,table.table) S3method(vec_ptype2,vctrs_list_of) S3method(vec_ptype2,vctrs_list_of.list) S3method(vec_ptype2,vctrs_partial_factor) S3method(vec_ptype2,vctrs_partial_frame) S3method(vec_ptype2.AsIs,AsIs) S3method(vec_ptype2.Date,Date) S3method(vec_ptype2.Date,POSIXct) S3method(vec_ptype2.Date,POSIXlt) S3method(vec_ptype2.POSIXct,Date) S3method(vec_ptype2.POSIXct,POSIXct) S3method(vec_ptype2.POSIXct,POSIXlt) S3method(vec_ptype2.POSIXlt,Date) S3method(vec_ptype2.POSIXlt,POSIXct) S3method(vec_ptype2.POSIXlt,POSIXlt) S3method(vec_ptype2.character,character) S3method(vec_ptype2.complex,complex) S3method(vec_ptype2.complex,double) S3method(vec_ptype2.complex,integer) S3method(vec_ptype2.data.frame,data.frame) S3method(vec_ptype2.data.frame,vctrs_partial_frame) S3method(vec_ptype2.difftime,difftime) S3method(vec_ptype2.double,complex) S3method(vec_ptype2.double,double) S3method(vec_ptype2.double,integer) S3method(vec_ptype2.double,logical) S3method(vec_ptype2.factor,vctrs_partial_factor) S3method(vec_ptype2.integer,complex) S3method(vec_ptype2.integer,double) S3method(vec_ptype2.integer,integer) S3method(vec_ptype2.integer,integer64) S3method(vec_ptype2.integer,logical) S3method(vec_ptype2.integer64,integer) S3method(vec_ptype2.integer64,integer64) S3method(vec_ptype2.integer64,logical) S3method(vec_ptype2.list,list) S3method(vec_ptype2.logical,double) S3method(vec_ptype2.logical,integer) S3method(vec_ptype2.logical,integer64) S3method(vec_ptype2.logical,logical) S3method(vec_ptype2.raw,raw) S3method(vec_ptype2.vctrs_list_of,vctrs_list_of) S3method(vec_ptype2.vctrs_partial_factor,factor) S3method(vec_ptype2.vctrs_partial_factor,vctrs_partial_factor) S3method(vec_ptype2.vctrs_partial_frame,data.frame) S3method(vec_ptype2.vctrs_partial_frame,vctrs_partial_frame) S3method(vec_ptype_abbr,"NULL") S3method(vec_ptype_abbr,AsIs) S3method(vec_ptype_abbr,Date) S3method(vec_ptype_abbr,POSIXct) S3method(vec_ptype_abbr,POSIXlt) S3method(vec_ptype_abbr,data.frame) S3method(vec_ptype_abbr,data.table) S3method(vec_ptype_abbr,default) S3method(vec_ptype_abbr,difftime) S3method(vec_ptype_abbr,factor) S3method(vec_ptype_abbr,integer64) S3method(vec_ptype_abbr,ordered) S3method(vec_ptype_abbr,table) S3method(vec_ptype_abbr,vctrs_list_of) S3method(vec_ptype_abbr,vctrs_partial_factor) S3method(vec_ptype_abbr,vctrs_partial_frame) S3method(vec_ptype_abbr,vctrs_unspecified) S3method(vec_ptype_finalise,default) S3method(vec_ptype_finalise,vctrs_partial) S3method(vec_ptype_finalise,vctrs_partial_factor) S3method(vec_ptype_finalise,vctrs_partial_frame) S3method(vec_ptype_full,"NULL") S3method(vec_ptype_full,AsIs) S3method(vec_ptype_full,Date) S3method(vec_ptype_full,POSIXct) S3method(vec_ptype_full,POSIXlt) S3method(vec_ptype_full,default) S3method(vec_ptype_full,difftime) S3method(vec_ptype_full,factor) S3method(vec_ptype_full,integer64) S3method(vec_ptype_full,ordered) S3method(vec_ptype_full,table) S3method(vec_ptype_full,vctrs_list_of) S3method(vec_ptype_full,vctrs_partial_factor) S3method(vec_ptype_full,vctrs_partial_frame) S3method(vec_restore,AsIs) S3method(vec_restore,Date) S3method(vec_restore,POSIXct) S3method(vec_restore,POSIXlt) S3method(vec_restore,data.frame) S3method(vec_restore,default) S3method(vec_restore,exclude) S3method(vec_restore,factor) S3method(vec_restore,omit) S3method(vec_restore,ordered) S3method(vec_restore,table) S3method(vec_restore,vctrs_rcrd) S3method(vec_restore,vctrs_vctr) S3method(xtfrm,vctrs_sclr) S3method(xtfrm,vctrs_vctr) export("%0%") export("field<-") export("vec_slice<-") export(MISSING) export(allow_lossy_cast) export(as_list_of) export(data_frame) export(df_cast) export(df_list) export(df_ptype2) export(field) export(fields) export(is_list_of) export(is_partial) export(list_all_size) export(list_all_vectors) export(list_check_all_size) export(list_check_all_vectors) export(list_drop_empty) export(list_of) export(list_sizes) export(list_unchop) export(maybe_lossy_cast) export(n_fields) export(new_data_frame) export(new_date) export(new_datetime) export(new_duration) export(new_factor) export(new_list_of) export(new_ordered) export(new_partial) export(new_rcrd) export(new_vctr) export(num_as_location) export(num_as_location2) export(obj_check_list) export(obj_check_vector) export(obj_is_list) export(obj_is_vector) export(obj_print) export(obj_print_data) export(obj_print_footer) export(obj_print_header) export(obj_str) export(obj_str_data) export(obj_str_footer) export(obj_str_header) export(partial_factor) export(partial_frame) export(s3_register) export(stop_incompatible_cast) export(stop_incompatible_op) export(stop_incompatible_size) export(stop_incompatible_type) export(tib_cast) export(tib_ptype2) export(unspecified) export(vec_any_missing) export(vec_arith) export(vec_arith.Date) export(vec_arith.POSIXct) export(vec_arith.POSIXlt) export(vec_arith.difftime) export(vec_arith.logical) export(vec_arith.numeric) export(vec_arith_base) export(vec_as_index) export(vec_as_location) export(vec_as_location2) export(vec_as_names) export(vec_as_names_legacy) export(vec_as_subscript) export(vec_as_subscript2) export(vec_assert) export(vec_assign) export(vec_c) export(vec_cast) export(vec_cast.Date) export(vec_cast.POSIXct) export(vec_cast.POSIXlt) export(vec_cast.character) export(vec_cast.complex) export(vec_cast.data.frame) export(vec_cast.difftime) export(vec_cast.double) export(vec_cast.factor) export(vec_cast.integer) export(vec_cast.integer64) export(vec_cast.list) export(vec_cast.logical) export(vec_cast.ordered) export(vec_cast.raw) export(vec_cast.vctrs_list_of) export(vec_cast_common) export(vec_cbind) export(vec_cbind_frame_ptype) export(vec_check_list) export(vec_check_size) export(vec_chop) export(vec_compare) export(vec_count) export(vec_data) export(vec_default_cast) export(vec_default_ptype2) export(vec_detect_complete) export(vec_detect_missing) export(vec_duplicate_any) export(vec_duplicate_detect) export(vec_duplicate_id) export(vec_empty) export(vec_equal) export(vec_equal_na) export(vec_expand_grid) export(vec_fill_missing) export(vec_group_id) export(vec_group_loc) export(vec_group_rle) export(vec_identify_runs) export(vec_in) export(vec_init) export(vec_init_along) export(vec_interleave) export(vec_is) export(vec_is_empty) export(vec_is_list) export(vec_locate_matches) export(vec_locate_sorted_groups) export(vec_match) export(vec_math) export(vec_math_base) export(vec_names) export(vec_names2) export(vec_order) export(vec_proxy) export(vec_proxy_compare) export(vec_proxy_equal) export(vec_proxy_order) export(vec_ptype) export(vec_ptype2) export(vec_ptype2.AsIs) export(vec_ptype2.Date) export(vec_ptype2.POSIXct) export(vec_ptype2.POSIXlt) export(vec_ptype2.character) export(vec_ptype2.complex) export(vec_ptype2.data.frame) export(vec_ptype2.difftime) export(vec_ptype2.double) export(vec_ptype2.factor) export(vec_ptype2.integer) export(vec_ptype2.integer64) export(vec_ptype2.list) export(vec_ptype2.logical) export(vec_ptype2.ordered) export(vec_ptype2.raw) export(vec_ptype2.vctrs_list_of) export(vec_ptype_abbr) export(vec_ptype_common) export(vec_ptype_finalise) export(vec_ptype_full) export(vec_ptype_show) export(vec_rank) export(vec_rbind) export(vec_recycle) export(vec_recycle_common) export(vec_rep) export(vec_rep_each) export(vec_repeat) export(vec_restore) export(vec_run_sizes) export(vec_seq_along) export(vec_set_difference) export(vec_set_intersect) export(vec_set_names) export(vec_set_symmetric_difference) export(vec_set_union) export(vec_size) export(vec_size_common) export(vec_slice) export(vec_sort) export(vec_split) export(vec_type) export(vec_type2) export(vec_type_common) export(vec_unchop) export(vec_unique) export(vec_unique_count) export(vec_unique_loc) export(vec_unrep) import(rlang) importFrom(stats,median) importFrom(stats,na.exclude) importFrom(stats,na.fail) importFrom(stats,na.omit) importFrom(stats,quantile) useDynLib(vctrs, .registration = TRUE) vctrs/LICENSE.note0000644000176200001440000004136714276722575013420 0ustar liggesusersThe implementation of vec_order() is based on data.table’s forder() and their earlier contribution to R’s order(). This warrants placing specific files in the vctrs package under the MPL-2.0 license used by data.table. Files named with the pattern of `src/order-*.c` and `src/order-*.h` are additionally under the MPL-2.0 license. MPL-2.0 License ---------------------------------------------------------------- Mozilla Public License Version 2.0 ================================== 1. Definitions -------------- 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or (b) any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions -------------------------------- 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: (a) for any code that a Contributor has removed from Covered Software; or (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or (c) under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities ------------------- 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation --------------------------------------------------- If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination -------------- 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ************************************************************************ * * * 6. Disclaimer of Warranty * * ------------------------- * * * * Covered Software is provided under this License on an "as is" * * basis, without warranty of any kind, either expressed, implied, or * * statutory, including, without limitation, warranties that the * * Covered Software is free of defects, merchantable, fit for a * * particular purpose or non-infringing. The entire risk as to the * * quality and performance of the Covered Software is with You. * * Should any Covered Software prove defective in any respect, You * * (not any Contributor) assume the cost of any necessary servicing, * * repair, or correction. This disclaimer of warranty constitutes an * * essential part of this License. No use of any Covered Software is * * authorized under this License except under this disclaimer. * * * ************************************************************************ ************************************************************************ * * * 7. Limitation of Liability * * -------------------------- * * * * Under no circumstances and under no legal theory, whether tort * * (including negligence), contract, or otherwise, shall any * * Contributor, or anyone who distributes Covered Software as * * permitted above, be liable to You for any direct, indirect, * * special, incidental, or consequential damages of any character * * including, without limitation, damages for lost profits, loss of * * goodwill, work stoppage, computer failure or malfunction, or any * * and all other commercial damages or losses, even if such party * * shall have been informed of the possibility of such damages. This * * limitation of liability shall not apply to liability for death or * * personal injury resulting from such party's negligence to the * * extent applicable law prohibits such limitation. Some * * jurisdictions do not allow the exclusion or limitation of * * incidental or consequential damages, so this exclusion and * * limitation may not apply to You. * * * ************************************************************************ 8. Litigation ------------- Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous ---------------- This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License --------------------------- 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice ------------------------------------------- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. Exhibit B - "Incompatible With Secondary Licenses" Notice --------------------------------------------------------- This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. vctrs/LICENSE0000644000176200001440000000005314401442234012414 0ustar liggesusersYEAR: 2023 COPYRIGHT HOLDER: vctrs authors vctrs/README.md0000644000176200001440000000744114532374113012703 0ustar liggesusers # vctrs [![Codecov test coverage](https://codecov.io/gh/r-lib/vctrs/branch/main/graph/badge.svg)](https://app.codecov.io/gh/r-lib/vctrs?branch=main) ![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg) [![R-CMD-check](https://github.com/r-lib/vctrs/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/vctrs/actions/workflows/R-CMD-check.yaml) There are three main goals to the vctrs package, each described in a vignette: - To propose `vec_size()` and `vec_ptype()` as alternatives to `length()` and `class()`; `vignette("type-size")`. These definitions are paired with a framework for size-recycling and type-coercion. `ptype` should evoke the notion of a prototype, i.e. the original or typical form of something. - To define size- and type-stability as desirable function properties, use them to analyse existing base functions, and to propose better alternatives; `vignette("stability")`. This work has been particularly motivated by thinking about the ideal properties of `c()`, `ifelse()`, and `rbind()`. - To provide a new `vctr` base class that makes it easy to create new S3 vectors; `vignette("s3-vector")`. vctrs provides methods for many base generics in terms of a few new vctrs generics, making implementation considerably simpler and more robust. vctrs is a developer-focussed package. Understanding and extending vctrs requires some effort from developers, but should be invisible to most users. It’s our hope that having an underlying theory will mean that users can build up an accurate mental model without explicitly learning the theory. vctrs will typically be used by other packages, making it easy for them to provide new classes of S3 vectors that are supported throughout the tidyverse (and beyond). For that reason, vctrs has few dependencies. ## Installation Install vctrs from CRAN with: ``` r install.packages("vctrs") ``` Alternatively, if you need the development version, install it with: ``` r # install.packages("pak") pak::pak("r-lib/vctrs") ``` ## Usage ``` r library(vctrs) # Sizes str(vec_size_common(1, 1:10)) #> int 10 str(vec_recycle_common(1, 1:10)) #> List of 2 #> $ : num [1:10] 1 1 1 1 1 1 1 1 1 1 #> $ : int [1:10] 1 2 3 4 5 6 7 8 9 10 # Prototypes str(vec_ptype_common(FALSE, 1L, 2.5)) #> num(0) str(vec_cast_common(FALSE, 1L, 2.5)) #> List of 3 #> $ : num 0 #> $ : num 1 #> $ : num 2.5 ``` ## Motivation The original motivation for vctrs comes from two separate but related problems. The first problem is that `base::c()` has rather undesirable behaviour when you mix different S3 vectors: ``` r # combining factors makes integers c(factor("a"), factor("b")) #> [1] 1 1 # combining dates and date-times gives incorrect values; also, order matters dt <- as.Date("2020-01-01") dttm <- as.POSIXct(dt) c(dt, dttm) #> [1] "2020-01-01" "4321940-06-07" c(dttm, dt) #> [1] "2019-12-31 19:00:00 EST" "1970-01-01 00:04:22 EST" ``` This behaviour arises because `c()` has dual purposes: as well as its primary duty of combining vectors, it has a secondary duty of stripping attributes. For example, `?POSIXct` suggests that you should use `c()` if you want to reset the timezone. The second problem is that `dplyr::bind_rows()` is not extensible by others. Currently, it handles arbitrary S3 classes using heuristics, but these often fail, and it feels like we really need to think through the problem in order to build a principled solution. This intersects with the need to cleanly support more types of data frame columns, including lists of data frames, data frames, and matrices. vctrs/man/0000755000176200001440000000000014532404540012167 5ustar liggesusersvctrs/man/vctrs-data-frame.Rd0000644000176200001440000000075714276722575015647 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{vctrs-data-frame} \alias{vctrs-data-frame} \alias{vec_ptype2.data.frame} \alias{vec_cast.data.frame} \title{vctrs methods for data frames} \usage{ \method{vec_ptype2}{data.frame}(x, y, ...) \method{vec_cast}{data.frame}(x, to, ...) } \description{ These functions help the base data.frame class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/theory-faq-coercion.Rd0000644000176200001440000002713114511320527016337 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{theory-faq-coercion} \alias{theory-faq-coercion} \title{FAQ - How does coercion work in vctrs?} \description{ This is an overview of the usage of \code{vec_ptype2()} and \code{vec_cast()} and their role in the vctrs coercion mechanism. Related topics: \itemize{ \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } \subsection{Combination mechanism in vctrs}{ The coercion system in vctrs is designed to make combination of multiple inputs consistent and extensible. Combinations occur in many places, such as row-binding, joins, subset-assignment, or grouped summary functions that use the split-apply-combine strategy. For example: \if{html}{\out{
}}\preformatted{vec_c(TRUE, 1) #> [1] 1 1 vec_c("a", 1) #> Error in `vec_c()`: #> ! Can't combine `..1` and `..2` . vec_rbind( data.frame(x = TRUE), data.frame(x = 1, y = 2) ) #> x y #> 1 1 NA #> 2 1 2 vec_rbind( data.frame(x = "a"), data.frame(x = 1, y = 2) ) #> Error in `vec_rbind()`: #> ! Can't combine `..1$x` and `..2$x` . }\if{html}{\out{
}} One major goal of vctrs is to provide a central place for implementing the coercion methods that make generic combinations possible. The two relevant generics are \code{vec_ptype2()} and \code{vec_cast()}. They both take two arguments and perform \strong{double dispatch}, meaning that a method is selected based on the classes of both inputs. The general mechanism for combining multiple inputs is: \enumerate{ \item Find the common type of a set of inputs by reducing (as in \code{base::Reduce()} or \code{purrr::reduce()}) the \code{vec_ptype2()} binary function over the set. \item Convert all inputs to the common type with \code{vec_cast()}. \item Initialise the output vector as an instance of this common type with \code{vec_init()}. \item Fill the output vector with the elements of the inputs using \code{vec_assign()}. } The last two steps may require \code{vec_proxy()} and \code{vec_restore()} implementations, unless the attributes of your class are constant and do not depend on the contents of the vector. We focus here on the first two steps, which require \code{vec_ptype2()} and \code{vec_cast()} implementations. } \subsection{\code{vec_ptype2()}}{ Methods for \code{vec_ptype2()} are passed two \emph{prototypes}, i.e. two inputs emptied of their elements. They implement two behaviours: \itemize{ \item If the types of their inputs are compatible, indicate which of them is the richer type by returning it. If the types are of equal resolution, return any of the two. \item Throw an error with \code{stop_incompatible_type()} when it can be determined from the attributes that the types of the inputs are not compatible. } \subsection{Type compatibility}{ A type is \strong{compatible} with another type if the values it represents are a subset or a superset of the values of the other type. The notion of “value” is to be interpreted at a high level, in particular it is not the same as the memory representation. For example, factors are represented in memory with integers but their values are more related to character vectors than to round numbers: \if{html}{\out{
}}\preformatted{# Two factors are compatible vec_ptype2(factor("a"), factor("b")) #> factor() #> Levels: a b # Factors are compatible with a character vec_ptype2(factor("a"), "b") #> character(0) # But they are incompatible with integers vec_ptype2(factor("a"), 1L) #> Error: #> ! Can't combine `factor("a")` > and `1L` . }\if{html}{\out{
}} } \subsection{Richness of type}{ Richness of type is not a very precise notion. It can be about richer data (for instance a \code{double} vector covers more values than an integer vector), richer behaviour (a \code{data.table} has richer behaviour than a \code{data.frame}), or both. If you have trouble determining which one of the two types is richer, it probably means they shouldn’t be automatically coercible. Let’s look again at what happens when we combine a factor and a character: \if{html}{\out{
}}\preformatted{vec_ptype2(factor("a"), "b") #> character(0) }\if{html}{\out{
}} The ptype2 method for \verb{} and \verb{>} returns \verb{} because the former is a richer type. The factor can only contain \code{"a"} strings, whereas the character can contain any strings. In this sense, factors are a \emph{subset} of character. Note that another valid behaviour would be to throw an incompatible type error. This is what a strict factor implementation would do. We have decided to be laxer in vctrs because it is easy to inadvertently create factors instead of character vectors, especially with older versions of R where \code{stringsAsFactors} is still true by default. } \subsection{Consistency and symmetry on permutation}{ Each ptype2 method should strive to have exactly the same behaviour when the inputs are permuted. This is not always possible, for example factor levels are aggregated in order: \if{html}{\out{
}}\preformatted{vec_ptype2(factor(c("a", "c")), factor("b")) #> factor() #> Levels: a c b vec_ptype2(factor("b"), factor(c("a", "c"))) #> factor() #> Levels: b a c }\if{html}{\out{
}} In any case, permuting the input should not return a fundamentally different type or introduce an incompatible type error. } \subsection{Coercion hierarchy}{ The classes that you can coerce together form a coercion (or subtyping) hierarchy. Below is a schema of the hierarchy for the base types like integer and factor. In this diagram the directions of the arrows express which type is richer. They flow from the bottom (more constrained types) to the top (richer types). \figure{coerce.png} A coercion hierarchy is distinct from the structural hierarchy implied by memory types and classes. For instance, in a structural hierarchy, factors are built on top of integers. But in the coercion hierarchy they are more related to character vectors. Similarly, subclasses are not necessarily coercible with their superclasses because the coercion and structural hierarchies are separate. } \subsection{Implementing a coercion hierarchy}{ As a class implementor, you have two options. The simplest is to create an entirely separate hierarchy. The date and date-time classes are an example of an S3-based hierarchy that is completely separate. Alternatively, you can integrate your class in an existing hierarchy, typically by adding parent nodes on top of the hierarchy (your class is richer), by adding children node at the root of the hierarchy (your class is more constrained), or by inserting a node in the tree. These coercion hierarchies are \emph{implicit}, in the sense that they are implied by the \code{vec_ptype2()} implementations. There is no structured way to create or modify a hierarchy, instead you need to implement the appropriate coercion methods for all the types in your hierarchy, and diligently return the richer type in each case. The \code{vec_ptype2()} implementations are not transitive nor inherited, so all pairwise methods between classes lying on a given path must be implemented manually. This is something we might make easier in the future. } } \subsection{\code{vec_cast()}}{ The second generic, \code{vec_cast()}, is the one that looks at the data and actually performs the conversion. Because it has access to more information than \code{vec_ptype2()}, it may be stricter and cause an error in more cases. \code{vec_cast()} has three possible behaviours: \itemize{ \item Determine that the prototypes of the two inputs are not compatible. This must be decided in exactly the same way as for \code{vec_ptype2()}. Call \code{stop_incompatible_cast()} if you can determine from the attributes that the types are not compatible. \item Detect incompatible values. Usually this is because the target type is too restricted for the values supported by the input type. For example, a fractional number can’t be converted to an integer. The method should throw an error in that case. \item Return the input vector converted to the target type if all values are compatible. Whereas \code{vec_ptype2()} must return the same type when the inputs are permuted, \code{vec_cast()} is \emph{directional}. It always returns the type of the right-hand side, or dies trying. } } \subsection{Double dispatch}{ The dispatch mechanism for \code{vec_ptype2()} and \code{vec_cast()} looks like S3 but is actually a custom mechanism. Compared to S3, it has the following differences: \itemize{ \item It dispatches on the classes of the first two inputs. \item There is no inheritance of ptype2 and cast methods. This is because the S3 class hierarchy is not necessarily the same as the coercion hierarchy. \item \code{NextMethod()} does not work. Parent methods must be called explicitly if necessary. \item The default method is hard-coded. } } \subsection{Data frames}{ The determination of the common type of data frames with \code{vec_ptype2()} happens in three steps: \enumerate{ \item Match the columns of the two input data frames. If some columns don’t exist, they are created and filled with adequately typed \code{NA} values. \item Find the common type for each column by calling \code{vec_ptype2()} on each pair of matched columns. \item Find the common data frame type. For example the common type of a grouped tibble and a tibble is a grouped tibble because the latter is the richer type. The common type of a data table and a data frame is a data table. } \code{vec_cast()} operates similarly. If a data frame is cast to a target type that has fewer columns, this is an error. If you are implementing coercion methods for data frames, you will need to explicitly call the parent methods that perform the common type determination or the type conversion described above. These are exported as \code{\link[=df_ptype2]{df_ptype2()}} and \code{\link[=df_cast]{df_cast()}}. \subsection{Data frame fallbacks}{ Being too strict with data frame combinations would cause too much pain because there are many data frame subclasses in the wild that don’t implement vctrs methods. We have decided to implement a special fallback behaviour for foreign data frames. Incompatible data frames fall back to a base data frame: \if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1) df2 <- structure(df1, class = c("foreign_df", "data.frame")) vec_rbind(df1, df2) #> x #> 1 1 #> 2 1 }\if{html}{\out{
}} When a tibble is involved, we fall back to tibble: \if{html}{\out{
}}\preformatted{df3 <- tibble::as_tibble(df1) vec_rbind(df1, df3) #> # A tibble: 2 x 1 #> x #> #> 1 1 #> 2 1 }\if{html}{\out{
}} These fallbacks are not ideal but they make sense because all data frames share a common data structure. This is not generally the case for vectors. For example factors and characters have different representations, and it is not possible to find a fallback time mechanically. However this fallback has a big downside: implementing vctrs methods for your data frame subclass is a breaking behaviour change. The proper coercion behaviour for your data frame class should be specified as soon as possible to limit the consequences of changing the behaviour of your class in R scripts. } } } vctrs/man/vec_duplicate.Rd0000644000176200001440000000413114276722575015304 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_duplicate} \alias{vec_duplicate} \alias{vec_duplicate_any} \alias{vec_duplicate_detect} \alias{vec_duplicate_id} \title{Find duplicated values} \usage{ vec_duplicate_any(x) vec_duplicate_detect(x) vec_duplicate_id(x) } \arguments{ \item{x}{A vector (including a data frame).} } \value{ \itemize{ \item \code{vec_duplicate_any()}: a logical vector of length 1. \item \code{vec_duplicate_detect()}: a logical vector the same length as \code{x}. \item \code{vec_duplicate_id()}: an integer vector the same length as \code{x}. } } \description{ \itemize{ \item \code{vec_duplicate_any()}: detects the presence of duplicated values, similar to \code{\link[=anyDuplicated]{anyDuplicated()}}. \item \code{vec_duplicate_detect()}: returns a logical vector describing if each element of the vector is duplicated elsewhere. Unlike \code{\link[=duplicated]{duplicated()}}, it reports all duplicated values, not just the second and subsequent repetitions. \item \code{vec_duplicate_id()}: returns an integer vector giving the location of the first occurrence of the value. } } \section{Missing values}{ In most cases, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. This behaviour would be unappealing here, so these functions consider all \code{NAs} to be equal. (Similarly, all \code{NaN} are also considered to be equal.) } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ vec_duplicate_any(1:10) vec_duplicate_any(c(1, 1:10)) x <- c(10, 10, 20, 30, 30, 40) vec_duplicate_detect(x) # Note that `duplicated()` doesn't consider the first instance to # be a duplicate duplicated(x) # Identify elements of a vector by the location of the first element that # they're equal to: vec_duplicate_id(x) # Location of the unique values: vec_unique_loc(x) # Equivalent to `duplicated()`: vec_duplicate_id(x) == seq_along(x) } \seealso{ \code{\link[=vec_unique]{vec_unique()}} for functions that work with the dual of duplicated values: unique values. } vctrs/man/faq-error-incompatible-attributes.Rd0000644000176200001440000000233714276722575021231 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-error-incompatible-attributes} \alias{faq-error-incompatible-attributes} \title{FAQ - Error/Warning: Some attributes are incompatible} \description{ This error occurs when \code{\link[=vec_ptype2]{vec_ptype2()}} or \code{\link[=vec_cast]{vec_cast()}} are supplied vectors of the same classes with different attributes. In this case, vctrs doesn't know how to combine the inputs. To fix this error, the maintainer of the class should implement self-to-self coercion methods for \code{\link[=vec_ptype2]{vec_ptype2()}} and \code{\link[=vec_cast]{vec_cast()}}. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } vctrs/man/internal-faq-matches-algorithm.Rd0000644000176200001440000002751214315060307020452 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match.R \name{internal-faq-matches-algorithm} \alias{internal-faq-matches-algorithm} \title{Internal FAQ - Implementation of \code{vec_locate_matches()}} \description{ \code{vec_locate_matches()} is similar to \code{vec_match()}, but detects \emph{all} matches by default, and can match on conditions other than equality (like \code{>=} and \code{<}). There are also various other arguments to limit or adjust exactly which kinds of matches are returned. Here is an example: \if{html}{\out{
}}\preformatted{x <- c("a", "b", "a", "c", "d") y <- c("d", "b", "a", "d", "a", "e") # For each value of `x`, find all matches in `y` # - The "c" in `x` doesn't have a match, so it gets an NA location by default # - The "e" in `y` isn't matched by anything in `x`, so it is dropped by default vec_locate_matches(x, y) #> needles haystack #> 1 1 3 #> 2 1 5 #> 3 2 2 #> 4 3 3 #> 5 3 5 #> 6 4 NA #> 7 5 1 #> 8 5 4 }\if{html}{\out{
}} } \section{Algorithm description}{ \subsection{Overview and \code{==}}{ The simplest (approximate) way to think about the algorithm that \code{df_locate_matches_recurse()} uses is that it sorts both inputs, and then starts at the midpoint in \code{needles} and uses a binary search to find each needle in \code{haystack}. Since there might be multiple of the same needle, we find the location of the lower and upper duplicate of that needle to handle all duplicates of that needle at once. Similarly, if there are duplicates of a matching \code{haystack} value, we find the lower and upper duplicates of the match. If the condition is \code{==}, that is pretty much all we have to do. For each needle, we then record 3 things: the location of the needle, the location of the lower match in the haystack, and the match size (i.e. \code{loc_upper_match - loc_lower_match + 1}). This later gets expanded in \code{expand_compact_indices()} into the actual output. After recording the matches for a single needle, we perform the same procedure on the LHS and RHS of that needle (remember we started on the midpoint needle). i.e. from \verb{[1, loc_needle-1]} and \verb{[loc_needle+1, size_needles]}, again taking the midpoint of those two ranges, finding their respective needle in the haystack, recording matches, and continuing on to the next needle. This iteration proceeds until we run out of needles. When we have a data frame with multiple columns, we add a layer of recursion to this. For the first column, we find the locations of the lower/upper duplicate of the current needle, and we find the locations of the lower/upper matches in the haystack. If we are on the final column in the data frame, we record the matches, otherwise we pass this information on to another call to \code{df_locate_matches_recurse()}, bumping the column index and using these refined lower/upper bounds as the starting bounds for the next column. I think an example would be useful here, so below I step through this process for a few iterations: \if{html}{\out{
}}\preformatted{# these are sorted already for simplicity needles <- data_frame(x = c(1, 1, 2, 2, 2, 3), y = c(1, 2, 3, 4, 5, 3)) haystack <- data_frame(x = c(1, 1, 2, 2, 3), y = c(2, 3, 4, 4, 1)) needles #> x y #> 1 1 1 #> 2 1 2 #> 3 2 3 #> 4 2 4 #> 5 2 5 #> 6 3 3 haystack #> x y #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 #> 5 3 1 ## Column 1, iteration 1 # start at midpoint in needles # this corresponds to x==2 loc_mid_needles <- 3L # finding all x==2 values in needles gives us: loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # compute LHS/RHS bounds for next needle lhs_loc_lower_bound_needles <- 1L # original lower bound lhs_loc_upper_bound_needles <- 2L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 6L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 6L # original upper bound # We still have a 2nd column to check. So recurse and pass on the current # duplicate and match bounds to start the 2nd column with. ## Column 2, iteration 1 # midpoint of [3, 5] # value y==4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # last column, so record matches # - this was location 4 in needles # - lower match in haystack is at loc 3 # - match size is 2 # Now handle LHS and RHS of needle midpoint lhs_loc_lower_bound_needles <- 3L # original lower bound lhs_loc_upper_bound_needles <- 3L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 5L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 5L # original upper bound ## Column 2, iteration 2 (using LHS bounds) # midpoint of [3,3] # value of y==3 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 3L # no match! no y==3 in haystack for x==2 # lower-match will always end up > upper-match in this case loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 2L # no LHS or RHS needle values to do, so we are done here ## Column 2, iteration 3 (using RHS bounds) # same as above, range of [5,5], value of y==5, which has no match in haystack ## Column 1, iteration 2 (LHS of first x needle) # Now we are done with the x needles from [3,5], so move on to the LHS and RHS # of that. Here we would do the LHS: # midpoint of [1,2] loc_mid_needles <- 1L # ... ## Column 1, iteration 3 (RHS of first x needle) # midpoint of [6,6] loc_mid_needles <- 6L # ... }\if{html}{\out{
}} In the real code, rather than comparing the double values of the columns directly, we replace each column with pseudo "joint ranks" computed between the i-th column of \code{needles} and the i-th column of \code{haystack}. It is approximately like doing \code{vec_rank(vec_c(needles$x, haystack$x), type = "dense")}, then splitting the resulting ranks back up into their corresponding needle/haystack columns. This keeps the recursion code simpler, because we only have to worry about comparing integers. } \subsection{Non-equi conditions and containers}{ At this point we can talk about non-equi conditions like \code{<} or \code{>=}. The general idea is pretty simple, and just builds on the above algorithm. For example, start with the \code{x} column from needles/haystack above: \if{html}{\out{
}}\preformatted{needles$x #> [1] 1 1 2 2 2 3 haystack$x #> [1] 1 1 2 2 3 }\if{html}{\out{
}} If we used a condition of \code{<=}, then we'd do everything the same as before: \itemize{ \item Midpoint in needles is location 3, value \code{x==2} \item Find lower/upper duplicates in needles, giving locations \verb{[3, 5]} \item Find lower/upper \emph{exact} match in haystack, giving locations \verb{[3, 4]} } At this point, we need to "adjust" the \code{haystack} match bounds to account for the condition. Since \code{haystack} is ordered, our "rule" for \code{<=} is to keep the lower match location the same, but extend the upper match location to the upper bound, so we end up with \verb{[3, 5]}. We know we can extend the upper match location because every haystack value after the exact match should be less than the needle. Then we just record the matches and continue on normally. This approach is really nice, because we only have to exactly match the \code{needle} in \code{haystack}. We don't have to compare each needle against every value in \code{haystack}, which would take a massive amount of time. However, it gets slightly more complex with data frames with multiple columns. Let's go back to our original \code{needles} and \code{haystack} data frames and apply the condition \code{<=} to each column. Here is another worked example, which shows a case where our "rule" falls apart on the second column. \if{html}{\out{
}}\preformatted{needles #> x y #> 1 1 1 #> 2 1 2 #> 3 2 3 #> 4 2 4 #> 5 2 5 #> 6 3 3 haystack #> x y #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 #> 5 3 1 # `condition = c("<=", "<=")` ## Column 1, iteration 1 # x == 2 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # because haystack is ordered we know we can expand the upper bound automatically # to include everything past the match. i.e. needle of x==2 must be less than # the haystack value at loc 5, which we can check by seeing that it is x==3. loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L ## Column 2, iteration 1 # needles range of [3, 5] # y == 4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # lets try using our rule, which tells us we should be able to extend the upper # bound: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L # but the haystack value of y at location 5 is y==1, which is not less than y==4 # in the needles! looks like our rule failed us. }\if{html}{\out{
}} If you read through the above example, you'll see that the rule didn't work here. The problem is that while \code{haystack} is ordered (by \code{vec_order()}s standards), each column isn't ordered \emph{independently} of the others. Instead, each column is ordered within the "group" created by previous columns. Concretely, \code{haystack} here has an ordered \code{x} column, but if you look at \code{haystack$y} by itself, it isn't ordered (because of that 1 at the end). That is what causes the rule to fail. \if{html}{\out{
}}\preformatted{haystack #> x y #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 #> 5 3 1 }\if{html}{\out{
}} To fix this, we need to create haystack "containers" where the values within each container are all \emph{totally} ordered. For \code{haystack} that would create 2 containers and look like: \if{html}{\out{
}}\preformatted{haystack[1:4,] #> # A tibble: 4 × 2 #> x y #> #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 haystack[5,] #> # A tibble: 1 × 2 #> x y #> #> 1 3 1 }\if{html}{\out{
}} This is essentially what \code{computing_nesting_container_ids()} does. You can actually see these ids with the helper, \code{compute_nesting_container_info()}: \if{html}{\out{
}}\preformatted{haystack2 <- haystack # we really pass along the integer ranks, but in this case that is equivalent # to converting our double columns to integers haystack2$x <- as.integer(haystack2$x) haystack2$y <- as.integer(haystack2$y) info <- compute_nesting_container_info(haystack2, condition = c("<=", "<=")) # the ids are in the second slot. # container ids break haystack into [1, 4] and [5, 5]. info[[2]] #> [1] 0 0 0 0 1 }\if{html}{\out{
}} So the idea is that for each needle, we look in each haystack container and find all the matches, then we aggregate all of the matches once at the end. \code{df_locate_matches_with_containers()} has the job of iterating over the containers. Computing totally ordered containers can be expensive, but luckily it doesn't happen very often in normal usage. \itemize{ \item If there are all \code{==} conditions, we don't need containers (i.e. any equi join) \item If there is only 1 non-equi condition and no conditions after it, we don't need containers (i.e. most rolling joins) \item Otherwise the typical case where we need containers is if we have something like \verb{date >= lower, date <= upper}. Even so, the computation cost generally scales with the number of columns in \code{haystack} you compute containers with (here 2), and it only really slows down around 4 columns or so, which I haven't ever seen a real life example of. } } } vctrs/man/vec_seq_along.Rd0000644000176200001440000000135513505165544015276 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{vec_seq_along} \alias{vec_seq_along} \alias{vec_init_along} \title{Useful sequences} \usage{ vec_seq_along(x) vec_init_along(x, y = x) } \arguments{ \item{x, y}{Vectors} } \value{ \itemize{ \item \code{vec_seq_along()} an integer vector with the same size as \code{x}. \item \code{vec_init_along()} a vector with the same type as \code{x} and the same size as \code{y}. } } \description{ \code{vec_seq_along()} is equivalent to \code{\link[=seq_along]{seq_along()}} but uses size, not length. \code{vec_init_along()} creates a vector of missing values with size matching an existing object. } \examples{ vec_seq_along(mtcars) vec_init_along(head(mtcars)) } vctrs/man/vec-set.Rd0000644000176200001440000001141114362266120014023 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/set.R \name{vec-set} \alias{vec-set} \alias{vec_set_intersect} \alias{vec_set_difference} \alias{vec_set_union} \alias{vec_set_symmetric_difference} \title{Set operations} \usage{ vec_set_intersect( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) vec_set_difference( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) vec_set_union( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) vec_set_symmetric_difference( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) } \arguments{ \item{x, y}{A pair of vectors.} \item{...}{These dots are for future extensions and must be empty.} \item{ptype}{If \code{NULL}, the default, the output type is determined by computing the common type between \code{x} and \code{y}. If supplied, both \code{x} and \code{y} will be cast to this type.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector of the common type of \code{x} and \code{y} (or \code{ptype}, if supplied) containing the result of the corresponding set function. } \description{ \itemize{ \item \code{vec_set_intersect()} returns all values in both \code{x} and \code{y}. \item \code{vec_set_difference()} returns all values in \code{x} but not \code{y}. Note that this is an asymmetric set difference, meaning it is not commutative. \item \code{vec_set_union()} returns all values in either \code{x} or \code{y}. \item \code{vec_set_symmetric_difference()} returns all values in either \code{x} or \code{y} but not both. This is a commutative difference. } Because these are \emph{set} operations, these functions only return unique values from \code{x} and \code{y}, returned in the order they first appeared in the original input. Names of \code{x} and \code{y} are retained on the result, but names are always taken from \code{x} if the value appears in both inputs. These functions work similarly to \code{\link[=intersect]{intersect()}}, \code{\link[=setdiff]{setdiff()}}, and \code{\link[=union]{union()}}, but don't strip attributes and can be used with data frames. } \details{ Missing values are treated as equal to other missing values. For doubles and complexes, \code{NaN} are equal to other \code{NaN}, but not to \code{NA}. } \section{Dependencies}{ \subsection{\code{vec_set_intersect()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } } \subsection{\code{vec_set_difference()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } } \subsection{\code{vec_set_union()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} \item \code{\link[=vec_c]{vec_c()}} } } \subsection{\code{vec_set_symmetric_difference()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} \item \code{\link[=vec_c]{vec_c()}} } } } \examples{ x <- c(1, 2, 1, 4, 3) y <- c(2, 5, 5, 1) # All unique values in both `x` and `y`. # Duplicates in `x` and `y` are always removed. vec_set_intersect(x, y) # All unique values in `x` but not `y` vec_set_difference(x, y) # All unique values in either `x` or `y` vec_set_union(x, y) # All unique values in either `x` or `y` but not both vec_set_symmetric_difference(x, y) # These functions can also be used with data frames x <- data_frame( a = c(2, 3, 2, 2), b = c("j", "k", "j", "l") ) y <- data_frame( a = c(1, 2, 2, 2, 3), b = c("j", "l", "j", "l", "j") ) vec_set_intersect(x, y) vec_set_difference(x, y) vec_set_union(x, y) vec_set_symmetric_difference(x, y) # Vector names don't affect set membership, but if you'd like to force # them to, you can transform the vector into a two column data frame x <- c(a = 1, b = 2, c = 2, d = 3) y <- c(c = 2, b = 1, a = 3, d = 3) vec_set_intersect(x, y) x <- data_frame(name = names(x), value = unname(x)) y <- data_frame(name = names(y), value = unname(y)) vec_set_intersect(x, y) } vctrs/man/vec_rank.Rd0000644000176200001440000001236314315060307014251 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rank.R \name{vec_rank} \alias{vec_rank} \title{Compute ranks} \usage{ vec_rank( x, ..., ties = c("min", "max", "sequential", "dense"), incomplete = c("rank", "na"), direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{ties}{Ranking of duplicate values. \itemize{ \item \code{"min"}: Use the current rank for all duplicates. The next non-duplicate value will have a rank incremented by the number of duplicates present. \item \code{"max"}: Use the current rank \code{+ n_duplicates - 1} for all duplicates. The next non-duplicate value will have a rank incremented by the number of duplicates present. \item \code{"sequential"}: Use an increasing sequence of ranks starting at the current rank, applied to duplicates in order of appearance. \item \code{"dense"}: Use the current rank for all duplicates. The next non-duplicate value will have a rank incremented by \code{1}, effectively removing any gaps in the ranking. }} \item{incomplete}{Ranking of missing and \link[=vec_detect_complete]{incomplete} observations. \itemize{ \item \code{"rank"}: Rank incomplete observations normally. Missing values within incomplete observations will be affected by \code{na_value} and \code{nan_distinct}. \item \code{"na"}: Don't rank incomplete observations at all. Instead, they are given a rank of \code{NA}. In this case, \code{na_value} and \code{nan_distinct} have no effect. }} \item{direction}{Direction to sort in. \itemize{ \item A single \code{"asc"} or \code{"desc"} for ascending or descending order respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"asc"} or \code{"desc"}, specifying the direction for each column. }} \item{na_value}{Ordering of missing values. \itemize{ \item A single \code{"largest"} or \code{"smallest"} for ordering missing values as the largest or smallest values respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"largest"} or \code{"smallest"}, specifying how missing values should be ordered within each column. }} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} } \description{ \code{vec_rank()} computes the sample ranks of a vector. For data frames, ranks are computed along the rows, using all columns after the first to break ties. } \details{ Unlike \code{\link[base:rank]{base::rank()}}, when \code{incomplete = "rank"} all missing values are given the same rank, rather than an increasing sequence of ranks. When \code{nan_distinct = FALSE}, \code{NaN} values are given the same rank as \code{NA}, otherwise they are given a rank that differentiates them from \code{NA}. Like \code{\link[=vec_order_radix]{vec_order_radix()}}, ordering is done in the C-locale. This can affect the ranks of character vectors, especially regarding how uppercase and lowercase letters are ranked. See the documentation of \code{\link[=vec_order_radix]{vec_order_radix()}} for more information. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_order_radix]{vec_order_radix()}} \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ x <- c(5L, 6L, 3L, 3L, 5L, 3L) vec_rank(x, ties = "min") vec_rank(x, ties = "max") # Sequential ranks use an increasing sequence for duplicates vec_rank(x, ties = "sequential") # Dense ranks remove gaps between distinct values, # even if there are duplicates vec_rank(x, ties = "dense") y <- c(NA, x, NA, NaN) # Incomplete values match other incomplete values by default, and their # overall position can be adjusted with `na_value` vec_rank(y, na_value = "largest") vec_rank(y, na_value = "smallest") # NaN can be ranked separately from NA if required vec_rank(y, nan_distinct = TRUE) # Rank in descending order. Since missing values are the largest value, # they are given a rank of `1` when ranking in descending order. vec_rank(y, direction = "desc", na_value = "largest") # Give incomplete values a rank of `NA` by setting `incomplete = "na"` vec_rank(y, incomplete = "na") # Can also rank data frames, using columns after the first to break ties z <- c(2L, 3L, 4L, 4L, 5L, 2L) df <- data_frame(x = x, z = z) df vec_rank(df) } vctrs/man/maybe_lossy_cast.Rd0000644000176200001440000000574314315060307016025 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conditions.R \name{maybe_lossy_cast} \alias{maybe_lossy_cast} \title{Lossy cast error} \usage{ maybe_lossy_cast( result, x, to, lossy = NULL, locations = NULL, ..., loss_type = c("precision", "generality"), x_arg, to_arg, call = caller_env(), details = NULL, message = NULL, class = NULL, .deprecation = FALSE ) } \arguments{ \item{result}{The result of a potentially lossy cast.} \item{x}{Vectors to cast.} \item{to}{Type to cast to.} \item{lossy}{A logical vector indicating which elements of \code{result} were lossy. Can also be a single \code{TRUE}, but note that \code{locations} picks up locations from this vector by default. In this case, supply your own location vector, possibly empty.} \item{locations}{An optional integer vector giving the locations where \code{x} lost information.} \item{..., class}{Only use these fields when creating a subclass.} \item{loss_type}{The kind of lossy cast to be mentioned in error messages. Can be loss of precision (for instance from double to integer) or loss of generality (from character to factor).} \item{x_arg}{Argument name for \code{x}, used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{details}{Any additional human readable details.} \item{message}{An overriding message for the error. \code{details} and \code{message} are mutually exclusive, supplying both is an error.} \item{.deprecation}{If \code{TRUE}, the error is downgraded to a deprecation warning. This is useful for transitioning your class to a stricter conversion scheme. The warning advises your users to wrap their code with \code{allow_lossy_cast()}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} By default, lossy casts are an error. Use \code{allow_lossy_cast()} to silence these errors and continue with the partial results. In this case the lost values are typically set to \code{NA} or to a lower value resolution, depending on the type of cast. Lossy cast errors are thrown by \code{maybe_lossy_cast()}. Unlike functions prefixed with \code{stop_}, \code{maybe_lossy_cast()} usually returns a result. If a lossy cast is detected, it throws an error, unless it's been wrapped in \code{allow_lossy_cast()}. In that case, it returns the result silently. } \keyword{internal} vctrs/man/vec-rep.Rd0000644000176200001440000000657314511524374014037 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rep.R \name{vec-rep} \alias{vec-rep} \alias{vec_rep} \alias{vec_rep_each} \alias{vec_unrep} \title{Repeat a vector} \usage{ vec_rep( x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times" ) vec_rep_each( x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times" ) vec_unrep(x) } \arguments{ \item{x}{A vector.} \item{times}{For \code{vec_rep()}, a single integer for the number of times to repeat the entire vector. For \code{vec_rep_each()}, an integer vector of the number of times to repeat each element of \code{x}. \code{times} will be \link[=theory-faq-recycling]{recycled} to the size of \code{x}.} \item{...}{These dots are for future extensions and must be empty.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{x_arg, times_arg}{Argument names for errors.} } \value{ For \code{vec_rep()}, a vector the same type as \code{x} with size \code{vec_size(x) * times}. For \code{vec_rep_each()}, a vector the same type as \code{x} with size \code{sum(vec_recycle(times, vec_size(x)))}. For \code{vec_unrep()}, a data frame with two columns, \code{key} and \code{times}. \code{key} is a vector with the same type as \code{x}, and \code{times} is an integer vector. } \description{ \itemize{ \item \code{vec_rep()} repeats an entire vector a set number of \code{times}. \item \code{vec_rep_each()} repeats each element of a vector a set number of \code{times}. \item \code{vec_unrep()} compresses a vector with repeated values. The repeated values are returned as a \code{key} alongside the number of \code{times} each key is repeated. } } \details{ Using \code{vec_unrep()} and \code{vec_rep_each()} together is similar to using \code{\link[base:rle]{base::rle()}} and \code{\link[base:rle]{base::inverse.rle()}}. The following invariant shows the relationship between the two functions: \if{html}{\out{
}}\preformatted{compressed <- vec_unrep(x) identical(x, vec_rep_each(compressed$key, compressed$times)) }\if{html}{\out{
}} There are two main differences between \code{vec_unrep()} and \code{\link[base:rle]{base::rle()}}: \itemize{ \item \code{vec_unrep()} treats adjacent missing values as equivalent, while \code{rle()} treats them as different values. \item \code{vec_unrep()} works along the size of \code{x}, while \code{rle()} works along its length. This means that \code{vec_unrep()} works on data frames by compressing repeated rows. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ # Repeat the entire vector vec_rep(1:2, 3) # Repeat within each vector vec_rep_each(1:2, 3) x <- vec_rep_each(1:2, c(3, 4)) x # After using `vec_rep_each()`, you can recover the original vector # with `vec_unrep()` vec_unrep(x) df <- data.frame(x = 1:2, y = 3:4) # `rep()` repeats columns of data frames, and returns lists rep(df, each = 2) # `vec_rep()` and `vec_rep_each()` repeat rows, and return data frames vec_rep(df, 2) vec_rep_each(df, 2) # `rle()` treats adjacent missing values as different y <- c(1, NA, NA, 2) rle(y) # `vec_unrep()` treats them as equivalent vec_unrep(y) } vctrs/man/vec_ptype2.Rd0000644000176200001440000000565414315060307014546 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-bare.R, R/type2.R \name{vec_ptype2.logical} \alias{vec_ptype2.logical} \alias{vec_ptype2.integer} \alias{vec_ptype2.double} \alias{vec_ptype2.complex} \alias{vec_ptype2.character} \alias{vec_ptype2.raw} \alias{vec_ptype2.list} \alias{vec_ptype2} \title{Find the common type for a pair of vectors} \usage{ \method{vec_ptype2}{logical}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{integer}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{double}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{complex}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{character}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{raw}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{list}(x, y, ..., x_arg = "", y_arg = "") vec_ptype2( x, y, ..., x_arg = caller_arg(x), y_arg = caller_arg(y), call = caller_env() ) } \arguments{ \item{x, y}{Vector types.} \item{...}{These dots are for future extensions and must be empty.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{vec_ptype2()} defines the coercion hierarchy for a set of related vector types. Along with \code{\link[=vec_cast]{vec_cast()}}, this generic forms the foundation of type coercions in vctrs. \code{vec_ptype2()} is relevant when you are implementing vctrs methods for your class, but it should not usually be called directly. If you need to find the common type of a set of inputs, call \code{\link[=vec_ptype_common]{vec_ptype_common()}} instead. This function supports multiple inputs and \link[=vec_ptype_finalise]{finalises} the common type. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_ptype]{vec_ptype()}} is applied to \code{x} and \code{y} } } \seealso{ \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} when you determine from the attributes that an input can't be cast to the target type. } vctrs/man/vec_interleave.Rd0000644000176200001440000000537214511524374015465 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice-interleave.R \name{vec_interleave} \alias{vec_interleave} \title{Interleave many vectors into one vector} \usage{ vec_interleave( ..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet") ) } \arguments{ \item{...}{Vectors to interleave. These will be \link[=theory-faq-recycling]{recycled} to a common size.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{.name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} } \description{ \code{vec_interleave()} combines multiple vectors together, much like \code{\link[=vec_c]{vec_c()}}, but does so in such a way that the elements of each vector are interleaved together. It is a more efficient equivalent to the following usage of \code{vec_c()}: \if{html}{\out{
}}\preformatted{vec_interleave(x, y) == vec_c(x[1], y[1], x[2], y[2], ..., x[n], y[n]) }\if{html}{\out{
}} } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=list_unchop]{list_unchop()}} } } } \examples{ # The most common case is to interleave two vectors vec_interleave(1:3, 4:6) # But you aren't restricted to just two vec_interleave(1:3, 4:6, 7:9, 10:12) # You can also interleave data frames x <- data_frame(x = 1:2, y = c("a", "b")) y <- data_frame(x = 3:4, y = c("c", "d")) vec_interleave(x, y) } vctrs/man/new_rcrd.Rd0000644000176200001440000000170214276722575014301 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-rcrd.R \name{new_rcrd} \alias{new_rcrd} \alias{ses} \alias{rcrd} \title{rcrd (record) S3 class} \usage{ new_rcrd(fields, ..., class = character()) } \arguments{ \item{fields}{A list or a data frame. Lists must be rectangular (same sizes), and contain uniquely named vectors (at least one). \code{fields} is validated with \code{\link[=df_list]{df_list()}} to ensure uniquely named vectors.} \item{...}{Additional attributes} \item{class}{Name of subclass.} } \description{ The rcrd class extends \link{vctr}. A rcrd is composed of 1 or more \link{field}s, which must be vectors of the same length. Is designed specifically for classes that can naturally be decomposed into multiple vectors of the same length, like \link{POSIXlt}, but where the organisation should be considered an implementation detail invisible to the user (unlike a \link{data.frame}). } \keyword{internal} vctrs/man/new_partial.Rd0000644000176200001440000000220414276722575015001 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/partial.R \name{new_partial} \alias{new_partial} \alias{is_partial} \alias{vec_ptype_finalise} \title{Partial type} \usage{ new_partial(..., class = character()) is_partial(x) vec_ptype_finalise(x, ...) } \arguments{ \item{...}{Attributes of the partial type} \item{class}{Name of subclass.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use \code{new_partial()} when constructing a new partial type subclass; and use \code{is_partial()} to test if a type is partial. All subclasses need to provide a \code{vec_ptype_finalise()} method. } \details{ As the name suggests, a partial type \emph{partially} specifies a type, and it must be combined with data to yield a full type. A useful example of a partial type is \code{\link[=partial_frame]{partial_frame()}}, which makes it possible to specify the type of just a few columns in a data frame. Use this constructor if you're making your own partial type. } \keyword{internal} vctrs/man/vector-checks.Rd0000644000176200001440000001013114401377400015211 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{vector-checks} \alias{vector-checks} \alias{obj_is_vector} \alias{obj_check_vector} \alias{vec_check_size} \title{Vector checks} \usage{ obj_is_vector(x) obj_check_vector(x, ..., arg = caller_arg(x), call = caller_env()) vec_check_size(x, size, ..., arg = caller_arg(x), call = caller_env()) } \arguments{ \item{x}{For \verb{obj_*()} functions, an object. For \verb{vec_*()} functions, a vector.} \item{...}{These dots are for future extensions and must be empty.} \item{arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{size}{The size to check for.} } \value{ \itemize{ \item \code{obj_is_vector()} returns a single \code{TRUE} or \code{FALSE}. \item \code{obj_check_vector()} returns \code{NULL} invisibly, or errors. \item \code{vec_check_size()} returns \code{NULL} invisibly, or errors. } } \description{ \itemize{ \item \code{obj_is_vector()} tests if \code{x} is considered a vector in the vctrs sense. See \emph{Vectors and scalars} below for the exact details. \item \code{obj_check_vector()} uses \code{obj_is_vector()} and throws a standardized and informative error if it returns \code{FALSE}. \item \code{vec_check_size()} tests if \code{x} has size \code{size}, and throws an informative error if it doesn't. } } \section{Vectors and scalars}{ Informally, a vector is a collection that makes sense to use as column in a data frame. The following rules define whether or not \code{x} is considered a vector. If no \code{\link[=vec_proxy]{vec_proxy()}} method has been registered, \code{x} is a vector if: \itemize{ \item The \link[=typeof]{base type} of the object is atomic: \code{"logical"}, \code{"integer"}, \code{"double"}, \code{"complex"}, \code{"character"}, or \code{"raw"}. \item \code{x} is a list, as defined by \code{\link[=obj_is_list]{obj_is_list()}}. \item \code{x} is a \link{data.frame}. } If a \code{vec_proxy()} method has been registered, \code{x} is a vector if: \itemize{ \item The proxy satisfies one of the above conditions. \item The base type of the proxy is \code{"list"}, regardless of its class. S3 lists are thus treated as scalars unless they implement a \code{vec_proxy()} method. } Otherwise an object is treated as scalar and cannot be used as a vector. In particular: \itemize{ \item \code{NULL} is not a vector. \item S3 lists like \code{lm} objects are treated as scalars by default. \item Objects of type \link{expression} are not treated as vectors. } } \section{Technical limitations}{ \itemize{ \item Support for S4 vectors is currently limited to objects that inherit from an atomic type. \item Subclasses of \link{data.frame} that \emph{append} their class to the back of the \code{"class"} attribute are not treated as vectors. If you inherit from an S3 class, always prepend your class to the front of the \code{"class"} attribute for correct dispatch. This matches our general principle of allowing subclasses but not mixins. } } \examples{ obj_is_vector(1) # Data frames are vectors obj_is_vector(data_frame()) # Bare lists are vectors obj_is_vector(list()) # S3 lists are vectors if they explicitly inherit from `"list"` x <- structure(list(), class = c("my_list", "list")) obj_is_list(x) obj_is_vector(x) # But if they don't explicitly inherit from `"list"`, they aren't # automatically considered to be vectors. Instead, vctrs considers this # to be a scalar object, like a linear model returned from `lm()`. y <- structure(list(), class = "my_list") obj_is_list(y) obj_is_vector(y) # `obj_check_vector()` throws an informative error if the input # isn't a vector try(obj_check_vector(y)) # `vec_check_size()` throws an informative error if the size of the # input doesn't match `size` vec_check_size(1:5, size = 5) try(vec_check_size(1:5, size = 4)) } vctrs/man/faq/0000755000176200001440000000000014532470452012743 5ustar liggesusersvctrs/man/faq/internal/0000755000176200001440000000000014315060307014550 5ustar liggesusersvctrs/man/faq/internal/matches-algorithm.Rmd0000644000176200001440000002456514315060307020640 0ustar liggesusers--- output: html_document editor_options: chunk_output_type: console --- ```{r, child = "../setup.Rmd", include = FALSE} ``` `vec_locate_matches()` is similar to `vec_match()`, but detects _all_ matches by default, and can match on conditions other than equality (like `>=` and `<`). There are also various other arguments to limit or adjust exactly which kinds of matches are returned. Here is an example: ```{r} x <- c("a", "b", "a", "c", "d") y <- c("d", "b", "a", "d", "a", "e") # For each value of `x`, find all matches in `y` # - The "c" in `x` doesn't have a match, so it gets an NA location by default # - The "e" in `y` isn't matched by anything in `x`, so it is dropped by default vec_locate_matches(x, y) ``` # Algorithm description ## Overview and `==` The simplest (approximate) way to think about the algorithm that `df_locate_matches_recurse()` uses is that it sorts both inputs, and then starts at the midpoint in `needles` and uses a binary search to find each needle in `haystack`. Since there might be multiple of the same needle, we find the location of the lower and upper duplicate of that needle to handle all duplicates of that needle at once. Similarly, if there are duplicates of a matching `haystack` value, we find the lower and upper duplicates of the match. If the condition is `==`, that is pretty much all we have to do. For each needle, we then record 3 things: the location of the needle, the location of the lower match in the haystack, and the match size (i.e. `loc_upper_match - loc_lower_match + 1`). This later gets expanded in `expand_compact_indices()` into the actual output. After recording the matches for a single needle, we perform the same procedure on the LHS and RHS of that needle (remember we started on the midpoint needle). i.e. from `[1, loc_needle-1]` and `[loc_needle+1, size_needles]`, again taking the midpoint of those two ranges, finding their respective needle in the haystack, recording matches, and continuing on to the next needle. This iteration proceeds until we run out of needles. When we have a data frame with multiple columns, we add a layer of recursion to this. For the first column, we find the locations of the lower/upper duplicate of the current needle, and we find the locations of the lower/upper matches in the haystack. If we are on the final column in the data frame, we record the matches, otherwise we pass this information on to another call to `df_locate_matches_recurse()`, bumping the column index and using these refined lower/upper bounds as the starting bounds for the next column. I think an example would be useful here, so below I step through this process for a few iterations: ```{r} # these are sorted already for simplicity needles <- data_frame(x = c(1, 1, 2, 2, 2, 3), y = c(1, 2, 3, 4, 5, 3)) haystack <- data_frame(x = c(1, 1, 2, 2, 3), y = c(2, 3, 4, 4, 1)) needles haystack ## Column 1, iteration 1 # start at midpoint in needles # this corresponds to x==2 loc_mid_needles <- 3L # finding all x==2 values in needles gives us: loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # compute LHS/RHS bounds for next needle lhs_loc_lower_bound_needles <- 1L # original lower bound lhs_loc_upper_bound_needles <- 2L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 6L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 6L # original upper bound # We still have a 2nd column to check. So recurse and pass on the current # duplicate and match bounds to start the 2nd column with. ## Column 2, iteration 1 # midpoint of [3, 5] # value y==4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # last column, so record matches # - this was location 4 in needles # - lower match in haystack is at loc 3 # - match size is 2 # Now handle LHS and RHS of needle midpoint lhs_loc_lower_bound_needles <- 3L # original lower bound lhs_loc_upper_bound_needles <- 3L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 5L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 5L # original upper bound ## Column 2, iteration 2 (using LHS bounds) # midpoint of [3,3] # value of y==3 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 3L # no match! no y==3 in haystack for x==2 # lower-match will always end up > upper-match in this case loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 2L # no LHS or RHS needle values to do, so we are done here ## Column 2, iteration 3 (using RHS bounds) # same as above, range of [5,5], value of y==5, which has no match in haystack ## Column 1, iteration 2 (LHS of first x needle) # Now we are done with the x needles from [3,5], so move on to the LHS and RHS # of that. Here we would do the LHS: # midpoint of [1,2] loc_mid_needles <- 1L # ... ## Column 1, iteration 3 (RHS of first x needle) # midpoint of [6,6] loc_mid_needles <- 6L # ... ``` In the real code, rather than comparing the double values of the columns directly, we replace each column with pseudo "joint ranks" computed between the i-th column of `needles` and the i-th column of `haystack`. It is approximately like doing `vec_rank(vec_c(needles$x, haystack$x), type = "dense")`, then splitting the resulting ranks back up into their corresponding needle/haystack columns. This keeps the recursion code simpler, because we only have to worry about comparing integers. ## Non-equi conditions and containers At this point we can talk about non-equi conditions like `<` or `>=`. The general idea is pretty simple, and just builds on the above algorithm. For example, start with the `x` column from needles/haystack above: ```{r} needles$x haystack$x ``` If we used a condition of `<=`, then we'd do everything the same as before: - Midpoint in needles is location 3, value `x==2` - Find lower/upper duplicates in needles, giving locations `[3, 5]` - Find lower/upper _exact_ match in haystack, giving locations `[3, 4]` At this point, we need to "adjust" the `haystack` match bounds to account for the condition. Since `haystack` is ordered, our "rule" for `<=` is to keep the lower match location the same, but extend the upper match location to the upper bound, so we end up with `[3, 5]`. We know we can extend the upper match location because every haystack value after the exact match should be less than the needle. Then we just record the matches and continue on normally. This approach is really nice, because we only have to exactly match the `needle` in `haystack`. We don't have to compare each needle against every value in `haystack`, which would take a massive amount of time. However, it gets slightly more complex with data frames with multiple columns. Let's go back to our original `needles` and `haystack` data frames and apply the condition `<=` to each column. Here is another worked example, which shows a case where our "rule" falls apart on the second column. ```{r} needles haystack # `condition = c("<=", "<=")` ## Column 1, iteration 1 # x == 2 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # because haystack is ordered we know we can expand the upper bound automatically # to include everything past the match. i.e. needle of x==2 must be less than # the haystack value at loc 5, which we can check by seeing that it is x==3. loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L ## Column 2, iteration 1 # needles range of [3, 5] # y == 4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # lets try using our rule, which tells us we should be able to extend the upper # bound: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L # but the haystack value of y at location 5 is y==1, which is not less than y==4 # in the needles! looks like our rule failed us. ``` If you read through the above example, you'll see that the rule didn't work here. The problem is that while `haystack` is ordered (by `vec_order()`s standards), each column isn't ordered _independently_ of the others. Instead, each column is ordered within the "group" created by previous columns. Concretely, `haystack` here has an ordered `x` column, but if you look at `haystack$y` by itself, it isn't ordered (because of that 1 at the end). That is what causes the rule to fail. ```{r} haystack ``` To fix this, we need to create haystack "containers" where the values within each container are all _totally_ ordered. For `haystack` that would create 2 containers and look like: ``` r haystack[1:4,] #> # A tibble: 4 × 2 #> x y #> #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 haystack[5,] #> # A tibble: 1 × 2 #> x y #> #> 1 3 1 ``` This is essentially what `computing_nesting_container_ids()` does. You can actually see these ids with the helper, `compute_nesting_container_info()`: ```{r} haystack2 <- haystack # we really pass along the integer ranks, but in this case that is equivalent # to converting our double columns to integers haystack2$x <- as.integer(haystack2$x) haystack2$y <- as.integer(haystack2$y) info <- compute_nesting_container_info(haystack2, condition = c("<=", "<=")) # the ids are in the second slot. # container ids break haystack into [1, 4] and [5, 5]. info[[2]] ``` So the idea is that for each needle, we look in each haystack container and find all the matches, then we aggregate all of the matches once at the end. `df_locate_matches_with_containers()` has the job of iterating over the containers. Computing totally ordered containers can be expensive, but luckily it doesn't happen very often in normal usage. - If there are all `==` conditions, we don't need containers (i.e. any equi join) - If there is only 1 non-equi condition and no conditions after it, we don't need containers (i.e. most rolling joins) - Otherwise the typical case where we need containers is if we have something like `date >= lower, date <= upper`. Even so, the computation cost generally scales with the number of columns in `haystack` you compute containers with (here 2), and it only really slows down around 4 columns or so, which I haven't ever seen a real life example of. vctrs/man/faq/internal/ptype2-identity.Rmd0000644000176200001440000000612314276722575020312 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ## Promotion monoid Promotions (i.e. automatic coercions) should always transform inputs to their richer type to avoid losing values of precision. `vec_ptype2()` returns the _richer_ type of two vectors, or throws an incompatible type error if none of the two vector types include the other. For example, the richer type of integer and double is the latter because double covers a larger range of values than integer. `vec_ptype2()` is a [monoid](https://en.wikipedia.org/wiki/Monoid) over vectors, which in practical terms means that it is a well behaved operation for [reduction](https://purrr.tidyverse.org/reference/reduce.html). Reduction is an important operation for promotions because that is how the richer type of multiple elements is computed. As a monoid, `vec_ptype2()` needs an identity element, i.e. a value that doesn't change the result of the reduction. vctrs has two identity values, `NULL` and __unspecified__ vectors. ## The `NULL` identity As an identity element that shouldn't influence the determination of the common type of a set of vectors, `NULL` is promoted to any type: ```{r} vec_ptype2(NULL, "") vec_ptype2(1L, NULL) ``` The common type of `NULL` and `NULL` is the identity `NULL`: ```{r} vec_ptype2(NULL, NULL) ``` This way the result of `vec_ptype2(NULL, NULL)` does not influence subsequent promotions: ```{r} vec_ptype2( vec_ptype2(NULL, NULL), "" ) ``` ## Unspecified vectors In the vctrs coercion system, logical vectors of missing values are also automatically promoted to the type of any other vector, just like `NULL`. We call these vectors unspecified. The special coercion semantics of unspecified vectors serve two purposes: 1. It makes it possible to assign vectors of `NA` inside any type of vectors, even when they are not coercible with logical: ```{r} x <- letters[1:5] vec_assign(x, 1:2, c(NA, NA)) ``` 2. We can't put `NULL` in a data frame, so we need an identity element that behaves more like a vector. Logical vectors of `NA` seem a natural fit for this. Unspecified vectors are thus promoted to any other type, just like `NULL`: ```{r} vec_ptype2(NA, "") vec_ptype2(1L, c(NA, NA)) ``` ## Finalising common types vctrs has an internal vector type of class `vctrs_unspecified`. Users normally don't see such vectors in the wild, but they do come up when taking the common type of an unspecified vector with another identity value: ```{r} vec_ptype2(NA, NA) vec_ptype2(NA, NULL) vec_ptype2(NULL, NA) ``` We can't return `NA` here because `vec_ptype2()` normally returns empty vectors. We also can't return `NULL` because unspecified vectors need to be recognised as logical vectors if they haven't been promoted at the end of the reduction. ```{r} vec_ptype_finalise(vec_ptype2(NULL, NA)) ``` See the output of `vec_ptype_common()` which performs the reduction and finalises the type, ready to be used by the caller: ```{r} vec_ptype_common(NULL, NULL) vec_ptype_common(NA, NULL) ``` Note that __partial__ types in vctrs make use of the same mechanism. They are finalised with `vec_ptype_finalise()`. vctrs/man/faq/developer/0000755000176200001440000000000014511524374014730 5ustar liggesusersvctrs/man/faq/developer/reference-compatibility.Rmd0000644000176200001440000000576214276722575022226 0ustar liggesusers vctrs provides a framework for working with vector classes in a generic way. However, it implements several compatibility fallbacks to base R methods. In this reference you will find how vctrs tries to be compatible with your vector class, and what base methods you need to implement for compatibility. If you're starting from scratch, we think you'll find it easier to start using [new_vctr()] as documented in `vignette("s3-vector")`. This guide is aimed for developers with existing vector classes. ## Aggregate operations with fallbacks All vctrs operations are based on four primitive generics described in the next section. However there are many higher level operations. The most important ones implement fallbacks to base generics for maximum compatibility with existing classes. - [vec_slice()] falls back to the base `[` generic if no [vec_proxy()] method is implemented. This way foreign classes that do not implement [vec_restore()] can restore attributes based on the new subsetted contents. - [vec_c()] and [vec_rbind()] now fall back to [base::c()] if the inputs have a common parent class with a `c()` method (only if they have no self-to-self `vec_ptype2()` method). vctrs works hard to make your `c()` method success in various situations (with `NULL` and `NA` inputs, even as first input which would normally prevent dispatch to your method). The main downside compared to using vctrs primitives is that you can't combine vectors of different classes since there is no extensible mechanism of coercion in `c()`, and it is less efficient in some cases. ## The vctrs primitives Most functions in vctrs are aggregate operations: they call other vctrs functions which themselves call other vctrs functions. The dependencies of a vctrs functions are listed in the Dependencies section of its documentation page. Take a look at [vec_count()] for an example. These dependencies form a tree whose leaves are the four vctrs primitives. Here is the diagram for `vec_count()`: \\figure{vec-count-deps.png} ### The coercion generics The coercion mechanism in vctrs is based on two generics: - [vec_ptype2()] - [vec_cast()] See the [theory overview][theory-faq-coercion]. Two objects with the same class and the same attributes are always considered compatible by ptype2 and cast. If the attributes or classes differ, they throw an incompatible type error. Coercion errors are the main source of incompatibility with vctrs. See the [howto guide][howto-faq-coercion] if you need to implement methods for these generics. ### The proxy and restoration generics - [vec_proxy()] - [vec_restore()] These generics are essential for vctrs but mostly optional. `vec_proxy()` defaults to an [identity][identity] function and you normally don't need to implement it. The proxy a vector must be one of the atomic vector types, a list, or a data frame. By default, S3 lists that do not inherit from `"list"` do not have an identity proxy. In that case, you need to explicitly implement `vec_proxy()` or make your class inherit from list. vctrs/man/faq/developer/snippet-roxy-workflow.Rmd0000644000176200001440000000122414276722575021737 0ustar liggesusers To implement methods for generics, first import the generics in your namespace and redocument: ```{r, eval = FALSE} #' @importFrom vctrs vec_ptype2 vec_cast NULL ``` Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won't be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with `s3_register()`. vctrs/man/faq/developer/howto-faq-fix-scalar-type-error.Rmd0000644000176200001440000000445114376223321023437 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} stopifnot(rlang::is_installed("dplyr")) ``` The tidyverse is a bit stricter than base R regarding what kind of objects are considered as vectors (see the [user FAQ][faq-error-scalar-type] about this topic). Sometimes vctrs won't treat your class as a vector when it should. ## Why isn't my list class considered a vector? By default, S3 lists are not considered to be vectors by vctrs: ```{r} my_list <- structure(list(), class = "my_class") vctrs::vec_is(my_list) ``` To be treated as a vector, the class must either inherit from `"list"` explicitly: ```{r} my_explicit_list <- structure(list(), class = c("my_class", "list")) vctrs::vec_is(my_explicit_list) ``` Or it should implement a `vec_proxy()` method that returns its input if explicit inheritance is not possible or troublesome: ```{r} #' @export vec_proxy.my_class <- function(x, ...) x vctrs::vec_is(my_list) ``` Note that explicit inheritance is the preferred way because this makes it possible for your class to dispatch on `list` methods of S3 generics: ```{r, error = TRUE} my_generic <- function(x) UseMethod("my_generic") my_generic.list <- function(x) "dispatched!" my_generic(my_list) my_generic(my_explicit_list) ``` ## Why isn't my data frame class considered a vector? The most likely explanation is that the data frame has not been properly constructed. However, if you get an "Input must be a vector" error with a data frame subclass, it probably means that the data frame has not been properly constructed. The main cause of these errors are data frames whose _base class_ is not `"data.frame"`: ```{r, error = TRUE} my_df <- data.frame(x = 1) class(my_df) <- c("data.frame", "my_class") vctrs::obj_check_vector(my_df) ``` This is problematic as many tidyverse functions won't work properly: ```{r, error = TRUE} dplyr::slice(my_df, 1) ``` It is generally not appropriate to declare your class to be a superclass of another class. We generally consider this undefined behaviour (UB). To fix these errors, you can simply change the construction of your data frame class so that `"data.frame"` is a base class, i.e. it should come last in the class vector: ```{r} class(my_df) <- c("my_class", "data.frame") vctrs::obj_check_vector(my_df) dplyr::slice(my_df, 1) ``` vctrs/man/faq/developer/howto-coercion.Rmd0000644000176200001440000002106014511320530020316 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} old_warn_on_fallback <- options(`vctrs:::warn_on_fallback` = FALSE) knitr_defer(options(old_warn_on_fallback)) ``` This guide illustrates how to implement `vec_ptype2()` and `vec_cast()` methods for existing classes. Related topics: - For an overview of how these generics work and their roles in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")` ## The natural number class We'll illustrate how to implement coercion methods with a simple class that represents natural numbers. In this scenario we have an existing class that already features a constructor and methods for `print()` and subset. ```{r} #' @export new_natural <- function(x) { if (is.numeric(x) || is.logical(x)) { stopifnot(is_whole(x)) x <- as.integer(x) } else { stop("Can't construct natural from unknown type.") } structure(x, class = "my_natural") } is_whole <- function(x) { all(x %% 1 == 0 | is.na(x)) } #' @export print.my_natural <- function(x, ...) { cat("\n") x <- unclass(x) NextMethod() } #' @export `[.my_natural` <- function(x, i, ...) { new_natural(NextMethod()) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("base::print", "my_natural") knitr_local_registration("base::[", "my_natural") ``` ```{r} new_natural(1:3) new_natural(c(1, NA)) ``` ## Roxygen workflow ```{r, child = "snippet-roxy-workflow.Rmd"} ``` ## Implementing `vec_ptype2()` ### The self-self method The first method to implement is the one that signals that your class is compatible with itself: ```{r} #' @export vec_ptype2.my_natural.my_natural <- function(x, y, ...) { x } vec_ptype2(new_natural(1), new_natural(2:3)) ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.my_natural") ``` `vec_ptype2()` implements a fallback to try and be compatible with simple classes, so it may seem that you don't need to implement the self-self coercion method. However, you must implement it explicitly because this is how vctrs knows that a class that is implementing vctrs methods (for instance this disable fallbacks to `base::c()`). Also, it makes your class a bit more efficient. ### The parent and children methods Our natural number class is conceptually a parent of `` and a child of ``, but the class is not compatible with logical, integer, or double vectors yet: ```{r, error = TRUE} vec_ptype2(TRUE, new_natural(2:3)) vec_ptype2(new_natural(1), 2:3) ``` We'll specify the twin methods for each of these classes, returning the richer class in each case. ```{r} #' @export vec_ptype2.my_natural.logical <- function(x, y, ...) { # The order of the classes in the method name follows the order of # the arguments in the function signature, so `x` is the natural # number and `y` is the logical x } #' @export vec_ptype2.logical.my_natural <- function(x, y, ...) { # In this case `y` is the richer natural number y } ``` Between a natural number and an integer, the latter is the richer class: ```{r} #' @export vec_ptype2.my_natural.integer <- function(x, y, ...) { y } #' @export vec_ptype2.integer.my_natural <- function(x, y, ...) { x } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.logical") knitr_local_registration("vctrs::vec_ptype2", "my_natural.integer") knitr_local_registration("vctrs::vec_ptype2", "integer.my_natural") knitr_local_registration("vctrs::vec_ptype2", "logical.my_natural") ``` We no longer get common type errors for logical and integer: ```{r} vec_ptype2(TRUE, new_natural(2:3)) vec_ptype2(new_natural(1), 2:3) ``` We are not done yet. Pairwise coercion methods must be implemented for all the connected nodes in the coercion hierarchy, which include double vectors further up. The coercion methods for grand-parent types must be implemented separately: ```{r} #' @export vec_ptype2.my_natural.double <- function(x, y, ...) { y } #' @export vec_ptype2.double.my_natural <- function(x, y, ...) { x } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.double") knitr_local_registration("vctrs::vec_ptype2", "double.my_natural") ``` ### Incompatible attributes Most of the time, inputs are incompatible because they have different classes for which no `vec_ptype2()` method is implemented. More rarely, inputs could be incompatible because of their attributes. In that case incompatibility is signalled by calling `stop_incompatible_type()`. In the following example, we implement a self-self ptype2 method for a hypothetical subclass of `` that has stricter combination semantics. The method throws an error when the levels of the two factors are not compatible. ```{r, eval = FALSE} #' @export vec_ptype2.my_strict_factor.my_strict_factor <- function(x, y, ..., x_arg = "", y_arg = "") { if (!setequal(levels(x), levels(y))) { stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) } x } ``` Note how the methods need to take `x_arg` and `y_arg` parameters and pass them on to `stop_incompatible_type()`. These argument tags help create more informative error messages when the common type determination is for a column of a data frame. They are part of the generic signature but can usually be left out if not used. ## Implementing `vec_cast()` Corresponding `vec_cast()` methods must be implemented for all `vec_ptype2()` methods. The general pattern is to convert the argument `x` to the type of `to`. The methods should validate the values in `x` and make sure they conform to the values of `to`. Please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents `to`, whereas the second class represents `x`. The self-self method is easy in this case, it just returns the target input: ```{r} #' @export vec_cast.my_natural.my_natural <- function(x, to, ...) { x } ``` The other types need to be validated. We perform input validation in the `new_natural()` constructor, so that's a good fit for our `vec_cast()` implementations. ```{r} #' @export vec_cast.my_natural.logical <- function(x, to, ...) { # The order of the classes in the method name is in reverse order # of the arguments in the function signature, so `to` is the natural # number and `x` is the logical new_natural(x) } vec_cast.my_natural.integer <- function(x, to, ...) { new_natural(x) } vec_cast.my_natural.double <- function(x, to, ...) { new_natural(x) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_cast", "my_natural.my_natural") knitr_local_registration("vctrs::vec_cast", "my_natural.logical") knitr_local_registration("vctrs::vec_cast", "my_natural.integer") knitr_local_registration("vctrs::vec_cast", "my_natural.double") ``` With these methods, vctrs is now able to combine logical and natural vectors. It properly returns the richer type of the two, a natural vector: ```{r} vec_c(TRUE, new_natural(1), FALSE) ``` Because we haven't implemented conversions _from_ natural, it still doesn't know how to combine natural with the richer integer and double types: ```{r, error = TRUE} vec_c(new_natural(1), 10L) vec_c(1.5, new_natural(1)) ``` This is quick work which completes the implementation of coercion methods for vctrs: ```{r} #' @export vec_cast.logical.my_natural <- function(x, to, ...) { # In this case `to` is the logical and `x` is the natural number attributes(x) <- NULL as.logical(x) } #' @export vec_cast.integer.my_natural <- function(x, to, ...) { attributes(x) <- NULL as.integer(x) } #' @export vec_cast.double.my_natural <- function(x, to, ...) { attributes(x) <- NULL as.double(x) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_cast", "logical.my_natural") knitr_local_registration("vctrs::vec_cast", "integer.my_natural") knitr_local_registration("vctrs::vec_cast", "double.my_natural") ``` And we now get the expected combinations. ```{r} vec_c(new_natural(1), 10L) vec_c(1.5, new_natural(1)) ``` vctrs/man/faq/developer/theory-recycling.Rmd0000644000176200001440000000321714511524374020666 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` Recycling describes the concept of repeating elements of one vector to match the size of another. There are two rules that underlie the "tidyverse" recycling rules: - Vectors of size 1 will be recycled to the size of any other vector - Otherwise, all vectors must have the same size # Examples ```{r, warning = FALSE, message = FALSE, include = FALSE} library(tibble) ``` Vectors of size 1 are recycled to the size of any other vector: ```{r} tibble(x = 1:3, y = 1L) ``` This includes vectors of size 0: ```{r} tibble(x = integer(), y = 1L) ``` If vectors aren't size 1, they must all be the same size. Otherwise, an error is thrown: ```{r, error = TRUE} tibble(x = 1:3, y = 4:7) ``` # vctrs backend Packages in r-lib and the tidyverse generally use [vec_size_common()] and [vec_recycle_common()] as the backends for handling recycling rules. - `vec_size_common()` returns the common size of multiple vectors, after applying the recycling rules - `vec_recycle_common()` goes one step further, and actually recycles the vectors to their common size ```{r, error = TRUE} vec_size_common(1:3, "x") vec_recycle_common(1:3, "x") vec_size_common(1:3, c("x", "y")) ``` # Base R recycling rules The recycling rules described here are stricter than the ones generally used by base R, which are: - If any vector is length 0, the output will be length 0 - Otherwise, the output will be length `max(length_x, length_y)`, and a warning will be thrown if the length of the longer vector is not an integer multiple of the length of the shorter vector. We explore the base R rules in detail in `vignette("type-size")`. vctrs/man/faq/developer/theory-coercion.Rmd0000644000176200001440000002400014315060307020472 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` This is an overview of the usage of `vec_ptype2()` and `vec_cast()` and their role in the vctrs coercion mechanism. Related topics: - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")`. ## Combination mechanism in vctrs The coercion system in vctrs is designed to make combination of multiple inputs consistent and extensible. Combinations occur in many places, such as row-binding, joins, subset-assignment, or grouped summary functions that use the split-apply-combine strategy. For example: ```{r, error = TRUE} vec_c(TRUE, 1) vec_c("a", 1) vec_rbind( data.frame(x = TRUE), data.frame(x = 1, y = 2) ) vec_rbind( data.frame(x = "a"), data.frame(x = 1, y = 2) ) ``` One major goal of vctrs is to provide a central place for implementing the coercion methods that make generic combinations possible. The two relevant generics are `vec_ptype2()` and `vec_cast()`. They both take two arguments and perform __double dispatch__, meaning that a method is selected based on the classes of both inputs. The general mechanism for combining multiple inputs is: 1. Find the common type of a set of inputs by reducing (as in `base::Reduce()` or `purrr::reduce()`) the `vec_ptype2()` binary function over the set. 2. Convert all inputs to the common type with `vec_cast()`. 3. Initialise the output vector as an instance of this common type with `vec_init()`. 4. Fill the output vector with the elements of the inputs using `vec_assign()`. The last two steps may require `vec_proxy()` and `vec_restore()` implementations, unless the attributes of your class are constant and do not depend on the contents of the vector. We focus here on the first two steps, which require `vec_ptype2()` and `vec_cast()` implementations. ## `vec_ptype2()` Methods for `vec_ptype2()` are passed two _prototypes_, i.e. two inputs emptied of their elements. They implement two behaviours: * If the types of their inputs are compatible, indicate which of them is the richer type by returning it. If the types are of equal resolution, return any of the two. * Throw an error with `stop_incompatible_type()` when it can be determined from the attributes that the types of the inputs are not compatible. ### Type compatibility A type is __compatible__ with another type if the values it represents are a subset or a superset of the values of the other type. The notion of "value" is to be interpreted at a high level, in particular it is not the same as the memory representation. For example, factors are represented in memory with integers but their values are more related to character vectors than to round numbers: ```{r, error = TRUE} # Two factors are compatible vec_ptype2(factor("a"), factor("b")) # Factors are compatible with a character vec_ptype2(factor("a"), "b") # But they are incompatible with integers vec_ptype2(factor("a"), 1L) ``` ### Richness of type Richness of type is not a very precise notion. It can be about richer data (for instance a `double` vector covers more values than an integer vector), richer behaviour (a `data.table` has richer behaviour than a `data.frame`), or both. If you have trouble determining which one of the two types is richer, it probably means they shouldn't be automatically coercible. Let's look again at what happens when we combine a factor and a character: ```{r} vec_ptype2(factor("a"), "b") ``` The ptype2 method for `` and `>` returns `` because the former is a richer type. The factor can only contain `"a"` strings, whereas the character can contain any strings. In this sense, factors are a _subset_ of character. Note that another valid behaviour would be to throw an incompatible type error. This is what a strict factor implementation would do. We have decided to be laxer in vctrs because it is easy to inadvertently create factors instead of character vectors, especially with older versions of R where `stringsAsFactors` is still true by default. ### Consistency and symmetry on permutation Each ptype2 method should strive to have exactly the same behaviour when the inputs are permuted. This is not always possible, for example factor levels are aggregated in order: ```{r} vec_ptype2(factor(c("a", "c")), factor("b")) vec_ptype2(factor("b"), factor(c("a", "c"))) ``` In any case, permuting the input should not return a fundamentally different type or introduce an incompatible type error. ### Coercion hierarchy The classes that you can coerce together form a coercion (or subtyping) hierarchy. Below is a schema of the hierarchy for the base types like integer and factor. In this diagram the directions of the arrows express which type is richer. They flow from the bottom (more constrained types) to the top (richer types). \\figure{coerce.png} A coercion hierarchy is distinct from the structural hierarchy implied by memory types and classes. For instance, in a structural hierarchy, factors are built on top of integers. But in the coercion hierarchy they are more related to character vectors. Similarly, subclasses are not necessarily coercible with their superclasses because the coercion and structural hierarchies are separate. ### Implementing a coercion hierarchy As a class implementor, you have two options. The simplest is to create an entirely separate hierarchy. The date and date-time classes are an example of an S3-based hierarchy that is completely separate. Alternatively, you can integrate your class in an existing hierarchy, typically by adding parent nodes on top of the hierarchy (your class is richer), by adding children node at the root of the hierarchy (your class is more constrained), or by inserting a node in the tree. These coercion hierarchies are _implicit_, in the sense that they are implied by the `vec_ptype2()` implementations. There is no structured way to create or modify a hierarchy, instead you need to implement the appropriate coercion methods for all the types in your hierarchy, and diligently return the richer type in each case. The `vec_ptype2()` implementations are not transitive nor inherited, so all pairwise methods between classes lying on a given path must be implemented manually. This is something we might make easier in the future. ## `vec_cast()` The second generic, `vec_cast()`, is the one that looks at the data and actually performs the conversion. Because it has access to more information than `vec_ptype2()`, it may be stricter and cause an error in more cases. `vec_cast()` has three possible behaviours: - Determine that the prototypes of the two inputs are not compatible. This must be decided in exactly the same way as for `vec_ptype2()`. Call `stop_incompatible_cast()` if you can determine from the attributes that the types are not compatible. - Detect incompatible values. Usually this is because the target type is too restricted for the values supported by the input type. For example, a fractional number can't be converted to an integer. The method should throw an error in that case. - Return the input vector converted to the target type if all values are compatible. Whereas `vec_ptype2()` must return the same type when the inputs are permuted, `vec_cast()` is _directional_. It always returns the type of the right-hand side, or dies trying. ## Double dispatch The dispatch mechanism for `vec_ptype2()` and `vec_cast()` looks like S3 but is actually a custom mechanism. Compared to S3, it has the following differences: * It dispatches on the classes of the first two inputs. * There is no inheritance of ptype2 and cast methods. This is because the S3 class hierarchy is not necessarily the same as the coercion hierarchy. * `NextMethod()` does not work. Parent methods must be called explicitly if necessary. * The default method is hard-coded. ## Data frames The determination of the common type of data frames with `vec_ptype2()` happens in three steps: 1. Match the columns of the two input data frames. If some columns don't exist, they are created and filled with adequately typed `NA` values. 2. Find the common type for each column by calling `vec_ptype2()` on each pair of matched columns. 3. Find the common data frame type. For example the common type of a grouped tibble and a tibble is a grouped tibble because the latter is the richer type. The common type of a data table and a data frame is a data table. `vec_cast()` operates similarly. If a data frame is cast to a target type that has fewer columns, this is an error. If you are implementing coercion methods for data frames, you will need to explicitly call the parent methods that perform the common type determination or the type conversion described above. These are exported as [df_ptype2()] and [df_cast()]. ### Data frame fallbacks Being too strict with data frame combinations would cause too much pain because there are many data frame subclasses in the wild that don't implement vctrs methods. We have decided to implement a special fallback behaviour for foreign data frames. Incompatible data frames fall back to a base data frame: ```{r} df1 <- data.frame(x = 1) df2 <- structure(df1, class = c("foreign_df", "data.frame")) vec_rbind(df1, df2) ``` When a tibble is involved, we fall back to tibble: ```{r} df3 <- tibble::as_tibble(df1) vec_rbind(df1, df3) ``` These fallbacks are not ideal but they make sense because all data frames share a common data structure. This is not generally the case for vectors. For example factors and characters have different representations, and it is not possible to find a fallback time mechanically. However this fallback has a big downside: implementing vctrs methods for your data frame subclass is a breaking behaviour change. The proper coercion behaviour for your data frame class should be specified as soon as possible to limit the consequences of changing the behaviour of your class in R scripts. vctrs/man/faq/developer/howto-coercion-data-frame.Rmd0000644000176200001440000003051714276722575022353 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} old_warn_on_fallback <- options(`vctrs:::warn_on_fallback` = FALSE) knitr_defer(options(old_warn_on_fallback)) ``` This guide provides a practical recipe for implementing `vec_ptype2()` and `vec_cast()` methods for coercions of data frame subclasses. Related topics: - For an overview of the coercion mechanism in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. Coercion of data frames occurs when different data frame classes are combined in some way. The two main methods of combination are currently row-binding with [vec_rbind()] and col-binding with [vec_cbind()] (which are in turn used by a number of dplyr and tidyr functions). These functions take multiple data frame inputs and automatically coerce them to their common type. vctrs is generally strict about the kind of automatic coercions that are performed when combining inputs. In the case of data frames we have decided to be a bit less strict for convenience. Instead of throwing an incompatible type error, we fall back to a base data frame or a tibble if we don't know how to combine two data frame subclasses. It is still a good idea to specify the proper coercion behaviour for your data frame subclasses as soon as possible. We will see two examples in this guide. The first example is about a data frame subclass that has no particular attributes to manage. In the second example, we implement coercion methods for a tibble subclass that includes potentially incompatible attributes. ## Roxygen workflow ```{r, child = "snippet-roxy-workflow.Rmd"} ``` ## Parent methods Most of the common type determination should be performed by the parent class. In vctrs, double dispatch is implemented in such a way that you need to call the methods for the parent class manually. For `vec_ptype2()` this means you need to call `df_ptype2()` (for data frame subclasses) or `tib_ptype2()` (for tibble subclasses). Similarly, `df_cast()` and `tib_cast()` are the workhorses for `vec_cast()` methods of subtypes of `data.frame` and `tbl_df`. These functions take the union of the columns in `x` and `y`, and ensure shared columns have the same type. These functions are much less strict than `vec_ptype2()` and `vec_cast()` as they accept any subclass of data frame as input. They always return a `data.frame` or a `tbl_df`. You will probably want to write similar functions for your subclass to avoid repetition in your code. You may want to export them as well if you are expecting other people to derive from your class. ## A `data.table` example ```{r, include = FALSE} delayedAssign("as.data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "as.data.table") } else { function(...) abort("`data.table` must be installed.") } }) delayedAssign("data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "data.table") } else { function(...) abort("`data.table` must be installed.") } }) ``` This example is the actual implementation of vctrs coercion methods for `data.table`. This is a simple example because we don't have to keep track of attributes for this class or manage incompatibilities. See the tibble section for a more complicated example. We first create the `dt_ptype2()` and `dt_cast()` helpers. They wrap around the parent methods `df_ptype2()` and `df_cast()`, and transform the common type or converted input to a data table. You may want to export these helpers if you expect other packages to derive from your data frame class. These helpers should always return data tables. To this end we use the conversion generic `as.data.table()`. Depending on the tools available for the particular class at hand, a constructor might be appropriate as well. ```{r} dt_ptype2 <- function(x, y, ...) { as.data.table(df_ptype2(x, y, ...)) } dt_cast <- function(x, to, ...) { as.data.table(df_cast(x, to, ...)) } ``` We start with the self-self method: ```{r} #' @export vec_ptype2.data.table.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } ``` Between a data frame and a data table, we consider the richer type to be data table. This decision is not based on the value coverage of each data structures, but on the idea that data tables have richer behaviour. Since data tables are the richer type, we call `dt_type2()` from the `vec_ptype2()` method. It always returns a data table, no matter the order of arguments: ```{r} #' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } ``` The `vec_cast()` methods follow the same pattern, but note how the method for coercing to data frame uses `df_cast()` rather than `dt_cast()`. Also, please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents `to`, whereas the second class represents `x`. ```{r} #' @export vec_cast.data.table.data.table <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.table.data.frame <- function(x, to, ...) { # `x` is a data.frame to be converted to a data.table dt_cast(x, to, ...) } #' @export vec_cast.data.frame.data.table <- function(x, to, ...) { # `x` is a data.table to be converted to a data.frame df_cast(x, to, ...) } ``` With these methods vctrs is now able to combine data tables with data frames: ```{r} vec_cbind(data.frame(x = 1:3), data.table(y = "foo")) ``` ## A tibble example In this example we implement coercion methods for a tibble subclass that carries a colour as a scalar metadata: ```{r} # User constructor my_tibble <- function(colour = NULL, ...) { new_my_tibble(tibble::tibble(...), colour = colour) } # Developer constructor new_my_tibble <- function(x, colour = NULL) { stopifnot(is.data.frame(x)) tibble::new_tibble( x, colour = colour, class = "my_tibble", nrow = nrow(x) ) } df_colour <- function(x) { if (inherits(x, "my_tibble")) { attr(x, "colour") } else { NULL } } #'@export print.my_tibble <- function(x, ...) { cat(sprintf("<%s: %s>\n", class(x)[[1]], df_colour(x))) cli::cat_line(format(x)[-1]) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("base::print", "my_tibble") ``` This subclass is very simple. All it does is modify the header. ```{r} red <- my_tibble("red", x = 1, y = 1:2) red red[2] green <- my_tibble("green", z = TRUE) green ``` Combinations do not work properly out of the box, instead vctrs falls back to a bare tibble: ```{r} vec_rbind(red, tibble::tibble(x = 10:12)) ``` Instead of falling back to a data frame, we would like to return a `` when combined with a data frame or a tibble. Because this subclass has more metadata than normal data frames (it has a colour), it is a _supertype_ of tibble and data frame, i.e. it is the richer type. This is similar to how a grouped tibble is a more general type than a tibble or a data frame. Conceptually, the latter are pinned to a single constant group. The coercion methods for data frames operate in two steps: - They check for compatible subclass attributes. In our case the tibble colour has to be the same, or be undefined. - They call their parent methods, in this case [tib_ptype2()] and [tib_cast()] because we have a subclass of tibble. This eventually calls the data frame methods [df_ptype2()] and [tib_ptype2()] which match the columns and their types. This process should usually be wrapped in two functions to avoid repetition. Consider exporting these if you expect your class to be derived by other subclasses. We first implement a helper to determine if two data frames have compatible colours. We use the `df_colour()` accessor which returns `NULL` when the data frame colour is undefined. ```{r} has_compatible_colours <- function(x, y) { x_colour <- df_colour(x) %||% df_colour(y) y_colour <- df_colour(y) %||% x_colour identical(x_colour, y_colour) } ``` Next we implement the coercion helpers. If the colours are not compatible, we call `stop_incompatible_cast()` or `stop_incompatible_type()`. These strict coercion semantics are justified because in this class colour is a _data_ attribute. If it were a non essential _detail_ attribute, like the timezone in a datetime, we would just standardise it to the value of the left-hand side. In simpler cases (like the data.table example), these methods do not need to take the arguments suffixed in `_arg`. Here we do need to take these arguments so we can pass them to the `stop_` functions when we detect an incompatibility. They also should be passed to the parent methods. ```{r} #' @export my_tib_cast <- function(x, to, ..., x_arg = "", to_arg = "") { out <- tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) if (!has_compatible_colours(x, to)) { stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, details = "Can't combine colours." ) } colour <- df_colour(x) %||% df_colour(to) new_my_tibble(out, colour = colour) } #' @export my_tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { out <- tib_ptype2(x, y, ..., x_arg = x_arg, y_arg = y_arg) if (!has_compatible_colours(x, y)) { stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = "Can't combine colours." ) } colour <- df_colour(x) %||% df_colour(y) new_my_tibble(out, colour = colour) } ``` Let's now implement the coercion methods, starting with the self-self methods. ```{r} #' @export vec_ptype2.my_tibble.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_cast.my_tibble.my_tibble <- function(x, to, ...) { my_tib_cast(x, to, ...) } ``` ```{r, include = FALSE} knitr_local_registration("vctrs::vec_ptype2", "my_tibble.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.my_tibble") ``` We can now combine compatible instances of our class! ```{r, error = TRUE} vec_rbind(red, red) vec_rbind(green, green) vec_rbind(green, red) ``` The methods for combining our class with tibbles follow the same pattern. For ptype2 we return our class in both cases because it is the richer type: ```{r} #' @export vec_ptype2.my_tibble.tbl_df <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_ptype2.tbl_df.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } ``` For cast are careful about returning a tibble when casting to a tibble. Note the call to `vctrs::tib_cast()`: ```{r} #' @export vec_cast.my_tibble.tbl_df <- function(x, to, ...) { my_tib_cast(x, to, ...) } #' @export vec_cast.tbl_df.my_tibble <- function(x, to, ...) { tib_cast(x, to, ...) } ``` ```{r, include = FALSE} knitr_local_registration("vctrs::vec_ptype2", "my_tibble.tbl_df") knitr_local_registration("vctrs::vec_ptype2", "tbl_df.my_tibble") knitr_local_registration("vctrs::vec_cast", "tbl_df.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.tbl_df") ``` From this point, we get correct combinations with tibbles: ```{r} vec_rbind(red, tibble::tibble(x = 10:12)) ``` However we are not done yet. Because the coercion hierarchy is different from the class hierarchy, there is no inheritance of coercion methods. We're not getting correct behaviour for data frames yet because we haven't explicitly specified the methods for this class: ```{r} vec_rbind(red, data.frame(x = 10:12)) ``` Let's finish up the boiler plate: ```{r} #' @export vec_ptype2.my_tibble.data.frame <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_cast.my_tibble.data.frame <- function(x, to, ...) { my_tib_cast(x, to, ...) } #' @export vec_cast.data.frame.my_tibble <- function(x, to, ...) { df_cast(x, to, ...) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_tibble.data.frame") knitr_local_registration("vctrs::vec_ptype2", "data.frame.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.data.frame") knitr_local_registration("vctrs::vec_cast", "data.frame.my_tibble") ``` This completes the implementation: ```{r} vec_rbind(red, data.frame(x = 10:12)) ``` vctrs/man/faq/developer/links-coercion.Rmd0000644000176200001440000000100714276722575020324 0ustar liggesusers # Implementing coercion methods - For an overview of how these generics work and their roles in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")`. vctrs/man/faq/user/0000755000176200001440000000000014315060307013712 5ustar liggesusersvctrs/man/faq/user/faq-error-scalar-type.Rmd0000644000176200001440000000305314315060307020477 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` This error occurs when a function expects a vector and gets a scalar object instead. This commonly happens when some code attempts to assign a scalar object as column in a data frame: ```{r, error = TRUE} fn <- function() NULL tibble::tibble(x = fn) fit <- lm(1:3 ~ 1) tibble::tibble(x = fit) ``` # Vectorness in base R and in the tidyverse In base R, almost everything is a vector or behaves like a vector. In the tidyverse we have chosen to be a bit stricter about what is considered a vector. The main question we ask ourselves to decide on the vectorness of a type is whether it makes sense to include that object as a column in a data frame. The main difference is that S3 lists are considered vectors by base R but in the tidyverse that's not the case by default: ```{r, error = TRUE} fit <- lm(1:3 ~ 1) typeof(fit) class(fit) # S3 lists can be subset like a vector using base R: fit[c(1, 4)] # But not in vctrs vctrs::vec_slice(fit, c(1, 4)) ``` Defused function calls are another (more esoteric) example: ```{r, error = TRUE} call <- quote(foo(bar = TRUE, baz = FALSE)) call # They can be subset like a vector using base R: call[1:2] lapply(call, function(x) x) # But not with vctrs: vctrs::vec_slice(call, 1:2) ``` # I get a scalar type error but I think this is a bug It's possible the author of the class needs to do some work to declare their class a vector. Consider reaching out to the author. We have written a [developer FAQ page][howto-faq-fix-scalar-type-error] to help them fix the issue. vctrs/man/faq/user/faq-compatibility-types.Rmd0000644000176200001440000000517614276722575021171 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} stopifnot(rlang::is_installed("dplyr")) ``` Two vectors are __compatible__ when you can safely: - Combine them into one larger vector. - Assign values from one of the vectors into the other vector. Examples of compatible types are integer and double vectors. On the other hand, integer and character vectors are not compatible. # Common type of multiple vectors There are two possible outcomes when multiple vectors of different types are combined into a larger vector: - An incompatible type error is thrown because some of the types are not compatible: ```{r, error = TRUE} df1 <- data.frame(x = 1:3) df2 <- data.frame(x = "foo") dplyr::bind_rows(df1, df2) ``` - The vectors are combined into a vector that has the common type of all inputs. In this example, the common type of integer and logical is integer: ```{r} df1 <- data.frame(x = 1:3) df2 <- data.frame(x = FALSE) dplyr::bind_rows(df1, df2) ``` In general, the common type is the _richer_ type, in other words the type that can represent the most values. Logical vectors are at the bottom of the hierarchy of numeric types because they can only represent two values (not counting missing values). Then come integer vectors, and then doubles. Here is the vctrs type hierarchy for the fundamental vectors: \\figure{coerce.png} # Type conversion and lossy cast errors Type compatibility does not necessarily mean that you can __convert__ one type to the other type. That's because one of the types might support a larger set of possible values. For instance, integer and double vectors are compatible, but double vectors can't be converted to integer if they contain fractional values. When vctrs can't convert a vector because the target type is not as rich as the source type, it throws a lossy cast error. Assigning a fractional number to an integer vector is a typical example of a lossy cast error: ```{r, error = TRUE} int_vector <- 1:3 vec_assign(int_vector, 2, 0.001) ``` # How to make two vector classes compatible? If you encounter two vector types that you think should be compatible, they might need to implement coercion methods. Reach out to the author(s) of the classes and ask them if it makes sense for their classes to be compatible. These developer FAQ items provide guides for implementing coercion methods: - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. vctrs/man/faq/setup.Rmd0000644000176200001440000000026114315060307014537 0ustar liggesusers ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options( cli.unicode = FALSE, rlang_call_format_srcrefs = FALSE ) library(vctrs) ``` vctrs/man/vec_is_list.Rd0000644000176200001440000000244014401377400014760 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_is_list} \alias{vec_is_list} \alias{vec_check_list} \title{List checks} \usage{ vec_is_list(x) vec_check_list(x, ..., arg = caller_arg(x), call = caller_env()) } \arguments{ \item{x}{For \verb{vec_*()} functions, an object. For \verb{list_*()} functions, a list.} \item{...}{These dots are for future extensions and must be empty.} \item{arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions have been deprecated as of vctrs 0.6.0. \itemize{ \item \code{vec_is_list()} has been renamed to \code{\link[=obj_is_list]{obj_is_list()}}. \item \code{vec_check_list()} has been renamed to \code{\link[=obj_check_list]{obj_check_list()}}. } } \keyword{internal} vctrs/man/vec_unchop.Rd0000644000176200001440000000624114402367170014615 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_unchop} \alias{vec_unchop} \title{Chopping} \usage{ vec_unchop( x, indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal") ) } \arguments{ \item{x}{A vector} \item{indices}{For \code{vec_chop()}, a list of positive integer vectors to slice \code{x} with, or \code{NULL}. Can't be used if \code{sizes} is already specified. If both \code{indices} and \code{sizes} are \code{NULL}, \code{x} is split into its individual elements, equivalent to using an \code{indices} of \code{as.list(vec_seq_along(x))}. For \code{list_unchop()}, a list of positive integer vectors specifying the locations to place elements of \code{x} in. Each element of \code{x} is recycled to the size of the corresponding index vector. The size of \code{indices} must match the size of \code{x}. If \code{NULL}, \code{x} is combined in the order it is provided in, which is equivalent to using \code{\link[=vec_c]{vec_c()}}.} \item{ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{x}. Alternatively, you can supply \code{ptype} to give the output a known type.} \item{name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} } \value{ \itemize{ \item \code{vec_chop()}: A list where each element has the same type as \code{x}. The size of the list is equal to \code{vec_size(indices)}, \code{vec_size(sizes)}, or \code{vec_size(x)} depending on whether or not \code{indices} or \code{sizes} is provided. \item \code{list_unchop()}: A vector of type \code{vec_ptype_common(!!!x)}, or \code{ptype}, if specified. The size is computed as \code{vec_size_common(!!!indices)} unless the indices are \code{NULL}, in which case the size is \code{vec_size_common(!!!x)}. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_unchop()} has been renamed to \code{\link[=list_unchop]{list_unchop()}} and is deprecated as of vctrs 0.5.0. } \keyword{internal} vctrs/man/vctrs-conditions.Rd0000644000176200001440000000564414512002263015771 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conditions.R \name{vctrs-conditions} \alias{vctrs-conditions} \alias{stop_incompatible_type} \alias{stop_incompatible_cast} \alias{stop_incompatible_op} \alias{stop_incompatible_size} \alias{allow_lossy_cast} \title{Custom conditions for vctrs package} \usage{ stop_incompatible_type( x, y, ..., x_arg, y_arg, action = c("combine", "convert"), details = NULL, message = NULL, class = NULL, call = caller_env() ) stop_incompatible_cast( x, to, ..., x_arg, to_arg, details = NULL, message = NULL, class = NULL, call = caller_env() ) stop_incompatible_op( op, x, y, details = NULL, ..., message = NULL, class = NULL, call = caller_env() ) stop_incompatible_size( x, y, x_size, y_size, ..., x_arg, y_arg, details = NULL, message = NULL, class = NULL, call = caller_env() ) allow_lossy_cast(expr, x_ptype = NULL, to_ptype = NULL) } \arguments{ \item{x, y, to}{Vectors} \item{..., class}{Only use these fields when creating a subclass.} \item{x_arg, y_arg, to_arg}{Argument names for \code{x}, \code{y}, and \code{to}. Used in error messages to inform the user about the locations of incompatible types.} \item{action}{An option to customize the incompatible type message depending on the context. Errors thrown from \code{\link[=vec_ptype2]{vec_ptype2()}} use \code{"combine"} and those thrown from \code{\link[=vec_cast]{vec_cast()}} use \code{"convert"}.} \item{details}{Any additional human readable details.} \item{message}{An overriding message for the error. \code{details} and \code{message} are mutually exclusive, supplying both is an error.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{x_ptype, to_ptype}{Suppress only the casting errors where \code{x} or \code{to} match these \link[=vec_ptype]{prototypes}.} } \value{ \verb{stop_incompatible_*()} unconditionally raise an error of class \code{"vctrs_error_incompatible_*"} and \code{"vctrs_error_incompatible"}. } \description{ These functions are called for their side effect of raising errors and warnings. These conditions have custom classes and structures to make testing easier. } \examples{ # Most of the time, `maybe_lossy_cast()` returns its input normally: maybe_lossy_cast( c("foo", "bar"), NA, "", lossy = c(FALSE, FALSE), x_arg = "", to_arg = "" ) # If `lossy` has any `TRUE`, an error is thrown: try(maybe_lossy_cast( c("foo", "bar"), NA, "", lossy = c(FALSE, TRUE), x_arg = "", to_arg = "" )) # Unless lossy casts are allowed: allow_lossy_cast( maybe_lossy_cast( c("foo", "bar"), NA, "", lossy = c(FALSE, TRUE), x_arg = "", to_arg = "" ) ) } \keyword{internal} vctrs/man/vec_c.Rd0000644000176200001440000000773614362266120013553 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/c.R \name{vec_c} \alias{vec_c} \title{Combine many vectors into one vector} \usage{ vec_c( ..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), .error_arg = "", .error_call = current_env() ) } \arguments{ \item{...}{Vectors to coerce.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{.name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} \item{.error_arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector with class given by \code{.ptype}, and length equal to the sum of the \code{vec_size()} of the contents of \code{...}. The vector will have names if the individual components have names (inner names) or if the arguments are named (outer names). If both inner and outer names are present, an error is thrown unless a \code{.name_spec} is provided. } \description{ Combine all arguments into a new vector of common type. } \section{Invariants}{ \itemize{ \item \code{vec_size(vec_c(x, y)) == vec_size(x) + vec_size(y)} \item \code{vec_ptype(vec_c(x, y)) == vec_ptype_common(x, y)}. } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies}{ \itemize{ \item \code{\link[base:c]{base::c()}} } If inputs inherit from a common class hierarchy, \code{vec_c()} falls back to \code{base::c()} if there exists a \code{c()} method implemented for this class hierarchy. } } \examples{ vec_c(FALSE, 1L, 1.5) # Date/times -------------------------- c(Sys.Date(), Sys.time()) c(Sys.time(), Sys.Date()) vec_c(Sys.Date(), Sys.time()) vec_c(Sys.time(), Sys.Date()) # Factors ----------------------------- c(factor("a"), factor("b")) vec_c(factor("a"), factor("b")) # By default, named inputs must be length 1: vec_c(name = 1) try(vec_c(name = 1:3)) # Pass a name specification to work around this: vec_c(name = 1:3, .name_spec = "{outer}_{inner}") # See `?name_spec` for more examples of name specifications. } \seealso{ \code{\link[=vec_cbind]{vec_cbind()}}/\code{\link[=vec_rbind]{vec_rbind()}} for combining data frames by rows or columns. } vctrs/man/vec_empty.Rd0000644000176200001440000000075314276722575014476 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_empty} \alias{vec_empty} \title{Is a vector empty} \usage{ vec_empty(x) } \arguments{ \item{x}{An object.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} This function is defunct, please use \code{\link[=vec_is_empty]{vec_is_empty()}}. } \keyword{internal} vctrs/man/internal-faq-ptype2-identity.Rd0000644000176200001440000001036314315060307020110 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-internal.R \name{internal-faq-ptype2-identity} \alias{internal-faq-ptype2-identity} \title{Internal FAQ - \code{vec_ptype2()}, \code{NULL}, and unspecified vectors} \description{ \subsection{Promotion monoid}{ Promotions (i.e. automatic coercions) should always transform inputs to their richer type to avoid losing values of precision. \code{vec_ptype2()} returns the \emph{richer} type of two vectors, or throws an incompatible type error if none of the two vector types include the other. For example, the richer type of integer and double is the latter because double covers a larger range of values than integer. \code{vec_ptype2()} is a \href{https://en.wikipedia.org/wiki/Monoid}{monoid} over vectors, which in practical terms means that it is a well behaved operation for \href{https://purrr.tidyverse.org/reference/reduce.html}{reduction}. Reduction is an important operation for promotions because that is how the richer type of multiple elements is computed. As a monoid, \code{vec_ptype2()} needs an identity element, i.e. a value that doesn’t change the result of the reduction. vctrs has two identity values, \code{NULL} and \strong{unspecified} vectors. } \subsection{The \code{NULL} identity}{ As an identity element that shouldn’t influence the determination of the common type of a set of vectors, \code{NULL} is promoted to any type: \if{html}{\out{
}}\preformatted{vec_ptype2(NULL, "") #> character(0) vec_ptype2(1L, NULL) #> integer(0) }\if{html}{\out{
}} The common type of \code{NULL} and \code{NULL} is the identity \code{NULL}: \if{html}{\out{
}}\preformatted{vec_ptype2(NULL, NULL) #> NULL }\if{html}{\out{
}} This way the result of \code{vec_ptype2(NULL, NULL)} does not influence subsequent promotions: \if{html}{\out{
}}\preformatted{vec_ptype2( vec_ptype2(NULL, NULL), "" ) #> character(0) }\if{html}{\out{
}} } \subsection{Unspecified vectors}{ In the vctrs coercion system, logical vectors of missing values are also automatically promoted to the type of any other vector, just like \code{NULL}. We call these vectors unspecified. The special coercion semantics of unspecified vectors serve two purposes: \enumerate{ \item It makes it possible to assign vectors of \code{NA} inside any type of vectors, even when they are not coercible with logical: \if{html}{\out{
}}\preformatted{x <- letters[1:5] vec_assign(x, 1:2, c(NA, NA)) #> [1] NA NA "c" "d" "e" }\if{html}{\out{
}} \item We can’t put \code{NULL} in a data frame, so we need an identity element that behaves more like a vector. Logical vectors of \code{NA} seem a natural fit for this. } Unspecified vectors are thus promoted to any other type, just like \code{NULL}: \if{html}{\out{
}}\preformatted{vec_ptype2(NA, "") #> character(0) vec_ptype2(1L, c(NA, NA)) #> integer(0) }\if{html}{\out{
}} } \subsection{Finalising common types}{ vctrs has an internal vector type of class \code{vctrs_unspecified}. Users normally don’t see such vectors in the wild, but they do come up when taking the common type of an unspecified vector with another identity value: \if{html}{\out{
}}\preformatted{vec_ptype2(NA, NA) #> [0] vec_ptype2(NA, NULL) #> [0] vec_ptype2(NULL, NA) #> [0] }\if{html}{\out{
}} We can’t return \code{NA} here because \code{vec_ptype2()} normally returns empty vectors. We also can’t return \code{NULL} because unspecified vectors need to be recognised as logical vectors if they haven’t been promoted at the end of the reduction. \if{html}{\out{
}}\preformatted{vec_ptype_finalise(vec_ptype2(NULL, NA)) #> logical(0) }\if{html}{\out{
}} See the output of \code{vec_ptype_common()} which performs the reduction and finalises the type, ready to be used by the caller: \if{html}{\out{
}}\preformatted{vec_ptype_common(NULL, NULL) #> NULL vec_ptype_common(NA, NULL) #> logical(0) }\if{html}{\out{
}} Note that \strong{partial} types in vctrs make use of the same mechanism. They are finalised with \code{vec_ptype_finalise()}. } } vctrs/man/unspecified.Rd0000644000176200001440000000120614276722575014773 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-unspecified.R \name{unspecified} \alias{unspecified} \title{A 1d vector of unspecified type} \usage{ unspecified(n = 0) } \arguments{ \item{n}{Length of vector} } \description{ This is a \link[=new_partial]{partial type} used to represent logical vectors that only contain \code{NA}. These require special handling because we want to allow \code{NA} to specify missingness without requiring a type. } \examples{ vec_ptype_show() vec_ptype_show(NA) vec_c(NA, factor("x")) vec_c(NA, Sys.Date()) vec_c(NA, Sys.time()) vec_c(NA, list(1:3, 4:5)) } \keyword{internal} vctrs/man/reference-faq-compatibility.Rd0000644000176200001440000000723614315612253020041 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{reference-faq-compatibility} \alias{reference-faq-compatibility} \title{FAQ - Is my class compatible with vctrs?} \description{ vctrs provides a framework for working with vector classes in a generic way. However, it implements several compatibility fallbacks to base R methods. In this reference you will find how vctrs tries to be compatible with your vector class, and what base methods you need to implement for compatibility. If you’re starting from scratch, we think you’ll find it easier to start using \code{\link[=new_vctr]{new_vctr()}} as documented in \code{vignette("s3-vector")}. This guide is aimed for developers with existing vector classes. \subsection{Aggregate operations with fallbacks}{ All vctrs operations are based on four primitive generics described in the next section. However there are many higher level operations. The most important ones implement fallbacks to base generics for maximum compatibility with existing classes. \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} falls back to the base \code{[} generic if no \code{\link[=vec_proxy]{vec_proxy()}} method is implemented. This way foreign classes that do not implement \code{\link[=vec_restore]{vec_restore()}} can restore attributes based on the new subsetted contents. \item \code{\link[=vec_c]{vec_c()}} and \code{\link[=vec_rbind]{vec_rbind()}} now fall back to \code{\link[base:c]{base::c()}} if the inputs have a common parent class with a \code{c()} method (only if they have no self-to-self \code{vec_ptype2()} method). vctrs works hard to make your \code{c()} method success in various situations (with \code{NULL} and \code{NA} inputs, even as first input which would normally prevent dispatch to your method). The main downside compared to using vctrs primitives is that you can’t combine vectors of different classes since there is no extensible mechanism of coercion in \code{c()}, and it is less efficient in some cases. } } \subsection{The vctrs primitives}{ Most functions in vctrs are aggregate operations: they call other vctrs functions which themselves call other vctrs functions. The dependencies of a vctrs functions are listed in the Dependencies section of its documentation page. Take a look at \code{\link[=vec_count]{vec_count()}} for an example. These dependencies form a tree whose leaves are the four vctrs primitives. Here is the diagram for \code{vec_count()}: \figure{vec-count-deps.png} \subsection{The coercion generics}{ The coercion mechanism in vctrs is based on two generics: \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } See the \link[=theory-faq-coercion]{theory overview}. Two objects with the same class and the same attributes are always considered compatible by ptype2 and cast. If the attributes or classes differ, they throw an incompatible type error. Coercion errors are the main source of incompatibility with vctrs. See the \link[=howto-faq-coercion]{howto guide} if you need to implement methods for these generics. } \subsection{The proxy and restoration generics}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } These generics are essential for vctrs but mostly optional. \code{vec_proxy()} defaults to an \link{identity} function and you normally don’t need to implement it. The proxy a vector must be one of the atomic vector types, a list, or a data frame. By default, S3 lists that do not inherit from \code{"list"} do not have an identity proxy. In that case, you need to explicitly implement \code{vec_proxy()} or make your class inherit from list. } } } vctrs/man/vec_init.Rd0000644000176200001440000000066714276722575014307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{vec_init} \alias{vec_init} \title{Initialize a vector} \usage{ vec_init(x, n = 1L) } \arguments{ \item{x}{Template of vector to initialize.} \item{n}{Desired size of result.} } \description{ Initialize a vector } \section{Dependencies}{ \itemize{ \item vec_slice() } } \examples{ vec_init(1:10, 3) vec_init(Sys.Date(), 5) vec_init(mtcars, 2) } vctrs/man/vec_proxy_compare.Rd0000644000176200001440000000610614315060307016203 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare.R \name{vec_proxy_compare} \alias{vec_proxy_compare} \alias{vec_proxy_order} \title{Comparison and order proxy} \usage{ vec_proxy_compare(x, ...) vec_proxy_order(x, ...) } \arguments{ \item{x}{A vector x.} \item{...}{These dots are for future extensions and must be empty.} } \value{ A 1d atomic vector or a data frame. } \description{ \code{vec_proxy_compare()} and \code{vec_proxy_order()} return proxy objects, i.e. an atomic vector or data frame of atomic vectors. For \code{\link[=vctr]{vctrs_vctr}} objects: \itemize{ \item \code{vec_proxy_compare()} determines the behavior of \code{<}, \code{>}, \code{>=} and \code{<=} (via \code{\link[=vec_compare]{vec_compare()}}); and \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}}, and \code{\link[=quantile]{quantile()}}. \item \code{vec_proxy_order()} determines the behavior of \code{order()} and \code{sort()} (via \code{xtfrm()}). } } \details{ The default method of \code{vec_proxy_compare()} assumes that all classes built on top of atomic vectors or records are comparable. Internally the default calls \code{\link[=vec_proxy_equal]{vec_proxy_equal()}}. If your class is not comparable, you will need to provide a \code{vec_proxy_compare()} method that throws an error. The behavior of \code{vec_proxy_order()} is identical to \code{vec_proxy_compare()}, with the exception of lists. Lists are not comparable, as comparing elements of different types is undefined. However, to allow ordering of data frames containing list-columns, the ordering proxy of a list is generated as an integer vector that can be used to order list elements by first appearance. If a class implements a \code{vec_proxy_compare()} method, it usually doesn't need to provide a \code{vec_proxy_order()} method, because the latter is implemented by forwarding to \code{vec_proxy_compare()} by default. Classes inheriting from list are an exception: due to the default \code{vec_proxy_order()} implementation, \code{vec_proxy_compare()} and \code{vec_proxy_order()} should be provided for such classes (with identical implementations) to avoid mismatches between comparison and sorting. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} called by default in \code{vec_proxy_compare()} \item \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} called by default in \code{vec_proxy_order()} } } \section{Data frames}{ If the proxy for \code{x} is a data frame, the proxy function is automatically recursively applied on all columns as well. After applying the proxy recursively, if there are any data frame columns present in the proxy, then they are unpacked. Finally, if the resulting data frame only has a single column, then it is unwrapped and a vector is returned as the proxy. } \examples{ # Lists are not comparable x <- list(1:2, 1, 1:2, 3) try(vec_compare(x, x)) # But lists are orderable by first appearance to allow for # ordering data frames with list-cols df <- new_data_frame(list(x = x)) vec_sort(df) } \keyword{internal} vctrs/man/new_date.Rd0000644000176200001440000000440214276722575014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-date-time.R \name{new_date} \alias{new_date} \alias{new_datetime} \alias{new_duration} \alias{vec_ptype2.Date} \alias{vec_ptype2.POSIXct} \alias{vec_ptype2.POSIXlt} \alias{vec_ptype2.difftime} \alias{vec_cast.Date} \alias{vec_cast.POSIXct} \alias{vec_cast.POSIXlt} \alias{vec_cast.difftime} \alias{vec_arith.Date} \alias{vec_arith.POSIXct} \alias{vec_arith.POSIXlt} \alias{vec_arith.difftime} \title{Date, date-time, and duration S3 classes} \usage{ new_date(x = double()) new_datetime(x = double(), tzone = "") new_duration(x = double(), units = c("secs", "mins", "hours", "days", "weeks")) \method{vec_ptype2}{Date}(x, y, ...) \method{vec_ptype2}{POSIXct}(x, y, ...) \method{vec_ptype2}{POSIXlt}(x, y, ...) \method{vec_ptype2}{difftime}(x, y, ...) \method{vec_cast}{Date}(x, to, ...) \method{vec_cast}{POSIXct}(x, to, ...) \method{vec_cast}{POSIXlt}(x, to, ...) \method{vec_cast}{difftime}(x, to, ...) \method{vec_arith}{Date}(op, x, y, ...) \method{vec_arith}{POSIXct}(op, x, y, ...) \method{vec_arith}{POSIXlt}(op, x, y, ...) \method{vec_arith}{difftime}(op, x, y, ...) } \arguments{ \item{x}{A double vector representing the number of days since UNIX epoch for \code{new_date()}, number of seconds since UNIX epoch for \code{new_datetime()}, and number of \code{units} for \code{new_duration()}.} \item{tzone}{Time zone. A character vector of length 1. Either \code{""} for the local time zone, or a value from \code{\link[=OlsonNames]{OlsonNames()}}} \item{units}{Units of duration.} } \description{ \itemize{ \item A \code{date} (\link{Date}) is a double vector. Its value represent the number of days since the Unix "epoch", 1970-01-01. It has no attributes. \item A \code{datetime} (\link{POSIXct} is a double vector. Its value represents the number of seconds since the Unix "Epoch", 1970-01-01. It has a single attribute: the timezone (\code{tzone})) \item A \code{duration} (\link{difftime}) } } \details{ These function help the base \code{Date}, \code{POSIXct}, and \code{difftime} classes fit into the vctrs type system by providing constructors, coercion functions, and casting functions. } \examples{ new_date(0) new_datetime(0, tzone = "UTC") new_duration(1, "hours") } \keyword{internal} vctrs/man/fields.Rd0000644000176200001440000000154213566016500013727 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fields.R \name{fields} \alias{fields} \alias{n_fields} \alias{field} \alias{field<-} \title{Tools for accessing the fields of a record.} \usage{ fields(x) n_fields(x) field(x, i) field(x, i) <- value } \arguments{ \item{x}{A \link{rcrd}, i.e. a list of equal length vectors with unique names.} } \description{ A \link{rcrd} behaves like a vector, so \code{length()}, \code{names()}, and \code{$} can not provide access to the fields of the underlying list. These helpers do: \code{fields()} is equivalent to \code{names()}; \code{n_fields()} is equivalent to \code{length()}; \code{field()} is equivalent to \code{$}. } \examples{ x <- new_rcrd(list(x = 1:3, y = 3:1, z = letters[1:3])) n_fields(x) fields(x) field(x, "y") field(x, "y") <- runif(3) field(x, "y") } \keyword{internal} vctrs/man/theory-faq-recycling.Rd0000644000176200001440000000521714511524374016524 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{theory-faq-recycling} \alias{theory-faq-recycling} \alias{vector_recycling_rules} \title{FAQ - How does recycling work in vctrs and the tidyverse?} \description{ Recycling describes the concept of repeating elements of one vector to match the size of another. There are two rules that underlie the “tidyverse” recycling rules: \itemize{ \item Vectors of size 1 will be recycled to the size of any other vector \item Otherwise, all vectors must have the same size } } \section{Examples}{ Vectors of size 1 are recycled to the size of any other vector: \if{html}{\out{
}}\preformatted{tibble(x = 1:3, y = 1L) #> # A tibble: 3 x 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 3 1 }\if{html}{\out{
}} This includes vectors of size 0: \if{html}{\out{
}}\preformatted{tibble(x = integer(), y = 1L) #> # A tibble: 0 x 2 #> # i 2 variables: x , y }\if{html}{\out{
}} If vectors aren’t size 1, they must all be the same size. Otherwise, an error is thrown: \if{html}{\out{
}}\preformatted{tibble(x = 1:3, y = 4:7) #> Error in `tibble()`: #> ! Tibble columns must have compatible sizes. #> * Size 3: Existing data. #> * Size 4: Column `y`. #> i Only values of size one are recycled. }\if{html}{\out{
}} } \section{vctrs backend}{ Packages in r-lib and the tidyverse generally use \code{\link[=vec_size_common]{vec_size_common()}} and \code{\link[=vec_recycle_common]{vec_recycle_common()}} as the backends for handling recycling rules. \itemize{ \item \code{vec_size_common()} returns the common size of multiple vectors, after applying the recycling rules \item \code{vec_recycle_common()} goes one step further, and actually recycles the vectors to their common size } \if{html}{\out{
}}\preformatted{vec_size_common(1:3, "x") #> [1] 3 vec_recycle_common(1:3, "x") #> [[1]] #> [1] 1 2 3 #> #> [[2]] #> [1] "x" "x" "x" vec_size_common(1:3, c("x", "y")) #> Error: #> ! Can't recycle `..1` (size 3) to match `..2` (size 2). }\if{html}{\out{
}} } \section{Base R recycling rules}{ The recycling rules described here are stricter than the ones generally used by base R, which are: \itemize{ \item If any vector is length 0, the output will be length 0 \item Otherwise, the output will be length \code{max(length_x, length_y)}, and a warning will be thrown if the length of the longer vector is not an integer multiple of the length of the shorter vector. } We explore the base R rules in detail in \code{vignette("type-size")}. } vctrs/man/vctrs-package.Rd0000644000176200001440000000231314405105665015214 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-package.R \docType{package} \name{vctrs-package} \alias{vctrs} \alias{vctrs-package} \title{vctrs: Vector Helpers} \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} Defines new notions of prototype and size that are used to provide tools for consistent and well-founded type-coercion and size-recycling, and are in turn connected to ideas of type- and size-stability useful for analysing function interfaces. } \seealso{ Useful links: \itemize{ \item \url{https://vctrs.r-lib.org/} \item \url{https://github.com/r-lib/vctrs} \item Report bugs at \url{https://github.com/r-lib/vctrs/issues} } } \author{ \strong{Maintainer}: Davis Vaughan \email{davis@posit.co} Authors: \itemize{ \item Hadley Wickham \email{hadley@posit.co} \item Lionel Henry \email{lionel@posit.co} } Other contributors: \itemize{ \item data.table team (Radix sort based on data.table's forder() and their contribution to R's order()) [copyright holder] \item Posit Software, PBC [copyright holder, funder] } } \keyword{internal} vctrs/man/vec_equal_na.Rd0000644000176200001440000000114614315060307015100 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_equal_na} \alias{vec_equal_na} \title{Missing values} \usage{ vec_equal_na(x) } \arguments{ \item{x}{A vector} } \value{ A logical vector the same size as \code{x}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_equal_na()} has been renamed to \code{\link[=vec_detect_missing]{vec_detect_missing()}} and is deprecated as of vctrs 0.5.0. } \keyword{internal} vctrs/man/vec_equal.Rd0000644000176200001440000000220214315060307014414 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equal.R \name{vec_equal} \alias{vec_equal} \title{Equality} \usage{ vec_equal(x, y, na_equal = FALSE, .ptype = NULL) } \arguments{ \item{x, y}{Vectors with compatible types and lengths.} \item{na_equal}{Should \code{NA} values be considered equal?} \item{.ptype}{Override to optionally specify common type} } \value{ A logical vector the same size as the common size of \code{x} and \code{y}. Will only contain \code{NA}s if \code{na_equal} is \code{FALSE}. } \description{ \code{vec_equal()} tests if two vectors are equal. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_recycle_common]{vec_recycle_common()}} \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ vec_equal(c(TRUE, FALSE, NA), FALSE) vec_equal(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) vec_equal(5, 1:10) vec_equal("d", letters[1:10]) df <- data.frame(x = c(1, 1, 2, 1), y = c(1, 2, 1, NA)) vec_equal(df, data.frame(x = 1, y = 2)) } \seealso{ \code{\link[=vec_detect_missing]{vec_detect_missing()}} } vctrs/man/vec_group.Rd0000644000176200001440000000470414276722575014474 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group.R \name{vec_group} \alias{vec_group} \alias{vec_group_id} \alias{vec_group_loc} \alias{vec_group_rle} \title{Identify groups} \usage{ vec_group_id(x) vec_group_loc(x) vec_group_rle(x) } \arguments{ \item{x}{A vector} } \value{ \itemize{ \item \code{vec_group_id()}: An integer vector with the same size as \code{x}. \item \code{vec_group_loc()}: A two column data frame with size equal to \code{vec_size(vec_unique(x))}. \itemize{ \item A \code{key} column of type \code{vec_ptype(x)} \item A \code{loc} column of type list, with elements of type integer. } \item \code{vec_group_rle()}: A \code{vctrs_group_rle} rcrd object with two integer vector fields: \code{group} and \code{length}. } Note that when using \code{vec_group_loc()} for complex types, the default \code{data.frame} print method will be suboptimal, and you will want to coerce into a tibble to better understand the output. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \itemize{ \item \code{vec_group_id()} returns an identifier for the group that each element of \code{x} falls in, constructed in the order that they appear. The number of groups is also returned as an attribute, \code{n}. \item \code{vec_group_loc()} returns a data frame containing a \code{key} column with the unique groups, and a \code{loc} column with the locations of each group in \code{x}. \item \code{vec_group_rle()} locates groups in \code{x} and returns them run length encoded in the order that they appear. The return value is a rcrd object with fields for the \code{group} identifiers and the run \code{length} of the corresponding group. The number of groups is also returned as an attribute, \code{n}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ purrr <- c("p", "u", "r", "r", "r") vec_group_id(purrr) vec_group_rle(purrr) groups <- mtcars[c("vs", "am")] vec_group_id(groups) group_rle <- vec_group_rle(groups) group_rle # Access fields with `field()` field(group_rle, "group") field(group_rle, "length") # `vec_group_id()` is equivalent to vec_match(groups, vec_unique(groups)) vec_group_loc(mtcars$vs) vec_group_loc(mtcars[c("vs", "am")]) if (require("tibble")) { as_tibble(vec_group_loc(mtcars[c("vs", "am")])) } } \keyword{internal} vctrs/man/howto-faq-coercion-data-frame.Rd0000644000176200001440000003504614420027341020165 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-coercion-data-frame} \alias{howto-faq-coercion-data-frame} \title{FAQ - How to implement ptype2 and cast methods? (Data frames)} \description{ This guide provides a practical recipe for implementing \code{vec_ptype2()} and \code{vec_cast()} methods for coercions of data frame subclasses. Related topics: \itemize{ \item For an overview of the coercion mechanism in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. } Coercion of data frames occurs when different data frame classes are combined in some way. The two main methods of combination are currently row-binding with \code{\link[=vec_rbind]{vec_rbind()}} and col-binding with \code{\link[=vec_cbind]{vec_cbind()}} (which are in turn used by a number of dplyr and tidyr functions). These functions take multiple data frame inputs and automatically coerce them to their common type. vctrs is generally strict about the kind of automatic coercions that are performed when combining inputs. In the case of data frames we have decided to be a bit less strict for convenience. Instead of throwing an incompatible type error, we fall back to a base data frame or a tibble if we don’t know how to combine two data frame subclasses. It is still a good idea to specify the proper coercion behaviour for your data frame subclasses as soon as possible. We will see two examples in this guide. The first example is about a data frame subclass that has no particular attributes to manage. In the second example, we implement coercion methods for a tibble subclass that includes potentially incompatible attributes. \subsection{Roxygen workflow}{ To implement methods for generics, first import the generics in your namespace and redocument: \if{html}{\out{
}}\preformatted{#' @importFrom vctrs vec_ptype2 vec_cast NULL }\if{html}{\out{
}} Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won’t be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with \code{s3_register()}. } \subsection{Parent methods}{ Most of the common type determination should be performed by the parent class. In vctrs, double dispatch is implemented in such a way that you need to call the methods for the parent class manually. For \code{vec_ptype2()} this means you need to call \code{df_ptype2()} (for data frame subclasses) or \code{tib_ptype2()} (for tibble subclasses). Similarly, \code{df_cast()} and \code{tib_cast()} are the workhorses for \code{vec_cast()} methods of subtypes of \code{data.frame} and \code{tbl_df}. These functions take the union of the columns in \code{x} and \code{y}, and ensure shared columns have the same type. These functions are much less strict than \code{vec_ptype2()} and \code{vec_cast()} as they accept any subclass of data frame as input. They always return a \code{data.frame} or a \code{tbl_df}. You will probably want to write similar functions for your subclass to avoid repetition in your code. You may want to export them as well if you are expecting other people to derive from your class. } \subsection{A \code{data.table} example}{ This example is the actual implementation of vctrs coercion methods for \code{data.table}. This is a simple example because we don’t have to keep track of attributes for this class or manage incompatibilities. See the tibble section for a more complicated example. We first create the \code{dt_ptype2()} and \code{dt_cast()} helpers. They wrap around the parent methods \code{df_ptype2()} and \code{df_cast()}, and transform the common type or converted input to a data table. You may want to export these helpers if you expect other packages to derive from your data frame class. These helpers should always return data tables. To this end we use the conversion generic \code{as.data.table()}. Depending on the tools available for the particular class at hand, a constructor might be appropriate as well. \if{html}{\out{
}}\preformatted{dt_ptype2 <- function(x, y, ...) \{ as.data.table(df_ptype2(x, y, ...)) \} dt_cast <- function(x, to, ...) \{ as.data.table(df_cast(x, to, ...)) \} }\if{html}{\out{
}} We start with the self-self method: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.data.table.data.table <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} }\if{html}{\out{
}} Between a data frame and a data table, we consider the richer type to be data table. This decision is not based on the value coverage of each data structures, but on the idea that data tables have richer behaviour. Since data tables are the richer type, we call \code{dt_type2()} from the \code{vec_ptype2()} method. It always returns a data table, no matter the order of arguments: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} }\if{html}{\out{
}} The \code{vec_cast()} methods follow the same pattern, but note how the method for coercing to data frame uses \code{df_cast()} rather than \code{dt_cast()}. Also, please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents \code{to}, whereas the second class represents \code{x}. \if{html}{\out{
}}\preformatted{#' @export vec_cast.data.table.data.table <- function(x, to, ...) \{ dt_cast(x, to, ...) \} #' @export vec_cast.data.table.data.frame <- function(x, to, ...) \{ # `x` is a data.frame to be converted to a data.table dt_cast(x, to, ...) \} #' @export vec_cast.data.frame.data.table <- function(x, to, ...) \{ # `x` is a data.table to be converted to a data.frame df_cast(x, to, ...) \} }\if{html}{\out{
}} With these methods vctrs is now able to combine data tables with data frames: \if{html}{\out{
}}\preformatted{vec_cbind(data.frame(x = 1:3), data.table(y = "foo")) #> x y #> 1: 1 foo #> 2: 2 foo #> 3: 3 foo }\if{html}{\out{
}} } \subsection{A tibble example}{ In this example we implement coercion methods for a tibble subclass that carries a colour as a scalar metadata: \if{html}{\out{
}}\preformatted{# User constructor my_tibble <- function(colour = NULL, ...) \{ new_my_tibble(tibble::tibble(...), colour = colour) \} # Developer constructor new_my_tibble <- function(x, colour = NULL) \{ stopifnot(is.data.frame(x)) tibble::new_tibble( x, colour = colour, class = "my_tibble", nrow = nrow(x) ) \} df_colour <- function(x) \{ if (inherits(x, "my_tibble")) \{ attr(x, "colour") \} else \{ NULL \} \} #'@export print.my_tibble <- function(x, ...) \{ cat(sprintf("<\%s: \%s>\n", class(x)[[1]], df_colour(x))) cli::cat_line(format(x)[-1]) \} }\if{html}{\out{
}} This subclass is very simple. All it does is modify the header. \if{html}{\out{
}}\preformatted{red <- my_tibble("red", x = 1, y = 1:2) red #> #> x y #> #> 1 1 1 #> 2 1 2 red[2] #> #> y #> #> 1 1 #> 2 2 green <- my_tibble("green", z = TRUE) green #> #> z #> #> 1 TRUE }\if{html}{\out{
}} Combinations do not work properly out of the box, instead vctrs falls back to a bare tibble: \if{html}{\out{
}}\preformatted{vec_rbind(red, tibble::tibble(x = 10:12)) #> # A tibble: 5 x 2 #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} Instead of falling back to a data frame, we would like to return a \verb{} when combined with a data frame or a tibble. Because this subclass has more metadata than normal data frames (it has a colour), it is a \emph{supertype} of tibble and data frame, i.e. it is the richer type. This is similar to how a grouped tibble is a more general type than a tibble or a data frame. Conceptually, the latter are pinned to a single constant group. The coercion methods for data frames operate in two steps: \itemize{ \item They check for compatible subclass attributes. In our case the tibble colour has to be the same, or be undefined. \item They call their parent methods, in this case \code{\link[=tib_ptype2]{tib_ptype2()}} and \code{\link[=tib_cast]{tib_cast()}} because we have a subclass of tibble. This eventually calls the data frame methods \code{\link[=df_ptype2]{df_ptype2()}} and \code{\link[=tib_ptype2]{tib_ptype2()}} which match the columns and their types. } This process should usually be wrapped in two functions to avoid repetition. Consider exporting these if you expect your class to be derived by other subclasses. We first implement a helper to determine if two data frames have compatible colours. We use the \code{df_colour()} accessor which returns \code{NULL} when the data frame colour is undefined. \if{html}{\out{
}}\preformatted{has_compatible_colours <- function(x, y) \{ x_colour <- df_colour(x) \%||\% df_colour(y) y_colour <- df_colour(y) \%||\% x_colour identical(x_colour, y_colour) \} }\if{html}{\out{
}} Next we implement the coercion helpers. If the colours are not compatible, we call \code{stop_incompatible_cast()} or \code{stop_incompatible_type()}. These strict coercion semantics are justified because in this class colour is a \emph{data} attribute. If it were a non essential \emph{detail} attribute, like the timezone in a datetime, we would just standardise it to the value of the left-hand side. In simpler cases (like the data.table example), these methods do not need to take the arguments suffixed in \verb{_arg}. Here we do need to take these arguments so we can pass them to the \code{stop_} functions when we detect an incompatibility. They also should be passed to the parent methods. \if{html}{\out{
}}\preformatted{#' @export my_tib_cast <- function(x, to, ..., x_arg = "", to_arg = "") \{ out <- tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) if (!has_compatible_colours(x, to)) \{ stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, details = "Can't combine colours." ) \} colour <- df_colour(x) \%||\% df_colour(to) new_my_tibble(out, colour = colour) \} #' @export my_tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") \{ out <- tib_ptype2(x, y, ..., x_arg = x_arg, y_arg = y_arg) if (!has_compatible_colours(x, y)) \{ stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = "Can't combine colours." ) \} colour <- df_colour(x) \%||\% df_colour(y) new_my_tibble(out, colour = colour) \} }\if{html}{\out{
}} Let’s now implement the coercion methods, starting with the self-self methods. \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_cast.my_tibble.my_tibble <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} }\if{html}{\out{
}} We can now combine compatible instances of our class! \if{html}{\out{
}}\preformatted{vec_rbind(red, red) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 1 1 #> 4 1 2 vec_rbind(green, green) #> #> z #> #> 1 TRUE #> 2 TRUE vec_rbind(green, red) #> Error in `my_tib_ptype2()`: #> ! Can't combine `..1` and `..2` . #> Can't combine colours. }\if{html}{\out{
}} The methods for combining our class with tibbles follow the same pattern. For ptype2 we return our class in both cases because it is the richer type: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.tbl_df <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_ptype2.tbl_df.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} }\if{html}{\out{
}} For cast are careful about returning a tibble when casting to a tibble. Note the call to \code{vctrs::tib_cast()}: \if{html}{\out{
}}\preformatted{#' @export vec_cast.my_tibble.tbl_df <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} #' @export vec_cast.tbl_df.my_tibble <- function(x, to, ...) \{ tib_cast(x, to, ...) \} }\if{html}{\out{
}} From this point, we get correct combinations with tibbles: \if{html}{\out{
}}\preformatted{vec_rbind(red, tibble::tibble(x = 10:12)) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} However we are not done yet. Because the coercion hierarchy is different from the class hierarchy, there is no inheritance of coercion methods. We’re not getting correct behaviour for data frames yet because we haven’t explicitly specified the methods for this class: \if{html}{\out{
}}\preformatted{vec_rbind(red, data.frame(x = 10:12)) #> # A tibble: 5 x 2 #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} Let’s finish up the boiler plate: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.data.frame <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_ptype2.data.frame.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_cast.my_tibble.data.frame <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} #' @export vec_cast.data.frame.my_tibble <- function(x, to, ...) \{ df_cast(x, to, ...) \} }\if{html}{\out{
}} This completes the implementation: \if{html}{\out{
}}\preformatted{vec_rbind(red, data.frame(x = 10:12)) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} } } vctrs/man/vec_arith.Rd0000644000176200001440000000373513566016500014433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/arith.R \name{vec_arith} \alias{vec_arith} \alias{vec_arith.default} \alias{vec_arith.logical} \alias{vec_arith.numeric} \alias{vec_arith_base} \alias{MISSING} \title{Arithmetic operations} \usage{ vec_arith(op, x, y, ...) \method{vec_arith}{default}(op, x, y, ...) \method{vec_arith}{logical}(op, x, y, ...) \method{vec_arith}{numeric}(op, x, y, ...) vec_arith_base(op, x, y) MISSING() } \arguments{ \item{op}{An arithmetic operator as a string} \item{x, y}{A pair of vectors. For \code{!}, unary \code{+} and unary \code{-}, \code{y} will be a sentinel object of class \code{MISSING}, as created by \code{MISSING()}.} \item{...}{These dots are for future extensions and must be empty.} } \description{ This generic provides a common double dispatch mechanism for all infix operators (\code{+}, \code{-}, \code{/}, \code{*}, \code{^}, \code{\%\%}, \code{\%/\%}, \code{!}, \code{&}, \code{|}). It is used to power the default arithmetic and boolean operators for \link{vctr}s objects, overcoming the limitations of the base \link{Ops} generic. } \details{ \code{vec_arith_base()} is provided as a convenience for writing methods. It recycles \code{x} and \code{y} to common length then calls the base operator with the underlying \code{\link[=vec_data]{vec_data()}}. \code{vec_arith()} is also used in \code{diff.vctrs_vctr()} method via \code{-}. } \examples{ d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-02 12:00") t <- as.difftime(12, unit = "hours") vec_arith("-", dt, 1) vec_arith("-", dt, t) vec_arith("-", dt, d) vec_arith("+", dt, 86400) vec_arith("+", dt, t) vec_arith("+", t, t) vec_arith("/", t, t) vec_arith("/", t, 2) vec_arith("*", t, 2) } \seealso{ \code{\link[=stop_incompatible_op]{stop_incompatible_op()}} for signalling that an arithmetic operation is not permitted/supported. See \code{\link[=vec_math]{vec_math()}} for the equivalent for the unary mathematical functions. } \keyword{internal} vctrs/man/howto-faq-coercion.Rd0000644000176200001440000002271514511323761016173 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-coercion} \alias{howto-faq-coercion} \title{FAQ - How to implement ptype2 and cast methods?} \description{ This guide illustrates how to implement \code{vec_ptype2()} and \code{vec_cast()} methods for existing classes. Related topics: \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")} } \subsection{The natural number class}{ We’ll illustrate how to implement coercion methods with a simple class that represents natural numbers. In this scenario we have an existing class that already features a constructor and methods for \code{print()} and subset. \if{html}{\out{
}}\preformatted{#' @export new_natural <- function(x) \{ if (is.numeric(x) || is.logical(x)) \{ stopifnot(is_whole(x)) x <- as.integer(x) \} else \{ stop("Can't construct natural from unknown type.") \} structure(x, class = "my_natural") \} is_whole <- function(x) \{ all(x \%\% 1 == 0 | is.na(x)) \} #' @export print.my_natural <- function(x, ...) \{ cat("\n") x <- unclass(x) NextMethod() \} #' @export `[.my_natural` <- function(x, i, ...) \{ new_natural(NextMethod()) \} }\if{html}{\out{
}} \if{html}{\out{
}}\preformatted{new_natural(1:3) #> #> [1] 1 2 3 new_natural(c(1, NA)) #> #> [1] 1 NA }\if{html}{\out{
}} } \subsection{Roxygen workflow}{ To implement methods for generics, first import the generics in your namespace and redocument: \if{html}{\out{
}}\preformatted{#' @importFrom vctrs vec_ptype2 vec_cast NULL }\if{html}{\out{
}} Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won’t be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with \code{s3_register()}. } \subsection{Implementing \code{vec_ptype2()}}{ \subsection{The self-self method}{ The first method to implement is the one that signals that your class is compatible with itself: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.my_natural <- function(x, y, ...) \{ x \} vec_ptype2(new_natural(1), new_natural(2:3)) #> #> integer(0) }\if{html}{\out{
}} \code{vec_ptype2()} implements a fallback to try and be compatible with simple classes, so it may seem that you don’t need to implement the self-self coercion method. However, you must implement it explicitly because this is how vctrs knows that a class that is implementing vctrs methods (for instance this disable fallbacks to \code{base::c()}). Also, it makes your class a bit more efficient. } \subsection{The parent and children methods}{ Our natural number class is conceptually a parent of \verb{} and a child of \verb{}, but the class is not compatible with logical, integer, or double vectors yet: \if{html}{\out{
}}\preformatted{vec_ptype2(TRUE, new_natural(2:3)) #> Error: #> ! Can't combine `TRUE` and `new_natural(2:3)` . vec_ptype2(new_natural(1), 2:3) #> Error: #> ! Can't combine `new_natural(1)` and `2:3` . }\if{html}{\out{
}} We’ll specify the twin methods for each of these classes, returning the richer class in each case. \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.logical <- function(x, y, ...) \{ # The order of the classes in the method name follows the order of # the arguments in the function signature, so `x` is the natural # number and `y` is the logical x \} #' @export vec_ptype2.logical.my_natural <- function(x, y, ...) \{ # In this case `y` is the richer natural number y \} }\if{html}{\out{
}} Between a natural number and an integer, the latter is the richer class: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.integer <- function(x, y, ...) \{ y \} #' @export vec_ptype2.integer.my_natural <- function(x, y, ...) \{ x \} }\if{html}{\out{
}} We no longer get common type errors for logical and integer: \if{html}{\out{
}}\preformatted{vec_ptype2(TRUE, new_natural(2:3)) #> #> integer(0) vec_ptype2(new_natural(1), 2:3) #> integer(0) }\if{html}{\out{
}} We are not done yet. Pairwise coercion methods must be implemented for all the connected nodes in the coercion hierarchy, which include double vectors further up. The coercion methods for grand-parent types must be implemented separately: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.double <- function(x, y, ...) \{ y \} #' @export vec_ptype2.double.my_natural <- function(x, y, ...) \{ x \} }\if{html}{\out{
}} } \subsection{Incompatible attributes}{ Most of the time, inputs are incompatible because they have different classes for which no \code{vec_ptype2()} method is implemented. More rarely, inputs could be incompatible because of their attributes. In that case incompatibility is signalled by calling \code{stop_incompatible_type()}. In the following example, we implement a self-self ptype2 method for a hypothetical subclass of \verb{} that has stricter combination semantics. The method throws an error when the levels of the two factors are not compatible. \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_strict_factor.my_strict_factor <- function(x, y, ..., x_arg = "", y_arg = "") \{ if (!setequal(levels(x), levels(y))) \{ stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) \} x \} }\if{html}{\out{
}} Note how the methods need to take \code{x_arg} and \code{y_arg} parameters and pass them on to \code{stop_incompatible_type()}. These argument tags help create more informative error messages when the common type determination is for a column of a data frame. They are part of the generic signature but can usually be left out if not used. } } \subsection{Implementing \code{vec_cast()}}{ Corresponding \code{vec_cast()} methods must be implemented for all \code{vec_ptype2()} methods. The general pattern is to convert the argument \code{x} to the type of \code{to}. The methods should validate the values in \code{x} and make sure they conform to the values of \code{to}. Please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents \code{to}, whereas the second class represents \code{x}. The self-self method is easy in this case, it just returns the target input: \if{html}{\out{
}}\preformatted{#' @export vec_cast.my_natural.my_natural <- function(x, to, ...) \{ x \} }\if{html}{\out{
}} The other types need to be validated. We perform input validation in the \code{new_natural()} constructor, so that’s a good fit for our \code{vec_cast()} implementations. \if{html}{\out{
}}\preformatted{#' @export vec_cast.my_natural.logical <- function(x, to, ...) \{ # The order of the classes in the method name is in reverse order # of the arguments in the function signature, so `to` is the natural # number and `x` is the logical new_natural(x) \} vec_cast.my_natural.integer <- function(x, to, ...) \{ new_natural(x) \} vec_cast.my_natural.double <- function(x, to, ...) \{ new_natural(x) \} }\if{html}{\out{
}} With these methods, vctrs is now able to combine logical and natural vectors. It properly returns the richer type of the two, a natural vector: \if{html}{\out{
}}\preformatted{vec_c(TRUE, new_natural(1), FALSE) #> #> [1] 1 1 0 }\if{html}{\out{
}} Because we haven’t implemented conversions \emph{from} natural, it still doesn’t know how to combine natural with the richer integer and double types: \if{html}{\out{
}}\preformatted{vec_c(new_natural(1), 10L) #> Error in `vec_c()`: #> ! Can't convert `..1` to . vec_c(1.5, new_natural(1)) #> Error in `vec_c()`: #> ! Can't convert `..2` to . }\if{html}{\out{
}} This is quick work which completes the implementation of coercion methods for vctrs: \if{html}{\out{
}}\preformatted{#' @export vec_cast.logical.my_natural <- function(x, to, ...) \{ # In this case `to` is the logical and `x` is the natural number attributes(x) <- NULL as.logical(x) \} #' @export vec_cast.integer.my_natural <- function(x, to, ...) \{ attributes(x) <- NULL as.integer(x) \} #' @export vec_cast.double.my_natural <- function(x, to, ...) \{ attributes(x) <- NULL as.double(x) \} }\if{html}{\out{
}} And we now get the expected combinations. \if{html}{\out{
}}\preformatted{vec_c(new_natural(1), 10L) #> [1] 1 10 vec_c(1.5, new_natural(1)) #> [1] 1.5 1.0 }\if{html}{\out{
}} } } vctrs/man/missing.Rd0000644000176200001440000000362014315060307014126 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/missing.R \name{missing} \alias{missing} \alias{vec_detect_missing} \alias{vec_any_missing} \title{Missing values} \usage{ vec_detect_missing(x) vec_any_missing(x) } \arguments{ \item{x}{A vector} } \value{ \itemize{ \item \code{vec_detect_missing()} returns a logical vector the same size as \code{x}. \item \code{vec_any_missing()} returns a single \code{TRUE} or \code{FALSE}. } } \description{ \itemize{ \item \code{vec_detect_missing()} returns a logical vector the same size as \code{x}. For each element of \code{x}, it returns \code{TRUE} if the element is missing, and \code{FALSE} otherwise. \item \code{vec_any_missing()} returns a single \code{TRUE} or \code{FALSE} depending on whether or not \code{x} has \emph{any} missing values. } \subsection{Differences with \code{\link[=is.na]{is.na()}}}{ Data frame rows are only considered missing if every element in the row is missing. Similarly, \link[=new_rcrd]{record vector} elements are only considered missing if every field in the record is missing. Put another way, rows with \emph{any} missing values are considered \link[=vec_detect_complete]{incomplete}, but only rows with \emph{all} missing values are considered missing. List elements are only considered missing if they are \code{NULL}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ x <- c(1, 2, NA, 4, NA) vec_detect_missing(x) vec_any_missing(x) # Data frames are iterated over rowwise, and only report a row as missing # if every element of that row is missing. If a row is only partially # missing, it is said to be incomplete, but not missing. y <- c("a", "b", NA, "d", "e") df <- data_frame(x = x, y = y) df$missing <- vec_detect_missing(df) df$incomplete <- !vec_detect_complete(df) df } \seealso{ \code{\link[=vec_detect_complete]{vec_detect_complete()}} } vctrs/man/as-is.Rd0000644000176200001440000000060214276722575013510 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-asis.R \name{as-is} \alias{as-is} \alias{vec_ptype2.AsIs} \title{AsIs S3 class} \usage{ \method{vec_ptype2}{AsIs}(x, y, ..., x_arg = "", y_arg = "") } \description{ These functions help the base AsIs class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/runs.Rd0000644000176200001440000000346714363556517013475 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/runs.R \name{runs} \alias{runs} \alias{vec_identify_runs} \alias{vec_run_sizes} \title{Runs} \usage{ vec_identify_runs(x) vec_run_sizes(x) } \arguments{ \item{x}{A vector.} } \value{ \itemize{ \item For \code{vec_identify_runs()}, an integer vector with the same size as \code{x}. A scalar integer attribute, \code{n}, is attached. \item For \code{vec_run_sizes()}, an integer vector with size equal to the number of runs in \code{x}. } } \description{ \itemize{ \item \code{vec_identify_runs()} returns a vector of identifiers for the elements of \code{x} that indicate which run of repeated values they fall in. The number of runs is also returned as an attribute, \code{n}. \item \code{vec_run_sizes()} returns an integer vector corresponding to the size of each run. This is identical to the \code{times} column from \code{vec_unrep()}, but is faster if you don't need the run keys. \item \code{\link[=vec_unrep]{vec_unrep()}} is a generalized \code{\link[base:rle]{base::rle()}}. It is documented alongside the "repeat" functions of \code{\link[=vec_rep]{vec_rep()}} and \code{\link[=vec_rep_each]{vec_rep_each()}}; look there for more information. } } \details{ Unlike \code{\link[base:rle]{base::rle()}}, adjacent missing values are considered identical when constructing runs. For example, \code{vec_identify_runs(c(NA, NA))} will return \code{c(1, 1)}, not \code{c(1, 2)}. } \examples{ x <- c("a", "z", "z", "c", "a", "a") vec_identify_runs(x) vec_run_sizes(x) vec_unrep(x) y <- c(1, 1, 1, 2, 2, 3) # With multiple columns, the runs are constructed rowwise df <- data_frame( x = x, y = y ) vec_identify_runs(df) vec_run_sizes(df) vec_unrep(df) } \seealso{ \code{\link[=vec_unrep]{vec_unrep()}} for a generalized \code{\link[base:rle]{base::rle()}}. } vctrs/man/vec_cbind_frame_ptype.Rd0000644000176200001440000000134414276722575017007 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind.R \name{vec_cbind_frame_ptype} \alias{vec_cbind_frame_ptype} \title{Frame prototype} \usage{ vec_cbind_frame_ptype(x, ...) } \arguments{ \item{x}{A data frame.} \item{...}{These dots are for future extensions and must be empty.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This is an experimental generic that returns zero-columns variants of a data frame. It is needed for \code{\link[=vec_cbind]{vec_cbind()}}, to work around the lack of colwise primitives in vctrs. Expect changes. } \keyword{internal} vctrs/man/vec_chop.Rd0000644000176200001440000001574514402367170014263 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice-chop.R \name{vec_chop} \alias{vec_chop} \alias{list_unchop} \title{Chopping} \usage{ vec_chop(x, ..., indices = NULL, sizes = NULL) list_unchop( x, ..., indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), error_arg = "x", error_call = current_env() ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{indices}{For \code{vec_chop()}, a list of positive integer vectors to slice \code{x} with, or \code{NULL}. Can't be used if \code{sizes} is already specified. If both \code{indices} and \code{sizes} are \code{NULL}, \code{x} is split into its individual elements, equivalent to using an \code{indices} of \code{as.list(vec_seq_along(x))}. For \code{list_unchop()}, a list of positive integer vectors specifying the locations to place elements of \code{x} in. Each element of \code{x} is recycled to the size of the corresponding index vector. The size of \code{indices} must match the size of \code{x}. If \code{NULL}, \code{x} is combined in the order it is provided in, which is equivalent to using \code{\link[=vec_c]{vec_c()}}.} \item{sizes}{An integer vector of non-negative sizes representing sequential indices to slice \code{x} with, or \code{NULL}. Can't be used if \code{indices} is already specified. For example, \code{sizes = c(2, 4)} is equivalent to \code{indices = list(1:2, 3:6)}, but is typically faster. \code{sum(sizes)} must be equal to \code{vec_size(x)}, i.e. \code{sizes} must completely partition \code{x}, but an individual size is allowed to be \code{0}.} \item{ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{x}. Alternatively, you can supply \code{ptype} to give the output a known type.} \item{name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} \item{error_arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ \itemize{ \item \code{vec_chop()}: A list where each element has the same type as \code{x}. The size of the list is equal to \code{vec_size(indices)}, \code{vec_size(sizes)}, or \code{vec_size(x)} depending on whether or not \code{indices} or \code{sizes} is provided. \item \code{list_unchop()}: A vector of type \code{vec_ptype_common(!!!x)}, or \code{ptype}, if specified. The size is computed as \code{vec_size_common(!!!indices)} unless the indices are \code{NULL}, in which case the size is \code{vec_size_common(!!!x)}. } } \description{ \itemize{ \item \code{vec_chop()} provides an efficient method to repeatedly slice a vector. It captures the pattern of \code{map(indices, vec_slice, x = x)}. When no indices are supplied, it is generally equivalent to \code{\link[=as.list]{as.list()}}. \item \code{list_unchop()} combines a list of vectors into a single vector, placing elements in the output according to the locations specified by \code{indices}. It is similar to \code{\link[=vec_c]{vec_c()}}, but gives greater control over how the elements are combined. When no indices are supplied, it is identical to \code{vec_c()}, but typically a little faster. } If \code{indices} selects every value in \code{x} exactly once, in any order, then \code{list_unchop()} is the inverse of \code{vec_chop()} and the following invariant holds: \if{html}{\out{
}}\preformatted{list_unchop(vec_chop(x, indices = indices), indices = indices) == x }\if{html}{\out{
}} } \section{Dependencies of \code{vec_chop()}}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \section{Dependencies of \code{list_unchop()}}{ \itemize{ \item \code{\link[=vec_c]{vec_c()}} } } \examples{ vec_chop(1:5) # These two are equivalent vec_chop(1:5, indices = list(1:2, 3:5)) vec_chop(1:5, sizes = c(2, 3)) # Can also be used on data frames vec_chop(mtcars, indices = list(1:3, 4:6)) # If `indices` selects every value in `x` exactly once, # in any order, then `list_unchop()` inverts `vec_chop()` x <- c("a", "b", "c", "d") indices <- list(2, c(3, 1), 4) vec_chop(x, indices = indices) list_unchop(vec_chop(x, indices = indices), indices = indices) # When unchopping, size 1 elements of `x` are recycled # to the size of the corresponding index list_unchop(list(1, 2:3), indices = list(c(1, 3, 5), c(2, 4))) # Names are retained, and outer names can be combined with inner # names through the use of a `name_spec` lst <- list(x = c(a = 1, b = 2), y = 1) list_unchop(lst, indices = list(c(3, 2), c(1, 4)), name_spec = "{outer}_{inner}") # An alternative implementation of `ave()` can be constructed using # `vec_chop()` and `list_unchop()` in combination with `vec_group_loc()` ave2 <- function(.x, .by, .f, ...) { indices <- vec_group_loc(.by)$loc chopped <- vec_chop(.x, indices = indices) out <- lapply(chopped, .f, ...) list_unchop(out, indices = indices) } breaks <- warpbreaks$breaks wool <- warpbreaks$wool ave2(breaks, wool, mean) identical( ave2(breaks, wool, mean), ave(breaks, wool, FUN = mean) ) # If you know your input is sorted and you'd like to split on the groups, # `vec_run_sizes()` can be efficiently combined with `sizes` df <- data_frame( g = c(2, 5, 5, 6, 6, 6, 6, 8, 9, 9), x = 1:10 ) vec_chop(df, sizes = vec_run_sizes(df$g)) # If you have a list of homogeneous vectors, sometimes it can be useful to # unchop, apply a function to the flattened vector, and then rechop according # to the original indices. This can be done efficiently with `list_sizes()`. x <- list(c(1, 2, 1), c(3, 1), 5, double()) x_flat <- list_unchop(x) x_flat <- x_flat + max(x_flat) vec_chop(x_flat, sizes = list_sizes(x)) } vctrs/man/op-empty-default.Rd0000644000176200001440000000101114276722575015663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{\%0\%} \alias{\%0\%} \title{Default value for empty vectors} \usage{ x \%0\% y } \arguments{ \item{x}{A vector} \item{y}{Value to use if \code{x} is empty. To preserve type-stability, should be the same type as \code{x}.} } \description{ Use this inline operator when you need to provide a default value for empty (as defined by \code{\link[=vec_is_empty]{vec_is_empty()}}) vectors. } \examples{ 1:10 \%0\% 5 integer() \%0\% 5 } vctrs/man/name_spec.Rd0000644000176200001440000000543114315060307014411 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{name_spec} \alias{name_spec} \title{Name specifications} \arguments{ \item{name_spec, .name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} } \description{ A name specification describes how to combine an inner and outer names. This sort of name combination arises when concatenating vectors or flattening lists. There are two possible cases: \itemize{ \item Named vector: \if{html}{\out{
}}\preformatted{vec_c(outer = c(inner1 = 1, inner2 = 2)) }\if{html}{\out{
}} \item Unnamed vector: \if{html}{\out{
}}\preformatted{vec_c(outer = 1:2) }\if{html}{\out{
}} } In r-lib and tidyverse packages, these cases are errors by default, because there's no behaviour that works well for every case. Instead, you can provide a name specification that describes how to combine the inner and outer names of inputs. Name specifications can refer to: \itemize{ \item \code{outer}: The external name recycled to the size of the input vector. \item \code{inner}: Either the names of the input vector, or a sequence of integer from 1 to the size of the vector if it is unnamed. } } \examples{ # By default, named inputs must be length 1: vec_c(name = 1) # ok try(vec_c(name = 1:3)) # bad # They also can't have internal names, even if scalar: try(vec_c(name = c(internal = 1))) # bad # Pass a name specification to work around this. A specification # can be a glue string referring to `outer` and `inner`: vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}") vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}_{inner}") # They can also be functions: my_spec <- function(outer, inner) paste(outer, inner, sep = "_") vec_c(name = 1:3, other = 4:5, .name_spec = my_spec) # Or purrr-style formulas for anonymous functions: vec_c(name = 1:3, other = 4:5, .name_spec = ~ paste0(.x, .y)) } vctrs/man/df_list.Rd0000644000176200001440000000540014511524374014106 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{df_list} \alias{df_list} \title{Collect columns for data frame construction} \usage{ df_list( ..., .size = NULL, .unpack = TRUE, .name_repair = c("check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env() ) } \arguments{ \item{...}{Vectors of equal-length. When inputs are named, those names are used for names of the resulting list.} \item{.size}{The common size of vectors supplied in \code{...}. If \code{NULL}, this will be computed as the common size of the inputs.} \item{.unpack}{Should unnamed data frame inputs be unpacked? Defaults to \code{TRUE}.} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"}, \code{"minimal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{df_list()} constructs the data structure underlying a data frame, a named list of equal-length vectors. It is often used in combination with \code{\link[=new_data_frame]{new_data_frame()}} to safely and consistently create a helper function for data frame subclasses. } \section{Properties}{ \itemize{ \item Inputs are \link[=theory-faq-recycling]{recycled} to a common size with \code{\link[=vec_recycle_common]{vec_recycle_common()}}. \item With the exception of data frames, inputs are not modified in any way. Character vectors are never converted to factors, and lists are stored as-is for easy creation of list-columns. \item Unnamed data frame inputs are automatically unpacked. Named data frame inputs are stored unmodified as data frame columns. \item \code{NULL} inputs are completely ignored. \item The dots are dynamic, allowing for splicing of lists with \verb{!!!} and unquoting. } } \examples{ # `new_data_frame()` can be used to create custom data frame constructors new_fancy_df <- function(x = list(), n = NULL, ..., class = NULL) { new_data_frame(x, n = n, ..., class = c(class, "fancy_df")) } # Combine this constructor with `df_list()` to create a safe, # consistent helper function for your data frame subclass fancy_df <- function(...) { data <- df_list(...) new_fancy_df(data) } df <- fancy_df(x = 1) class(df) } \seealso{ \code{\link[=new_data_frame]{new_data_frame()}} for constructing data frame subclasses from a validated input. \code{\link[=data_frame]{data_frame()}} for a fast data frame creation helper. } vctrs/man/new_data_frame.Rd0000644000176200001440000000266314511320527015420 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{new_data_frame} \alias{new_data_frame} \title{Assemble attributes for data frame construction} \usage{ new_data_frame(x = list(), n = NULL, ..., class = NULL) } \arguments{ \item{x}{A named list of equal-length vectors. The lengths are not checked; it is responsibility of the caller to make sure they are equal.} \item{n}{Number of rows. If \code{NULL}, will be computed from the length of the first element of \code{x}.} \item{..., class}{Additional arguments for creating subclasses. The following attributes have special behavior: \itemize{ \item \code{"names"} is preferred if provided, overriding existing names in \code{x}. \item \code{"row.names"} is preferred if provided, overriding both \code{n} and the size implied by \code{x}. }} } \description{ \code{new_data_frame()} constructs a new data frame from an existing list. It is meant to be performant, and does not check the inputs for correctness in any way. It is only safe to use after a call to \code{\link[=df_list]{df_list()}}, which collects and validates the columns used to construct the data frame. } \examples{ new_data_frame(list(x = 1:10, y = 10:1)) } \seealso{ \code{\link[=df_list]{df_list()}} for a way to safely construct a data frame's underlying data structure from individual columns. This can be used to create a named list for further use by \code{new_data_frame()}. } vctrs/man/data_frame.Rd0000644000176200001440000000716114511524374014553 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{data_frame} \alias{data_frame} \title{Construct a data frame} \usage{ data_frame( ..., .size = NULL, .name_repair = c("check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env() ) } \arguments{ \item{...}{Vectors to become columns in the data frame. When inputs are named, those names are used for column names.} \item{.size}{The number of rows in the data frame. If \code{NULL}, this will be computed as the common size of the inputs.} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"}, \code{"minimal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{data_frame()} constructs a data frame. It is similar to \code{\link[base:data.frame]{base::data.frame()}}, but there are a few notable differences that make it more in line with vctrs principles. The Properties section outlines these. } \details{ If no column names are supplied, \code{""} will be used as a default name for all columns. This is applied before name repair occurs, so the default name repair of \code{"check_unique"} will error if any unnamed inputs are supplied and \code{"unique"} (or \code{"unique_quiet"}) will repair the empty string column names appropriately. If the column names don't matter, use a \code{"minimal"} name repair for convenience and performance. } \section{Properties}{ \itemize{ \item Inputs are \link[=theory-faq-recycling]{recycled} to a common size with \code{\link[=vec_recycle_common]{vec_recycle_common()}}. \item With the exception of data frames, inputs are not modified in any way. Character vectors are never converted to factors, and lists are stored as-is for easy creation of list-columns. \item Unnamed data frame inputs are automatically unpacked. Named data frame inputs are stored unmodified as data frame columns. \item \code{NULL} inputs are completely ignored. \item The dots are dynamic, allowing for splicing of lists with \verb{!!!} and unquoting. } } \examples{ data_frame(x = 1, y = 2) # Inputs are recycled using tidyverse recycling rules data_frame(x = 1, y = 1:3) # Strings are never converted to factors class(data_frame(x = "foo")$x) # List columns can be easily created df <- data_frame(x = list(1:2, 2, 3:4), y = 3:1) # However, the base print method is suboptimal for displaying them, # so it is recommended to convert them to tibble if (rlang::is_installed("tibble")) { tibble::as_tibble(df) } # Named data frame inputs create data frame columns df <- data_frame(x = data_frame(y = 1:2, z = "a")) # The `x` column itself is another data frame df$x # Again, it is recommended to convert these to tibbles for a better # print method if (rlang::is_installed("tibble")) { tibble::as_tibble(df) } # Unnamed data frame input is automatically unpacked data_frame(x = 1, data_frame(y = 1:2, z = "a")) } \seealso{ \code{\link[=df_list]{df_list()}} for safely creating a data frame's underlying data structure from individual columns. \code{\link[=new_data_frame]{new_data_frame()}} for constructing the actual data frame from that underlying data structure. Together, these can be useful for developers when creating new data frame subclasses supporting standard evaluation. } vctrs/man/vec_order.Rd0000644000176200001440000000373614315060307014435 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order.R \name{vec_order} \alias{vec_order} \alias{vec_sort} \title{Order and sort vectors} \usage{ vec_order( x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest") ) vec_sort( x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest") ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{direction}{Direction to sort in. Defaults to \code{asc}ending.} \item{na_value}{Should \code{NA}s be treated as the largest or smallest values?} } \value{ \itemize{ \item \code{vec_order()} an integer vector the same size as \code{x}. \item \code{vec_sort()} a vector with the same size and type as \code{x}. } } \description{ Order and sort vectors } \section{Differences with \code{order()}}{ Unlike the \code{na.last} argument of \code{order()} which decides the positions of missing values irrespective of the \code{decreasing} argument, the \code{na_value} argument of \code{vec_order()} interacts with \code{direction}. If missing values are considered the largest value, they will appear last in ascending order, and first in descending order. } \section{Dependencies of \code{vec_order()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} } } \section{Dependencies of \code{vec_sort()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} \item \code{\link[=vec_order]{vec_order()}} \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ x <- round(c(runif(9), NA), 3) vec_order(x) vec_sort(x) vec_sort(x, direction = "desc") # Can also handle data frames df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) vec_order(df) vec_sort(df) vec_sort(df, direction = "desc") # Missing values interpreted as largest values are last when # in increasing order: vec_order(c(1, NA), na_value = "largest", direction = "asc") vec_order(c(1, NA), na_value = "largest", direction = "desc") } vctrs/man/vec_fill_missing.Rd0000644000176200001440000000275614276722575016024 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fill.R \name{vec_fill_missing} \alias{vec_fill_missing} \title{Fill in missing values with the previous or following value} \usage{ vec_fill_missing( x, direction = c("down", "up", "downup", "updown"), max_fill = NULL ) } \arguments{ \item{x}{A vector} \item{direction}{Direction in which to fill missing values. Must be either \code{"down"}, \code{"up"}, \code{"downup"}, or \code{"updown"}.} \item{max_fill}{A single positive integer specifying the maximum number of sequential missing values that will be filled. If \code{NULL}, there is no limit.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_fill_missing()} fills gaps of missing values with the previous or following non-missing value. } \examples{ x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) # Filling down replaces missing values with the previous non-missing value vec_fill_missing(x, direction = "down") # To also fill leading missing values, use `"downup"` vec_fill_missing(x, direction = "downup") # Limit the number of sequential missing values to fill with `max_fill` vec_fill_missing(x, max_fill = 1) # Data frames are filled rowwise. Rows are only considered missing # if all elements of that row are missing. y <- c(1, NA, 2, NA, NA, 3, 4, NA, 5) df <- data_frame(x = x, y = y) df vec_fill_missing(df) } vctrs/man/vec_repeat.Rd0000644000176200001440000000151714276722575014617 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_repeat} \alias{vec_repeat} \title{Expand the length of a vector} \usage{ vec_repeat(x, each = 1L, times = 1L) } \arguments{ \item{x}{A vector.} \item{each}{Number of times to repeat each element of \code{x}.} \item{times}{Number of times to repeat the whole vector of \code{x}.} } \value{ A vector the same type as \code{x} with size \code{vec_size(x) * times * each}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_repeat()} has been replaced with \code{\link[=vec_rep]{vec_rep()}} and \code{\link[=vec_rep_each]{vec_rep_each()}} and is deprecated as of vctrs 0.3.0. } \keyword{internal} vctrs/man/vec_proxy_equal.Rd0000644000176200001440000000313514315060307015663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equal.R \name{vec_proxy_equal} \alias{vec_proxy_equal} \title{Equality proxy} \usage{ vec_proxy_equal(x, ...) } \arguments{ \item{x}{A vector x.} \item{...}{These dots are for future extensions and must be empty.} } \value{ A 1d atomic vector or a data frame. } \description{ Returns a proxy object (i.e. an atomic vector or data frame of atomic vectors). For \link{vctr}s, this determines the behaviour of \code{==} and \code{!=} (via \code{\link[=vec_equal]{vec_equal()}}); \code{\link[=unique]{unique()}}, \code{\link[=duplicated]{duplicated()}} (via \code{\link[=vec_unique]{vec_unique()}} and \code{\link[=vec_duplicate_detect]{vec_duplicate_detect()}}); \code{\link[=is.na]{is.na()}} and \code{\link[=anyNA]{anyNA()}} (via \code{\link[=vec_detect_missing]{vec_detect_missing()}}). } \details{ The default method calls \code{\link[=vec_proxy]{vec_proxy()}}, as the default underlying vector data should be equal-able in most cases. If your class is not equal-able, provide a \code{vec_proxy_equal()} method that throws an error. } \section{Data frames}{ If the proxy for \code{x} is a data frame, the proxy function is automatically recursively applied on all columns as well. After applying the proxy recursively, if there are any data frame columns present in the proxy, then they are unpacked. Finally, if the resulting data frame only has a single column, then it is unwrapped and a vector is returned as the proxy. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} called by default } } \keyword{internal} vctrs/man/obj_print.Rd0000644000176200001440000000166514202760666014464 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print-str.R \name{obj_print} \alias{obj_print} \alias{obj_print_header} \alias{obj_print_data} \alias{obj_print_footer} \alias{obj_str} \alias{obj_str_header} \alias{obj_str_data} \alias{obj_str_footer} \title{\code{print()} and \code{str()} generics.} \usage{ obj_print(x, ...) obj_print_header(x, ...) obj_print_data(x, ...) obj_print_footer(x, ...) obj_str(x, ...) obj_str_header(x, ...) obj_str_data(x, ...) obj_str_footer(x, ...) } \arguments{ \item{x}{A vector} \item{...}{Additional arguments passed on to methods. See \code{\link[=print]{print()}} and \code{\link[=str]{str()}} for commonly used options} } \description{ These are constructed to be more easily extensible since you can override the \verb{_header()}, \verb{_data()} or \verb{_footer()} components individually. The default methods are built on top of \code{format()}. } \keyword{internal} vctrs/man/vec_ptype.Rd0000644000176200001440000001143514315060307014456 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type.R \name{vec_ptype} \alias{vec_ptype} \alias{vec_ptype_common} \alias{vec_ptype_show} \title{Find the prototype of a set of vectors} \usage{ vec_ptype(x, ..., x_arg = "", call = caller_env()) vec_ptype_common(..., .ptype = NULL, .arg = "", .call = caller_env()) vec_ptype_show(...) } \arguments{ \item{x}{A vector} \item{...}{For \code{vec_ptype()}, these dots are for future extensions and must be empty. For \code{vec_ptype_common()} and \code{vec_ptype_show()}, vector inputs.} \item{x_arg}{Argument name for \code{x}. This is used in error messages to inform the user about the locations of incompatible types.} \item{call, .call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} } \value{ \code{vec_ptype()} and \code{vec_ptype_common()} return a prototype (a size-0 vector) } \description{ \code{vec_ptype()} returns the unfinalised prototype of a single vector. \code{vec_ptype_common()} finds the common type of multiple vectors. \code{vec_ptype_show()} nicely prints the common type of any number of inputs, and is designed for interactive exploration. } \section{\code{vec_ptype()}}{ \code{vec_ptype()} returns \link[=vec_size]{size} 0 vectors potentially containing attributes but no data. Generally, this is just \code{vec_slice(x, 0L)}, but some inputs require special handling. \itemize{ \item While you can't slice \code{NULL}, the prototype of \code{NULL} is itself. This is because we treat \code{NULL} as an identity value in the \code{vec_ptype2()} monoid. \item The prototype of logical vectors that only contain missing values is the special \link{unspecified} type, which can be coerced to any other 1d type. This allows bare \code{NA}s to represent missing values for any 1d vector type. } See \link{internal-faq-ptype2-identity} for more information about identity values. \code{vec_ptype()} is a \emph{performance} generic. It is not necessary to implement it because the default method will work for any vctrs type. However the default method builds around other vctrs primitives like \code{vec_slice()} which incurs performance costs. If your class has a static prototype, you might consider implementing a custom \code{vec_ptype()} method that returns a constant. This will improve the performance of your class in many cases (\link[=vec_ptype2]{common type} imputation in particular). Because it may contain unspecified vectors, the prototype returned by \code{vec_ptype()} is said to be \strong{unfinalised}. Call \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} to finalise it. Commonly you will need the finalised prototype as returned by \code{vec_slice(x, 0L)}. } \section{\code{vec_ptype_common()}}{ \code{vec_ptype_common()} first finds the prototype of each input, then successively calls \code{\link[=vec_ptype2]{vec_ptype2()}} to find a common type. It returns a \link[=vec_ptype_finalise]{finalised} prototype. } \section{Dependencies of \code{vec_ptype()}}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} for returning an empty slice } } \section{Dependencies of \code{vec_ptype_common()}}{ \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} } } \examples{ # Unknown types ------------------------------------------ vec_ptype_show() vec_ptype_show(NA) vec_ptype_show(NULL) # Vectors ------------------------------------------------ vec_ptype_show(1:10) vec_ptype_show(letters) vec_ptype_show(TRUE) vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(factor("a")) vec_ptype_show(ordered("a")) # Matrices ----------------------------------------------- # The prototype of a matrix includes the number of columns vec_ptype_show(array(1, dim = c(1, 2))) vec_ptype_show(array("x", dim = c(1, 2))) # Data frames -------------------------------------------- # The prototype of a data frame includes the prototype of # every column vec_ptype_show(iris) # The prototype of multiple data frames includes the prototype # of every column that in any data frame vec_ptype_show( data.frame(x = TRUE), data.frame(y = 2), data.frame(z = "a") ) } vctrs/man/faq-compatibility-types.Rd0000644000176200001440000000636414400165664017254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-compatibility-types} \alias{faq-compatibility-types} \title{FAQ - How is the compatibility of vector types decided?} \description{ Two vectors are \strong{compatible} when you can safely: \itemize{ \item Combine them into one larger vector. \item Assign values from one of the vectors into the other vector. } Examples of compatible types are integer and double vectors. On the other hand, integer and character vectors are not compatible. } \section{Common type of multiple vectors}{ There are two possible outcomes when multiple vectors of different types are combined into a larger vector: \itemize{ \item An incompatible type error is thrown because some of the types are not compatible: \if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1:3) df2 <- data.frame(x = "foo") dplyr::bind_rows(df1, df2) #> Error in `dplyr::bind_rows()`: #> ! Can't combine `..1$x` and `..2$x` . }\if{html}{\out{
}} \item The vectors are combined into a vector that has the common type of all inputs. In this example, the common type of integer and logical is integer: \if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1:3) df2 <- data.frame(x = FALSE) dplyr::bind_rows(df1, df2) #> x #> 1 1 #> 2 2 #> 3 3 #> 4 0 }\if{html}{\out{
}} } In general, the common type is the \emph{richer} type, in other words the type that can represent the most values. Logical vectors are at the bottom of the hierarchy of numeric types because they can only represent two values (not counting missing values). Then come integer vectors, and then doubles. Here is the vctrs type hierarchy for the fundamental vectors: \figure{coerce.png} } \section{Type conversion and lossy cast errors}{ Type compatibility does not necessarily mean that you can \strong{convert} one type to the other type. That’s because one of the types might support a larger set of possible values. For instance, integer and double vectors are compatible, but double vectors can’t be converted to integer if they contain fractional values. When vctrs can’t convert a vector because the target type is not as rich as the source type, it throws a lossy cast error. Assigning a fractional number to an integer vector is a typical example of a lossy cast error: \if{html}{\out{
}}\preformatted{int_vector <- 1:3 vec_assign(int_vector, 2, 0.001) #> Error in `vec_assign()`: #> ! Can't convert from to due to loss of precision. #> * Locations: 1 }\if{html}{\out{
}} } \section{How to make two vector classes compatible?}{ If you encounter two vector types that you think should be compatible, they might need to implement coercion methods. Reach out to the author(s) of the classes and ask them if it makes sense for their classes to be compatible. These developer FAQ items provide guides for implementing coercion methods: \itemize{ \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. } } vctrs/man/vec_as_names_legacy.Rd0000644000176200001440000000211413505165544016432 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_as_names_legacy} \alias{vec_as_names_legacy} \title{Repair names with legacy method} \usage{ vec_as_names_legacy(names, prefix = "V", sep = "") } \arguments{ \item{names}{A character vector.} \item{prefix, sep}{Prefix and separator for repaired names.} } \description{ This standardises names with the legacy approach that was used in tidyverse packages (such as tibble, tidyr, and readxl) before \code{\link[=vec_as_names]{vec_as_names()}} was implemented. This tool is meant to help transitioning to the new name repairing standard and will be deprecated and removed from the package some time in the future. } \examples{ if (rlang::is_installed("tibble")) { library(tibble) # Names repair is turned off by default in tibble: try(tibble(a = 1, a = 2)) # You can turn it on by supplying a repair method: tibble(a = 1, a = 2, .name_repair = "universal") # If you prefer the legacy method, use `vec_as_names_legacy()`: tibble(a = 1, a = 2, .name_repair = vec_as_names_legacy) } } \keyword{internal} vctrs/man/vec_data.Rd0000644000176200001440000000231214276722575014242 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/proxy.R \name{vec_data} \alias{vec_data} \title{Extract underlying data} \usage{ vec_data(x) } \arguments{ \item{x}{A vector or object implementing \code{vec_proxy()}.} } \value{ The data underlying \code{x}, free from any attributes except the names. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Extract the data underlying an S3 vector object, i.e. the underlying (named) atomic vector, data frame, or list. } \section{Difference with \code{vec_proxy()}}{ \itemize{ \item \code{vec_data()} returns unstructured data. The only attributes preserved are names, dims, and dimnames. Currently, due to the underlying memory architecture of R, this creates a full copy of the data for atomic vectors. \item \code{vec_proxy()} may return structured data. This generic is the main customisation point for accessing memory values in vctrs, along with \code{\link[=vec_restore]{vec_restore()}}. Methods must return a vector type. Records and data frames will be processed rowwise. } } \keyword{internal} vctrs/man/vec_math.Rd0000644000176200001440000000403314276722575014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/numeric.R \name{vec_math} \alias{vec_math} \alias{vec_math_base} \title{Mathematical operations} \usage{ vec_math(.fn, .x, ...) vec_math_base(.fn, .x, ...) } \arguments{ \item{.fn}{A mathematical function from the base package, as a string.} \item{.x}{A vector.} \item{...}{Additional arguments passed to \code{.fn}.} } \description{ This generic provides a common dispatch mechanism for all regular unary mathematical functions. It is used as a common wrapper around many of the Summary group generics, the Math group generics, and a handful of other mathematical functions like \code{mean()} (but not \code{var()} or \code{sd()}). } \details{ \code{vec_math_base()} is provided as a convenience for writing methods. It calls the base \code{.fn} on the underlying \code{\link[=vec_data]{vec_data()}}. } \section{Included functions}{ \itemize{ \item From the \link{Summary} group generic: \code{prod()}, \code{sum()}, \code{any()}, \code{all()}. \item From the \link{Math} group generic: \code{abs()}, \code{sign()}, \code{sqrt()}, \code{ceiling()}, \code{floor()}, \code{trunc()}, \code{cummax()}, \code{cummin()}, \code{cumprod()}, \code{cumsum()}, \code{log()}, \code{log10()}, \code{log2()}, \code{log1p()}, \code{acos()}, \code{acosh()}, \code{asin()}, \code{asinh()}, \code{atan()}, \code{atanh()}, \code{exp()}, \code{expm1()}, \code{cos()}, \code{cosh()}, \code{cospi()}, \code{sin()}, \code{sinh()}, \code{sinpi()}, \code{tan()}, \code{tanh()}, \code{tanpi()}, \code{gamma()}, \code{lgamma()}, \code{digamma()}, \code{trigamma()}. \item Additional generics: \code{mean()}, \code{is.nan()}, \code{is.finite()}, \code{is.infinite()}. } Note that \code{median()} is currently not implemented, and \code{sd()} and \code{var()} are currently not generic and so do not support custom classes. } \examples{ x <- new_vctr(c(1, 2.5, 10)) x abs(x) sum(x) cumsum(x) } \seealso{ \code{\link[=vec_arith]{vec_arith()}} for the equivalent for the arithmetic infix operators. } \keyword{internal} vctrs/man/list_of.Rd0000644000176200001440000000327414362266120014124 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-list-of.R \name{list_of} \alias{list_of} \alias{as_list_of} \alias{is_list_of} \alias{vec_ptype2.vctrs_list_of} \alias{vec_cast.vctrs_list_of} \title{\code{list_of} S3 class for homogenous lists} \usage{ list_of(..., .ptype = NULL) as_list_of(x, ...) is_list_of(x) \method{vec_ptype2}{vctrs_list_of}(x, y, ..., x_arg = "", y_arg = "") \method{vec_cast}{vctrs_list_of}(x, to, ...) } \arguments{ \item{...}{Vectors to coerce.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{x}{For \code{as_list_of()}, a vector to be coerced to list_of.} \item{y, to}{Arguments to \code{vec_ptype2()} and \code{vec_cast()}.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \description{ A \code{list_of} object is a list where each element has the same type. Modifying the list with \code{$}, \code{[}, and \code{[[} preserves the constraint by coercing all input items. } \details{ Unlike regular lists, setting a list element to \code{NULL} using \code{[[} does not remove it. } \examples{ x <- list_of(1:3, 5:6, 10:15) if (requireNamespace("tibble", quietly = TRUE)) { tibble::tibble(x = x) } vec_c(list_of(1, 2), list_of(FALSE, TRUE)) } vctrs/man/df_ptype2.Rd0000644000176200001440000000426714315060307014361 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R, R/type-tibble.R \name{df_ptype2} \alias{df_ptype2} \alias{df_cast} \alias{tib_ptype2} \alias{tib_cast} \title{Coercion between two data frames} \usage{ df_ptype2(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) df_cast(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) tib_ptype2(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) tib_cast(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) } \arguments{ \item{x, y, to}{Subclasses of data frame.} \item{...}{If you call \code{df_ptype2()} or \code{df_cast()} from a \code{vec_ptype2()} or \code{vec_cast()} method, you must forward the dots passed to your method on to \code{df_ptype2()} or \code{df_cast()}.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \value{ \itemize{ \item When \code{x} and \code{y} are not compatible, an error of class \code{vctrs_error_incompatible_type} is thrown. \item When \code{x} and \code{y} are compatible, \code{df_ptype2()} returns the common type as a bare data frame. \code{tib_ptype2()} returns the common type as a bare tibble. } } \description{ \code{df_ptype2()} and \code{df_cast()} are the two functions you need to call from \code{vec_ptype2()} and \code{vec_cast()} methods for data frame subclasses. See \link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}. Their main job is to determine the common type of two data frames, adding and coercing columns as needed, or throwing an incompatible type error when the columns are not compatible. } vctrs/man/faq-error-scalar-type.Rd0000644000176200001440000000472214404336324016605 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-error-scalar-type} \alias{faq-error-scalar-type} \title{FAQ - Error: Input must be a vector} \description{ This error occurs when a function expects a vector and gets a scalar object instead. This commonly happens when some code attempts to assign a scalar object as column in a data frame: \if{html}{\out{
}}\preformatted{fn <- function() NULL tibble::tibble(x = fn) #> Error in `tibble::tibble()`: #> ! All columns in a tibble must be vectors. #> x Column `x` is a function. fit <- lm(1:3 ~ 1) tibble::tibble(x = fit) #> Error in `tibble::tibble()`: #> ! All columns in a tibble must be vectors. #> x Column `x` is a `lm` object. }\if{html}{\out{
}} } \section{Vectorness in base R and in the tidyverse}{ In base R, almost everything is a vector or behaves like a vector. In the tidyverse we have chosen to be a bit stricter about what is considered a vector. The main question we ask ourselves to decide on the vectorness of a type is whether it makes sense to include that object as a column in a data frame. The main difference is that S3 lists are considered vectors by base R but in the tidyverse that’s not the case by default: \if{html}{\out{
}}\preformatted{fit <- lm(1:3 ~ 1) typeof(fit) #> [1] "list" class(fit) #> [1] "lm" # S3 lists can be subset like a vector using base R: fit[c(1, 4)] #> $coefficients #> (Intercept) #> 2 #> #> $rank #> [1] 1 # But not in vctrs vctrs::vec_slice(fit, c(1, 4)) #> Error in `vctrs::vec_slice()`: #> ! `x` must be a vector, not a object. }\if{html}{\out{
}} Defused function calls are another (more esoteric) example: \if{html}{\out{
}}\preformatted{call <- quote(foo(bar = TRUE, baz = FALSE)) call #> foo(bar = TRUE, baz = FALSE) # They can be subset like a vector using base R: call[1:2] #> foo(bar = TRUE) lapply(call, function(x) x) #> [[1]] #> foo #> #> $bar #> [1] TRUE #> #> $baz #> [1] FALSE # But not with vctrs: vctrs::vec_slice(call, 1:2) #> Error in `vctrs::vec_slice()`: #> ! `x` must be a vector, not a call. }\if{html}{\out{
}} } \section{I get a scalar type error but I think this is a bug}{ It’s possible the author of the class needs to do some work to declare their class a vector. Consider reaching out to the author. We have written a \link[=howto-faq-fix-scalar-type-error]{developer FAQ page} to help them fix the issue. } vctrs/man/vec_ptype_full.Rd0000644000176200001440000000257414276722575015526 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ptype-abbr-full.R \name{vec_ptype_full} \alias{vec_ptype_full} \alias{vec_ptype_abbr} \title{Vector type as a string} \usage{ vec_ptype_full(x, ...) vec_ptype_abbr(x, ..., prefix_named = FALSE, suffix_shape = TRUE) } \arguments{ \item{x}{A vector.} \item{...}{These dots are for future extensions and must be empty.} \item{prefix_named}{If \code{TRUE}, add a prefix for named vectors.} \item{suffix_shape}{If \code{TRUE} (the default), append the shape of the vector.} } \value{ A string. } \description{ \code{vec_ptype_full()} displays the full type of the vector. \code{vec_ptype_abbr()} provides an abbreviated summary suitable for use in a column heading. } \section{S3 dispatch}{ The default method for \code{vec_ptype_full()} uses the first element of the class vector. Override this method if your class has parameters that should be prominently displayed. The default method for \code{vec_ptype_abbr()} \code{\link[=abbreviate]{abbreviate()}}s \code{vec_ptype_full()} to 8 characters. You should almost always override, aiming for 4-6 characters where possible. These arguments are handled by the generic and not passed to methods: \itemize{ \item \code{prefix_named} \item \code{suffix_shape} } } \examples{ cat(vec_ptype_full(1:10)) cat(vec_ptype_full(iris)) cat(vec_ptype_abbr(1:10)) } \keyword{internal} vctrs/man/vec_type.Rd0000644000176200001440000000150214276722575014312 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_type} \alias{vec_type} \alias{vec_type_common} \alias{vec_type2} \title{Deprecated type functions} \usage{ vec_type(x) vec_type_common(..., .ptype = NULL) vec_type2(x, y, ...) } \arguments{ \item{x, y, ..., .ptype}{Arguments for deprecated functions.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions have been renamed: \itemize{ \item \code{vec_type()} => \code{\link[=vec_ptype]{vec_ptype()}} \item \code{vec_type2()} => \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{vec_type_common()} => \code{\link[=vec_ptype_common]{vec_ptype_common()}} } } \keyword{internal} vctrs/man/new_list_of.Rd0000644000176200001440000000075713465327536015014 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-list-of.R \name{new_list_of} \alias{new_list_of} \title{Create list_of subclass} \usage{ new_list_of(x = list(), ptype = logical(), ..., class = character()) } \arguments{ \item{x}{A list} \item{ptype}{The prototype which every element of \code{x} belongs to} \item{...}{Additional attributes used by subclass} \item{class}{Optional subclass name} } \description{ Create list_of subclass } \keyword{internal} vctrs/man/vec_cast.Rd0000644000176200001440000001050214315060307014241 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cast.R, R/type-bare.R \name{vec_cast} \alias{vec_cast} \alias{vec_cast_common} \alias{vec_cast.logical} \alias{vec_cast.integer} \alias{vec_cast.double} \alias{vec_cast.complex} \alias{vec_cast.raw} \alias{vec_cast.character} \alias{vec_cast.list} \title{Cast a vector to a specified type} \usage{ vec_cast(x, to, ..., x_arg = caller_arg(x), to_arg = "", call = caller_env()) vec_cast_common(..., .to = NULL, .arg = "", .call = caller_env()) \method{vec_cast}{logical}(x, to, ...) \method{vec_cast}{integer}(x, to, ...) \method{vec_cast}{double}(x, to, ...) \method{vec_cast}{complex}(x, to, ...) \method{vec_cast}{raw}(x, to, ...) \method{vec_cast}{character}(x, to, ...) \method{vec_cast}{list}(x, to, ...) } \arguments{ \item{x}{Vectors to cast.} \item{to, .to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} \item{...}{For \code{vec_cast_common()}, vectors to cast. For \code{vec_cast()}, \code{vec_cast_default()}, and \code{vec_restore()}, these dots are only for future extensions and should be empty.} \item{x_arg}{Argument name for \code{x}, used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call, .call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} } \value{ A vector the same length as \code{x} with the same type as \code{to}, or an error if the cast is not possible. An error is generated if information is lost when casting between compatible types (i.e. when there is no 1-to-1 mapping for a specific value). } \description{ \code{vec_cast()} provides directional conversions from one type of vector to another. Along with \code{\link[=vec_ptype2]{vec_ptype2()}}, this generic forms the foundation of type coercions in vctrs. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } \section{Dependencies of \code{vec_cast_common()}}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } } \subsection{base dependencies}{ Some functions enable a base-class fallback for \code{vec_cast_common()}. In that case the inputs are deemed compatible when they have the same \link[base:typeof]{base type} and inherit from the same base class. } } \examples{ # x is a double, but no information is lost vec_cast(1, integer()) # When information is lost the cast fails try(vec_cast(c(1, 1.5), integer())) try(vec_cast(c(1, 2), logical())) # You can suppress this error and get the partial results allow_lossy_cast(vec_cast(c(1, 1.5), integer())) allow_lossy_cast(vec_cast(c(1, 2), logical())) # By default this suppress all lossy cast errors without # distinction, but you can be specific about what cast is allowed # by supplying prototypes allow_lossy_cast(vec_cast(c(1, 1.5), integer()), to_ptype = integer()) try(allow_lossy_cast(vec_cast(c(1, 2), logical()), to_ptype = integer())) # No sensible coercion is possible so an error is generated try(vec_cast(1.5, factor("a"))) # Cast to common type vec_cast_common(factor("a"), factor(c("a", "b"))) } \seealso{ Call \code{\link[=stop_incompatible_cast]{stop_incompatible_cast()}} when you determine from the attributes that an input can't be cast to the target type. } vctrs/man/list_drop_empty.Rd0000644000176200001440000000116314315060307015672 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/empty.R \name{list_drop_empty} \alias{list_drop_empty} \title{Drop empty elements from a list} \usage{ list_drop_empty(x) } \arguments{ \item{x}{A list.} } \description{ \code{list_drop_empty()} removes empty elements from a list. This includes \code{NULL} elements along with empty vectors, like \code{integer(0)}. This is equivalent to, but faster than, \code{vec_slice(x, list_sizes(x) != 0L)}. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ x <- list(1, NULL, integer(), 2) list_drop_empty(x) } vctrs/man/figures/0000755000176200001440000000000014532404540013633 5ustar liggesusersvctrs/man/figures/sizes.graffle0000644000176200001440000000653014211412552016320 0ustar liggesusers]ks ]kFoN8& )" c3B'.[B:=}Q0{c?%h?'_ޞީ ZPa]Neu-X.u{(TFܸ: pߕ岤`TIu藻B/X]N@I #Tͺ:foNTݿYN:xOޜ+9s=g:)rIA/2䴼):TmtB1;m qYpELPYiժ,'EMu.2|#05_8Բ\ur7VNqŷk)sawJ+s6 }" vwN;^s^WMQy{L[ܮ+'[㙺i "7A۞9[hʑRL"E,G AÁT,cXpxWV,i"| *W ?U#sjo<-UZgnٍ%?0_nGW5i>XZkpy|ȿ^YZOJ}JZ3&[;ip 4DWNDYdQ S;@uPǔ1OT#оzʅM>h!luw-eWhe^:S|(:/QHLy/ˆJQT8VDAlX ]gLвX΢S1^gкZ~ )ۮL+8%EpY `p7-µJ]3vJ.m .;7A'g;1LIE A) /W?Q gВ$ "ا4Ṅ {5HTOiS>l[Y3Ӳ {,HKj E\@]QWjfr(1& 5 R((R+ 3<<1"2/& ߣO#6k<> lA/Ix=> ;pˆ5c)F,DܗT_KPgqL`&3QBd~/=OoMf5ѓmj(JŸbOF?'6>Z !*-]M^_>H4>la+!BO$! \ӳ`)䋊/xW3E ~6lDєXه B9%X8SoJzkwK0B^=}xU \^4Pom|*ΐnvt=ӹ*o ^{`]us=ٸ؈ "H$Qڽesl82r$VF9/A5UYnX@ý׆,{9qq:7] K5!D7mTW4=XQEPEW}6kj#m5Tu5?m v!jq˞j@^m]uK}n2 jOF.˗|WTn+-C>D)nŝІ"zhUHaFؗ4oaO/zޒyRɌJ-f<XA=PU[Yc!L|!>B|vK"sL /-*EW«dlQ-*EE_e-Ȓk^2 v6]Jx,a|ͤ#F>GH"=Loߦakm#2xz8k&>%GoR_$Qh !+^1#Gq,b1@+~h+=GxlڋtJ(i7 Ǒ5mABi8_{F L-٤(KZd(EԪ$/lR}vEl?/Yb8KKLI'dJ؊e;pB0!tN6Dٸ8"d44a)ÃFVJ%@хR2Hzc.>C:`ɶ9Od]!\5=չ4z*dž>DCa:F.9+Ta]8"~S71T|eC2eIh<úW7ѺXH2y|w?":3^HӸr<F|vctrs/man/figures/cast.png0000644000176200001440000003567513652627531015324 0ustar liggesusersPNG  IHDRTTSsRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$9KIDATxyM.%".[I"RT(6$Z e -(囥URٲ!!m,Bз|{?9{q95y{f޳! ?WdX" W;E@ъW_m@D1D+^b~!Qx[@DGUl! WE@ъW_m@D1D+^b~!Qx[@DGUl! W~z…7gΜ &r?*=zۤ=q~%τޡykAط Wdm֮]q׮];o޼a  fΝ;wyK,9֭[Z5ΝrJL>c\ ,[Hw IÆ ر#㏦M6i0`@z!t2nܸ-[nz̘1i$)# O*>_:u*e|>}o~!%Jl޼w$eWm ,YǏڵg5GYr 4~ҥVzwرc}~< ЩSM6͚5M6oFڵQ% On! w I nΝׯ}͟?׮]ӦM$-I1)r!Fboo_fM2)T֭[gΜɂy*! WS&rСCY+^x…CGpBG}tƍMÛ޺73%c_nML2_НㄊsE"xB x|!)bLScꪫg1{X*馛q7bd S4(_5kH\rWv+Y/HE[o붮};vmNJ}O4|G`ӦM%K$~x ѭ%ߟ?8$EmB-=z:aOs0֪$R@@b )Z*Uq>J~{!$GUR!b=ЋbԵJ*@@̿zA@VIC/B :S*1p%-!Cܹ3X $cɒ%:ur-?0娃=ae…ո@E*2s3,[_K.}r!G}?A*U:,viXgҥ7onذ!qCtbݶmۂ lN>SO-R!SҴiS%\ҡCvj*Iȸ2(V=>d5kb PBק?&/C~ b~۷k׮X"Ae WKc=6֭[GW__hıݻW_}4`~|^a~wOIB&ʗNg2B+V0 3p}AL^6 9?FO233_wu29G7ָ[֭l28Hq' $ߘLuA1X}.D^yD? 3AwF O:?7WqhyAƍQ `bge?ߘI a8brqe?qM' f̘a=?CFxdxϢ{c g}QG[x& z'x" "CW5uT8r _m v:q{^K.]t$1aE!ugA(F7JIEݾtC۷ogbč7os;&yG!/q{s6Cʕ+e~l 5j`+fM>l@Oc.QL/qY>O;w.zn裏"f*Ȇ]^#'fҼm@(Z|C/w}Bˆ@DМ?b 1ڀ(bV-jB #Z* " h{QիW3Ƌ9hРD/b|!R[qO?tڽ ?3PJvl9;ѐ;%{Üf|8BÁiӦ@ 0"~ #zpK/弐11#RG>`Ǿ`<!1(<,q~f4waGNmP:={b>|8lرcU_/G b@իW?~5Chb3h^~RЕW^i)# B{ M rao f9τ@@1c:&|,| Ӓ3jϙ &3jt~@B~9d0ztAtFXg \BPqCGAFx0s5 ]{cZ)ゐw}jٲ%6,D"Xv#Uݙ4hyӧon=Ps e{L3sL. U)7k N1Ӝ?ĕBьMx9#ưJ QbUEyG4h5k9s d9)pyJ:^}Xf'8ƷQFr̉,t]WNTDI n5o/3cߣG.v^fiK܁[Gy$m[:|݇q ¹LJ삜 ׎ߴߟM;>Pr>ϝ/* q6JK1|&M*!gΝbA0^y@b`"{0?ٱr=B៵ &py ?v}N]6Rl[JO6,s. s`H VU s/XZ]%Qb:di\M8c=5#E$9WvmU^@7QkY& }6Ӝ?g5xmUq¡`7|!q)֭6XbqBD @l8%h0hz_~2a! LYg1Ig26iҤjժ1[V[b@+ENl;>q~[L1VnV}M-8ioF9T|9s7's2$/ܹ37_{ v4~Ls]C : _q8J>RY8BU^xVZl#|b3X#B>gW/?qԩlFoɃpt$gX9́r [.Bޓ|[ntڻw7xÓ h PbS:sG?|$RTo&wCRiӦ8xWȟ?F]k*YP_tޝN( 1G,qg&*}8_|p1YƍIwOSc9 Q4{5^{-]Oe05ȟMR 7j]_Z^:FLG/";[T?E JڶmyguV58GE'x"fP7xcab)dM ;Egg3fmh.ڪ96ZJ+ǏpTQTtj87ie/З_~AVVL?^zI8?hɿ+ G#/^b!v%KШRfX`gÅp¨ ?ZO>=*0s JK" ZǎID_~1Q#=իWF)Rvꫯ|a.;^_b;;8l߾I>c|ߩP ҝ\MEs` eX.ƃ:IY&eTaYQn5`1wd*" _*c{̵i.bCE0 kB}Du7 TlE~=L3 %oxNٲeZwq!H1 j'xwއ[l@P&"‹~ٯ[vq2%?;""h ?DL`'C}` R(J L/?<&bW1\X)GX֭e˖ 3תU`뮳7s<OI > }{xvȐ!#z{v>rGV1g7fAwsμ#?&^C ΐ<2(w¸ݵkW4tZSN[lXI!FgJUF>Y;WLaVBeGGoίN8]ilH&,~!" 0n?~4tץKE/ .>c7nv\P=&^fΜ8b!W2q 0C ,]@Iep=EZ&MB+>oذ᭷ڵk W|^IjC%KZSLRm0&iT? LZO9SO۰Ƿp{(i'F?b4 @xy=>%1N@DvLH]vYiX1s1%f‡_|nqOccO/"{o 80]ʰGq^t琁cSp(|80'xۣrEa9x8uL̐yؓntu1;An X:xJ^̂6visA<DǤ[戲 mgqC@CSNEVbۑk49kmn͌Ƈu], ڙ &pDs%)NbR'*d_=jNN%cQL]|'(0pa~R2H׷o_:ܪr#s[ċ"2s=}9FȨ*_۞DX! ǜ9s69T712[xfUB ]B>g s4zmڴ)coVn+096׳fN*?餓VK/GU -#cs7d訣P 0R9@8…+WMcr9W=fO?0Q0 $Qq%J>/IsE裏.ж<|G=DDs=G _VRG TK}m#ur=?;?\/ 0?'9K;zk7Q.]Xhٲ%?iҤP%2@HkÃ܀#y7o^A̱+;<0swx9s&JriX"W\2NJ#ؚUm槉Ϣ^*y(3 D{X(Q w1v$ԧ"EH2ebgʮ¦@2~Z|y8>7U ` z˫f@N2?&.ex̕XX%o߾=PD cƌa1,t"r K2 }yUȱ|5`Gb5sVL!(XDȥ o TC\UEG+ v[.{ɋӛ]L;\bꫯ+Iq*^'LJ.&j*BK/r?Spo-]Gcn]6Wٱ]]ImӦMk|ڸq+)`.`wsQjA9??eU1jp5hs'{l/4:Xbl6 HjdI9..+y٣@tڻ 2uqL2izz"xZ֜8ulcz?C5]C! /0bGB}#ל lM'ra% x0Y 䏅:\RAis AԴKx cuhY|' E2RɚU2kgd+vOWaA:AVjjKgT?cTS+gi?'xg۷TQHGs'K1QGy+ GsFsښHŊYi{>4`(G @bV@:D1X+'!(#C@I 1CrB ϙ3+T b9*RuЦuk|'so38?qTp1??hРoذaÆJ,ט)7)=7ons,W9\G XLvBo׮]L 7&~m֬YXˠP1qSD5=N;\H^d4@߹s' +K,iܸ1EjTaqOΙ0c.Ŧ7FRqBt> H36fL8>L/Ias֦d>#CI磏>ӯZJ r֝?9B0 ۠AAseP͋ŋ&;)Y$ͩggB;w\,)\ZT\l׮]oD*TeZ85M"b9.M֩SN.ꔦtR[c^y3Qkܺu+ի#1OΝ'蠸O>ĨօRUW]EFQ|T]joJMk,|]R7n ݻwǐ.7ܱcG,K+4d U \$$R!jzyR(Cs2MaiE@G?;ZKLf֪U$qDƏ?F?B Xm|!Ʊl '`?F3biMMvGi^_y̔>G.Xh`3s߆aD sC0iS$5x`fٸ}s1nu|B7?p,_y5З*U0j^bƠz嗓 @M41 ЙVvy (O)MP/Ttz.bsGn&>EA4P|~~̛ho 4$1SM$Q(;%w 5UR%3d~pԨQÄ10x'db;shH"痻@< H!>  m62&n ^ڤ@v/3 yj]^eg Α%R0Z 1DB 7ߴE^njv|؋M~5IkLyI<{=gRpO^3'ƃ}ex7s?c˗/X؀^~#Q׳>*2əv:ANݺu.[bǵtL'bŭj_:J(a qXy'Iv͚5oo`d1pFcPot7lK<50H֘LYl"IO;[;bW\T̟JN1pc"C1f:?yM|Et*4¦c_hт7B *tZp5tHtfNd 2s'.E͒ѣf4غC0VgIgxhm٩$19S8I Bm޸k8 3nwf9^ \,J!C k0IAZ$|fttdǃ,LYL13w2KXL>TM+&Iyp Y?Byl{NiEqYoI< L@lje_#sG”~@;]y9L~<ӻwo> f ̢_E5'esoFϞ=Am۲b"#+ RF~Ybdke$<32K|yb|M5RI00h̙Dz!Zn >,"Ebbzeۮ +`*7mv`pMSgD̓U˖-NDZ[uĨ|Ex 6f9xm'y03&8vX4&{Aج%e$,y\q5LV5QFFCK̆3G4(v猛Udm1|ɦ]"22_f Q2wK}PkDѳS̃P$Zɀ`Fa2yX3HRQ]veNF$b$jW_}J0y~.ʙX r+, SO\WMv&f2q6#SK"#>4Ob}.O{ C+J̉7&:`&t9nk4RgD̓ˀHm2--1C͞&"! \G mL[V2|׌!Bx>^*P3(8JA"JEBu2_s%(D[y ! Pe!'b~FZrJHxW϶9sc}YJ_2EߘŒIA ̑9#TH# s4MDFBT<{y9 JeDiڴ) Myq\p|[r,"]t9蠃L7x0;ލ)@ &e~Wsk5~1T%PbŊmڴ a*2<K/ 7SFX}FC2& kqlق ~3<3r^J*X|_!Xb~ \VŋsemDF;R3 le0!DM>&[ouҤI|| 70w+22ѢE Eڴ"J<}>PY̚5 z"&cCB j/QB #P*?*@@JV@<PbT(! 揇@Hs̙0aBA@x.\q qI"~7C I ҧ\A O8_~)I=8՗q qI"~g:utW0|l޼y0*{){ʔ)f%kΛ7/+Y+S!"i0?@ްaC͚5O;4۷z3_[.0kk.K ,1|[֨Q~;o_~A Vj?IsbI&1ĥ+W΂ӧs1+W&P_|q's1IU@^֧sεk6cǎ?+VVd ?ZyF >FXE{\`,ѷo_cdɒݺu;}ƽ={/&'OFM6(9fYu@ իW0(E-ZDg$.J{wM|lRrHiϸkCD YjBpdzj$gHhHx%KR1gCfсC6"'Eʗ-[F:uЯh *T`SCnnL1 iR0f>? ?k&;Dtk:u*3o#$B(|NucɟFm65d C@Jb''01sڴi/lց82>p>| srm@ZHC+&&휆ؒN919rsy(>]L_Š!TĸEuDaÆaߪZlɌ_\ti޽Y7Y\tE<̜i0l$~ 0+6ì+PjP 0%a}f?9@ ƌe*V4X#AP*|F %m?41Gemz!Z)@÷1c#|ur/+V@Dꩧ̔A-'LYq6ԗ5k 4ӗ]FwGcט]E9 '۱c5W:> 30K[fnT̢a׭[ǀo֢qml9 ve.(D1Ud?/0?r>܎ ?0eϔ)SAOI^{m2u`Y ٞc-w\p )Jȋ OHA_\99R>B@w?% B@@JiB@b&)G@>JQb&)G[3:QàIENDB`vctrs/man/figures/lifecycle-defunct.svg0000644000176200001440000000170413505146267017753 0ustar liggesuserslifecyclelifecycledefunctdefunct vctrs/man/figures/lifecycle-maturing.svg0000644000176200001440000000170613505146267020153 0ustar liggesuserslifecyclelifecyclematuringmaturing vctrs/man/figures/logo.png0000644000176200001440000020176414402367166015323 0ustar liggesusersPNG  IHDRޫhgAMA a cHRMz&u0`:pQ<bKGDtIME%*IDATxgdv ε#Y{G")H4"E(C2=Ù^3gzj25"&ER|GP(+ ;ܸ*P*ʪȈks[{*4_EJ$!?;}[vLزOn$Hu~)GA{]Oo'4N}2 %ULKOhAUGpA{[mQ'/-+H/K) :#Z=3 <| oI)! '۝>- 3f D >R$'P[ EA3z2f '@,SB 8ezKHd ſ`JBpS޲M3`7% "e>jY_,pӿ;}[vbk+ x?B<uB\.^OJɥ3~/Ŗm`[߅6qO@D ZKFR"o@⟃wRͷOo|w$C7]oϰh7+!G2Z 0,1n޶}RxU+;6IxR4m]߀ȥs߻ӷsk[ EaЅA=$ x >ǵ푒 &׃Wmǁ! ꫗Nߚϝm|m?C!G  qx[>Q|3ׁm|}xeg˸1x_ElA|m [d \[} >B~KP{|sF۞[,bCxM+g 6mw?co om mC?( ɶv8oh7kF6{orUw@;n~-?m ?: I=b;X?ʪ ƻ ^@u_Q.g7;}+?߄{gj.Bʧ<^ gqxx7,Ees7 ~ x/|NϜm|B@_G {]$,ow oS\}ĩ7@޼m ϩJ!mܶ{z?7y-x+6 o= -g[_ǚJH)DoGB2/u3rm&6'ܵBB &R/_;jxkuu8! 3 o踚;Y o[ o$5ϑ6BJ*6k'O-_D([DormھA؊z.RoMo \+ 5r]Y킷ymxӄE`ʶ=^SHCnxo@~L«W!ގux#P@ǁT> 79)ȿ}F Vock; ?eO /$ \vW6 o;8^/k鄷""Wo~AB\h@ܳ-^o!k?vngǁKo~@9PVuwwcgPt7bxn3MsQA( {zxrc~Msm'_c7? n/F%Dn[ q5}Bx;e![F@9Ov.0| OB8HDZ \"!a穟w5}"ޛvdžn`nV Pjys~H )!/ oY =޶&@y{mZnۨud>_./6<]?+qxCL<68 *_-4 \[ 7;K5Gm ~Z? fU;$H-FvY l WӻIxΝlρ*|v!/GPq?BXOF}ul-Yn5("LY׳G~)|% _M`F)xZmsew_`xm _9]yE>3H~~۴] [kn3m?~ϐl񮾪Mx1) U}*ސ,p6o;m]޶}nnMofŻxQ7^ I?RZ-^31\hz}7B4Td\ ގ?&m2].[/ 2ȑ7X-U;3&vŵtg4%;ܥŻ2Á'VfA ෥-^6{=x۠W5Z[5bxWv۴6.[͗R~T5?ZmB~ y׍bX3[ގl񷀗Kdw* p/R=Af;8׷;]<~>ϒ-jV#;?;r0JxWW`%U`{o/QA8ƻVXf཮I 7禮2v+Ν%N0o^V+7+6j7[B,=@3ch`x =պ?Ǯ d´|7_Gs>Vm>D{$OH&6O /\OO-^vۯ؂MA G&3Зq}4.):T';6CڦF_~A"z=]x56w5lۺk/rx /_`x^hDv4tʦ+ %BRީ)(;Ov-o>ԯq_k:g?ajmg8zvs;ᕷ^6 &M{ (;nKkE݄L ݎY14c&h8>~tǏ7gG⿮iU;o7}7p6 Zl޶FiA>l܆§$oH~[~ֺBo:h!zSZJ$ ?0=[ia<}#/#!Nx@"?~ߚvXIx۶e_oLz[u]gzixmmj cChih ga{׽@ %]{\,9O|P_ ,;m O _Fѭo~;C$Wl ov.ʢ57gEE&p]x;;j k0ɄEowRN"aQkH)ф@Jm=Ju.\ͱ} CWNoA_\18u._ytM}5[@OHkusrMo⚊ ?)xDmﭳBuLt:R }*Pcr !x,V44n$3='^suO/=_.#!<Hg V[W@5uWZ6k7 x;A-Ag4ͰBC`7,,W,\q `l0`)WhM|[4Ĉ}GXf/>T7']4H7DwMu86ږ:- c=Y5}~mok927`BpXMH>+:.^^o!4z~4-`u `ecF{>R{4/wZn{#VZ;r^G^nNؾݍpP|-ݮ$ojkOB3a/CXR d-vre&Jyy>:{f!TpHKGt96rjGf&_hu{vx'@n^X^ ûfHr+m^ u܂M HНMT%1۠xxĎ^b1<\J)/t$Oaf0 f}l"FQ܆ʢ氚.m%ސ߼:׀-xo5 H`se CDz j5b`s}LRXBChB7$0=ik[7qg/uZA5[:NE[Fl xa '`H*c:J)\\\i =#] Tz]G3Lt+bucfc)R)'Nևx$JshyxY޶ \V5`V\OWxko^+a CiJe|mu]uٿ{s2jߩCJZhZ4t}X.4C# fy 5zb;ѧᕭ6mFkte\޶suzۯ7 5\ s[EWkCC\j#3BlT*ҫUx"Sy>Ra49t4+!#CigYPǁD/1:ferYjxuF▦c  @6ڍe:8*^}4B 0b5D3h'ضn5`j M-*Ԯ-A c;MTmu7-xr[-`A+ .I2=WbrW-M6tz_D%ɡvo\΃B 'Zb8iz6$C ;wt۱-6WF[WY_ONv[3eA d\V u@0JrjQūEt+@N,MC6BPq<|O;SEzrq %?xݥ\k)Y ognB5S{-xoڤ0X+&B@p sjerZFhz ZBj GMHot:_x|_/W<#2 c-aKd]{<1ЄWz߹7oͫ,uM:<iYr+n+ Ud,r(h !۲7̙ DqK+4Vf iajr=xW-]޶aúʛn[-(# CRD* q\Xs=z\:`@?&B7z:?Z8~[̓a$RHSq7 otׂ )m׆r +ཫCic)W ɘAw6Jr^*x"~B9HCFHtCk5灃 gxi*Na '?[ZWf\WzuH ۬Qv=x6 6'H mw z)`ϐ/6(U;ƺJP*;8]FU:HC>9$xW|tz6yNaV!pHT:Mn eoXxSJ7˴+[5]PkxA<WPlaiː<64MR%N~ZwK9 2POoA^C|uz lBeoήeE~օwc> ގku  =- "%.As !j df@Ǯm=;9<ҭqr xSS ֶ5齉m/|jcZ'VMy]-E!`b &R(WRwQ@hu<8+U;>:x<Na ^ 2Psrbܞ:]3*uzk\Z4GӭPQBl`h]TA8 KئNڼUA:*Eۈ:Y_hl6=]{r=x#l{5 a+}{[7|?j'~uh{ADq&vs"g/-s)_P^gzQ).2Юωׄ7:q1Cfuz]}~t z4w5zl,05Ɂ=5f ̾M |=5oiky%` d2lkCގׁu(-M]jmY? B0Pdd `p=p!_4ՋSPP1M;>r(U#`Vm[y6ˍ[ Fˍvr#^ >eȢ0lKP g//Yx^@tZ(4= R326Crj n)W7jbS)Oo[#ߴ:k#;7]^X OZP޶d Io\Qŕך*sW?V=$nmM:&/er]x׈ۏMؚG+cIǙ[,}4K*isq\Qv,l}l^wUu]EbarN5WkWAy5^;L>t7?vA~ǶX6xm hW!.q-x;G+o=4 Bߓ5 vK1 -L'H#t#U)r}hV$="gϡkZT^믆tl3=uHai'mQu-x[].mͷ޵C̈́Hn{{M` 21Kg?ùK˔.c[& c l/Vz"iy`dk(B>`mׂ+owsn b[O`+ JCY<_r Ri꘦(}uaò(6;{901ȩ󋜻#QykR9ׄ- и&1_ ̺kVύFZ90E6P2:ՙ<QjKF"K T a= /#XBՏ<ˣ$7YG'l {:_x i CNŘ_*su%k惮7\ikz8Q.MRLmIR*bHB"R"/H^ Gfo7_Svg :A~k0U3a V(`0LLC Wr i %q[%=z6#Np4M@V30*}wuux[:\Ok<]Ѹ uaWZ^-M֚hkLv'(W]]5?1['pwUU7nejbXج ZAFqKIz5t­w]u_O,ڪCX;#z,x*zkλ]T#]α-U<> ?v]x?Ure3Z?-x;otk:ǿe7d+0'EH^ZVwUA7\?*XMΥN6x"nq9j vU!L[Ufh _e:ΎU= o6ێ'b߼V»viQ2K5"O5fͻ v"@ٕd篬 eMM ϗQz|, "N~1DHzs)tRaϛHcuPu"O_5 `3YxDds9KUn0t40- Ӳ W45B7- =TA<]_C^ցu7 /W$iv=>=xߚFK1k:;uەةyu:TjJ m A&iqx s e%paYW/^UQ{]?:&@㘙^b}c03=薂#0LPI!7}Fl{qCs%] ͌a$3,Ƕ D[*+w͛&ػ Ka=pr;ؿ3DӶ!;7C3ܳ'IPð~^pM{cvx%tWDcTy;>c<&уiAsd= ֚̅vՍ^0Z:YsmǿmǮ[ck91c d#AC)F; e +-M0T2fh 39zb WW²(+8RNHD6{`Ckk{"b 1l?c>{b7Bo,j[ |׳>vίUE)jfqbD_ 4 ܶRж+߈i膎%R' 2 o.c&izltM ]7kyAJ4 ]p]t-:vH||#(r.(0B-F =D5'3X\/U>f?+a! f뷴RcSW)ke՝HjpJj4墮~gX_}ju;cfaĒL~_NiCVhz># فѰك\+fcYVOsw=FC%v}z܎i[v{vvrB閁ejeMnxpp]Wŷjf&Mcba/T0M$.LӘz;nZt=_&Ĵ,D˶ )%nATZ.9ntx;ǴmuT؝zFF^u|σ;!n9tt[}54&ٽ\UU[ ѥǒB7tzS~M4q_ĭr;&l>`s`m3MD3{ф[/ej&t´0ilJi٨ڣbvBix6~}A]+ 4 ;f&`ضIyBJZPmӫTLO7tòVQp$M`c$)4D+#ǰ-C Fݡ^Ө7ݰ✦zX2I,Dziۤ{zMф@M=lL3r4@%iҩRy6M2!O%I$1LjjZJRR,Q.+ %֚_%xJٽl&2/yMXtYTq4Gz~[X-4jBC#Lz'apJ@lL/Z,IoK2aUU-,a [1R] zEM>Bz: Q!~qpѰwx »JLTxCئ ̅I}x puzCGa$3zx{66BqhTTX"NTw]=l6́=':>4\ϣT3y.\PbZN:GhCt*γv2Pg_xiB7T@rf l&ţû__z|D&C=={سclJ+ufV8u~g.3=9a~ՁwQ]_M%]Od0CU6U_Mz@&Ē!&m'7)V*YʸV7ˢDm5, КʡZ {xa&x 8}~q K41~xx١y^z^ V7] +~_LeTVCίmwOƻ'45o%dz{]|gvĆ }Y6uǾ(=u?ߓM_}xO-z{h?ODר4?CR$IH tށ>#|a;yL-owyR,(t a0-F fVrzDuCt#q|\YN(0A{YxQrNj%Tq>,sc'g:Nq4icY?8,AVV{YYTׄF9NN.Wlnf 24"V̦^$:V,F,`0cY9ѓi@O&HfC=ן}h隆ixRnvިiф8l-bG6٬Du$ekxg=A0A}ֹtdc[ `Y Y))W0Bԑ2`'˗ Y,;~C oȁt4MGJm &v·sl j'/M0Vr<8ȕS3FVƯUz~ GZZ.sE;rD+d߮QΟe0R"jL<>~@[pef3g(*lnv0ЛӳK&t͎c$U+LlzTl -bh#cYs#^w#*ݹdipE7_zhwU]r )U^xYJth@Qsuk5{ѨըW\:ETwx6^~D4q /reK$phXW H? H⇿?F R[2=DRt]'3<}w2T\(P}|2tC{A~�Jj\caaq1m:܄}Y3f\^+,5iNy;2NP1zy= <1l-Z#dJTx݃xy G.G{F9{quzW)P_4SXį)c?ɳwhC|𱋑buN]T87 /޶pUP zJ>W=g=,/blp g;g|lRkp%v<჻u;U>,|z4~5Lfzr.`FFd lс1;B ״e 1mTWGRNIq=:~K !dD:C fsY͗cvz r˶-.s4B@) w"r^ T+#Eb$& W7<\g)WydSGFX睏fQ{B"B(O<9y*~@cyP,]j<N!=LY=t/^WD)@D8ׅ5] /Eۊx-u|| gJ{wqU56j9)\kmMl#k]grz0LL6O?+?|p2ŕ\jTٱ8.Z huoޠce~ZIue%|^utH)yY`qfJziNŢT!/R+W/-S) |_-u%q4MVRTifKG&z,x R7J3ؗ f)+N^=tg|Rn,`>I &^yl' Key ~Hcy$NnZ"ȹlٷg\.;Ԑ~+ӫШz.2pC> 3S>NNZejfWx.a+o#Th@s+7 ɇ'/Q.W |òسsCѽ ɋo(,-27zpx؄Fd`s30 itpdR{.jJHX朦mabƷroKs,QXY7 =m͈2]%\Шש Up':mw^'z"̪JC 3Pa.NWpρa_Y Xsk uUjfذ:wGR8|s,Tj%ܜyssxBvo)ffs vwg/v }JJRıS"T&3C,NJh#]7s-?ʜ8{@N1 r@airS*PPQWRV.Spb6%i c'w$~.qurr RzE8b h}y vd觫d&e[ %N]Vu*nǣ+av+3],+瞔$l'u::vfEs!PQEfR+ 1ty,EdL#7 ^'Q|W2;G}#tw%0#Ӌ ,VׄDY^6 oQ~c^&/R>/`qUV+=Fqx25X"q/N3;D"ƞC ԅ)斨KeF yUU 5Y7=I2RFz}09ɤ܏>ӏ(c ;4H +ChZqǭ5 ¥9"#f[>H_o ?,.WUxu3-U4SH,@p|k VHcfzy|ЂZtQ8EceViگI"NX'Ç1l#cfzUH¸W6rmxk»kqAs]0;u]IRē0~YfiMl.=qJ<$mAA peV NQ]yCy9[`~1BcQJc9~~}{w1mQzIfҘV;nWѬf2RWɸɾ݃,*P8K<{ǘ/rB m8^5ޠH b jIoO{4䑞R,R_Qj&»vP!iP|S*!8<αnLwauabOM VhziY «,\ǡVP)%\l>@dB$q5|>>.8saFz` JosprA *DZ^Y-U7\*:ynRO %náR,R\N\`жL8_?SES], ⻁g@+=`h)N_X$}=)V%?0e$՚]RM8GTp[#JD7=ON<Ƈ(THlpK+JG\7jjn\Wݥ5*i3WNS,Ո ?B:@03}X]7*xYZ o~6 PC;WkԫU\b~)+щtD:E,g1RXg^:JY*`hSO0$d`t!jK*E@,yXjY71V%3]$1l%@b*+pp!ML>&$Bn\WP4JA4Hvau `uD+!4X'^k摾7\-%nFA\aiq&O={ʤIueIe3d kD\^}՜ۇ ޠ˓+.箮Y4~tNV))`(OfEoc\ʱ1ԟ!],ӕ69vr~4Ğ]dkmpT>hdz2훉b.3GvoW:h0YݧF*aR:ѨT7,^.`z=6hktuFBZCu¤FBCowVK&J5r9/J;qx}w`;Tk+'_Q+W# g<+= EBٷ9}a < 7JW6η~pV\U%@-ұm\Qq0RFIl?1b}#a"'}9qvޟ?k"e>Rcsmɵf3[ͪywpK3LN_ܓyqy&ǕAj# Y𶴽J|ߨhT+\NiYORq=lK=L` ӶB'PVsM4 )éթJT EEƏ<0NܗWw3ڝxu88F"ySIXX09[$M+Ü K*]\4;]WVTneXvr6;ʣq2/u+Cu{}1>ӶZ; |Az.B39zr]apַ پo2i EVXfWS}>8y(}}X1KiEDl0Hfcޮg..Q{t,ҩrGq}A nRw/Pw\N#ۈ%,v:bcnխw] lJ˓s\Z֓M1ԗaPO}|[.TC XJr>s?v,#Z*tIJ#Zk3xKTbnjogb(Ƈ˒J0t)L.s$g_eyq e #~{NU ˥LgzRAG%wJHP =HЬ+4 C'f*)op/}FWΫ\VK8EzM‡i%0cBt`fIVJ0d 9w1|d'+_{osuCMsMp|߽O w[x]K>3sVWW"9|ApޛekP_Xx RR/U^ަm ^8|tiTkTKeJSWgxvƲ-tM#hՍzFU'K*Q]_-@6s{Utyd2m P^S-WKԫU Ӡ^rܥ *4\V/ iP|ZBay`mX 2u i4:ZzBXR,Q\GnYÌr)Nx) R ]/`~LxRg:g c[|y Gƥ+:~%%k+=jQZ(]g5@͑iXNn3M{#k{_f7y%vh “Mp2?a^xj|G;VC:)UfVO/7\wj|P-эV`ԩUlfX0;zbϣ_V,iQfU4 AvGUn.HpGA8TJ%b t@3 j] OepÄyЙ3)g'B *]/GE( Bx!uAXR`i& |j&[{r7-1C)^j:f)TUR4Wɷ_>O3~&W+6Yׁ[: pFo ZW*^8m/I7?߶fO'oc;{n:Դ;:+m\|uv+=QY7I\JHN]X'cX\p"{wqqr^U aj;4Gh:1B5UY$kWZ3Vj$!t:Cx6Ws};6k^9[XTǧgH%msu/>d%WGӻG^M04SÏ'ٳFy9zƻx]$RIDgT%t ErRiMkS}⫔I tcX %nΓ̹nP@Wq)ּݭr)Y5%-RGIcU,4 *m(!Ocj`uceԊfu9&g Nn$噎DtZYA3lNU='ju܎;5JWUI\\Pq1 >eveD_+tw?bicۛw3nWSJzZW9@Xu;ײ5˽_@m0]+/Vz8tnk3rξ5j~X.Eo+L؜87pC{8saW>+U檋TOSY$L?).L摞zfk]`HlXDNJU!>4{*n9W#Ųɳ ؽgWs]^xj'_9LS^$0yQ4++o_Sܷ/= +LUgD0Np-~ 6HI2uukjr) 𥇶S*3؟PS,Q*="?3HӕS{z +5+E% [R˥ ex&v047/2\7cҝMpjFEZ: |Q#JDȒL{R* Ce <p9W-@{"  #5]) SM` E4DFd7xa:Y/\o&tLP(ְM_RR^

E_O ]ф')Vh;#[*2Zigspgdaw_HMePo*H:*^pKӑT}=tV=+ Ov2X⏿1-7jʉ Qëq9U%͕|trT`P{*FLcyQTޫcy ?8`Z%2-'1 x݌ u)AiE3|oս^ ܲϿE!8i: D9N_\ܕ|[Y`5T vjF%|=* yV Zih|?*vpCH>y!Z|bXd N;\3k!B7A\f/\-"dū/9tʎnNFnUS*ē||v?QN&39֥B0 E/<,zog(YQ@w$qWb /uKQqI&WαiGJV *uFs4=]1lM=a85+.SR7>Po|yG%38.y3uxWA< K9Z_ڂ kWʥr)]}$&is5,r%†!28/<=e|\.*/ ڶdN o:n|OElF %O=4 OO4<{xiUB}q%W0o բV{8g/.}Os"7O6niVRq*Sxˌt3:͗cd0kO1,aYx||sĖ=ZDnKEuRy'T'\Ű(ՏId1.M/JD]i5̤c|s 4;IRj5W9t?z 'VۚZ5_MtMГ1ye^|"S-X<_0t&&%jƅ>8Ǯ`Z3zUHwT|o /%koJlJHcZu>:IXC6*x:5C|Oaqj w P_ĉ2^bl'g~b).JPu_):։@[+d2\Mwcu `{fm#Y=8WWj*1FPu dܠ; VDt K%zY82W)P~fa`Y:\oL*!f=ng|$sOb?ͱ3ʅ%Uƪb }4PĎU^ZWx~Д[\ .MTx~'60Sj.\-*OTKn_uҼy~GGY\R{MyLx^3_}rsy4F,Dʛ7~7l۪\WF"QH-xh;ɸEZ~3]+*.:s gȤbu9Jf"IZa"0_4S"L 05Vq#4$V dò9wgۉahy @H8|Lˈ3R-] {kK.-r /ՙ] cr"JV #X1 =G]ˀ*P-t\KWY) 4sKT CBY@F9hµ-iMOn`$*+8V #h]x axKYKN0Л2vZcӬ-,ߩ9c |03hV$;ie7P$Vn Ll矚`x0CLDtu { ;1DӴ0Ke1)*f3zRR+RWoN \\ _^J*=8{wq`O_тٳ*Je!T10:bA5ưR\rR 9Y~"LW`fH7CiV ʍ͌qO%?c*ʶg6_~bFRRS8^ЊWd`6N[,9ZspcJ? Xg%(.WKQ@U\w%5L/VwkZ˾]K5RP](`ɮm\ȶ^˪fZ(2 j XNi+ L;0ē Y>>;8Ւzh,VԊi|ZY#8,B+?F$rOc$3҈1]qJ%&voeho+[ǯW ZP95Z+BGR7RJ|q;8R_ՙҭ<SHd[a{:Cڧ:na SC+١'/95ƒ,`9?מ=Ğ=M.-Ta̴R+ P庎f*{ڽm]:7o_&W"5%h Kx5U$PBVgMMcx Fnx*E*xz)K Lua$8Zؽc#<;CW&# !fĕ:̅2g.Еm=3Çze(UTEwM7X*6x:5dzGvz>.lN&p\z:{.C})C JSN63 he)ʗ2З3J:Aۖ5)./hm̈́xmVgЕL%kg^o񛵆ǥɜJRՊYufYz_+cbg325Je'ũ*Uf _cfvm݌ wGW_( ;rEJj [hk|am{b7ifkHcvs+ǮxgrϾ!2q JP[q4Ì:?$FXsx NC;8~fpAvm'_З{ɦc| W1q^_ͩ"~~h!,buSQ) #s[/+j9 `i&,=[ZVpY)|Xh܋K3؟bbg?g/\RK+*?{~}s~f,jxrGZFK;fx =RuOoWVpN4q+ad|)ȕ\.\Yyrٔ݃t4AOW 0,XBq4eB}Lԅe?v8w]=T>Y1f Ztͳ?Lccezr,8/<Ǖ| H]Tjo|0Ig047/p"D !8%._UqNnFnAf Hd&=Ɖ+7O#h3=5[\8]-:k{`+ۇKכdA.\YaaQ:bo7}̓C8^|0 EXhL-UX90)?{'g~w'Wꮮ9!`0`r ) N=g[t֞omﮤ^[^rNkrڳEr33swuTu)JP^& С\pHfT[[*~c6^V2#I4-*gXZq]!D\R/a:4GYQ)Vlg a|$z+7V)* wr`pHchb \&&⻈k{|v+5Z$ 3zO2>[.29.Xgc‹OL06ɻ,u| }G;N5r94c7KYdP= eВ]mH=3Mo&FX4ĝk%z3Jss 3!8ĭ~?KjCp8NgG}S=/*.+`|0}`:[SD"6TC1l3M|> 1qe!~WBBܼCff>GT߱J9ê;HB:t̯,I9GrY6 +oǢ4Q4_}&!>4_|.W^ɓ<|8o_Xdi . =d(VqS0A5<OMgݘȁ-h'&9٥_ƭĎ-྾5GJ]8782+k ocW_y ^orYX- %fMLl#$`<.b"cziq`MS].{>cQ3xk55FUu&]H(I4 'hKob;W&ՙM*`oMhbE>:"lV[>+%VrcG3JbQ!dh/fWo)[9-d0ҽ2zsx/pk6DZ=l 3 9^93K.o`s{(8r(B&-OKEfJ]JH""DK uObx3|,ق JLIrRV*n7~[FBY39*K+ysp5`#[ev~^z3 zeCLEsp۬w#%TwsܸC̭V:maD7Sc|qU٥wllh tMbT/3V;s9f򜻼)|xᵳ 4]7O''e yCڶ*mTK?-y@Y% SF0tf5@($Qwf(xu!DgfW+yd=$l9U Yc.Bor';W<6Lans' IHx m~/.Ary9Mq98yt'V3kndk3&4@Ow.,VO-?kwttمSTp m7W8qdd=Cl=ϞDVd<7J j`~sn]瞚f7zw?\n6q9٥ ,F1|KzSFj#K6;6ēR]n1;UrELjF "[hpw~Y.nZO7wiK%I29r'd5p4uFSFtLJy8}[6kN6 5;pEfP⇐+<Q돼v+yd6'rp[3Y*V5Bp-Z²n]bLN[~cciq%8VFCKklxϞe7 {XXl|O~`Sb..`gd?ܙ R5col Di28ElI' ~tNfyR4y,~v w3?FhS[,WFWDv񜶵ao|Z"&&{ f '.fY;8^ڙرhprFW%tXk Z .حWpՠW=TM@zɲ̖7aX]]16ܼf!1ȕL,O)5]P"xA Av, =)F%g'OL{<{nDtnr X Ur~([$8zh۳۬l8v<>,Kvfb6MhVۖ@xFO;QBQzǝ%!ZbrDċLCQD؃GS(=H{Y|c/3FyA|܇qvn/~`(fFTcl !KhJ$\'u]]W؋V 8"Z4I$q{f0؟bfS1nҶ-e !⃫ˌd8q"kN#+ByQm֑ejVls5n=^zC7ov WVJo=ciLfVyw~7 K/v ˜FTT mi1޵1^毻s%Lyh7n!IJŸ3k #Wpl jS$CBZkܩQfkΰ^nD :Vq+ӑ4@X|hn+Qt1$Uu=tC'T{>9ޜ!_lwY^ძ 2'Ι m(HޖmQœ-9d;_IѢ-J وN9ylB+#my,)0]\4EeWm(EfRk4:R\_AK((8+P[ABak z_8.J~Jb7FҜ<:FmAKt+Y%n2ڝ,q [jRwD \Zm8N[N68^`%$ Vr]Sǒ]M"atCglK"KKoͰ] _DpPV}!$I<,˥ְTJO$ġ=ldͦe7,[}'* ujlXZ1=5,Sܦn|V@p;vnZ/S+c(8!#L{ Gx /w7;l||I9'XS-ë̓Q#벎{l]ֳK|pe${$0xqz✿J6WEDZruA,kej #=w'![Y\^hN$`l(=ܙb6/>5Mշg7םkk+ * ꄘ8}BΈJEx1Mvp2fsbE[^,fq%| Wz-iZ`WVqw-;Ѓߍn l`IᙔBOtv1z<|.Ls$Hf82fn-}f#AD/xdEU$nkkld+|)zeXy%E 0:{pU9'C%>wSHJ*S-b6~w}+쫨HFӴwS: .rIUPb%3aLb!Nws`?rWVXި!Z9w0nvj6Dyp=z}\cPGS(#5HCaRxCITUlۣi:TkڅB"`z,ͅk&H1T-T]=78d>3O~<ď0R=8x#w5YERTl)gg cY.g>\q`ĩq{-S#̥nx$ s`?KkEnXY/QC1!\߬u9Ko mzJ%֖iۢ8vpJդ;I2wg3_Daʁ|.y4k{6!t£:g.,rv {',>lDЃUI a2]1>9ÇY\-r{vz9@VZbmY]}* $w:AU%޿‡7Q^p]]q޿BwZ,㤒a޿Wh> =[صxQB#+ 20q$ `D:sȪFwM (4N"TDZ<ض)E_صTKmڲ#j$I&glVV2mrNHኮ YѐTMШ&lTVWRO ju iddPmؔ ,x?vFi9A5I0IT]`y5Bo/cX-Dݫ[E #$-I݌VE6~noE޽qa~Z6kEr+BYmWrW#C;#ϭ-޾$lrڀ%P"bcgUJ,FͱY J\}DB*]1u %2KLYQ۟E4LQHz(-$EV4">4@#zRrfaઈ EųzRi?GoYo `R֚ zk/p,]&vRQ_e"]$&gk8Oh>lTq=tq]V=З, \CWT1LDLBQdb<;MaSo:],7(WM5zƴ[ۭLkUy yq,evGOfP0t)(-HF{KK< 2Л wy7Kw.:.4zNkr#~bl`b{z7O){׷7+m8Y^C11I6TV=USPH=  &^i*$|x1aߟXDn{#ѓ"ۭܥz~Of N>!V$aID FӴyyڠXnJ$v%/њ|1PTY5* T@gMh@&hsYN1؛`+[j|#ۨ໾lPܞ, 0$,$() ]mǽ۹V++;ͽ.Q@(\a %VBk` *a(g>X< ŗ^v qX˂rK5k%MU'|ɫa Yoˑ%ju7A}m> W~KJ'f;4J$FSci.Xgs xVS+K2YhE3CaIP* ?ه8~x3Gఞ Kb`ģ}%y<# R1-E&5X1Ť<ڶ+.L_[Ez&y?i]Y-LK|.טYG:X=::WfiD򷒕[;^Fg?Dh)XjV0@g@dMeh3y|'x0{'3,x96qv9Y©{BwW|Af*thyqk3ZYWUQp`6Au!U@dMxP_1./VƱĀ]Ήc&. ~@|{ >xcm*Aؔ*2$a ǐ5Mj>mW5pEwb*WnCNtݮI3ҵ)ojBCKv67nr` +EnD%X4,z ŝ~[B>4UBF"SDdGnl0gT]L]qo>G6W#(fw.eI- ;h[%^}wb6 kV#@'3 u{c<{jrEU>l%T7)D x{ kN/ 4[#lJuKOOuct .;Sa6yyLSFz eA8M-|ܝ]w0-$[_4?4%IMπ'}+nǩW\ә[fz۹r o[t_{o^i @7KG_.Yꕷ6(Mh5V[/'^Ү+-PIQX(]c)~(O>4fRY 8\[ .Uĥjx-+kV/4My@O܃xܾȤaR!hΤنZAmS|:' rݝ{x r8(TE|ԙ]Ggb4{'zyH?^6E7][0h?e-9w >Pt: OMI8wq ϗإms-jQtu^Ij; lEetMƫ 7yC5bdkۂFY4B;ՎG5ǻ)WL 8B0,jYĩ_t so~7α]*Bṷc"*0FRut]'3)ed.DZ#I%#!Cee-OoB$W謙̯c%z 0AVF"wm}iOdkE֦j_R±N-]ɇ Nϻ0*{O\|cn1hߓ +EaQ~Ⱦ4rKQ۫\ OЎFsιـR:H"2*4vc;Sa{N6R>'ȴjjN :[5N02flhBb{ y6+X"xԳͶx"C25$y{&6wk5]Y5P UW7كʘÅ++Laux_ 5@@Y%LFkK7$Ib dF1"q$ 17. ?,v[Fspы ~<$S-L̯3wTι}d%24]زfnYP{xF մ6]cm(϶Dl-ĭ{;<oQX 5®m$k e~z)xj^Y8i}`GɌPhM ^P$x;Ç0m`󃟷Q DTV6QTu$f5<\Sӌdh4-ZVx̠;BT)|C* tL:J&Ⱦ^V7ܙͱZ\m(kZ9 >$IXnp*{H D?`rߟ,eʘM1 -:PK[80C8vRh5_ C} x.ԛ]!TMѰDmքl v=q ,˝w<YplO7_n8+:ީ>p...2TbD YƇ3 $E %}A$Q}_ lIyxU3~{C9G8/aAsTI1,pD?u| <4S Qu)ƙg#?cClj] -쁖yVjUC&]T(/~a|dvz{S/+4C *N;h~/^S{L u(lYd]SX Rq bѰ#y@^Jk V պvp03\lU- LwgN32zŵln[[L5A&i|I] u#uR[eh:;#/156suhrN. Xk(¹AQ.ck$+罝 hnkcaK_ޫWRCO;| o>[OOHAQ+1:7Foy,\-Ԋ5f7裯;R)YA}#/ %bkX\+s"'.\Pc=Į] $+]_~?#PqT#1XšjI ,Sf]|בv||%L#F|NƆ:YZ+ 4)(&l۟M[t8 o5Kbţ!j6lnM35!WlRb$KbU={WaY@2 v튪az2K9 C&dd Lkì[@C+6]13IPEQd!]0x> ҕa6퀩0ЛT}]Lt2ԛ ejAe9wd" hZnۑR &x-Atc=8 @L ShNpP4¾n|{z(yvȕmqfEdJ1ЗM5+xF 7RBSFOe2u߇XVBC1"K[ o-f_,:8R[k`|A`5$Q\!aiZkw'dnH.Y#lPSO$3R ?E"M!- !HP$J$%HLՕ;I_NƆN*yZw~ʓ_3?GYۿwneF'>c_,ǀڃ8v]/cc,,+r{B2oB;2^ .Z1:8.a29Z-a KxҭKo7<{b=ͽ/y=w䋯!1:zD0\b!˵_L_ `'3ؑ8_^cuӧya~n{8'L[WQײ @@:!=>$o&48}bif ܛߦ#aȒrWb3Wwi$b:"h+m)㪅lқIP(7my==ʽ,\]e}ά$fKB[@$Y 0 2 tU8qH MFUt]Aćh(P"2_^-$!/^\}s;Ez/*1px񙃼-f~b?xP= Z2rw.U90͍{[T(j>x2}dT1^nbbdCӽ/}.V7sޓg$IyY|E3^?PVw +nֆhN"}DRaz|׿rzLmͭ%Qr RKm 5$5HPGܓyo?ҍEbVd]zE}"hwJ[O FȞ38~dx9uq͖~= %u=S]<$|pmFYP2RAU$NgxA&\]TKkEJePHa7A4zP Ћ" WyEi+gB>BqRthZ6)vhek|}dqM۳.,RƱA< `D܆;5)bĶL`v^{َ (D$љ hw5_./)m+w$c:4c=p k$&9~dxwg6phlclRoil;ert?J8zgo|ŬॖB Ա pj;wr;E$|4'9oE,1ukMw!|ZRK_ *J;[4UB\FLM"IUjuKdD]ASQ*B\nj'vtr+>F9wiȈIg4t11 Qj(j[wY; bP 6g}Hd7E2feMMDTԉI*7r0lnK5kA csH<6_ 3Lt39M2R[*pw>z&V5 ]arf WV<'P APT\F(w>ZF|,j qIo'ܦ,K_%~=WSedBiϻ FbelDQ1I6(l˖ z2-c3I[Z (f x)ܥ@_A(K0&fjX/ w] 쓌ɣۃiDT`x8p˳ӴTZeiLk**JQpHrH<}T6%;ت[SsOM"IP*9~xֹ9>&̧[ZJ\X`/6XިK:˛MxÝTCֳU[)Q*7$5ޥ3A:} !j,*jgϐkOMՈGu:vE銒 KP4z<ߑPo{T[لeglⅎO>ԫc?FI$5ho?c\3B>@'`yf;%Tw,VVdTUAUd'(*^\q UUE(rܭo]W8"lӴI%dlmW=E3-ƴueض88Km:4$K} + 5|NaTUްWn^`Xv/,^Rgu/} E TR4H!]ZKB4?p@*$ӓgVVY/QdKzlpr?~ o]\ r^ P4!h&d:$+Fg*L" rIH%"]e _mnKomk23Kx}㗾&k JK,mT_fz4ͥl嫨21J^\.~z@jP O'Exqx:㴲KiR*;*neAVLaf8+KgB/"]45IAFz;2@G 5 vBtj,(R7M*gNBzj~9ͱ[MQ.Eh'1-6ҳB>ECxjfek/.r@?'!\~g+(T\aL:FPaIڥ$+;}VB!MXvFӛёa R*7[̲YfcJ._#&9`_\Ϋgqk6Gi,1:dT/'ӨZS&H%B#LˡPqk[e*&M lk#R`"_d>OS^^+Ow+xgݲe;N;υ_8qt|ʭu_^ a+eV)62TŚ{fp/;kbJd=6)*wwD̂) ۖfqW^po̰Y{_ITZ2|+ɊMЀRu"aϜg7zwIQ1tx<,I w^g}i VNY .eob'(8q4@MuY5L[yz:C~z|]7y^OK}}=[#xa@i1NK ۑGݝ># >#t}O "K>M7q{v1+۫\+ 124#n־X%8T"n$YaT=wb⋯e+[­dc3&zpj$K{P ZP$g^޿̙-t<(#iZD#&8vx`~)DžBˠhX' ӛ#B,*2՚V2|tn:RQ:8D*B* ӹ?Ej6uǩt/ؕk='WxgdX^u/+qku޻|s%ߵǜzo`fW0^x^-'KA?W df@Y7PC,m 1FL[;Z֮URzksc ^t3FÝ p=!$DKtB:6̣G3kyp`/=6H㍳sgȊ*7{zij.ⵑ0(*=gvջY|GdwcP{{q\FTq88qG pdoC} 4E"]^7I&¼qv7ͱ*wf6n4&;90@`.y.Rϻ9xӞe^5Õ\ o~ҏO+{$|*}]kW xfS8--v[[N,8'buana~̼-_s]eG/6kXo5Ʌ+'y/3CuVFͺ] >?:r0 +^}wEy0Vds#\b1Waeȓ'gMU5Y<=TU63ڟC}(W=.d.ebSa"a ToXL|ɤZ3y(=)ֲu\O|*5kwܚݦ;-^=ZiF2wu^L]n>G[>2o0ڿe?87xmTn6M}ggj i<ŠXot"+poBA% h<MK[ J~YHJz2&qxY٨"`IE ·(Ս9g$7 k|)F\9vx׃WQV-+ b+`aixT=}I5R$y8RSy00Kyo?8ɣ+﹬ovk/-eͲQTs\a.CC8χְv bLR[[E ]a|8]Ky?9dU j4_l-|wN:[g_fUe2ݏ ٪%!W ;b{;.+J_[mzȒǡ}XfI]f&@ %![sھDh!V6J*i*+[5K"4d#ݏэlŧ,^z]ig.,K=0x/0\f]`}YUJ3>3[4 F2t&tO{'2v 'Gtp]Wnr./sk%i l8I&Uy0nl6jXu[˘ Z)7^[wo~mY#v?~lODC/Pug5޺˂|m&lr;#VIو+TM21auB&JMh oF}ALn{$I2:˫9:!Gb-[YC1BO%:0)>Gd x83\P_gOOR⽋+`wm.IVpQX^+-){@S!pX|M\Z7Wy"p^2riʥYŭ Q֊/ 8kwXY+*fvqYlToUTﻲw?Hؙ'ߗ=ǝ~~ $;0cCd5n=-".,Iv SVA:hq^~6r rʪ;w!*)f4$Loog+$R9}|9wyvu@E-l;Cq7tIr}oC uJU r _4úfY^7%H$E'`k*VAVQiTqU(RTK(ܶUYOޭooy~4-{.[go-E7A5';NBήŪ$ \#'88W-xo $xv ݮj~FBU7n/<0q:N|ib[68e{OKmڕ/ɠhF8ё ӛ'!k<_0uXLEn5(TlEAR@‘rfm IQJ |PeS[M\L6.wݦX/y݋g79o'(~[T QXX'1K׍n^#G.TdkwD޴@4^˖Q`h}ݔ[gd/EX;IPy_]vi'y"#ѰF_&4C?`HC:,hX,ECm헯 E} wRo:K$c4«*U*"p$5HePc)]sֆ0/f1 8c_gs τ2W~{>['b| @G݁Ԟ\r{7lz2zC0| ٦`8-t`Q~$ˢ\v9ƇLuQ79~.| 1(|nA}PegNқs ,qA@!S(Ը5XMWgE];hc\rmJ*[)d uAGk#dm=sGZBB6"ՠ%;уCܞfa9'ڊ&VaQ=?Kg%Iq\fo'}Ļ ^CMeuw[;V (I2nO/V)sE;e[!;P{lD%ψPi8lmbttoߝ+oͲ-5U&80CHDo&Α} %T\[68y5- 29MTssBfԣȲ™ K@^*%nqUr1% jk.'EAd#žp`Fv[HZS);j?|_ǣ*0:'}Glqy@oJ$&l>pA! Q" 7pw0|0L. 9 UFHDB=$*Zm}!e.D5Ntїt|ߧXUf#[%_jRkEOrc)&omjM!0c7ŕ[l߂JVo}:8yb^޿¥4$CՑCBuvptEIh:uwW%IeW6k#?:^Ep0"js/{.aPqBF\;??̉#z)LSݸͱg XyNWd)p7ýb %$iJ("|G3=ۆYC(&&aC:7Ǎ{[4-k!$$; e.ܠP!_Q]|nm:-rQBQL/ًKnx4O=:JO3,7mh1۬#k*dt$mn]~AC{Nsln~>zgWYm/eDY0ݰ&(R0{'{JfPJ  ,DUmVms-V:{1:0:{Rh4JdǤG/[IPMﳰ5~v E`Ӗ(ItNcuǙYwqN$B9Sy8U.\Yiְ Y*V)o5EO3 [xer4e{+b!=ESNd#JWgcVLOǬ;M}}ҏwwegu#I% ^ v1# rw1ɩcCoU(Ө 2TEnZHĩz 0D({Q@ʢXjUs]\m΅eHHS؎ڎi(Osl]^+r򒀊n)_2531& ΅%Lm԰K4We/iT1z*^{w"Oŧ&Mp2 Ix3jE5Uُ_O%#gp> ή@_FI$Jg5E06lqgDZ XHۨ #,6-cEU;؞}1zb$!4Mh8K6g?6E5Ltph|zVȯ*_QoLFCܠU]I2&oBJAq*f^ QcIn]G&8g-bn)A‘$-$ ^}׳>_*]E%12-bIjjX _ 5LeX۪R9 ai=@2hNh%C G'bu3-vG8~hɑN1JZMUD J_}.onRY8#вFMCgv|->Q~tdžPU,ڻ,,pLsG̾mTA*# yhw3+U@إ߬4mm:]FT HW#!gyS] 1灲:"UIQ?^IQBrc: gG~8 ioIJD":0]ԝ(Di+Ԙ]̲Ya3WE:RjuWpg>׬9xΙ0 mWά` 6,K\·VʉL5j-tl 50ۖ$ֹo*ܙMozs]< `u @$Y,{7N_4~YO)>7qvyОmW;A-ЋtV|xu'&8WVQ5hX#H@aq{f ۅպ BHgOߓi:yn yp@adUG ,Wf}Oġ==;"$Ky.\]aic;VV)+ͺP&5dΤpKk%MvwzJ)0m (%uكOW_|?'oZW.;OoiܫLF➤E(}Z'H4&{> \ΞN#LtF5Z3Wvk : (ϡ.;>B"f`.g>X,fM=pzgCmK=]T/q;9,'Ym`6+\.`/,35&ҹ=eZks7Z}B. l=|~HuB';pf^)Z_ M-)ʟFMLebtEIÄBXUd}zB(OjۼHyD SGxh// kd% v1$@r*JDP5 Wda*Xub׬>j8.+Q8 !WZZ%n#(Zb_]WcU?si?S7='*|_?cğ*]*c#\kx~;E?,Ow90/T"O2> 79w9;@U5$U U+V7qgאpg̩Qlݭc ~ْ hJ*CI✽Lij8v(OKqg[;/F'X2[\IϧXq[/eE#+!^ɗ^J sSXݹ-P =P̖k$j$ؖ(.e[hS73UH_}]I~~'Χkߣί~?WUy3_ ;5d5KR -OIBS${ubQ<;GEl뀼g`z,>ĵ;ٶf*05hlAj!lX}R0zG GtxqW\ۨc6ꨜ Ԗm$I^߮is?v"QBrMMLt u (H$:ϜClnYY3h 8 X3ޝOp*o?]k?™k+׺q%3&!}j4na*]b |d {b<•ey (;F;ֱ 8ͪ`0aR^SeLYfej%'5xպ1uRv)g6%IM^Xߖi;/qV+hXue QYF7OOx+E>=z=6rMAٙ48ut3M^=3c]lViڠ6IR4:yo3ۨ`70sص"c>,D~؀O;q>b}^Rk?|`_CZuzCe^z5$Y s/iO,uZ[?o;?"O$&oƽ,,(L%8ܞ9,n q`  zTBܥe CX(7OagKإfq, YV}cp%އ>,4?sl/?3oMۿKՕvn'ӑ'SNt\] ) dN4]]Z4-z9(8 ^B\;` EE!Kyt]ÇW˰Qrݷ}6ݗ>4?4/o\aN΅>NGݹ WVX۪ K חe@\맯'[ت2}P:;" QqF :$IƙC JDsyfn IVyy|!4 Nm3,DW^=3MGqu{Uy81d>jae0AiU!-""hY-%hIm(E YV%54nj3yϽ3gAq[^p>=6IDAT}*PDF.%.bptrDD嶺I 1k5gfɫV! !چH 5`7#,F)~m{<.Gfټַ3fě80@6 ua)F%'69{DŽ04]؅:)oC+pHy%tEXtdate:create2023-03-08T14:00:34+00:00zD%tEXtdate:modify2023-03-08T14:00:34+00:00tEXtSoftwareAdobe ImageReadyqe<IENDB`vctrs/man/figures/lifecycle-archived.svg0000644000176200001440000000170713505146267020113 0ustar liggesusers lifecyclelifecyclearchivedarchived vctrs/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172613505146267021400 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated vctrs/man/figures/lifecycle-questioning.svg0000644000176200001440000000171413505146267020671 0ustar liggesuserslifecyclelifecyclequestioningquestioning vctrs/man/figures/types.graffle0000644000176200001440000003254013370406002016325 0ustar liggesusers}isG_}ૃGG{>tG^|YvG>|aT4G?̦g,Nv@Dl]fqЧt}?|:\.ŰǏx6xEIz2EWqb?R-{# C?tkˋN9Mf^^oI]en*[ѣ {k}ge }/N\7͒gI:z*6b,}OC/>>fxFwp9SqVtO(y壕;/r:K5ctu" pO^}t뫗'/?y翏'?_i~{'?O^i>?9z71dM/ikF39s:xjJPN>Sw6;p Ázؖt]m27;wӳ8G M?mȐ.Ra?%WQqyvy'ϳxn‘Vy:JwǬ2[^1.oZNMo@dw>r_ovO+s=e0ى(4?B#Ϗ{߇1ORw>&ʑ`KΓ|x:9%#xx2KIh|п?&blGm|d%<8LҭIS˩ֆ<+~[-oao^&.͚ lԻ8I?Oi;X=ͽT{?uS[OZ{ۙSoɁ0r[p utfv^G>trPgN{;s긪⾪*}lfYG#kA>9)(悫B7y)4c [F^˷ {!e(#%V+p[8 (A РE\O`l`p aڰ- `Q681>`l`l-lXOnFv/sӶW%Bf$]gEwޯx0PpKF+pCCCGnlJja@ 8c{R6Ko$(\\#.28ܴ~5s08p2{L908p]bA^OZ28pBEL[nn^p{AЖ;ڤW7NrIܮ/ e1(ܷ}+5Щ&vq]Gmu]no ])v (Uن 5{,or݅65GrƅlrD}meƚ7fD`DMymre^Smc}vLҏO|g19`rХiyօkk=##BoGmzm=V.}إA@]xzޤ7Myzޤ{J[٢^tczqz kpP[mQNFz5P ߇|$7@_P>  BhʂC mӶTA5Ua`"yUJÎC^Rұ

:7>k;Ce9.|vrTxTP?R&Jl%{7X,Pլ1LLA@ic9pZsc#'He"SD5@H#K9=uZ"MTW*큘2iE6Jsb"ҒǂB sY...2eLsvU忙V5BNaz l!JGd͂qo~LEKEpNqpRƬC=a|*Be-u[<-sl؜t6XpxB/ƠAg-r(В~L}kz4W AIBein(Ra1$ p W^vR4P,)/3g 'ijϦ_|Mu8G,1u'99'{p;ovL6_g)7 L™}G`f]D9,y!jJ}spח{V)A(AVڒ'/; ge~P:%5D2(RF{Jh2,_+P Jh@u7Z-HusmepBt7ZY^# LQ ޥ퀣G=v¢18SE't߽ Zˈl)b<x5(2zGkºŽ-6C芞Q̍-ɸ@Փj o>j;5HvݕF W:Q(+(K K,i( p p.&P;Eҧj ҃}F"\lv\lv=xQ_aD RZ1e040VWi4S/УxZ!̥0E="exYMs,0m6z!<k(EqA)",z^Q( 0% b6¨^R*'r*g*,=DFKYV9#xJ9d"'mh'(w^edi Ę3g P*.)_}#;1gt>n}M1_}x=PK_=Ĵ젾eJո{߲V(h:SS~;Ut* y7s*OGs>`Yf\`#% 81(P.5b_.92G"Cӕ^''S֛d>~Y oi21 ZS4x[nFySW [ &4yk62"R^a[KJYxS VZkuqnU`DBX DFOI aKuJMҔi6>h}XiZDKB5kZdV zTCxO `jZv{I @ +@pnuPQK"!ԀX Arb "EQDHi@)e<@H` `^6$:b (w^[J-@h)X` XdK 4Ew( R>w W+QЗ@lP g\=+[qIr.i")iJͽW '@ ^VK[Iv#Ei ,'r PQ (sECa[ևNW{X^RV ,d}$`O?5\lT~~Kl#gMi^ڈPQK R".L\l:ɎBH,xm/#EQ`` \$HrcQVAɯ! 冏,@% z!@X,bB[n ,@ɮ NE ڄ%>>AF9A冏nXϨ _V$D*H,v +@"AI-ڔˀ*cD0V Y`V Xdi<OR]O irД oAabJ}HBK:r@x]P,X~ @0.Ҕ+۠@kqXN`9:EDȴ|ε"'p=٫~ uVKފ3o.~F (=W{+2g\>ss}#%†2}{GsF HCXd_2gd^^n|N%z&PgOd>}&L3ZF *(V"-o{6MTFDBZ @BqE ?Mɮ}'n\=|ΆZSJJ:T0.%)5ürッ][㚶_a?8ᄶDN\w`G"hpD_@*a:-zF-E)O!tdh҇V#\wEkW>- Z:Z"q 1@y㱻bUl Bs +ٺ-hbQas]-(>ha|- DZ"Dqrh1RQkDe ӳ`)DIᳰSqE(/DI+-pP= ̂qBYT@3T @ ދ Я`BxAg9XYg"cv"X`Ϫg{,v)Xp,1] =+~ϒE@iCIQCtx1 QAoFSzh!-'؝h-G =;-t{j˥ˍrF/Uw/hѣajkl]V[in E?VZH[gj%[;+X#b4XBw7XV[-N ^'̎VBJMo"KP{)'I{~O%^(:h ZP ^J{Q@=xO2ޓ$c$s$<r_zp1x]7%=[)+{@Dd z}fa]O֏F`waai_Y@O_H<0Ac<,tceqg=ԝlT5ىB|tW8g|035[X";IwǬ_^m͓ /I/i0MY2"r{x2KIh|п?&(-燣8glu}ϒq<̦3x 1 H3p?kY{?6N\O$D=PMYS(o$~ZxSiBu{FT-8 ogt[hl4$Mejc٘|nz6&[Iڔhf+nМ`?۳ApYÎg ٧Dv5m0pҜ4?ڋA 44O1֗ϫTd2"JH3K/"εqOI\,Ʊu8 HDBk= D̖ؒ-[^DV{Prz^Zc=Mю|m󉴞;9 ;9;y'\-qoݓqz2)[Wmcγ) ;yֈr.LSu{P]OY`J2oXLuL ))Z|܅lCG$لQFcccT FMi0] RF|ASH R8K4Lb*T\Jߓaaaj LuaSD*tؔH齛ړ:C u= (;2PPPm9h SoKB((ƨx)KL(Dg JE; R R RmUq@Jc9sBqʰRso랣>۝ATX6t*$@"Z<:dG*G@ v~}؄]£%lZuhV_o=("N;aɱq`8RF"e#rHJ͢THc#!#ql$:, '5*G/(HH,IHbWFbHJ]IHh$ 62FR0еRb"-* ]&Y2q?[J7+ݽR;ՆKRL.wi4>]r)u`k,RUL6HUU2F/og#adDHUU2HjF"UU,sl$d$VCau}FBF"HiP`Emi2pPd*.k}y[rݝig);C_;KƝ^NwZTД<-^!{٣{^ON rH)Kȣ^􎑇wӉ:5ξAAWӕ搦 w׼aaWә~!ב20 vQzL-'e Gww3]C҄ (;J}$ ^/žzwG۸\6b9b^vqXmWn\+{EFximXu /9}e#굍po"֚{3#lFXGؤ#IG`aa6:&muM5+2, (؈}6Ik꽍pF}6H#{ca{aa{aa{aaFXGب#FXG8wюu{ca{aa{aa{aa Bb͊雍IG`aaS=BmDzMZؤX#YbZkM&֚6iMl#5mԚPFXkڨ5FXkڨ5F$kMkMlGzo#H#G}s7)bMFsL7ľÍ&phM2;bH+uiMm&~{a~Dn⇽7懛"{o"!-x6g˯,-p{6tF}K>-?Zһ6G\)R/F!>I ϮHwӷø dU\7 FnxO2k Óqz2hx (0IKd]~'L-Xv(z0KHr~5% }rMy^4ڳ};lqyW-\p!aD JR c;U?&2]lε[srVZ: [=hǾvt˷,Q6xu]UΙl:N⛋#vOB3BjNqq" 8:X~] 㙿.ڼDSH%3:K+Qy Ep hCyFC`_t$,E#[EODP8/W5nG֐fR#l-7Ѷ,2ebtE~g@h<hz,8B0Ļ2HO}HGܠ۩ڃZqڰV[cWp.[>kp -A{X-ghnEԿo-#m׫Z X2"WMAY'T/'Z:4:BRзVx+ -`kW̧ɿ ')~V]K9PP!JBYAS#WgA@+8EWg 'ijϦ_|Mu8G,'99'{p;ovL6_g).787Nnrfd&W_ղջ #L$|J}Rg Rin0/1_b12ﵹ֤5>~7ԑ5 xdAWaAXP DHt XPhL˂ ,(&.(h(E+ A߱IQW,#0Rzg3F^Q((VXQ`E"0<:%\ŦV㺛e^ 4>}-pg BށdaS{,U}r' ,TІP0ty t<44;$<`2d0.9OC9%#⋇'$/ a,r<}~8JJxpxF_> g,l:+98`giR/_ׯDP%OYAAH| r75"ޫ_ۛM^1 tG0pX]^Y!I膝*Z㚾WN{{rMJ:34 cG<^gT$-R;ƫu*)mck)ÁjT MlTn[i+wϻ (-QE"2GAZ[\^ս/& s^QFFJ }}ksI6dsI6dsrS+MƁ7bQ$@ʀ#>e-/ ]X}K:MTm\,4O4Y2K;'/)`yP]ƲSSvMy.NƭE$S(+m>+׆~y7s7 ݘt-缷usI;L1Jb& yM -ۖUwyțdneT7vvDL@Yڏg؎zR/PC4<ǟݯa"CJ!:c@u \u \@I ZΩzR\ǀu \u \u zHj Luch6({{S*2v:0N>3R;AO9NyXd[&){{Fa)R NaB' eSeDȀ"duNz4,Zhɢ%]--[n/!\S C;)*A|N {Vb;Ktl\_4A9ߙ3g{ǚ舢a±_`: bIeDEXm(%y3ge[8+%nk X 3;-CHe[;"KG.#RswꝲJmoNg-Xn[YE40h^1[S|*aⓤ`+H)]>IrnvQq<'V^ǟY&$9W>4i_r >Ț/8&o4#<[oO&i% zc!%ot,X&zՈqBq~/&'FӬ&IM_IiB2څs\fӳtc~,M&`lexO`kCמ eqo%IK^1~:*I1X1;i;Y='d$0Lp't1>B̩ܫBx8K+0&]acgՔ1[&|qڜӽy[ iN=g;joi lifecyclelifecyclesupersededsuperseded vctrs/man/figures/sizes-recycling.png0000644000176200001440000002012313532250523017450 0ustar liggesusersPNG  IHDRyyajsRGB pHYs.#.#x?v iTXtXML:com.adobe.xmp 2 5 1 2 Ү$IDATx pT M0X"B[S*L”2m(С 86mymZ[*@I0E*K20aBi)`,"CbxIxK]͒=g#gcfwIjjjRn(***//wݸ{ [=/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/.aT[lihh;vlvvC zdM,}>dʕ.]~׬Y"Cd!h_etXgqkG6F nȚjիWW^d}_g Y_ްa#<5))ɻ+w3fHMMxu:ujܹu ]yMbN#G 2G2!@P[sСaK\m6zӦMׯߺunzɜe˖eee566]vGL4I敞.k;wtfZPP!JΝ7nܜ9sNbD@aa=n]Jp)7sL#SL6l=߿ᄏ`z^HuOs*۫wխNHiV:_'563I 5[ Bf&uz 8D5"$.@Xo'FD$-!@ֈh3DYc"mfX k@@Y#Ldp kDI"`]Nd63I 5[ Bf&uz 8D5"$.@Xo'FD$-!@ֈh3DYc"mfX k@@Y#Ldp kDI"`]NI"&go=l   D$5!x& L )#$EEEkeee 8IY~m֛jH kvy#`V1M5 5R;ϼ0+@֘Rg kzS d3o 5fTFj7fTC@Y#dYo! Uy捀YƬ7*@H+))YrK|~5kD>82 D .+VxgwB 2< ON\zuK,gɌD}YNlذG\H~bf̘꟮SN͝;7;9;d: fN2G2dHv.nLܶPN:th#Ӿ۶m9ӦMׯߺunzɜe˖eee566]vGL4IPd;wtUPP;JΝ7nܜ9sNbqֲ)6nꫯ~뭷˗/gϞ=zT&… =.\8qC\ַ#F۷or.a S."7x @/^?~4 T?q8p@;4bp &7cǎ͚5K?5)Zϟ~x] d$tYe -G'I%$vΎk(Nݻs۷,Ns\C9XR_ޙeYȏD_"EEE΂²1ƳL'&GRQ:0麀k^-xL何5Ñ+>P)PFh6C@Y#Ldap! Txa08*@m4zO~z/%kb TngRum Ro.pYm}&  ߩw~|R-m{TS$'5:YӶ/G =Rb:{:0"]3՝̎TC ds|Z+wبԴuVbwlYm۶-ɓK,3fm6}C!ظf͚}kzwQXXX[[kCtMdˆ]Pe_{֍w_qbE̙3L2eذa߿ᄏ`zz5Ȅq' w7^1567+[X˚O ܪ[i{eۅ5)@'J8vνq]#Ɔ:5򚣭/ܣTpҟWz#kSh \V, lAJ~e^Q\1NNAb!UT{oc־݅kԺgݱ?D jZ|;" _էނOD /OyHx#~7~Nt^=mk5䩋@>H{ީ!*Kؑ(wƟ[#w3hYSLTC =dzFoPݳLgM޽ `K߾}Woܼ/233L.!ה~~\i_RR`lj5o~Gh3 STTT^^WVVfVd%nKœUȄ>8apl2} ekEd]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#얧:[jvw˼jW;fSߝf0.@'`zR/ޮzzJ~k[ޝGtA:vjXqd&0G-B'ݠѫ۰ԒYc ~_ztwz)U_ԛ >-uV=oB1L(~?='Ҷ\sZ&GyRӖ}z 8N =R[XΞ_ٻ˽;ysT` kEgnT=nxZ+wبԴuVbw$`P߹sPPPJΝ7nܜ9sNOV1?dnNPm^>y^TsUgՑӿF|I-æ}񚚚,o71bٳg]sB49Ŀ1p^*or+VӕM s~9p&| U]]=a„CZ|| ;6k,R!ag/}Pp>}uw"6&77ѣ10f̘kرcѢEΣݯO|eժU}{?;xq<\CŧNUy`lJʹc16(aÆoʤI篥};ciG&\VrھU7,z'n6Fٳ?*͛,v~ x͟n_2yac9k<"uuލK27m_/ M&XY3|o~j IwSc*Yx]燍IOOٳSv!W XL&WnU^v5 L7 (xkťYm/9L=jNuwn k-TGW.+jaWOB$`Ym:*u]m=yU5jݳ?Inp..Ԫ?~:AwD'~?˒|L#ּYcޜ]xD~M»YaJB/WeO{kĉY'4n&GjݍzB/+ϫcGnvD krSwm5 jO݅ v; kS՟?qwIc?|BenVY_}ܯ_WUrtة)TŹIo7Wԣ8qGaZ"qř;;)++ Z,$@j6SEYc k5"`QOi 5T(@Xħ4AfX k,SAdf3U, 5) FP*F@Y#LdE|J# HlE">$@j6SEYc k5"`QOi 5T(@Xħ4AfX k,SAdf3U, 5) FP*F@Y#LdE|J# HlE">$`3V5FIENDB`vctrs/man/figures/coerce.png0000644000176200001440000002226213347722504015613 0ustar liggesusersPNG  IHDRm3sRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$"@IDATxyUǕMҠ9BI" 2D3=J%JHIA(*Q%Ji4@za$s[ny-Z0b~&Md̘RJŋg(DoFd٫Wp 8w\IM ^x$e˖_|1Nz'% h ϟFTAd/C̙suwȑCf}Ν{ΐ T!C (SOs:0JH*\0U,}'H@%\B6ܹC=$bjժUuAu"ɓtM._<=n 9(0ł\s5v̙Č[z &$ |.]-eCLPjXb^gy:1Ǡ~@y1n:ն1d9Q vIٳ0wBjcGQ@I#Hl!p0Ukn߾aWBd<&`+ke7o22ۆ =z[]Ug%/ch"z/q+iX՛oYf͚I_~ɖ-۰aøQLt9YPr١^+#F]tEC_E (/ŪW_-o~Ie"̨mۚR p#l&LB0E`ѢEH6|F FAH5jILTv']vfB"҅8Xƨ~AO?[F$ӧO6Tu3/RDás{m都|gba~tY߾} Dr١^k8n'N7: yf~b_|IbpI !0ӚsӦM4(*X'Nʔ)c6T"F1&&Z.kuv0'78Åd̢SKr[re/M*[7x+5Ʉ XK1IX3 \.f-aX$[]"sz(-~m6B WX1A2X@Goɴ/ͨ2B"t.r!ra0oƖEAZ l`UV?&L{N]H5kpk𹕝 R#.n#YkF\l!Ot#0B<vڕ&L41p=pvc";DoH. ;AH@*QVQǜS>S[2ͭOMwj&=/RKf ÛA^<ް Wqgȑ# UL:l F^yf>i8'Iu9[Y}*$^Qps@|bN I ?nɁXxa;n}6IY3 ȇ1eﺳ69 Ɂ=;A\^xygGqPo%JA͛7K0&BGyL`GEt:|G*&s~8d9xW}լY ~ʝ;7͆Wei*~gML&#a_~x6Abɇnkw?>h4|H$ Qipl('1ŕQ9L3 I9T9kW2=^A8/%ܔ2ʫNW9"'GoT!?fW̒+@_|ӧts=tP!?P2ʟZql>UF#҂|Cqm a8ߺ)eT\{?} ǦyPqSSF*ک}ڵ|6NTt[oRpC/ ›x7gFwEn+" ijbȧǃ'ϓr->tSʨtx::jԨ͛:?,v馔Qlz{#Fy e˖mԨIIQF%4R5ITPC%q"EU2*2) 4Ta4h+7 &q>|le<͊MDۑ[N2^ۆZ+yæXnQB>-Իo߾/Onc;h )eTrCj߽bmFr$W ]^HIN:Ż<'qSTr;5uT u|#5 :GlٲÞإK^H"OqW!]({cƌ ;ٳdɒgիWwIl{֭v-_qp(HEs )J~/[s5ok9sT\ܹs0'^c/^nj6{%Jp'܉hB"+V Q L2@hr:u9(׃ EL` xhǎ  H?3s+`|sɼhѢ;Ңj֭͛ǁ^3fFš5k6oތͥd0>ӦM9r$ >+hDܹwE+߬YLH*v7}t Ġ':r̝;R4loׯX̙3S:˗<X  U@9Qq7K) d IEyozj:uDT0I3f(= ;,I}I[UhQ\Ӻui! 6B 5kLXl8Dnk֬CIءۿP2W\XeT:vX/_" ^^eիc&pMM81G ƬgΜ1p*9c>s:c#z''p(sfT޽M>}Zj*32L0{ŋKC,FիbŊF8Nnժ XMYSƸ0S/($ls2ʡF荒/ ڹKİ2t♏;FNom# Fr >q6v)(B]&bAqY>bQƇ ^(YȲΌ.\00İ:$PTO 8S 0OLFyNV("!<[!Z V(fZ%{2ʁ >[(3+k~V~'baAGA]`j$ؑ3V퀊(,$9xgF2;m[۠a3ƍC1k6%3>T-eI 'm'EY5J+%g2z:7301جڵk? ,L 56aO&.'XMT°!GF-J  0[)o4dfq2rpGlQbM2q~a ȔbOt/]r$6DL6 ,"{U_rUp42b,_|a#Gf!lv1XV̠Ef۰KjɹcċcZ6'Ico RJIq@ w8ni<]ulْ "vqA6ڃס\ӦM?[ZLcXֳJ1Md|Ν̞C 1( u`4Yu$|6'I~a>L{ؒA'6]yloЏƔrJLyOCۇL3:`?@1aPN19X V} ό].v'OL%Qx26uoj3H9c䷮إ{dWo^ [ٖr~k"_f l%p봐mwc{pr!]~r*XwX$8gN#eW#܆jff]Gcќ0# cЙ^`eǜ~d$1FъdwnzL1\ YqhYG1MW8+^ b 7=6GdHÒ O;d he}1rD[iA)RLXs 3„K#!̤U|eצΔ+}dcnL+tR7ul\҉% 3[ oELX0acc9C;&;Gd&>QY2Tc_5,.A @ N5v!>l2TPNʲ-ɒ/@O`8' 'oЛ |̗V D3J̘8ͱ85O* F 9Jp#:E ax{0"E(;[!J>S2Ͻʨ3$Qg T7E{Ώ )xآ'zE$G0q?QPHxȈBxQ@g}9YC@/ͭ8#rGSw(᥹gQh"e;4"2MU!rVPF9㣩;Q܊3(g|4Up2^[pF@匏*PFKs+(TE(wxinEe3>C@/ͭ8#rGSw(᥹gQh"e;4"2MU!rVPF9㣩;Q܊3(g|4Up2^[pF@匏*.{ׯmQ- a*A*1 Ӣ@ʨH4BeT iQE eT$Ā2*"2*Pb@@xZT@ RF?~]D"aÇ?~4" "eO?=gΜX4EŽ;vzXв@HSHdڴiEM@ E Dɨ,Y_~TR͚53fݻww߾}jբN:si*sUVeΜnݺJ2LfϞ}С 6n8o޼&I@ wdW"E  y'N#G… WZJԫWIҘ1c2et饗VP!_|mڴ! IFɓ'[l!CC /pРAgʔ)Ydɟ?:us%$AQd'"[.P3Çm۶"V58n޼PB[&i&xb7k,|W ^~eٳg{A]vqf͚f˖mذaQD"x!ǫoHrz=yG7Xb0DD5jԨAbGy}TwرΝ;}ѣ*EO`(Ac"zu֊+f͚4z꘎۷㾮*뚊y*<URDʕ]vr[vm6HZ]&?#Qٳgb *kk;f% ߸YN#G4`ݺueʔL95׳zݺuș^b]TlY/_1$3gl`̛7įXqƹsٳ͉͛3L6 ()kFu}ׯ<,X%IM6eڶl2: D':u! ޽{&M؅ϙ3'!![liժI/@5>^똼Ѱܹs׏YaK]tgV$ ^c{CpQê 4Q$ϤItO0ԿG s&$lĕ(Q T'N(P\-χ~hXw~XM 򤅮iưEaMv`曏?;4ElŋDbJ,i(v ۊ"CQ(z7٣c ~zǎyf\)ƤUd`ă|k׮eO:}46mW(gT4^HDJh("'QNh"e[4"2 MS"rWPF9i[Qn('t4Mp2-b_pB࿶iR IENDB`vctrs/man/figures/lifecycle-stable.svg0000644000176200001440000000167413505146267017603 0ustar liggesuserslifecyclelifecyclestablestable vctrs/man/figures/lifecycle-experimental.svg0000644000176200001440000000171613505146267021023 0ustar liggesuserslifecyclelifecycleexperimentalexperimental vctrs/man/figures/combined.png0000644000176200001440000013327413347722504016141 0ustar liggesusersPNG  IHDRN^ sRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$@IDATx ~՜8_a #!)d _ Z6[ڣˤm$UeS٩f̘ cf,3>>w>z:Ϲgs>糟.w}7@`Eݿ/Oxi\ Xtou]a=G_M|`~lX\?|Bvo@ÿ,~v@ 4 ;qr=A}WZii2 袋2>pF>Ͻ59gaR2+J ,,3- 67r-?Oq=9yWzچ:%;d<9]߷?^xa!{A +$~Ї.&я'?mOCzAK_Q<p@T n@ӰBg|y!A'gv^BcpF/rmiaM6.+_fmo?g=YD@/zыN:$/}K)gk =p!\r%CЇ={I~w11Ur(^Շb.OxŔLj%Gy+ Wyٯ_%N̍7xQG/xwBÏ3J$/3^39Oh` !4Jq@|tCb @Ayy=)O!~#Ŋ~vX(zPn׿5O(=oڭK _(CvyjG\Ұ%$J;?o9JZv{页o~Be:B Ud.e;12Mo|~-^}տv:l& Ho2=\yH7D|e]bwşCbLC-)pX{J{Ygu`CS׽6Iovg0씜DiS<0i[}Fob-fka6!ƌNa.ͤv7A{ ZH 1gAm?ZCvp /.pDv'\oNźERwd [s=ӟMb%~;8AqP> /ӴV^QE{-k%W/&*KvmQ_qkEE\>|o2>SOŧ1lLm5iv7ߜPIF8Zk:sĬ)(jqcJF@5v^WOx4~ 07t~=IObZh1tʳR~hdѭݺV[3շm.=VOݝ_2Y^וc4[m_#skL*aChq@6\?S%O,dNqKh Ә')iqvװ Ao^2En?tʭZ;ޛL]srnQG ɤOq/n:ec|4vO~buKc̲G}- wxџY-6lc+Nx XzR0Ӟ4Nb/X~T7S>|k_ФzG /d8yBַ̻ %[x}bZR83ԧ~#=q84I[z/I4ib]T7)|'?I]! w7`3=ѰY3o!!훈FL^WN}U>oKlan֝CHlMGs[K `}g|饗TROu42.?A c7#F{y{`ҬL~ń]ON%:k}Xv%3ECK0c=hm0PBQ_o}&b7Y//Eߟ9@?+68<  m%r7{YOI|KA3b=1˾|TADv lܽaPkNC-.9UHM>"j\mj[4Yn4 W?C0V)3h!@i 0eMr$kY]P@I##j2>7EUxU?J<5Oz)\vŷv$ =~$qp++ܿ'M;BRq&`#Jb/Mj}sPHsT+' b"%=@GKxÀ=yD?xyh}"`7onO#8¹n=>__Q `Y;Ӱ1>}% 1fV{7 YDX~u]׮10(V% ъ"ZGbozj!`Mg䐓BSoo\\[o5k*h%chF`Xq}1My˱~dD1F83\X,-y+Ex: Ue:}%" "hXބfYV@,^z>#9CL~f1b"dЇ>+ Oxc쉰B82Vy|~!aN(hyRъ}c^uodDψg+L[ &)9rsk]**%Պӯ5 MM$SLs]ggg]҃G67hQHYV`TFD?묠?d+%&:O~rNЋ]?S`ҟ4Ԝ?OY59m_EXFKA3-9KL?:]> ǪsbTnKldkcWs}'X,<h$'`H9hL^Fo}x0BF(q5X^xE?XXVµ(|rAo:ޫdN 3)GaG\ƮȾad[(ʺNCy`~beІmx =ӏκɘcO9v}b LӖ8榛nb_͔;O!5ꚅgf:렧Nmlu ؔp[]T? \? u) _FGf |3"oAvVQd%03KQkZ C)a2#$Zi</B]tIx3ԙ*ҭtkeIzEÖwfvoVD@x%f[nȑ b˄}C11y( ʮy [FgJӃ#Dr\ph_#W;}oxh6%Bc~=Z8v^Vow"T?=fl̼3t8A[ijtQ3X°wgZvT)}1=ϵY#/E┻N2=tN[,|$i/%-^xLůd= dY@zm[kzqyNׁV4ǣjmFiR+F,ov%b\p1b*Zv9do2Ϙ~4"[EaqNlN3;{^1BcA!ybN!AVKm2ܺQX-V/Ń[ 8&|d~5icz[ J.쏠pT`Pz*I4nsd0JxduE+)E&L#NzTz vV"ğM=B/ ptW^f{yM0Z_>w H|a!rXS(E:$׸`kRoTi}3N;C9-ж(b b/K)xxߢP5X?/6N&RIڢ~U ҫ׿ Jsj% ?^cϳrwOx&Ní!b6XodҡmVxjx5LW@'"谹q&mظ-H!G% b Oʡ0R:`Nԧ>5zM%Q}Զ&Տgvǥ-𒣿~ "HXȽ]thA;ǎ^DdG.[)h(i~t?qxa-miy#m`~, KBiEZ VP+91)yq鷮,`E/"xC bD?<$atiդu}^pZlM%08;=CG=–h ڦ@iS+VH6It( vdnGi@ؤL~za .z(`UW]Ey,(BkM [N{s MEfF#3QDMӤ\C]L$-VhR`_ve@&&l#Ϧ"܋#hQH ~맗>}f<[]wyD@;k ,l rh}/!g'Bկ~u~i\B@)jNl8ۖthֵ.Gؑ :cHdd=`ooQBʏ 7u1~m?{HނȈ'(CJQt8+9?>ܵn`{{' rf|?%]n bmMLh▛I/$ 2 򝎮t#Z:ؐw+lf.] o~4Rz&j_L?Kn{{%Djlb?;k}Aj1 ͩTv$ cb-HDw$}AOz:uf(wyˇ?☮qoD~USydUeUʐ ڌoFB,LqOCN׭RCBn.J\8~YA-zhYtڋ\x߶PӖlZ8D+ )##j%ZHc>32[ nT\a^v[[S@>Uօx!qug.;9I^dfM5~2H PWWD%/ނ1J6K@}D|@sM: fۤJ+c@sG+T׏n V& 6z~n5 5aP]D_^K?@ҫX {mx%-0ݏ H/YG<ҁ4)F+@_&fƧmLFjNBGj;D(i,S/tnrQC/ٜwicy'~/m̯ Y0} BL,g0^.$ax6NcIB<Ǚ]!@Vg.ҏ\%SW\An?wGt}H3'J.NUSFvD-x!{`̥^:0]avm -rĜWM[jIS 7Kr KnI\ ZAiu"9x,TTy#Dbf3%]^W4cIɏl2-fL1,"orƲO\TS S0fCbq9[MozΆ㤉|>m 5kN[襷bx Jac`Ϥ2'JgKc{`]%NS^ͺćmLd=ct)L5s$Hǁn#c49o<-_\9K;{`aQK;9?Ci{a.,^`Cgb 3[e8X'heUz  0sg 7q!0@ 0Xgx)z3eWwEXgx+C>+Jβ|@ i(۸ΜƛnǏ~ܒv=~Ox/뺣B <Xt_(컝Bo勽tv_ҧ{{X9-;"?R.wq%ns}w)|M0QIHš(Ȯ{3}&D9l^֤^b ȿ ]8*?#6ZocDqu WhO|Fo} 7%,4\ފufi/ c HXhg1G%.kAnH|!iwaT7^yLي:4[g~]CxH,%"{&eq@蝿o{F;&~lO3>mo{- n֋?Frq^Gz2^Լ}i{L,ʃ< 11^zyjPQ u^*pPZ*gxh~E[oԧ>[{*P[oPpwk&DL$^ x~뮻|G No2R$4?!yHpz]in _ƙ^pN6s`[p?W U;iQy뭷[4mf5(_ѯb~v8{F^~*^]x!g@.Ds[CmBq3x^`>%2?k -S-f{ H {}[2dޭۢ} ꫻ DP55n-kx3/'Cin r!Zn#}?߳l9v @PҎ;XiMƒ{ Mʫn-੡<OSbfyc{5d66ɡcEK;$%\ !1_waJV[ bg=Wf&=9^Dzp_2]?/B GL3xU3`h:il<Bro|#fHΘ?$d ϥ{ţ|*ICytwI{PBn/K,#ScfP ?կ~5%9Z+S01ĄRK8r6X`H My%9͌()>9/.cñ%G 0p 䴌n&ܞ*fz)}ةbXaNGdkKb7$h*c>91V.s!pN13&p܄L^b"@E \IUi#믿]ZD>\e97ewWtI#AHp A$~x &NQ7mCu&Tz *=!KZ&fCy_ m2p,0$!)дLK&s,J^Z} +0tAOg`+@yDr^(!%gW|U&Z Mz7ZY]k @kONv/ZȥMuRhR,:Dњ3% ,㳘"aHhy8e283PPO&lC; 20B Vz%g<A^ysBXW@ B/dV8[(BXjz[ڼL#8-+OcAx{m#h]'l׽Y tK^(1<xo۩87l|0хUq2kg[ԟ@Z$-9 +fZDN[hם`Af!-A9jF[k.L]Py$t e AF|)bJDH%򶷽M_+NbEj(n.N?LNW2CXc\6w9Sq^LѽoR{ ܲ%;ƈ]֊~)gJiQia); O`G`rc_,0;  .,X*mD@ kQzQt!!`'S*O' >Z@Mk2Z$pDdaXdO:뮻 rb1z@|{,^;N@<Rχ #leǤ&,k:0Ze4'c(3Z`Yd(4 IG>'p*d ~, B YR~S ɍC_N:Ci-8]|_yH~p=B1g9s4~[*49e]A"!Qг۵Gf3P kby_ew2gwhojupkFoYqꩧ> ,86uG!Ӕ$qh(9pwu i}CzD-8U1(П Q1b/Ozғp:TJ,1(PEbN+*SD @{{kT#_΋_%bWt_'GB>k_ZX;85Sn.BD|p: >9N9ig1ĞEB1~D_h&l*hRMi>t  vX͘Ik$]DaVQB Jʃ*wƿvbO1aL0eq&^J0ƒ3`jqPLuY FVC +M=gk69@ ;;>&/IŐ^WIUD[յ+t$Jf I2[2*Ճ'ќV*][4%&ڽɒQ~ ѕ"`c;Cb @@?P/;r!@.XldR{5;X hiK#ݢ@TڌvɒԞ0^ DHPk\^Idotn!GEg\b ?siXƄԘ~c+?@qdGiI"*W^0I6!Ԁ|p1:{p_EW*Vi/:ݓR~PF *}чxk 3 ¶7sEL"eE.AnɢLW1(j^\r<^}h"4];#1GȳBRb侇>߭j br=t[D,iGzP],?:!g@ k֑y ^R i4BiE^*-PcDb>6Ӿ2vEm.AҢLC琼:뤈- Gd_;>r}i=A,"iHdH^c_}(cӈ2$@@̐ P &fH*/4Bd{)j6/fE,/V$"l*ff %W22._'w Z >?}}֦5blLo'Cb((g2u\{̧џ;;i/TNh Jm+fUNAP46Jۛ]?; *ة]̉ S1AC6Cb(}r-S1a2:]v*};H{Ï$#' e* }(dr1;و?GNJQ3L:;2,2}dA,R1p^L:\*LO[$|?u_hi4s*/fNI2 ]݃rvLo@%⧀Bgvoc\wode,J/,8Zç=?hbGf *hV4+j' Lv< [n99;2QHHD^)B|] u/rDHڱ#'$ T|+ t՞2حaI*-;Z[]8s9#Wg&a0a( vygĒ7/d Bz}z;ށGZy2(Ga<5:Ւv ;ny"Eh@@=]ʌA)d2 LC8%#.{071,{gy):10 ˄YAmȠ*0O)"Yiv}w/vaa;Suܢ°!"~04?>43$L%.qhIOzy+dVØUʗ-/9 },02b#AYa -_gcrQµ vQ ҄u诞RahលlAhLjX[;Y;&䠪x[Nz_}'cƇ b%_B>;γÌ5LmM~?UG!ϔ oqe HtL؟pyYeUx}ӋSCzKhI;F˴0WRvջcQ"t QmeTLli _y;,o솉 _Q 4:v"7[\! 'k8ng$ªR^X?w0,E{rLJf&`@F;#)^5R9"fkdM i:8``@赨M2w7Ͻ^jUϯA1'ʰ wͪPW}vCb!..]r/LI|&rT.=#gGv/B ""uu09a̰Q:5 Oh8 L!))5I-)jyȟK =M<ϔ.j.#"Jhl_S$y36B&hXcl6Wq%C] yr 6Z=Ղ*3d/J>]2L@ EAJnM 4*8舛fٙ.ZK/%Ցz/U9k8/e;&P[^6A w'/DŽS 7pԘ^®XbA龍6膛~oV}`_QHf5ePc |j72J2a>{Yus  VXW[u?vlJĤ'*?O8Gþ}N;>ݳ*p-^RO%In:J@:|ZZ,4[Zзۻ1iG XZ oif4Ӄ2&X8!ٹzS^#BR{`SXѺm@ _- ?zx';\A2ྲ j LQP'!D'98`4/ދy.9Q$f`1C C& iUW]5N9W2uϩ f_q\~Gs}Ap.|=]BV< ?\ں+p]^l0` ~?:4W!> ;PVzh[׭ev~I.|'Ӌ6ԉ/`Z=R?>6a|PO&d40 秉z P->"~ RM`)6њւbWp7ch=t]n|ukN/Tַ 6mi^/ųT@2etZPi1bv?tmeog.1DM u@v-f/]ώv?V bql\/84ƀڟ3*Ŏsq3#jyX+Mx>*oh._-,Tں*_pX4S3 ve@V׋0pqc-21~_pD|w3ŷ_tQ|W^yZa g4^ MKRKq*s嬷N ]E[bŠTD:M!6 ~%-PVd2GɊZ-29n nfG <{Pt659嬳=/ iu=MVJ_2zByTE-Cwy:Gq@IDATJ`~y`,}G # #%:vS(IbC@|L]lvs]}v[k~ПN w@u֏U1A >( e%S, Q (U57 *s±7Fnw{z{u3T8h 䈘{".3KN;`,ɝYwn֫ ?INm}OnLh";fʇ)ui9s f"^ ?ȋ8uT;)@Wм:s=c wז*HO~q#-(يsmaF3  <'b]<:WUwacKf [uW?{;!DJ,-'6kA яV"#찴p+ ) XSGzMUOPzI(X$RM5r-EzjhJKhRo]0v#S42V:ŗ6*:Y]>9DigqF1AL¶>C[y <&dt!)U|U ׎Rװ2UiOJEnhB.՛͔1knĵgV ˲z.,p?T4%- -FLJʔ2Tźصh.nd^L1@UZ[ 1&% {,O?Xl4A;f%Q[ou$"^D@bx4M"" 1sh\y#QJ(?SQy>ZuWKQDDxtluj>h{@1{5KWt_) YBk]M\7v"V` HKBG0şUS.m8J%et))k3q-s뢎 i;g1u)Y}lusEg8>d?CPd, ,xĐ餡!jXizȾ1;: ;C(8IN &IU(bjH1;'ƅ+ql>v*1DnnSſ7 D;tsX[YWQK9aoNy0y_c‼8hAOn)$5$N-[) l蒰$v$}{n~'u:c Pf Y >=(} sWK{ի^JTp[ll]1c[N hĭisyqit\&jj fk.=~;邏Nb@i͖ za fNr:R_KVcᢀF Mᅺav)hOVE4Đb;B9TwLWNPKZ,7 e[qD]6fÃag&\*&;gui"SֈL{;ށ^lc+^C9eArf;.%<+bGe/ f "RLɞ UsH0ʿ2@{6)ӯx S3fnnF O6\N[~RrvK ϒdeCL{$aA]؈$e~ -g \d(sG|gZ @xNl|I/E??\cQ}%/\$ G`gsZ\Ҡ,B1s)e0]+K l K|]V]@ M3~inZ'wZq3e*8'8 b=b-F1jw(|1!^bŘ9 O\`}ԍe{ꩧUc@6JG$!%Σ]{-QŹMTQ#cŲ-6@bǺບLG %<͍VB'w!D旌+ˆ^'O^*rчp* ADkULC ];se[,ai!wi~*0SLM?c(1P(7~B\AOl?$ LÉ]Qǚg~8|}Iꐇ;a`|g+cvioxum .B ^{A}6ۤ{OozӛHɰKTIAC0f?G?\3mꖶcU&ZbiuBE.dm9:\%4lm49(>~JԱpn&84 \p@$BgF3C= _`9U_I3ĦFI_x "/ڏ'Xq4@8# ҜL$ccHYЩ8ɑ\6䞙bW/+$QVr`%|bɱbJ)D,fG;"9v#Zmpsm3։h1(jkyzV -؊ty+MoV,9O ]~?N<%`DVx@0[@5D(=r V8,.zZS*!2g1A '.DffҌ]vK {KAGlJĈ*B aR 4Y]eHH?Kc.ڻ +ΥHfLXILtIW:S`xb6H%Lfq_̥wŠY2=H3(zF\BNH1!/ \6h9=&Lf Gꇧ EeZ+Fk(Exdr`vկ )YOY}`2vuq^"L@Iĉօ( ɨ Շ,h&RW +䃿 1>rADq~.kV_ed܁4VЋI5ПtjۈL(-b`캗 /Gs!M5̿S/I}^Wx]ɮjΆ>^11]>p(z';xC>0@ *PAqA E,(7K|EVZe(WB;,*Rd7xq݇ԧ6X4;mlI ^ҝ }ĸLdð`6UXf@Ɩ,/8k|Wb Bmz[zS=a5ax8)GԚ/P9sR rzY2ϣD{d0UNfH}%]bV/˒ᾱ -T̖bw8)>{^,=CkXMjk)X4Ґ~ gܙӉK#:>GWo$=BIZ뛻B/"VT1+5h@mQl͒0Ԋ?0}#qQ"Mo4ۧuBX$qwZ[c{1K3 EbP@/%ЋFO/h5ޙp0;ϴ|f]RTq?%M ^ Hc7ia">2i#GI8 RC2!!WRq2i1 nnz\r GCMco,JE IX!i3{+kr6x@7e yVĕ=* 嗴@ᭇB^ kq< ?#5ǟOKt c -r4:Q<.KwB[K"F5Ejԝ*. xMZ b9ibq_:&i2T/ܤ:Y-= >1+B?V5QDH?6b\;M(IX- +2"A窸*/"O* [/Eྊ@_b"ŐعFA,[vDb7WwM~k %(6~;?>>]*fϰۭdIq ȼOKڋI_4cY:EdQӉ(=ǣR|9b*OUsSGerϫri w}?z\ReZRLXRm4+va2)$N%àgl` GBmfqݡ_Z"8vFWQ0t&bS[G1nVJp̓F(8^^ge=-miRWB *X ,FT+ l Py[UP 11| 8 #DqqqU\ ;RNS4Uwk"] TFXڸ@ hQ Ze!74Q~sԷ*t!Ue80ؠj2:}z`sXz ߆nh vqG}R[VkZ5?_u|{=pKh]OAeu@C!_F6Pb@]Tn[oŊs !Rii^nJl54n}^a. ڄݝCleN ϺxQG,4Ǟu6Ey%/i\X3lښ7i9Ϻt>K{4tMa3,.>HfVB /{?YF{!Ok} ~blV MS}d,NYvr?{̖msb5 &pnL=[=]Xی >G/#ė{@]w]^Bq8mzFN)A8uUԙeq/}.a*\; &#Y |'rp͓zׄFQtCcbP],(lb+Y.Bvtzg$u=$L(>b М 6eLv=lg(mݶ#G/΄.i~yZ/@_Ҷ<RO;,d0m+;kiD? DiC\E%!! 7MjOnhJ%UWP`HZqv2񴻹 O[0fvsQ9i~p_ 9I#U\z;Ӥj\[ni, QrwarG8`Ebۯ)ZoI_(-]00ZW8m\6.w\8 /,IE UMPepԟ믿~~#$P'2~"15c"$\zQe \(ͬte 6+EL nvyKJXB[ s!CkOpGL|S?Glυ1@|$үzMWDfY̰baQT>w] 6W@`ߟvia1q"&"$`:as[8Ƒ D^.h<`$ cJ ֚Lܮmd:(4֥Od>V3Eo4B;եۜb gi>뭷bԟ1_.*HsKWa뼦Qo45qXYIue)ql>Ca7.\pA76;F&'|2܋.dA|D"ȡj#T ?c# pa@Ymի[Пc3 /K!X}pe \5@>Z-`ՋNgp^*8 GC'tTmKѾAve;G(j`G-"9uL:T#;]1?'maʻbOSEX&blQ/KPyx?< erd!Tv'r{.T*z%~{lg_7) /LWrÔ.j̽SN&^Nk45U@dJjXg1h$~\2^:.=]ܪL_yx(eԐ 4˝bsOg~tgDjM7t؀r79BA"u'_uUiv3@'pBiA_:vQlӚDiu45AirjAZ=iQ}14D3!NDLvܗtnL%yEg&di0X!{Q˱x :)Y: {K֘'pdm1"˟Ͻޛ/})Sqf JoCr'D]51h55)ĔwDЏ5b>lH(*R2}+pK|Σ!UY2'<Ƞ-F*a(j?  =.J/)F*x4 ٽ8'.܌~kV'< jvz]kʼtk>0na*,71&?!S!F2ffPZ2RfIqd 5`h>ф/t4W\qx (}UQRЈ!U4b82Yy^_T@Rc^nc׌m]T>׾Vc5gCW~ N1 \Ux w*ы:t>-% A>R)c{͟#-*?v~R&y{^C8^$[.4agfϸɭ;ә6 T$ZdWS/" 㾢 M @H,??svjoؓBTa0-+J얭HaDQPņң"뮻N}'@x}OI\|]\4K|-Ӗժp!D13JI\q۷w/*OmFGF-\%LT: O?֥tWcobkwk1 bLFŀr} s!ˀr..1b"{`6 ne_ʰY* `ݪ5\uc]yKUʔC_H>}bj=6ྉ`>HJa{a|nS+N,qlza2='F7D'4Mt *"^̋'W+ BVbaEȬqdkǬ~oJB/b>nH> X/ {'.Gզˠ$]1Zgu8]ZQ=ep%BJqu`BcI- S1 %;o3аhE!l"2w3A)ԕ!C,&%ЈpU^4G;ܐWqүc" }}8ڑ@ _#Pg9sU؀¿,U(G"r 1O^QQR1JO?6;:Fa2'9$BP Z2/3C-7}|Ӧ?s`RJ_lm<7fM0H2nN(ėimV[6\3_s7>rSiYJBIV?TT6 K3J~=ִӋЊ{l}%0OT>LAESPpD*"Д'9 ^B>_r&dW!Qe&crd2؟, &VBh c!(ĄG~( =#s=2WT r'}vTD瞓BVf^'`*Aغwt&Xݴ/r4s4 [~ dg[`ȌIS1Ჴh"U\$$[2S @l~.צnZc2Qx~pl]m;?m?^4s4 / -̢JcQL>ϑ_2φ%֚z#')LA (r^{̻H3p._6aҮռrõUyjh$\%W3jyMy駏fK='#%\|yyJR\{ C5 h ͷ^xaS'2=>vҺL1tW mj ``E$La\OFXH%kg5-!m.+iާ+oNNIGVcNHїK+S IYEJtZr4LX&d*j.w6B}5vf4f =A(] -Y}7})% 4:,`c#XZmP|UDp"h. dk CQMDlh/槷oUeY)pJbUR*bP҈OvQ8W@ +zd=8e`~sNr'hSkh~!wQB~Q/w}o}[_p$^9v(˯]vldCLDlHf,8- sMw;b;1f2_pq '@H4˜O<5{GjbwN9lJӴ`F|mދ5J+'#av3Թ }(0K+oqKV=y򕯼UPqX5q"""pqrڨAbs ; tv;S ߱k8∴h)$qxTyR8rXTzw5&Kad[7L.*_S/,酲*zX+AeoXvʖ~[;q f3!} CF 6 6:s=&T>mY 92F UT*:ɘ8~_9paa|]?b58%8DGHVBAd}bډ&ӀR[*д|Q:E_W+;y^AXzRBaA(/ȑ(:%Q ʘ^p$ φZHr[_bڨD`r..]IױXbVGS ?asf7r^d qg/K]Q i"xޫ,h=v'eIҚ'H&~r AҺ;O!؍$4'BGȐ ڨtmw9}`9Gg.~Do|c h&9%5iv6}AT4LK-(H/)C)G(;Zt9ty4֊r L>_}c|m喣 zO D;@Gmo ufaXPJ=-Ƀq% Yc8-1HnlΤݍmp ؔ`*To:$}ZNyu S]6 5[tR([z[p_ha*)* ߪ5cK ܉V_}u:\l!N>0,73|yV[Sx 4%_VunZa΀}#.b9!8zj1SEI814L44#q& P#+~sb p2AHU>w2YQU^94|^y۱ +J<^ %SW iV#qBG_W^yDۤbj%gtAR]p_B!-<+îvђ]+c*@ޛ1jmp+K̬j`JaeC1';tIJ#\pF8b h{@y]§(mYQ4ؤPu%OPྴ!=D4ڪv435Z1rPEK-V11N ȗ󡎙^35 i&) Rb@٪We]6>:V 5ƝZ^ ZWWۋ UZ@ؐ hl)lD**Һ)3fmb5# 8|o|_a^~Ҿ&J/8r ,(`R+s@Jdw001W\qjD=暴؝6.% f0D.ϴr8\hH7SH|Жi'U.tyH2kh MTETpʦyY*`Tޟ#1fg#.KW'-U$Lj8\l<-=zF0) OK1i ,u Zh]خ+d|, xaOzgbң K~2ի4 NxI"3IpvXrџwՇܦVߝ^eTh%؀[2 b;i-Dp;o׫a?Qt]p&D*4W.+fW3_ Hog2Hst> &m>VbN^k3Ҋ+9d:1;ʖW2WÙ jȘazuUW-|%'^JzZָ,^k%zcz9ꨣ`y֖lOa_vͷG/࿒4*ްacC/7RXnn 㥷J_{?i&{_Jw3 `?xu/+Oh'.Bunz9kOß@odY@~zYv-VZ{yba-J}u6uq]_sȝww1>p=`:lq}CluGV!;|_?uu 7)9&7yݓ~#Glޚsr'p7}p_{Z #g~ccn<9:fɺS~ZLٖ诡|:Z.[0Vly.U΀R@`i|$Eμ_`=n8&(tSu #n6BC|i SgQys@x!Q'Zlj>[o2+/&UܗC!TF <"҉qC'Z1-wha=s bLD>] `!wohUXLg񄩅uD?DB /t?6G B ?/n;[ Ҏ& b >PI0  -~\1b!>-@R"@xBy6-#9'zLD"!1i h^"plPMC ^{5W\DK/pacL"nTZ㉰t磰YQ煍fBa`ԥtTE: xҿU|QAf AWHzE";16bQɐibƟ!&%iHep%ʻ`ؖ<\3to-r~ՑE' NwR9ؙ9`vX8.21IS&M5?'v/pXуK]Ġ?»cXo3{(W+}JIHwtiPxPO[!t)2htjq CS@#s>\2BtQH[&A48D$L$a >8mƶAٺfMA b 1jM.(I+ՈјFAv[*j!Iֶ#Ü9|_o~!Ι33s< #S X..$}Ɖ:eK[{B .`Z sq42IDATT tR`'ƝVrCqEӟT,]j(X@ؠ]`HGi7acz햫ƾ jMvba{/C(b~fŌ%0kf7cΡgLm\hDǗ s b[{`mdo<'GlRJ_Xw93K=q!,;[TcgB(ܭJޜӧρƠDr ǫM8Հ^'* EbK8xM>fc >ȱ$ـ;'nJk0x?|I8?'*!_~_j`b%"l Y̤gMjs&bP)Y((7,M':@t Θ!X7 hnY:G˼.)gЅ@0 dƦA{}ݢ m3KW]u-,^ dq]2ZQ꿕ˣ'?9G}?C Hւkǩ;+%OUtFyt9\:g:?,lX6G&Ʊ[ՕW^C!bZtGKe E>OjSiğ8Xj\ '{X?쓝Gc_HCJO)Y~ 9w@-n$wډ~v%/mT;t:kR`P`q4s5#:;9٠5&1Psڔq+ S78*kTl 1u&}lII5z5z:;qza&HQ3ؿxW-T r|"=dL%cS~_ge@C5ȕL^xER'2I\o(޽3S7hr)(t/a hN>O\wu=nd}7 7'|nJFs~jM&da&k.(ek0XgW})|5[ n-2Lq)pGwǢ5 6Yj gPv\ fb&GsW\}M7N`t]^}{_5WV 3ɧKh@S@['vh5LKf٠^J0#jJ^Ζ}z1!~'85$+G咜6a$hF:/7!$7;y_-\WVZL<㮐]jOؙ6XEWNmZv#֫2 %þECh%'Y}a@##Ox#X.xb|'B& p )|d|E-)S^]d,~i4NQUr\2}7_p)"q˯k)n}}=JJR`Ig UxdĆq nqUwqGRbk r R6 ^o1pT+!fmS)8S`1Gf d&XBʁ/t(IʰI4qf^7:6IETgFa;cʹ]V/U T FR%I"AN|8 _ѿOSQabʨ24WPu eSQQ4gsO3n+ԨJcx%0G~ ">b, Q HbqF<`'xi;$/Mg=^<ςdS]YMZS)"Ttce<]p1f3R93W t!xXK}5IV l@?q.AYl5y/C*>0;Qʉ(vߓv<'޿g/yS5ql|nY(W =);v1o9×\G.5"=)0o"~%ap谞hz6[cj}Ϟ=twg9RNŭDko97thr Lh)m0T /~'^rk^p6gë&Y}TR`-( ZLhLb&yQu8#+yQk+VNvau;κrH Ag,\% ܹԈ֗dbQD6 3Ĩ5: [++N6 VZ =)Dh\ʉWLP3Jf,=\JAD?+SV ,8>L%ѹ4/ o_$L!} S+A}I#>.3ڔ1+7wb_8R)ȓ:%kX2H\KGԍ ȟVfʼe<3HKQP3xq)IQ K]5v$RweßsHRg!`ڞ3iڱ%g%P.Ufgyf;"cH!mW:*ERBr)@c|s;mpШؖXpK!ɋ쭝O'%~5pZ9ǗvUocѺ,/jR`R@vF9Ǩ2V>}K?YIe2%g8=F0?X.@K5yhrϳy¹61uzhVV ?|'PܡRV},<˳8"ggm2jat8Tʧ/~% #/qm\.9SU~l]R4%-kJur98}$߬qu$||%ܨҠcTӟ4ERyY0~GHV/}<ofsY2H'g/gidrꒈ*^ИoBGVjͤRw3(t\) 0RشQ=+] q-OhK0?f8DE+_qnD&|GE#vzA]}~p QSNC3xE1icw޴;jқf*HPT{lè~ZS)b ŜT'c㠋?`Fo:8h%Dw BQ4[Ql>v6fzSĦ"s<1.Ww #e]Fz}q׃bhymsυꅼW <. x2M'Qe6ظ`!E7X}5822y.k¾;ubУqu^Eo OApٿ?\ hU2_0(wg<ZsiK\ɵoPD?ß 9\tP_YK6ή.&F /n)JYaD;GawBPyc ͱ Z6^Jʧ(ohoZ+ 8ho߾FMygr(Hǒy.N~nq㨟ڹ[s:e~;<0|_PsBpEڸR`e傠3gPG^ +C=+ς%kS)-}(:h믿wF,&|)3l!=_ v HlVcd lJǑ0kY1[LO)bso`n8>杣H  ״~_ .m{.B#KUjN-6SO=Ԭ1יN;Mh/־U -B|UtO$߆1eZ']d{E{!ϛB:FY> |GOLFþ̿gu6ГbȚCwUW1Z%Q@mE5yüo:D՚JuG3 d,>> z}.ǩv,B6:Cyo˘7HD\Ԑ}!:ڼ9'uŹ8*h^Sxo1@\s'#5aKڕBT)S̙p13Jp|xJڽ@䬃b}6 6V_r`0ξrhų76j艧Y T ,l lQߗ% ATm5SjT >mgPt%4p NiR+tmV));˂*jIy2PpK_z1 6`TWqTn[&;gB%r>Hj߈w^䜰v Pcb%wtž6jMr)?Վtzrdue'u_ fȭz㗥yd; Aw \osfRv8V;plf $oBF;;@dneBK͑iƣŁ 5W}*3AXp$\TCmנpY)" 9ZN/uhuQ>&fIv%YY0_OO@d2ȔQ5 8!Q ꅱ+MyJEQO?¬IF d]o!sGkPX_}*"9_iK>yž-ZCU ,gRpS.`UujUC?'x饗ҧ,u{(DRI;|Qc[Jrtl O̒M\7(. `[ϕlB%QI(v_{p{w̴֒ (h'q";v#1Z}ٳ'&^WG)Vص:x/,ؖ6,{ bQއ9裏޳Ly.ھR`;RsmGž3W L@4}l ۾2oɻTwH(E|5ycJ{Qf~GMks > UQ33Ku<}WWCt\VG!(ԅP6Ԍ%Ÿt~np>ی9m쫩M֢gv0HpO$3Mo'L> OCMC@2d0W?>:AuHj('u[|P˂$1B;d~[HhE+5+ YxX8[MRnVLމ|ܛMńEK2]l)WCt".ٌ'LNx[!.;_kiaA@)Jm uBBfX],M:*ey ]JۅpW:>NW2ZذU5盃KK^')JS I:I;LIж^mIlh*\6;?m kh:qXQIhANc39(Vw Qګqnuc%?-frГ7.+ CAY?p6%1jt`A=- Cp-0˯|+O8h>b_BZʠB7\ؿDbrO Ֆ_rnh716wz_uXxax1oP|RqiF@-,)]2w\6b>ϥ'm&W؋~絯}*FK"CL}'$VCXmA餓NFKlkgYC@I$- `+QPf )α3dL7Rn8s0\- (Dh)|>~_ZKbQw>5?FhL3e}x(3%q'8~}j*,I*ttҫҋq/6z[:EGiD5D4\Yy}n)/.&F:2QzjZIEvaA]%_=q6 K,6f'9b(Hr-SQQAgbNF]•}Ӟָuu>uC1s`!o3-s:Lٽ/0ӆ!űNeJ/9 W  '/W3U[E፱XP•M޼2u] ϠCO% ':J2<3+=NEh6ܷo_g}tg 7l'!lNqjj,fɄ-ư(Xce(}V(z=t`!"g>±8jy[}5]-:JkB10e V/}ט؟qZ}Eq4}3ϴ-NE|GQg7R3zPAAx`?PoQ:At qJϢQg>@H9V`XvR$j׫1H٠25)3+51PnLh?/SoRTqgi%Ӛrܟ=sмMcfѪ*CE_oQp &^jK.uvN:eꪫ\|IS>v%a#_GYO’PQ} :GiLka1wB|:`ov :d#TgqFw Ѥ^|7tSpܻw/5*?ѱ8s=8A\_˾q/8º>믿 : Gt|d|Xq%_Sɽi33H"3el9=}ZGis(fVdkC=iOrl:oF!-d9tZo Ѵ e^4݀}m[z`;bUE,;+ y26ڎ48PDiáJ}-Ʈhg_;j:Vz`\?5uSAz[D+<}a%y([LIOL1e&k!ѷi̇?Z=Xd6`n 63U:]bw'ܹ[+{k7N¥R+h1i&+UR3ߢBܸA|2mCҠޚP qoLrO+0wyW_L |^a}GA~1Q쓳y晇z(PtNkk7M?n4p>CtxWa3ߛ2b`w!^GipM(F0ycdS?3>FsI@(wn~nW-oyKv NmgJCk Y=yF[T 2ڽ{9眃#,3}ī Q:˴,v9 XT'X ˨3ȳ>CPN,m{Y%Ba}(fqbg'm.*%n~jOט5Lnt8: 3d#QviLߏ` ="0l]"qDB(YfZf 11̀݌ VyC<"s8L'TՃx5HBB v(13kQ"ևb&ףzz*u}NF}Hl>fZ;N᳥"c(smΧ)C# 8SXk SdO?\{ |ːӜXk"< l\G0t.Fs6˷W.鐪Xv a詘MiH!=RKt*vŃH1:=u@umfMRY˔I3ohN ]1O)sz}攴ey9(J0!%dɽ5~K&nGjgc-E yƻ]8v'FDZ}$EEi"s]TXqCjԦ37m+c?uc9F4{!Eʟ"^eeMP:h١;rHlXO<1Ze60L|@yIt><y/b[->K8΃$[J@(OM3 ݙlFŇQ֓b2|5Rb, Nm@D7(X{ㄬX`tE'0J qIGl'ʒ[Or O?;F[G_mU5LpsX9:c:bc!}`'a棼3t26WJd$iY 4+,IKFw"O2W&coؘg+,<&2!K=/.,_)X|V cH!B7L[35jr q9*;=oZΨM T Ѱuze9 ~bV^FVt,*C,1keF4 .j>;zy ,upy( ^i ǃI,=$dئx*NW}T륏U d;%&l]t&9=av2+(yb֒WEhA( ѭ=m숓7\*Pc_E4@VQHe3a> q1߅|/HAƲx = F'B:SN`_o=#Ӎv.7(4bs>fadl ~, D/ *4 Zr$OLCv1H(21|/*(MYo]@vbeX?ޞ~6Yd^z)R-Eur"o)O>Y3ЉXe\ L~?~ov_uo>g]J=`[hF34^ֲQ^Q׊(ߣO_ƶnkFo5?῍wB}Ϟ=H6yz=w]eb߾eO|O/~i&t=,-e5.o}[}/Ң*Aߋ.=_ß5Fyow ۣx1QePY)P)0o)}TXVv(AB(8؜h|/O{B z[Ww}ɞ&6%ƃ+]r/t"p=^E,۞եy K[bmeA:{yՄE!W%0 $Ʌ b_Y${l;$PլE/ LTJc!Z"⮻$2(i~Q:ɒJte5muV-Z \l[-\8A ]$EK~֣-|/_: LN h~^|/y/%7L-N*f٤# a,:;PdإZc/1w:G@IPF : Αsft(LmdQR⬌bV^Gw*cJCx/zig~/pV~9Zc_|x.H|_cM@w@Y[®Ŭ1uX5 MkҲ/P 2/WQ49uhepˏU~r';ﵿj0'&0-os`Z3q 9r˾}΂u3$& XevK- AO_ZXobBzk:J  jFJ3[8# R䅹D&vkowT3IuY(%%C8 Qu>M}=zuFYGiSoP hS`T wh\pȨ>;翗[b|XA38?S8qa,K~WyRھL^H'6߸ol;P3.d k-y_h YT= ̹>{كFtNYm 9cM46o!/ҼENMԦLnF$vB.|@W⹞+4|:(AVuI[bpd![8Md5^"? &=o>z<.UW|_ʴ @iǻ0.nYG# 2#D ȕ ū,Udʴsc?6]v1}-:Ӆ8)!9}JCQ%~2mv6ue3--:|GdGnCʓ{+DTRWsdnwI80w;fXo@Q"Ba[QڐYA?A ^|}s  <|yu~#zqN>d0>d %h6K¶=oFN|FC/_E*?^H^[:@]3t\fل\2v;/;E':J$vGh_1amfx_6}5\_k}T~8!8Pw;[B>AtpK+_D;2TV1:XxC?Uen2u(E|״UfžǞY-a{R\2:W$+1nۯzի #umK1 D?LU2mX<ʥ%AR/I[*XIǂg4 rގrZK@Hr݃o>L>LeR2Nzk!G7!-<4"d1X}/BmU"VeԐ[1ԧ1TI o`ɩiE+M+^AhÔ9 }g\TE.((P)ѕWULo,?I-CLBh{ J@%mwo|/,0T kB|h]OD'WQXx@D ]ˏh"XnV)?0TC<>̝8\%e(lV<Ψb$In)=}/4{a[Ë^ؗ#7/AV'uV=E::Jw]26_I|!,0v\7geعs[vDo2 Produced by OmniGraffle 6.6.2 2020-05-22 13:14:22 +0000Canvas 1Layer 1vec_count()vec_proxy_equal()vec_slice()vec_order()vec_proxy()vec_restore()vec_proxy_compare() vctrs/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171213505146267020422 0ustar liggesuserslifecyclelifecycledeprecateddeprecated vctrs/man/figures/vec-count-deps.graffle0000644000176200001440000000545514276722575020050 0ustar liggesusers\[WH~~u:0Ȓ`Ι# K$H}囮`X2LXRu}_WK6~^Mj {^0eu{͕mt?#ߋ^X]oڣZmp{l@Uc"IFZ+R`:‘}l^[a&́=M6W~ڸT7m7}FE{AO]oYhD T6ZÙ N]Lz+9Q胟6$K px(8z9Fk*\DpIFd5zb8 ۽?q2jR`х?mnǷoh@k7*I$S^3@&L q{[wrT3'bW LrњioG¬a总n-&է0..'72stJ=x}o,eSq|Bܦ3Q$%c "huB L_!ƿ|JN¨|ucz3,*m{wnFp/kW7*ʦ! "$%ONC=ּTACfnϾ L/" G]Q X)2ϥn:Y|L4vӫ7! q.}fJ*(iNe=7p(&!6, knb eS$)-cM(Rf̖XH) ̀9# Verqn3%Tm٦m[r!% Z\E_8*@5Ց_v&Y7PEyyTdKA3Q6jrbQQe3DTh~*=uQ 9lv&d0f8MNq/앇V$38Iǎ^ "u7fZ?ʟT>?1- IbCaQfbjF' 7DmYǀ6as@:Df_l N!l'O{<>ɣO,9` %}dZ1 8ۄܟXW3W6/?iiwwW<͟17ŨAc=tzS.L"0XܙS\dvZfs,g?0EGf6w!=7mrk?&3BA6Z6GHF /?e~"L[A!p17u)CHQ#TKlҴ- YJ.m"5ӽpFW='v ^^aSSG?a*cp<}rhC0*!lJn"Ŷ9/XbI{@^z?FQ#ӗ{nyIEI9AF h +_ jhrf;I?Yr?룳~{DJa | 4aAHcgH@cߩ? z7g#'%׌#1I`2/42TTǩ IƑ}v\$*;he% #rEn8ir^~Y7!u(,HğIC 7PKHĤ M,jIդNm3} X `|I3Ka YuܐzC 7't")=*fLX8NL>ɃK2tHm| S tnI&˔.7Fņ~DF 6IFf%֟h!B{9nbS,M 9:B$Xl-+X6 5칛=zF>T KHTud5DYX7S9μ.^oxIT_LV(&zٷ2tk, Yln;0T7EAxkR ;Rnp |7cнTb:3'^읗K&I_0_}Y}V<wWvv{A!aOy@[ko#-nض~msѾ8>?}뻃|.wO6wv1y{;iw?knU?_ܣ)+B7*Qna0zUiuƯ1mH>-A6vƼ mv[g#\G:1|G(@iɜ;y(Z(.U 4]LᓊTk1Aj.9IP'yOSe8!>W!V;AEwdT `w\uQH=YiTl4v'.daOiQ{,. wt5Cm#ƪk?$3t|IR꺾s4 o]A~XLjGP+[09q`WA?U ^ERJ̱NkaoZ4[vctrs/man/figures/vec-count-deps.png0000644000176200001440000004331614276722575017224 0ustar liggesusersPNG  IHDRbKGD pHYs B(xtIME lj IDATxwXT (E4 "("6ƄDKMQ؍MĨ` **X@ĀB& 3?+Ig̙s^5}81B!4 B@!P%BB!T@ !*BH&I! ]ͫWp P0kE޽1m4(((P@i.c!]IJJ )) sss H%$$@QQo߆*:u*q9HKKS@ #FСCg !cKOų0c DEEQ0J R ށ,***(P%BB!T@ !*ݻ(//=t*SN!&&ߏ?^\5w\SJHuVzذa.^333"##qyl۶GPB= ŋE---,[7xO> "00BpM̞=[۷c݈;-[uuu3HLL9#GG:_^`mm7obҤIu.Z(%%eee@KK ֆ]]]hkkJJJt (!]!q!44wFHHqǥK[nbŊr3gNm[~pttѣ>>>|2PlXd `ذaHOOÁ4ˡ^c֥gϞRRRb?b hѯ_?DEEEEE(..FQQ QXX|˗}sssE={W^OEEP%Ś5kwww(++1SSSxzz<==addJ2]AB<JJJD)l@ z\^^ 991p8%%k.򐗗^ן˗tٳgz*PRRӃ%,,,DׯMbOyzzb…@XX 0331"###b޽Xp!á5j6mڄDѣppp )) )))ƕ+W0sEFFqqqӧЧOXZZ }c-Aƨx1c\|)))@~`eec%0JH%$|||pM m6@UU\..^iӦaʕO?$8[_5 %%Z `ff3f PSSCuu58NQۏWZooo0ưk.ZZZ0aك~!!!HKKɓۭաAx IIIdffBQQ666ӧOR;v`ɒ%Յ Ə:P%wD{ fff&deeaee[[[\+WD߾})`@bUg]1{lфiiiGhh(,X `ĉ?~<)A;]éƒ%999ڊؠ_~56(_g؅w' XZZbԩ6m%4%Wff&"""p-DEE᯿Buu5!C>= Фl 8::v½{?ׯ|||C[[FǸu"""TƍƍaggUU&\I HUTT@^^KÁ7 )) AAAؽ{7V\c?G9.\ʄB!puܼyn˗/ahh''' 6 ÇG~Z3f //ϟC#F#k׮noƑ#Gp1`ʔ)Xp!)1P6>}WիG^^,,,GGG899W^vzz:jXZZv |>}O<2dɒTRRĉ!''G ѣG駟/@^^ .Ă X)JG ݻp.^8puumbA߿?rssiN ك |/`jjJJHqe?x1c֖Q B9s7oFll,OUV_~*O||<Ο? . ** ***pwwMMMM nʕ+q L<6l!!66W9ǏcGnn.~7T}0h 9sf“'OoNNNt?FB+WHJJB>}l2Pp8څMݽ{N: ٳ'.ܮ̙3/K[ ,I8r>OQT4 (@T@I ޽{a`޽9r$]]\EEN8///য়~¤IƍXt)OBZ-[pssLuN(ڵk ???"""iiiؼy3LLL(P2 @TT,--qa Pc;233w^ddd~СC)H_X|9ϟOOOS`#,\X~=O >w 3_|X:t h' qxyy 4BWWDHdii7ooŗ_~ 777deeQ`V\\-[Ǐ6߿|tθt hرc8z(^~Q`!bرcXt)&L` +WFGaԩBHp8|Wظq#O_~,6p5̝;\.W\3,+V2|}}Q\\  @>իW^^^Iyg ,Çcv-ϟ?ǬYӧOcܸqBH5k0sLHIIav~!!!0`deeOœ*&M?k׮Ν;) T@;/֭[S>Chh((0V3~x:t8x  .\14=˗q L4IigFQQLgSm?k׮E^7jLxxxQQQ BԢEPTT9s@UUnnn*m/^zR,]哒0f <<DBHXj [nʊҊ远:u %%%˗{xtt4`eeP*vyf7^^^ɡPm;;wP(Blܸ/=ݻwq!88< p8HKKS0Hp88x tuu1n8SPZ+֌*z^RRSNÇ!))X5 &MO?.~#GDYYe455q!PHΆq&IF@߲w^HKKxɓڵk ooo߿'QRR߿~r\ <'i8{,cF[W\\Ç* !!@O>?ORy扽:c sΥ 6cmmǏ *_uSBB\.6668}41e['ĉ!'''vD0|p i3111Err2 hDOIIIp\Ӹw(HAҘ={vdddGǡH ~ $"ȑ#!!!P|W1beih 2OOjjͥKh 6lnܸH*`nn?2.T&v҄ pxzzN()c[^^3g`ƌؿ?ἣ>?~||ӧ}6x<B!`lleZUUsa i H^`_a~*vcP^]Ԃ:zԩ`C\\Ҿ2v؁Dx.....PQQ=ĶcܸqPVV/D{zzb˖-B=D<}4,,,@BbȐ!(**P(ĸqL|^Zl'Odrrr,&&edd0e떖LGG%$$v٥KXrr2bc1fϞͶlƎ+CII07o۱cG.n2{ gϏ]p 6L_?~|x<̎?.zmlƌ5OHH` Ă5ƫbΙ=3|ήQˊ]qkr2Ƙvl֭,66~e1@~6qDsڮ6#Mٳ';tM'}Owo17n@FFx<?~\Ϟ= I&&nHHH[n59wǏYfa̙֚[\ۿPPP3gn/ھy?z̜9b^^^b?iŚ5kwwwdٌ1S4|722Bzzzs8ɉ+**2{X Ըϯ1Ղ A k.]T,2>guBEEakk ___XZZgϞd0;dL+w<{ip1 :'OGӏiPPP6k IUUU\}Kgu>٨FPP]~ԨQr QQQGf?fϞ-xaѢEipŭߨQp5S㟵5 f...L(9oԶߖcllٲe5^j#??x;yC9yB (itpjkw^dgg7xPs,X.0n6ݻp8ܿtԙ#IyB(O[X,5k-Q<8`Q$  {g}uuu|I  '':::mHVV;fEEEO?YfV6~~~PPPMڑ@~~>VX!vжGGV;[;Bdd$^zաgqq1n:u甆ѣۮU@*l޼̓fKII%? &4۷/~F֐]]]$&&‚" @FFͥ`wb 2Yx}+I'Ptttwr`۶m\RP 9 h~~>4 c 9fΜ{>Mܶmo .!:4>O!T@;& Ф D4)vލ[n Pxadd M555Œ%Kg:*G}^zQ H-5iBc;w (!GkMXJJJذakdffRB'Mh?Xz5u !T@;j\~BA r{n9rQQQ1PXB!n߾$ iIkС>}:/^L?ڱȠ>٤ Me֯_OIH+pرc 3g sqygc\rcƌ`eeEJjرc3r ЖQPPSLA߾}) ]c 066yyy%8::Ν;u9?~Lj$ix*]̛I:zB3nnnԩNsEtt4j&##___ @[wp(]Dg4>Xp!JEԩ u^Ur?@WW::'ݘ7o)څnΝ;@'R0:e˖ŋHHHT}8qK.EAAx<^mmmNB=z?#zNYYt-@ƠA uIח#@ 'Oy扊4>C*4m[wŵkKw2SL˗/qƍN/;uB322```@Mꔖѡ֖C#ж3dĉunHt`۶mv[[:O㠤>z=ܜ'vs88q"B!( c 9fΜigzCKK ׯ_nj3 !!!z^JJ (iS8Z,͛WcF:֜4'Ad &ZqN0p!2 ƣmHJMt4LP^]{xUQܽ]uq::m"P9xciI2r1=hҡ(-.Wܣ1`<tDR8_5Fl9BIv,ף)~eus V]]bhhhP0:m۶oӧO[FiU9UYv[Ǥ$xxx 77%%%]~s1HFы^lh?BV]/?@x󖎁xiILX8]'qqq9rd*emMF&EQd`GT! h+p84iLMM믿ҎVX!C`„  B;XZ I&!**یz:7&ܽ{AhgCA4iBwp].]D!-S-WT@ہP(ЛI+ F3:u 111"33;w.H+* //?#"""~a͚5GϞ=) ʹuV?lذ/^wIwNhѼ1uTDDD 88ΛڒwߡV= ŋE---,[LL\\\\\{{Fͷ-[ =z޽{mprrƎ4@bb" O?}4,,,@f xdÆ 8rLLLDիXr%;e5CCC1~x7EYYY2d  1n8!$$'O… 1cF/n=j{ڵ2dTUU駟˖-w@GG6mݠy%.Z4i7YYYlΝdEEEVPPٞ={ڤJ>sf/. cÇ>|󙝝;{,+..fzzzoa^bfWnXZZ2RRRؙ3gĶcFFFڵkL [XcBUUU1CCCϪc%&&2yyyv%̬ؾ}6c%$$0yyy\Az0f}JaSΙ(np٦c}xI&''bbbXFFspp`[lۆϏ͞=mٲ;1XQQSRRb)))1͛vQﺋ[qmϟ?~ϟ3--z׏1Ǝ?^|x<svvfǏoV^5Sͫsa7ScER/ÇS0Z?377 (cyzzݻwJϊءC.g֖-Z]x0@ФbiiY+ĵc| e˗/gqqq>O]]edd7]xlҤIj1Μ9ŮϪU۽6cMÓ'O2;;;㠠 *6 GYYݻ7SSSчgWf<ikkz١CةSؔ)SXii)STTwc,;;CC{3i;CXf eee0`jj OOO*###78y޽6`Ϟ=6m;Cɓ222G544׬@EE`9od;K/ekJr8ɉ+**Xll Ȁ磲R̂ ASc/;;%z @Ьj(Z"hpjt"cFOaoozȓ&xzz"??Xt) III033䠣aÆ{GXXXφFnn.6m۷nnnHHH^!##III5j\DTTTѣppphroXZZVoc4㑝jnz-N}f?fϞ-:FFFXh|||\oq\o`aa )))TxW_}%:Eo߾0æLRSSk-smrYttt~}hذavG={2cccc7n`:::L^^'O6VVVΝ;tԘ);v,+,,͛73~gc;v` LUUӔ|}}k/88.ܦ_SԩSԔ2MMMrrrꍭdZZZ,;; ֮]+zFmh=jϏ>|:uM:U +//gVVVlݝ 66;TS򊎁v2Bݽ{mܸqe&--$%%ْ%KثWD888ٳgkJuVVV:ߓɄB'|tttjAFii)+..x=o6c>Lܱcǘyf1mڜw`„Babۘc1zjlٲFT}ضA˗/ozוSM+*Tvv6ۿ?ۼy3Fc?iiivN>deevJꖓ˗/9KJJj{۲75-Zۗ ::˩7Ź)yU_:0̛7QQQ믿`ccӭ㑔SDj^AAApzE ikk޽{PPPحb WWVmc޽nġκ~r 455[,v<;HNNLK.u]VeTy*]̵k>}`]f$z-XXX@]]:t[YYYt?޽{cС066*]Hyy9v >. kkkxyyQ`"=zTCSSBPDGGnp: /8< `Hr%OFdLaС].t 5'dcȐ!޳~xx`MAlhLZk? j<|111B޽aooO (iMu>˗KK}p;}\(jⷸK侈Bɝ;wp )S1*!&&4woSNAVV&&GJEU(,kl ygrrr;v,,--]MܼyBbw0#;6-xÇq…Y[[cTEM8ԍO%RW8,, /l22qqqpqq Uo?~<ƍeee -[ =z޽{OTUU(:866C AQQB!ƍ0`ɢ6/\3fx=ˆ pQ^uU\͎aUU=z`l޼ nݺjJիW8uoߎ}A~(8F~G?l555aȑHMMɓqΝ3<<-Nχc֫W/X[[͛4iR6)))x1RSSEf}r"999JB  33eee۷y>iiiTtB!T@I #Gؽ{7BBB64.]\.VX.9s4+++ z2O>D@@`֭_ؼy31zh._ ???;`mm?>(++c̙8p֭[gbƍgϞRRRu~7wwwDDD_@ WAhh(?~BHJJoRSSϟ@ @=jP---Jk֬ݡ LMM V###7hLQQŢq@ qvw Ȁ磲R A^_^,%㐔L+p@ZZ pjϺRrBb0CIDATXԿ<!!!PUUEϞ=1l0уP%- .D@@nnn {… 555 <6㑝 ^첣F¦Mccc=z׬Y333={6"##!)) aѢEc`ii$$$ʪV{)Sf?m>pm$$$劝X^^<{ )))BVV*++!++ ???駟RH_WA߾}acc000m`cc^z>@ZZpE|WPQQӑ`xjYF4!C믿!j*[믿b߾}6m$zk„ gϞZm -- H Ӄ7.]ֹt`󔖖86ի߻w999ǸqdXBT< iitCnߎGĂc*v9@Pk) BzOVVtuuE͖,Y'N*6سg^%njCm[~~>444عsg 777l۶ O?'NSdijLh|Q}RP(DYYj\Kyǃ@UUpss B@#!!Q(JFFFlӫxϞ=bGycn \_k׮AMM u0b7qwVSAA#G}-gggCVVRRR4TUU;D())Aee%x<x<a``P2Xǣ\1www :T\\\233eee(++CII "Fۓn O5 fcgr8055˗/QQQQ Ǐݻ@ =#F#::Ң*)) X[[(d==lmmkL8TWWڭs[n޽{o R$Ɛ< // c8p !*III=(ԁ1*Zg؈vaWVV.244Fo344P($zQc9www8::BZZ>LMMajjJMBk D D~Ѭ +.A -l@+ KVp+mfTt թ$&L(TUU?YRҷuww^,5wgcۋ0~qu 9S'1Lbe~P24&.nfp{-dn8>xHΉE.,~m(m%yf;~*(&"PPPPP@@@@@@@@@@,eDyL7"IENDB`vctrs/man/howto-faq-fix-scalar-type-error.Rd0000644000176200001440000000642414417765645020550 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-fix-scalar-type-error} \alias{howto-faq-fix-scalar-type-error} \title{FAQ - Why isn't my class treated as a vector?} \description{ The tidyverse is a bit stricter than base R regarding what kind of objects are considered as vectors (see the \link[=faq-error-scalar-type]{user FAQ} about this topic). Sometimes vctrs won’t treat your class as a vector when it should. \subsection{Why isn’t my list class considered a vector?}{ By default, S3 lists are not considered to be vectors by vctrs: \if{html}{\out{
}}\preformatted{my_list <- structure(list(), class = "my_class") vctrs::vec_is(my_list) #> [1] FALSE }\if{html}{\out{
}} To be treated as a vector, the class must either inherit from \code{"list"} explicitly: \if{html}{\out{
}}\preformatted{my_explicit_list <- structure(list(), class = c("my_class", "list")) vctrs::vec_is(my_explicit_list) #> [1] TRUE }\if{html}{\out{
}} Or it should implement a \code{vec_proxy()} method that returns its input if explicit inheritance is not possible or troublesome: \if{html}{\out{
}}\preformatted{#' @export vec_proxy.my_class <- function(x, ...) x vctrs::vec_is(my_list) #> [1] FALSE }\if{html}{\out{
}} Note that explicit inheritance is the preferred way because this makes it possible for your class to dispatch on \code{list} methods of S3 generics: \if{html}{\out{
}}\preformatted{my_generic <- function(x) UseMethod("my_generic") my_generic.list <- function(x) "dispatched!" my_generic(my_list) #> Error in UseMethod("my_generic"): no applicable method for 'my_generic' applied to an object of class "my_class" my_generic(my_explicit_list) #> [1] "dispatched!" }\if{html}{\out{
}} } \subsection{Why isn’t my data frame class considered a vector?}{ The most likely explanation is that the data frame has not been properly constructed. However, if you get an “Input must be a vector” error with a data frame subclass, it probably means that the data frame has not been properly constructed. The main cause of these errors are data frames whose \emph{base class} is not \code{"data.frame"}: \if{html}{\out{
}}\preformatted{my_df <- data.frame(x = 1) class(my_df) <- c("data.frame", "my_class") vctrs::obj_check_vector(my_df) #> Error: #> ! `my_df` must be a vector, not a object. }\if{html}{\out{
}} This is problematic as many tidyverse functions won’t work properly: \if{html}{\out{
}}\preformatted{dplyr::slice(my_df, 1) #> Error in `vec_slice()`: #> ! `x` must be a vector, not a object. }\if{html}{\out{
}} It is generally not appropriate to declare your class to be a superclass of another class. We generally consider this undefined behaviour (UB). To fix these errors, you can simply change the construction of your data frame class so that \code{"data.frame"} is a base class, i.e. it should come last in the class vector: \if{html}{\out{
}}\preformatted{class(my_df) <- c("my_class", "data.frame") vctrs::obj_check_vector(my_df) dplyr::slice(my_df, 1) #> x #> 1 1 }\if{html}{\out{
}} } } vctrs/man/partial_frame.Rd0000644000176200001440000000142314276722575015304 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/partial-frame.R \name{partial_frame} \alias{partial_frame} \title{Partially specify columns of a data frame} \usage{ partial_frame(...) } \arguments{ \item{...}{Attributes of subclass} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This special class can be passed to \code{.ptype} in order to specify the types of only some of the columns in a data frame. } \examples{ pf <- partial_frame(x = double()) pf vec_rbind( data.frame(x = 1L, y = "a"), data.frame(x = FALSE, z = 10), .ptype = partial_frame(x = double(), a = character()) ) } \keyword{internal} vctrs/man/vec_as_names.Rd0000644000176200001440000001551714362266120015113 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_as_names} \alias{vec_as_names} \title{Retrieve and repair names} \usage{ vec_as_names( names, ..., repair = c("minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), repair_arg = NULL, quiet = FALSE, call = caller_env() ) } \arguments{ \item{names}{A character vector.} \item{...}{These dots are for future extensions and must be empty.} \item{repair}{Either a string or a function. If a string, it must be one of \code{"check_unique"}, \code{"minimal"}, \code{"unique"}, \code{"universal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. If a function, it is invoked with a vector of minimal names and must return minimal names, otherwise an error is thrown. \itemize{ \item Minimal names are never \code{NULL} or \code{NA}. When an element doesn't have a name, its minimal name is an empty string. \item Unique names are unique. A suffix is appended to duplicate names to make them unique. \item Universal names are unique and syntactic, meaning that you can safely use the names as variables without causing a syntax error. } The \code{"check_unique"} option doesn't perform any name repair. Instead, an error is raised if the names don't suit the \code{"unique"} criteria. The options \code{"unique_quiet"} and \code{"universal_quiet"} are here to help the user who calls this function indirectly, via another function which exposes \code{repair} but not \code{quiet}. Specifying \code{repair = "unique_quiet"} is like specifying \verb{repair = "unique", quiet = TRUE}. When the \code{"*_quiet"} options are used, any setting of \code{quiet} is silently overridden.} \item{repair_arg}{If specified and \code{repair = "check_unique"}, any errors will include a hint to set the \code{repair_arg}.} \item{quiet}{By default, the user is informed of any renaming caused by repairing the names. This only concerns unique and universal repairing. Set \code{quiet} to \code{TRUE} to silence the messages. Users can silence the name repair messages by setting the \code{"rlib_name_repair_verbosity"} global option to \code{"quiet"}.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{vec_as_names()} takes a character vector of names and repairs it according to the \code{repair} argument. It is the r-lib and tidyverse equivalent of \code{\link[base:make.names]{base::make.names()}}. vctrs deals with a few levels of name repair: \itemize{ \item \code{minimal} names exist. The \code{names} attribute is not \code{NULL}. The name of an unnamed element is \code{""} and never \code{NA}. For instance, \code{vec_as_names()} always returns minimal names and data frames created by the tibble package have names that are, at least, \code{minimal}. \item \code{unique} names are \code{minimal}, have no duplicates, and can be used where a variable name is expected. Empty names, \code{...}, and \code{..} followed by a sequence of digits are banned. \itemize{ \item All columns can be accessed by name via \code{df[["name"]]} and \code{df$`name` } and \code{with(df, `name`)}. } \item \code{universal} names are \code{unique} and syntactic (see Details for more). \itemize{ \item Names work everywhere, without quoting: \code{df$name} and \code{with(df, name)} and \code{lm(name1 ~ name2, data = df)} and \code{dplyr::select(df, name)} all work. } } \code{universal} implies \code{unique}, \code{unique} implies \code{minimal}. These levels are nested. } \section{\code{minimal} names}{ \code{minimal} names exist. The \code{names} attribute is not \code{NULL}. The name of an unnamed element is \code{""} and never \code{NA}. Examples: \if{html}{\out{
}}\preformatted{Original names of a vector with length 3: NULL minimal names: "" "" "" Original names: "x" NA minimal names: "x" "" }\if{html}{\out{
}} } \section{\code{unique} names}{ \code{unique} names are \code{minimal}, have no duplicates, and can be used (possibly with backticks) in contexts where a variable is expected. Empty names, \code{...}, and \code{..} followed by a sequence of digits are banned. If a data frame has \code{unique} names, you can index it by name, and also access the columns by name. In particular, \code{df[["name"]]} and \code{df$`name`} and also \code{with(df, `name`)} always work. There are many ways to make names \code{unique}. We append a suffix of the form \code{...j} to any name that is \code{""} or a duplicate, where \code{j} is the position. We also change \code{..#} and \code{...} to \code{...#}. Example: \if{html}{\out{
}}\preformatted{Original names: "" "x" "" "y" "x" "..2" "..." unique names: "...1" "x...2" "...3" "y" "x...5" "...6" "...7" }\if{html}{\out{
}} Pre-existing suffixes of the form \code{...j} are always stripped, prior to making names \code{unique}, i.e. reconstructing the suffixes. If this interacts poorly with your names, you should take control of name repair. } \section{\code{universal} names}{ \code{universal} names are \code{unique} and syntactic, meaning they: \itemize{ \item Are never empty (inherited from \code{unique}). \item Have no duplicates (inherited from \code{unique}). \item Are not \code{...}. Do not have the form \code{..i}, where \code{i} is a number (inherited from \code{unique}). \item Consist of letters, numbers, and the dot \code{.} or underscore \verb{_} characters. \item Start with a letter or start with the dot \code{.} not followed by a number. \item Are not a \link{reserved} word, e.g., \code{if} or \code{function} or \code{TRUE}. } If a vector has \code{universal} names, variable names can be used "as is" in code. They work well with nonstandard evaluation, e.g., \code{df$name} works. vctrs has a different method of making names syntactic than \code{\link[base:make.names]{base::make.names()}}. In general, vctrs prepends one or more dots \code{.} until the name is syntactic. Examples: \if{html}{\out{
}}\preformatted{ Original names: "" "x" NA "x" universal names: "...1" "x...2" "...3" "x...4" Original names: "(y)" "_z" ".2fa" "FALSE" universal names: ".y." "._z" "..2fa" ".FALSE" }\if{html}{\out{
}} } \examples{ # By default, `vec_as_names()` returns minimal names: vec_as_names(c(NA, NA, "foo")) # You can make them unique: vec_as_names(c(NA, NA, "foo"), repair = "unique") # Universal repairing fixes any non-syntactic name: vec_as_names(c("_foo", "+"), repair = "universal") } \seealso{ \code{\link[rlang:names2]{rlang::names2()}} returns the names of an object, after making them \code{minimal}. } vctrs/man/vec_count.Rd0000644000176200001440000000340614315060307014444 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_count} \alias{vec_count} \title{Count unique values in a vector} \usage{ vec_count(x, sort = c("count", "key", "location", "none")) } \arguments{ \item{x}{A vector (including a data frame).} \item{sort}{One of "count", "key", "location", or "none". \itemize{ \item "count", the default, puts most frequent values at top \item "key", orders by the output key column (i.e. unique values of \code{x}) \item "location", orders by location where key first seen. This is useful if you want to match the counts up to other unique/duplicated functions. \item "none", leaves unordered. This is not guaranteed to produce the same ordering across R sessions, but is the fastest method. }} } \value{ A data frame with columns \code{key} (same type as \code{x}) and \code{count} (an integer vector). } \description{ Count the number of unique values in a vector. \code{vec_count()} has two important differences to \code{table()}: it returns a data frame, and when given multiple inputs (as a data frame), it only counts combinations that appear in the input. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_order]{vec_order()}} } } \examples{ vec_count(mtcars$vs) vec_count(iris$Species) # If you count a data frame you'll get a data frame # column in the output str(vec_count(mtcars[c("vs", "am")])) # Sorting --------------------------------------- x <- letters[rpois(100, 6)] # default is to sort by frequency vec_count(x) # by can sort by key vec_count(x, sort = "key") # or location of first value vec_count(x, sort = "location") head(x) # or not at all vec_count(x, sort = "none") } vctrs/man/vec_bind.Rd0000644000176200001440000001566214511524374014246 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind.R \name{vec_bind} \alias{vec_bind} \alias{vec_rbind} \alias{vec_cbind} \title{Combine many data frames into one data frame} \usage{ vec_rbind( ..., .ptype = NULL, .names_to = rlang::zap(), .name_repair = c("unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), .name_spec = NULL, .error_call = current_env() ) vec_cbind( ..., .ptype = NULL, .size = NULL, .name_repair = c("unique", "universal", "check_unique", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env() ) } \arguments{ \item{...}{Data frames or vectors. When the inputs are named: \itemize{ \item \code{vec_rbind()} assigns names to row names unless \code{.names_to} is supplied. In that case the names are assigned in the column defined by \code{.names_to}. \item \code{vec_cbind()} creates packed data frame columns with named inputs. } \code{NULL} inputs are silently ignored. Empty (e.g. zero row) inputs will not appear in the output, but will affect the derived \code{.ptype}.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.names_to}{This controls what to do with input names supplied in \code{...}. \itemize{ \item By default, input names are \link[rlang:zap]{zapped}. \item If a string, specifies a column where the input names will be copied. These names are often useful to identify rows with their original input. If a column name is supplied and \code{...} is not named, an integer column is used instead. \item If \code{NULL}, the input names are used as row names. }} \item{.name_repair}{One of \code{"unique"}, \code{"universal"}, \code{"check_unique"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options. With \code{vec_rbind()}, the repair function is applied to all inputs separately. This is because \code{vec_rbind()} needs to align their columns before binding the rows, and thus needs all inputs to have unique names. On the other hand, \code{vec_cbind()} applies the repair function after all inputs have been concatenated together in a final data frame. Hence \code{vec_cbind()} allows the more permissive minimal names repair.} \item{.name_spec}{A name specification (as documented in \code{\link[=vec_c]{vec_c()}}) for combining the outer inputs names in \code{...} and the inner row names of the inputs. This only has an effect when \code{.names_to} is set to \code{NULL}, which causes the input names to be assigned as row names.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.size}{If, \code{NULL}, the default, will determine the number of rows in \code{vec_cbind()} output by using the tidyverse \link[=theory-faq-recycling]{recycling rules}. Alternatively, specify the desired number of rows, and any inputs of length 1 will be recycled appropriately.} } \value{ A data frame, or subclass of data frame. If \code{...} is a mix of different data frame subclasses, \code{vec_ptype2()} will be used to determine the output type. For \code{vec_rbind()}, this will determine the type of the container and the type of each column; for \code{vec_cbind()} it only determines the type of the output container. If there are no non-\code{NULL} inputs, the result will be \code{data.frame()}. } \description{ This pair of functions binds together data frames (and vectors), either row-wise or column-wise. Row-binding creates a data frame with common type across all arguments. Column-binding creates a data frame with common length across all arguments. } \section{Invariants}{ All inputs are first converted to a data frame. The conversion for 1d vectors depends on the direction of binding: \itemize{ \item For \code{vec_rbind()}, each element of the vector becomes a column in a single row. \item For \code{vec_cbind()}, each element of the vector becomes a row in a single column. } Once the inputs have all become data frames, the following invariants are observed for row-binding: \itemize{ \item \code{vec_size(vec_rbind(x, y)) == vec_size(x) + vec_size(y)} \item \code{vec_ptype(vec_rbind(x, y)) = vec_ptype_common(x, y)} } Note that if an input is an empty vector, it is first converted to a 1-row data frame with 0 columns. Despite being empty, its effective size for the total number of rows is 1. For column-binding, the following invariants apply: \itemize{ \item \code{vec_size(vec_cbind(x, y)) == vec_size_common(x, y)} \item \code{vec_ptype(vec_cbind(x, y)) == vec_cbind(vec_ptype(x), vec_ptype(x))} } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_init]{vec_init()}} \item \code{\link[=vec_assign]{vec_assign()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies of \code{vec_rbind()}}{ \itemize{ \item \code{\link[base:c]{base::c()}} } If columns to combine inherit from a common class, \code{vec_rbind()} falls back to \code{base::c()} if there exists a \code{c()} method implemented for this class hierarchy. } } \examples{ # row binding ----------------------------------------- # common columns are coerced to common class vec_rbind( data.frame(x = 1), data.frame(x = FALSE) ) # unique columns are filled with NAs vec_rbind( data.frame(x = 1), data.frame(y = "x") ) # null inputs are ignored vec_rbind( data.frame(x = 1), NULL, data.frame(x = 2) ) # bare vectors are treated as rows vec_rbind( c(x = 1, y = 2), c(x = 3) ) # default names will be supplied if arguments are not named vec_rbind( 1:2, 1:3, 1:4 ) # column binding -------------------------------------- # each input is recycled to have common length vec_cbind( data.frame(x = 1), data.frame(y = 1:3) ) # bare vectors are treated as columns vec_cbind( data.frame(x = 1), y = letters[1:3] ) # if you supply a named data frame, it is packed in a single column data <- vec_cbind( x = data.frame(a = 1, b = 2), y = 1 ) data # Packed data frames are nested in a single column. This makes it # possible to access it through a single name: data$x # since the base print method is suboptimal with packed data # frames, it is recommended to use tibble to work with these: if (rlang::is_installed("tibble")) { vec_cbind(x = tibble::tibble(a = 1, b = 2), y = 1) } # duplicate names are flagged vec_cbind(x = 1, x = 2) } \seealso{ \code{\link[=vec_c]{vec_c()}} for combining 1d vectors. } vctrs/man/vec_size.Rd0000644000176200001440000001014414511524374014272 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{vec_size} \alias{vec_size} \alias{vec_size_common} \alias{list_sizes} \alias{vec_is_empty} \title{Number of observations} \usage{ vec_size(x) vec_size_common( ..., .size = NULL, .absent = 0L, .arg = "", .call = caller_env() ) list_sizes(x) vec_is_empty(x) } \arguments{ \item{x, ...}{Vector inputs or \code{NULL}.} \item{.size}{If \code{NULL}, the default, the output size is determined by recycling the lengths of all elements of \code{...}. Alternatively, you can supply \code{.size} to force a known size; in this case, \code{x} and \code{...} are ignored.} \item{.absent}{The size used when no input is provided, or when all input is \code{NULL}. If left as \code{NULL} when no input is supplied, an error is thrown.} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{.call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ An integer (or double for long vectors). \code{vec_size_common()} returns \code{.absent} if all inputs are \code{NULL} or absent, \code{0L} by default. } \description{ \code{vec_size(x)} returns the size of a vector. \code{vec_is_empty()} returns \code{TRUE} if the size is zero, \code{FALSE} otherwise. The size is distinct from the \code{\link[=length]{length()}} of a vector because it generalises to the "number of observations" for 2d structures, i.e. it's the number of rows in matrix or a data frame. This definition has the important property that every column of a data frame (even data frame and matrix columns) have the same size. \code{vec_size_common(...)} returns the common size of multiple vectors. \code{list_sizes()} returns an integer vector containing the size of each element of a list. It is nearly equivalent to, but faster than, \code{map_int(x, vec_size)}, with the exception that \code{list_sizes()} will error on non-list inputs, as defined by \code{\link[=obj_is_list]{obj_is_list()}}. \code{list_sizes()} is to \code{vec_size()} as \code{\link[=lengths]{lengths()}} is to \code{\link[=length]{length()}}. } \details{ There is no vctrs helper that retrieves the number of columns: as this is a property of the \link[=vec_ptype_show]{type}. \code{vec_size()} is equivalent to \code{NROW()} but has a name that is easier to pronounce, and throws an error when passed non-vector inputs. } \section{Invariants}{ \itemize{ \item \code{vec_size(dataframe)} == \code{vec_size(dataframe[[i]])} \item \code{vec_size(matrix)} == \code{vec_size(matrix[, i, drop = FALSE])} \item \code{vec_size(vec_c(x, y))} == \code{vec_size(x)} + \code{vec_size(y)} } } \section{The size of NULL}{ The size of \code{NULL} is hard-coded to \code{0L} in \code{vec_size()}. \code{vec_size_common()} returns \code{.absent} when all inputs are \code{NULL} (if only some inputs are \code{NULL}, they are simply ignored). A default size of 0 makes sense because sizes are most often queried in order to compute a total size while assembling a collection of vectors. Since we treat \code{NULL} as an absent input by principle, we return the identity of sizes under addition to reflect that an absent input doesn't take up any size. Note that other defaults might make sense under different circumstances. For instance, a default size of 1 makes sense for finding the common size because 1 is the identity of the recycling rules. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} } } \examples{ vec_size(1:100) vec_size(mtcars) vec_size(array(dim = c(3, 5, 10))) vec_size_common(1:10, 1:10) vec_size_common(1:10, 1) vec_size_common(integer(), 1) list_sizes(list("a", 1:5, letters)) } \seealso{ \code{\link[=vec_slice]{vec_slice()}} for a variation of \code{[} compatible with \code{vec_size()}, and \code{\link[=vec_recycle]{vec_recycle()}} to \link[=theory-faq-recycling]{recycle} vectors to common length. } vctrs/man/vec_assert.Rd0000644000176200001440000000764414401377400014626 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{vec_assert} \alias{vec_assert} \alias{vec_is} \title{Assert an argument has known prototype and/or size} \usage{ vec_assert( x, ptype = NULL, size = NULL, arg = caller_arg(x), call = caller_env() ) vec_is(x, ptype = NULL, size = NULL) } \arguments{ \item{x}{A vector argument to check.} \item{ptype}{Prototype to compare against. If the prototype has a class, its \code{\link[=vec_ptype]{vec_ptype()}} is compared to that of \code{x} with \code{identical()}. Otherwise, its \code{\link[=typeof]{typeof()}} is compared to that of \code{x} with \code{==}.} \item{size}{A single integer size against which to compare.} \item{arg}{Name of argument being checked. This is used in error messages. The label of the expression passed as \code{x} is taken as default.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ \code{vec_is()} returns \code{TRUE} or \code{FALSE}. \code{vec_assert()} either throws a typed error (see section on error types) or returns \code{x}, invisibly. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#questioning}{\figure{lifecycle-questioning.svg}{options: alt='[Questioning]'}}}{\strong{[Questioning]}} \itemize{ \item \code{vec_is()} is a predicate that checks if its input is a vector that conforms to a prototype and/or a size. \item \code{vec_assert()} throws an error when the input is not a vector or doesn't conform. } } \section{Error types}{ \code{vec_is()} never throws. \code{vec_assert()} throws the following errors: \itemize{ \item If the input is not a vector, an error of class \code{"vctrs_error_scalar_type"} is raised. \item If the prototype doesn't match, an error of class \code{"vctrs_error_assert_ptype"} is raised. \item If the size doesn't match, an error of class \code{"vctrs_error_assert_size"} is raised. } Both errors inherit from \code{"vctrs_error_assert"}. } \section{Lifecycle}{ Both \code{vec_is()} and \code{vec_assert()} are questioning because their \code{ptype} arguments have semantics that are challenging to define clearly and are rarely useful. \itemize{ \item Use \code{\link[=obj_is_vector]{obj_is_vector()}} or \code{\link[=obj_check_vector]{obj_check_vector()}} for vector checks \item Use \code{\link[=vec_check_size]{vec_check_size()}} for size checks \item Use \code{\link[=vec_cast]{vec_cast()}}, \code{\link[=inherits]{inherits()}}, or simple type predicates like \code{\link[rlang:type-predicates]{rlang::is_logical()}} for specific type checks } } \section{Vectors and scalars}{ Informally, a vector is a collection that makes sense to use as column in a data frame. The following rules define whether or not \code{x} is considered a vector. If no \code{\link[=vec_proxy]{vec_proxy()}} method has been registered, \code{x} is a vector if: \itemize{ \item The \link[=typeof]{base type} of the object is atomic: \code{"logical"}, \code{"integer"}, \code{"double"}, \code{"complex"}, \code{"character"}, or \code{"raw"}. \item \code{x} is a list, as defined by \code{\link[=obj_is_list]{obj_is_list()}}. \item \code{x} is a \link{data.frame}. } If a \code{vec_proxy()} method has been registered, \code{x} is a vector if: \itemize{ \item The proxy satisfies one of the above conditions. \item The base type of the proxy is \code{"list"}, regardless of its class. S3 lists are thus treated as scalars unless they implement a \code{vec_proxy()} method. } Otherwise an object is treated as scalar and cannot be used as a vector. In particular: \itemize{ \item \code{NULL} is not a vector. \item S3 lists like \code{lm} objects are treated as scalars by default. \item Objects of type \link{expression} are not treated as vectors. } } \keyword{internal} vctrs/man/vec_as_index.Rd0000644000176200001440000000226414276722575015131 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_as_index} \alias{vec_as_index} \title{Convert to an index vector} \usage{ vec_as_index(i, n, names = NULL) } \arguments{ \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{n}{A single integer representing the total size of the object that \code{i} is meant to index into.} \item{names}{If \code{i} is a character vector, \code{names} should be a character vector that \code{i} will be matched against to construct the index. Otherwise, not used. The default value of \code{NULL} will result in an error if \code{i} is a character vector.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_as_index()} has been renamed to \code{\link[=vec_as_location]{vec_as_location()}} and is deprecated as of vctrs 0.2.2. } \keyword{internal} vctrs/man/vec_locate_matches.Rd0000644000176200001440000003416514376174332016310 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match.R \name{vec_locate_matches} \alias{vec_locate_matches} \title{Locate observations matching specified conditions} \usage{ vec_locate_matches( needles, haystack, ..., condition = "==", filter = "none", incomplete = "compare", no_match = NA_integer_, remaining = "drop", multiple = "all", relationship = "none", nan_distinct = FALSE, chr_proxy_collate = NULL, needles_arg = "needles", haystack_arg = "haystack", error_call = current_env() ) } \arguments{ \item{needles, haystack}{Vectors used for matching. \itemize{ \item \code{needles} represents the vector to search for. \item \code{haystack} represents the vector to search in. } Prior to comparison, \code{needles} and \code{haystack} are coerced to the same type.} \item{...}{These dots are for future extensions and must be empty.} \item{condition}{Condition controlling how \code{needles} should be compared against \code{haystack} to identify a successful match. \itemize{ \item One of: \code{"=="}, \code{">"}, \code{">="}, \code{"<"}, or \code{"<="}. \item For data frames, a length \code{1} or \code{ncol(needles)} character vector containing only the above options, specifying how matching is determined for each column. }} \item{filter}{Filter to be applied to the matched results. \itemize{ \item \code{"none"} doesn't apply any filter. \item \code{"min"} returns only the minimum haystack value matching the current needle. \item \code{"max"} returns only the maximum haystack value matching the current needle. \item For data frames, a length \code{1} or \code{ncol(needles)} character vector containing only the above options, specifying a filter to apply to each column. } Filters don't have any effect on \code{"=="} conditions, but are useful for computing "rolling" matches with other conditions. A filter can return multiple haystack matches for a particular needle if the maximum or minimum haystack value is duplicated in \code{haystack}. These can be further controlled with \code{multiple}.} \item{incomplete}{Handling of missing and \link[=vec_detect_complete]{incomplete} values in \code{needles}. \itemize{ \item \code{"compare"} uses \code{condition} to determine whether or not a missing value in \code{needles} matches a missing value in \code{haystack}. If \code{condition} is \code{==}, \code{>=}, or \code{<=}, then missing values will match. \item \code{"match"} always allows missing values in \code{needles} to match missing values in \code{haystack}, regardless of the \code{condition}. \item \code{"drop"} drops incomplete values in \code{needles} from the result. \item \code{"error"} throws an error if any \code{needles} are incomplete. \item If a single integer is provided, this represents the value returned in the \code{haystack} column for values of \code{needles} that are incomplete. If \code{no_match = NA}, setting \code{incomplete = NA} forces incomplete values in \code{needles} to be treated like unmatched values. } \code{nan_distinct} determines whether a \code{NA} is allowed to match a \code{NaN}.} \item{no_match}{Handling of \code{needles} without a match. \itemize{ \item \code{"drop"} drops \code{needles} with zero matches from the result. \item \code{"error"} throws an error if any \code{needles} have zero matches. \item If a single integer is provided, this represents the value returned in the \code{haystack} column for values of \code{needles} that have zero matches. The default represents an unmatched needle with \code{NA}. }} \item{remaining}{Handling of \code{haystack} values that \code{needles} never matched. \itemize{ \item \code{"drop"} drops remaining \code{haystack} values from the result. Typically, this is the desired behavior if you only care when \code{needles} has a match. \item \code{"error"} throws an error if there are any remaining \code{haystack} values. \item If a single integer is provided (often \code{NA}), this represents the value returned in the \code{needles} column for the remaining \code{haystack} values that \code{needles} never matched. Remaining \code{haystack} values are always returned at the end of the result. }} \item{multiple}{Handling of \code{needles} with multiple matches. For each needle: \itemize{ \item \code{"all"} returns all matches detected in \code{haystack}. \item \code{"any"} returns any match detected in \code{haystack} with no guarantees on which match will be returned. It is often faster than \code{"first"} and \code{"last"} if you just need to detect if there is at least one match. \item \code{"first"} returns the first match detected in \code{haystack}. \item \code{"last"} returns the last match detected in \code{haystack}. }} \item{relationship}{Handling of the expected relationship between \code{needles} and \code{haystack}. If the expectations chosen from the list below are invalidated, an error is thrown. \itemize{ \item \code{"none"} doesn't perform any relationship checks. \item \code{"one-to-one"} expects: \itemize{ \item Each value in \code{needles} matches at most 1 value in \code{haystack}. \item Each value in \code{haystack} matches at most 1 value in \code{needles}. } \item \code{"one-to-many"} expects: \itemize{ \item Each value in \code{needles} matches any number of values in \code{haystack}. \item Each value in \code{haystack} matches at most 1 value in \code{needles}. } \item \code{"many-to-one"} expects: \itemize{ \item Each value in \code{needles} matches at most 1 value in \code{haystack}. \item Each value in \code{haystack} matches any number of values in \code{needles}. } \item \code{"many-to-many"} expects: \itemize{ \item Each value in \code{needles} matches any number of values in \code{haystack}. \item Each value in \code{haystack} matches any number of values in \code{needles}. } This performs no checks, and is identical to \code{"none"}, but is provided to allow you to be explicit about this relationship if you know it exists. \item \code{"warn-many-to-many"} doesn't assume there is any known relationship, but will warn if \code{needles} and \code{haystack} have a many-to-many relationship (which is typically unexpected), encouraging you to either take a closer look at your inputs or make this relationship explicit by specifying \code{"many-to-many"}. } \code{relationship} is applied after \code{filter} and \code{multiple} to allow potential multiple matches to be filtered out first. \code{relationship} doesn't handle cases where there are zero matches. For that, see \code{no_match} and \code{remaining}.} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} \item{needles_arg, haystack_arg}{Argument tags for \code{needles} and \code{haystack} used in error messages.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A two column data frame containing the locations of the matches. \itemize{ \item \code{needles} is an integer vector containing the location of the needle currently being matched. \item \code{haystack} is an integer vector containing the location of the corresponding match in the haystack for the current needle. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_locate_matches()} is a more flexible version of \code{\link[=vec_match]{vec_match()}} used to identify locations where each value of \code{needles} matches one or multiple values in \code{haystack}. Unlike \code{vec_match()}, \code{vec_locate_matches()} returns all matches by default, and can match on binary conditions other than equality, such as \code{>}, \code{>=}, \code{<}, and \code{<=}. } \details{ \code{\link[=vec_match]{vec_match()}} is identical to (but often slightly faster than): \if{html}{\out{
}}\preformatted{vec_locate_matches( needles, haystack, condition = "==", multiple = "first", nan_distinct = TRUE ) }\if{html}{\out{
}} \code{vec_locate_matches()} is extremely similar to a SQL join between \code{needles} and \code{haystack}, with the default being most similar to a left join. Be very careful when specifying match \code{condition}s. If a condition is misspecified, it is very easy to accidentally generate an exponentially large number of matches. } \section{Dependencies of \code{vec_locate_matches()}}{ \itemize{ \item \code{\link[=vec_order_radix]{vec_order_radix()}} \item \code{\link[=vec_detect_complete]{vec_detect_complete()}} } } \examples{ x <- c(1, 2, NA, 3, NaN) y <- c(2, 1, 4, NA, 1, 2, NaN) # By default, for each value of `x`, all matching locations in `y` are # returned matches <- vec_locate_matches(x, y) matches # The result can be used to slice the inputs to align them data_frame( x = vec_slice(x, matches$needles), y = vec_slice(y, matches$haystack) ) # If multiple matches are present, control which is returned with `multiple` vec_locate_matches(x, y, multiple = "first") vec_locate_matches(x, y, multiple = "last") vec_locate_matches(x, y, multiple = "any") # Use `relationship` to add constraints and error on multiple matches if # they aren't expected try(vec_locate_matches(x, y, relationship = "one-to-one")) # In this case, the `NA` in `y` matches two rows in `x` try(vec_locate_matches(x, y, relationship = "one-to-many")) # By default, `NA` is treated as being identical to `NaN`. # Using `nan_distinct = TRUE` treats `NA` and `NaN` as different values, so # `NA` can only match `NA`, and `NaN` can only match `NaN`. vec_locate_matches(x, y, nan_distinct = TRUE) # If you never want missing values to match, set `incomplete = NA` to return # `NA` in the `haystack` column anytime there was an incomplete value # in `needles`. vec_locate_matches(x, y, incomplete = NA) # Using `incomplete = NA` allows us to enforce the one-to-many relationship # that we couldn't before vec_locate_matches(x, y, relationship = "one-to-many", incomplete = NA) # `no_match` allows you to specify the returned value for a needle with # zero matches. Note that this is different from an incomplete value, # so specifying `no_match` allows you to differentiate between incomplete # values and unmatched values. vec_locate_matches(x, y, incomplete = NA, no_match = 0L) # If you want to require that every `needle` has at least 1 match, set # `no_match` to `"error"`: try(vec_locate_matches(x, y, incomplete = NA, no_match = "error")) # By default, `vec_locate_matches()` detects equality between `needles` and # `haystack`. Using `condition`, you can detect where an inequality holds # true instead. For example, to find every location where `x[[i]] >= y`: matches <- vec_locate_matches(x, y, condition = ">=") data_frame( x = vec_slice(x, matches$needles), y = vec_slice(y, matches$haystack) ) # You can limit which matches are returned with a `filter`. For example, # with the above example you can filter the matches returned by `x[[i]] >= y` # down to only the ones containing the maximum `y` value of those matches. matches <- vec_locate_matches(x, y, condition = ">=", filter = "max") # Here, the matches for the `3` needle value have been filtered down to # only include the maximum haystack value of those matches, `2`. This is # often referred to as a rolling join. data_frame( x = vec_slice(x, matches$needles), y = vec_slice(y, matches$haystack) ) # In the very rare case that you need to generate locations for a # cross match, where every value of `x` is forced to match every # value of `y` regardless of what the actual values are, you can # replace `x` and `y` with integer vectors of the same size that contain # a single value and match on those instead. x_proxy <- vec_rep(1L, vec_size(x)) y_proxy <- vec_rep(1L, vec_size(y)) nrow(vec_locate_matches(x_proxy, y_proxy)) vec_size(x) * vec_size(y) # By default, missing values will match other missing values when using # `==`, `>=`, or `<=` conditions, but not when using `>` or `<` conditions. # This is similar to how `vec_compare(x, y, na_equal = TRUE)` works. x <- c(1, NA) y <- c(NA, 2) vec_locate_matches(x, y, condition = "<=") vec_locate_matches(x, y, condition = "<") # You can force missing values to match regardless of the `condition` # by using `incomplete = "match"` vec_locate_matches(x, y, condition = "<", incomplete = "match") # You can also use data frames for `needles` and `haystack`. The # `condition` will be recycled to the number of columns in `needles`, or # you can specify varying conditions per column. In this example, we take # a vector of date `values` and find all locations where each value is # between lower and upper bounds specified by the `haystack`. values <- as.Date("2019-01-01") + 0:9 needles <- data_frame(lower = values, upper = values) set.seed(123) lower <- as.Date("2019-01-01") + sample(10, 10, replace = TRUE) upper <- lower + sample(3, 10, replace = TRUE) haystack <- data_frame(lower = lower, upper = upper) # (values >= lower) & (values <= upper) matches <- vec_locate_matches(needles, haystack, condition = c(">=", "<=")) data_frame( lower = vec_slice(lower, matches$haystack), value = vec_slice(values, matches$needle), upper = vec_slice(upper, matches$haystack) ) } vctrs/man/vec_default_ptype2.Rd0000644000176200001440000000431314315060307016241 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cast.R, R/type2.R \name{vec_default_cast} \alias{vec_default_cast} \alias{vec_default_ptype2} \title{Default cast and ptype2 methods} \usage{ vec_default_cast(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) vec_default_ptype2(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) } \arguments{ \item{x}{Vectors to cast.} \item{to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} \item{...}{For \code{vec_cast_common()}, vectors to cast. For \code{vec_cast()}, \code{vec_cast_default()}, and \code{vec_restore()}, these dots are only for future extensions and should be empty.} \item{x_arg}{Argument name for \code{x}, used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ These functions are automatically called when no \code{\link[=vec_ptype2]{vec_ptype2()}} or \code{\link[=vec_cast]{vec_cast()}} method is implemented for a pair of types. \itemize{ \item They apply special handling if one of the inputs is of type \code{AsIs} or \code{sfc}. \item They attempt a number of fallbacks in cases where it would be too inconvenient to be strict: \itemize{ \item If the class and attributes are the same they are considered compatible. \code{vec_default_cast()} returns \code{x} in this case. \item In case of incompatible data frame classes, they fall back to \code{data.frame}. If an incompatible subclass of tibble is involved, they fall back to \code{tbl_df}. } \item Otherwise, an error is thrown with \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} or \code{\link[=stop_incompatible_cast]{stop_incompatible_cast()}}. } } \keyword{internal} vctrs/man/vec_match.Rd0000644000176200001440000000466214276722575014437 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_match} \alias{vec_match} \alias{vec_in} \title{Find matching observations across vectors} \usage{ vec_match( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) vec_in( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) } \arguments{ \item{needles, haystack}{Vector of \code{needles} to search for in vector haystack. \code{haystack} should usually be unique; if not \code{vec_match()} will only return the location of the first match. \code{needles} and \code{haystack} are coerced to the same type prior to comparison.} \item{...}{These dots are for future extensions and must be empty.} \item{na_equal}{If \code{TRUE}, missing values in \code{needles} can be matched to missing values in \code{haystack}. If \code{FALSE}, they propagate, missing values in \code{needles} are represented as \code{NA} in the return value.} \item{needles_arg, haystack_arg}{Argument tags for \code{needles} and \code{haystack} used in error messages.} } \value{ A vector the same length as \code{needles}. \code{vec_in()} returns a logical vector; \code{vec_match()} returns an integer vector. } \description{ \code{vec_in()} returns a logical vector based on whether \code{needle} is found in haystack. \code{vec_match()} returns an integer vector giving location of \code{needle} in \code{haystack}, or \code{NA} if it's not found. } \details{ \code{vec_in()} is equivalent to \link{\%in\%}; \code{vec_match()} is equivalent to \code{match()}. } \section{Missing values}{ In most cases places in R, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. The exception is in matching functions like \code{\link[=match]{match()}} and \code{\link[=merge]{merge()}}, where an \code{NA} will match another \code{NA}. By \code{vec_match()} and \code{vec_in()} will match \code{NA}s; but you can control this behaviour with the \code{na_equal} argument. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ hadley <- strsplit("hadley", "")[[1]] vec_match(hadley, letters) vowels <- c("a", "e", "i", "o", "u") vec_match(hadley, vowels) vec_in(hadley, vowels) # Only the first index of duplicates is returned vec_match(c("a", "b"), c("a", "b", "a", "b")) } vctrs/man/vec_slice.Rd0000644000176200001440000000753614373205357014435 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{vec_slice} \alias{vec_slice} \alias{vec_slice<-} \alias{vec_assign} \title{Get or set observations in a vector} \usage{ vec_slice(x, i, ..., error_call = current_env()) vec_slice(x, i) <- value vec_assign(x, i, value, ..., x_arg = "", value_arg = "") } \arguments{ \item{x}{A vector} \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{...}{These dots are for future extensions and must be empty.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{value}{Replacement values. \code{value} is cast to the type of \code{x}, but only if they have a common type. See below for examples of this rule.} \item{x_arg, value_arg}{Argument names for \code{x} and \code{value}. These are used in error messages to inform the user about the locations of incompatible types and sizes (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} and \code{\link[=stop_incompatible_size]{stop_incompatible_size()}}).} } \value{ A vector of the same type as \code{x}. } \description{ This provides a common interface to extracting and modifying observations for all vector types, regardless of dimensionality. It is an analog to \code{[} that matches \code{\link[=vec_size]{vec_size()}} instead of \code{length()}. } \section{Genericity}{ Support for S3 objects depends on whether the object implements a \code{\link[=vec_proxy]{vec_proxy()}} method. \itemize{ \item When a \code{vec_proxy()} method exists, the proxy is sliced and \code{vec_restore()} is called on the result. \item Otherwise \code{vec_slice()} falls back to the base generic \code{[}. } Note that S3 lists are treated as scalars by default, and will cause an error if they don't implement a \code{\link[=vec_proxy]{vec_proxy()}} method. } \section{Differences with base R subsetting}{ \itemize{ \item \code{vec_slice()} only slices along one dimension. For two-dimensional types, the first dimension is subsetted. \item \code{vec_slice()} preserves attributes by default. \item \verb{vec_slice<-()} is type-stable and always returns the same type as the LHS. } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies}{ \itemize{ \item \code{base::`[`} } If a non-data-frame vector class doesn't have a \code{\link[=vec_proxy]{vec_proxy()}} method, the vector is sliced with \code{[} instead. } } \examples{ x <- sample(10) x vec_slice(x, 1:3) # You can assign with the infix variant: vec_slice(x, 2) <- 100 x # Or with the regular variant that doesn't modify the original input: y <- vec_assign(x, 3, 500) y x # Slicing objects of higher dimension: vec_slice(mtcars, 1:3) # Type stability -------------------------------------------------- # The assign variant is type stable. It always returns the same # type as the input. x <- 1:5 vec_slice(x, 2) <- 20.0 # `x` is still an integer vector because the RHS was cast to the # type of the LHS: vec_ptype(x) # Compare to `[<-`: x[2] <- 20.0 vec_ptype(x) # Note that the types must be coercible for the cast to happen. # For instance, you can cast a double vector of whole numbers to an # integer vector: vec_cast(1, integer()) # But not fractional doubles: try(vec_cast(1.5, integer())) # For this reason you can't assign fractional values in an integer # vector: x <- 1:3 try(vec_slice(x, 2) <- 1.5) } \keyword{internal} vctrs/man/s3_register.Rd0000644000176200001440000000507414315060307014713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/register-s3.R \name{s3_register} \alias{s3_register} \title{Register a method for a suggested dependency} \arguments{ \item{generic}{Name of the generic in the form \code{pkg::generic}.} \item{class}{Name of the class} \item{method}{Optionally, the implementation of the method. By default, this will be found by looking for a function called \code{generic.class} in the package environment. Note that providing \code{method} can be dangerous if you use devtools. When the namespace of the method is reloaded by \code{devtools::load_all()}, the function will keep inheriting from the old namespace. This might cause crashes because of dangling \code{.Call()} pointers.} } \description{ Generally, the recommend way to register an S3 method is to use the \code{S3Method()} namespace directive (often generated automatically by the \verb{@export} roxygen2 tag). However, this technique requires that the generic be in an imported package, and sometimes you want to suggest a package, and only provide a method when that package is loaded. \code{s3_register()} can be called from your package's \code{.onLoad()} to dynamically register a method only if the generic's package is loaded. } \details{ For R 3.5.0 and later, \code{s3_register()} is also useful when demonstrating class creation in a vignette, since method lookup no longer always involves the lexical scope. For R 3.6.0 and later, you can achieve a similar effect by using "delayed method registration", i.e. placing the following in your \code{NAMESPACE} file: \if{html}{\out{
}}\preformatted{if (getRversion() >= "3.6.0") \{ S3method(package::generic, class) \} }\if{html}{\out{
}} } \section{Usage in other packages}{ To avoid taking a dependency on vctrs, you copy the source of \href{https://github.com/r-lib/vctrs/blob/main/R/register-s3.R}{\code{s3_register()}} into your own package. It is licensed under the permissive \href{https://choosealicense.com/licenses/unlicense/}{unlicense} to make it crystal clear that we're happy for you to do this. There's no need to include the license or even credit us when using this function. } \examples{ # A typical use case is to dynamically register tibble/pillar methods # for your class. That way you avoid creating a hard dependency on packages # that are not essential, while still providing finer control over # printing when they are used. .onLoad <- function(...) { s3_register("pillar::pillar_shaft", "vctrs_vctr") s3_register("tibble::type_sum", "vctrs_vctr") } } \keyword{internal} vctrs/man/partial_factor.Rd0000644000176200001440000000137014376223321015453 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/partial-factor.R \name{partial_factor} \alias{partial_factor} \title{Partially specify a factor} \usage{ partial_factor(levels = character()) } \arguments{ \item{levels}{Character vector of labels.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This special class can be passed as a \code{ptype} in order to specify that the result should be a factor that contains at least the specified levels. } \examples{ pf <- partial_factor(levels = c("x", "y")) pf vec_ptype_common(factor("v"), factor("w"), .ptype = pf) } \keyword{internal} vctrs/man/new_vctr.Rd0000644000176200001440000000704214401377400014307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-vctr.R \name{new_vctr} \alias{new_vctr} \alias{vctr} \title{vctr (vector) S3 class} \usage{ new_vctr(.data, ..., class = character(), inherit_base_type = NULL) } \arguments{ \item{.data}{Foundation of class. Must be a vector} \item{...}{Name-value pairs defining attributes} \item{class}{Name of subclass.} \item{inherit_base_type}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A single logical, or \code{NULL}. Does this class extend the base type of \code{.data}? i.e. does the resulting object extend the behaviour of the underlying type? Defaults to \code{FALSE} for all types except lists, which are required to inherit from the base type.} } \description{ This abstract class provides a set of useful default methods that makes it considerably easier to get started with a new S3 vector class. See \code{vignette("s3-vector")} to learn how to use it to create your own S3 vector classes. } \details{ List vctrs are special cases. When created through \code{new_vctr()}, the resulting list vctr should always be recognized as a list by \code{obj_is_list()}. Because of this, if \code{inherit_base_type} is \code{FALSE} an error is thrown. } \section{Base methods}{ The vctr class provides methods for many base generics using a smaller set of generics defined by this package. Generally, you should think carefully before overriding any of the methods that vctrs implements for you as they've been carefully planned to be internally consistent. \itemize{ \item \code{[[} and \code{[} use \code{NextMethod()} dispatch to the underlying base function, then restore attributes with \code{vec_restore()}. \code{rep()} and \verb{length<-} work similarly. \item \verb{[[<-} and \verb{[<-} cast \code{value} to same type as \code{x}, then call \code{NextMethod()}. \item \code{as.logical()}, \code{as.integer()}, \code{as.numeric()}, \code{as.character()}, \code{as.Date()} and \code{as.POSIXct()} methods call \code{vec_cast()}. The \code{as.list()} method calls \code{[[} repeatedly, and the \code{as.data.frame()} method uses a standard technique to wrap a vector in a data frame. \item \code{as.factor()}, \code{as.ordered()} and \code{as.difftime()} are not generic functions in base R, but have been reimplemented as generics in the \code{generics} package. \code{vctrs} extends these and calls \code{vec_cast()}. To inherit this behaviour in a package, import and re-export the generic of interest from \code{generics}. \item \code{==}, \code{!=}, \code{unique()}, \code{anyDuplicated()}, and \code{is.na()} use \code{\link[=vec_proxy]{vec_proxy()}}. \item \code{<}, \code{<=}, \code{>=}, \code{>}, \code{min()}, \code{max()}, \code{range()}, \code{median()}, \code{quantile()}, and \code{xtfrm()} methods use \code{\link[=vec_proxy_compare]{vec_proxy_compare()}}. \item \code{+}, \code{-}, \code{/}, \code{*}, \code{^}, \code{\%\%}, \code{\%/\%}, \code{!}, \code{&}, and \code{|} operators use \code{\link[=vec_arith]{vec_arith()}}. \item Mathematical operations including the Summary group generics (\code{prod()}, \code{sum()}, \code{any()}, \code{all()}), the Math group generics (\code{abs()}, \code{sign()}, etc), \code{mean()}, \code{is.nan()}, \code{is.finite()}, and \code{is.infinite()} use \code{\link[=vec_math]{vec_math()}}. \item \code{dims()}, \verb{dims<-}, \code{dimnames()}, \verb{dimnames<-}, \code{levels()}, and \verb{levels<-} methods throw errors. } } \keyword{internal} vctrs/man/vec_names.Rd0000644000176200001440000000675414362266120014433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_names} \alias{vec_names} \alias{vec_names2} \alias{vec_set_names} \title{Get or set the names of a vector} \usage{ vec_names2( x, ..., repair = c("minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), quiet = FALSE ) vec_names(x) vec_set_names(x, names) } \arguments{ \item{x}{A vector with names} \item{...}{These dots are for future extensions and must be empty.} \item{repair}{Either a string or a function. If a string, it must be one of \code{"check_unique"}, \code{"minimal"}, \code{"unique"}, \code{"universal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. If a function, it is invoked with a vector of minimal names and must return minimal names, otherwise an error is thrown. \itemize{ \item Minimal names are never \code{NULL} or \code{NA}. When an element doesn't have a name, its minimal name is an empty string. \item Unique names are unique. A suffix is appended to duplicate names to make them unique. \item Universal names are unique and syntactic, meaning that you can safely use the names as variables without causing a syntax error. } The \code{"check_unique"} option doesn't perform any name repair. Instead, an error is raised if the names don't suit the \code{"unique"} criteria. The options \code{"unique_quiet"} and \code{"universal_quiet"} are here to help the user who calls this function indirectly, via another function which exposes \code{repair} but not \code{quiet}. Specifying \code{repair = "unique_quiet"} is like specifying \verb{repair = "unique", quiet = TRUE}. When the \code{"*_quiet"} options are used, any setting of \code{quiet} is silently overridden.} \item{quiet}{By default, the user is informed of any renaming caused by repairing the names. This only concerns unique and universal repairing. Set \code{quiet} to \code{TRUE} to silence the messages. Users can silence the name repair messages by setting the \code{"rlib_name_repair_verbosity"} global option to \code{"quiet"}.} \item{names}{A character vector, or \code{NULL}.} } \value{ \code{vec_names2()} returns the names of \code{x}, repaired. \code{vec_names()} returns the names of \code{x} or \code{NULL} if unnamed. \code{vec_set_names()} returns \code{x} with names updated. } \description{ These functions work like \code{\link[rlang:names2]{rlang::names2()}}, \code{\link[=names]{names()}} and \code{\link[=names<-]{names<-()}}, except that they return or modify the the rowwise names of the vector. These are: \itemize{ \item The usual \code{names()} for atomic vectors and lists \item The row names for data frames and matrices \item The names of the first dimension for arrays Rowwise names are size consistent: the length of the names always equals \code{\link[=vec_size]{vec_size()}}. } \code{vec_names2()} returns the repaired names from a vector, even if it is unnamed. See \code{\link[=vec_as_names]{vec_as_names()}} for details on name repair. \code{vec_names()} is a bare-bones version that returns \code{NULL} if the vector is unnamed. \code{vec_set_names()} sets the names or removes them. } \examples{ vec_names2(1:3) vec_names2(1:3, repair = "unique") vec_names2(c(a = 1, b = 2)) # `vec_names()` consistently returns the rowwise names of data frames and arrays: vec_names(data.frame(a = 1, b = 2)) names(data.frame(a = 1, b = 2)) vec_names(mtcars) names(mtcars) vec_names(Titanic) names(Titanic) vec_set_names(1:3, letters[1:3]) vec_set_names(data.frame(a = 1:3), letters[1:3]) } vctrs/man/vec_as_location.Rd0000644000176200001440000001172714315060307015614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/subscript-loc.R \name{vec_as_location} \alias{vec_as_location} \alias{num_as_location} \alias{vec_as_location2} \alias{num_as_location2} \title{Create a vector of locations} \usage{ vec_as_location( i, n, names = NULL, ..., missing = c("propagate", "remove", "error"), arg = caller_arg(i), call = caller_env() ) num_as_location( i, n, ..., missing = c("propagate", "remove", "error"), negative = c("invert", "error", "ignore"), oob = c("error", "remove", "extend"), zero = c("remove", "error", "ignore"), arg = caller_arg(i), call = caller_env() ) vec_as_location2( i, n, names = NULL, ..., missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env() ) num_as_location2( i, n, ..., negative = c("error", "ignore"), missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env() ) } \arguments{ \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{n}{A single integer representing the total size of the object that \code{i} is meant to index into.} \item{names}{If \code{i} is a character vector, \code{names} should be a character vector that \code{i} will be matched against to construct the index. Otherwise, not used. The default value of \code{NULL} will result in an error if \code{i} is a character vector.} \item{...}{These dots are for future extensions and must be empty.} \item{missing}{How should missing \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"propagate"} returns them as is. \item \code{"remove"} removes them. } By default, vector subscripts propagate missing values but scalar subscripts error on them. Propagated missing values can't be combined with negative indices when \code{negative = "invert"}, because they can't be meaningfully inverted.} \item{arg}{The argument name to be displayed in error messages.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{negative}{How should negative \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"ignore"} returns them as is. \item \code{"invert"} returns the positive location generated by inverting the negative location. When inverting, positive and negative locations can't be mixed. This option is only applicable for \code{num_as_location()}. }} \item{oob}{How should out-of-bounds \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"remove"} removes both positive and negative out-of-bounds locations. \item \code{"extend"} allows positive out-of-bounds locations if they directly follow the end of a vector. This can be used to implement extendable vectors, like \code{letters[1:30]}. }} \item{zero}{How should zero \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"remove"} removes them. \item \code{"ignore"} returns them as is. }} } \value{ \itemize{ \item \code{vec_as_location()} and \code{num_as_location()} return an integer vector that can be used as an index in a subsetting operation. \item \code{vec_as_location2()} and \code{num_as_location2()} return an integer of size 1 that can be used a scalar index for extracting an element. } } \description{ These helpers provide a means of standardizing common indexing methods such as integer, character or logical indexing. \itemize{ \item \code{vec_as_location()} accepts integer, character, or logical vectors of any size. The output is always an integer vector that is suitable for subsetting with \code{[} or \code{\link[=vec_slice]{vec_slice()}}. It might be a different size than the input because negative selections are transformed to positive ones and logical vectors are transformed to a vector of indices for the \code{TRUE} locations. \item \code{vec_as_location2()} accepts a single number or string. It returns a single location as a integer vector of size 1. This is suitable for extracting with \code{[[}. \item \code{num_as_location()} and \code{num_as_location2()} are specialized variants that have extra options for numeric indices. } } \examples{ x <- array(1:6, c(2, 3)) dimnames(x) <- list(c("r1", "r2"), c("c1", "c2", "c3")) # The most common use case validates row indices vec_as_location(1, vec_size(x)) # Negative indices can be used to index from the back vec_as_location(-1, vec_size(x)) # Character vectors can be used if `names` are provided vec_as_location("r2", vec_size(x), rownames(x)) # You can also construct an index for dimensions other than the first vec_as_location(c("c2", "c1"), ncol(x), colnames(x)) } \keyword{internal} vctrs/man/order-radix.Rd0000644000176200001440000001350514315060307014700 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order.R \name{order-radix} \alias{order-radix} \alias{vec_order_radix} \alias{vec_sort_radix} \title{Order and sort vectors} \usage{ vec_order_radix( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) vec_sort_radix( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{direction}{Direction to sort in. \itemize{ \item A single \code{"asc"} or \code{"desc"} for ascending or descending order respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"asc"} or \code{"desc"}, specifying the direction for each column. }} \item{na_value}{Ordering of missing values. \itemize{ \item A single \code{"largest"} or \code{"smallest"} for ordering missing values as the largest or smallest values respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"largest"} or \code{"smallest"}, specifying how missing values should be ordered within each column. }} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} } \value{ \itemize{ \item \code{vec_order_radix()} an integer vector the same size as \code{x}. \item \code{vec_sort_radix()} a vector with the same size and type as \code{x}. } } \description{ \code{vec_order_radix()} computes the order of \code{x}. For data frames, the order is computed along the rows by computing the order of the first column and using subsequent columns to break ties. \code{vec_sort_radix()} sorts \code{x}. It is equivalent to \code{vec_slice(x, vec_order_radix(x))}. } \section{Differences with \code{order()}}{ Unlike the \code{na.last} argument of \code{order()} which decides the positions of missing values irrespective of the \code{decreasing} argument, the \code{na_value} argument of \code{vec_order_radix()} interacts with \code{direction}. If missing values are considered the largest value, they will appear last in ascending order, and first in descending order. Character vectors are ordered in the C-locale. This is different from \code{base::order()}, which respects \code{base::Sys.setlocale()}. Sorting in a consistent locale can produce more reproducible results between different sessions and platforms, however, the results of sorting in the C-locale can be surprising. For example, capital letters sort before lower case letters. Sorting \code{c("b", "C", "a")} with \code{vec_sort_radix()} will return \code{c("C", "a", "b")}, but with \code{base::order()} will return \code{c("a", "b", "C")} unless \code{base::order(method = "radix")} is explicitly set, which also uses the C-locale. While sorting with the C-locale can be useful for algorithmic efficiency, in many real world uses it can be the cause of data analysis mistakes. To balance these trade-offs, you can supply a \code{chr_proxy_collate} function to transform character vectors into an alternative representation that orders in the C-locale in a less surprising way. For example, providing \code{\link[base:chartr]{base::tolower()}} as a transform will order the original vector in a case-insensitive manner. Locale-aware ordering can be achieved by providing \code{stringi::stri_sort_key()} as a transform, setting the collation options as appropriate for your locale. Character vectors are always translated to UTF-8 before ordering, and before any transform is applied by \code{chr_proxy_collate}. For complex vectors, if either the real or imaginary component is \code{NA} or \code{NaN}, then the entire observation is considered missing. } \section{Dependencies of \code{vec_order_radix()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} } } \section{Dependencies of \code{vec_sort_radix()}}{ \itemize{ \item \code{\link[=vec_order_radix]{vec_order_radix()}} \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ if (FALSE) { x <- round(sample(runif(5), 9, replace = TRUE), 3) x <- c(x, NA) vec_order_radix(x) vec_sort_radix(x) vec_sort_radix(x, direction = "desc") # Can also handle data frames df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) vec_order_radix(df) vec_sort_radix(df) vec_sort_radix(df, direction = "desc") # For data frames, `direction` and `na_value` are allowed to be vectors # with length equal to the number of columns in the data frame vec_sort_radix( df, direction = c("desc", "asc"), na_value = c("largest", "smallest") ) # Character vectors are ordered in the C locale, which orders capital letters # below lowercase ones y <- c("B", "A", "a") vec_sort_radix(y) # To order in a case-insensitive manner, provide a `chr_proxy_collate` # function that transforms the strings to all lowercase vec_sort_radix(y, chr_proxy_collate = tolower) } } \keyword{internal} vctrs/man/table.Rd0000644000176200001440000000044614276722575013571 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-table.R \name{table} \alias{table} \title{Table S3 class} \description{ These functions help the base table class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/obj_is_list.Rd0000644000176200001440000000445414401377400014764 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{obj_is_list} \alias{obj_is_list} \alias{obj_check_list} \alias{list_all_vectors} \alias{list_check_all_vectors} \alias{list_all_size} \alias{list_check_all_size} \title{List checks} \usage{ obj_is_list(x) obj_check_list(x, ..., arg = caller_arg(x), call = caller_env()) list_all_vectors(x) list_check_all_vectors(x, ..., arg = caller_arg(x), call = caller_env()) list_all_size(x, size) list_check_all_size(x, size, ..., arg = caller_arg(x), call = caller_env()) } \arguments{ \item{x}{For \verb{vec_*()} functions, an object. For \verb{list_*()} functions, a list.} \item{...}{These dots are for future extensions and must be empty.} \item{arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{size}{The size to check each element for.} } \description{ \itemize{ \item \code{obj_is_list()} tests if \code{x} is considered a list in the vctrs sense. It returns \code{TRUE} if: \itemize{ \item \code{x} is a bare list with no class. \item \code{x} is a list explicitly inheriting from \code{"list"}. } \item \code{list_all_vectors()} takes a list and returns \code{TRUE} if all elements of that list are vectors. \item \code{list_all_size()} takes a list and returns \code{TRUE} if all elements of that list have the same \code{size}. \item \code{obj_check_list()}, \code{list_check_all_vectors()}, and \code{list_check_all_size()} use the above functions, but throw a standardized and informative error if they return \code{FALSE}. } } \details{ Notably, data frames and S3 record style classes like POSIXlt are not considered lists. } \examples{ obj_is_list(list()) obj_is_list(list_of(1)) obj_is_list(data.frame()) list_all_vectors(list(1, mtcars)) list_all_vectors(list(1, environment())) list_all_size(list(1:2, 2:3), 2) list_all_size(list(1:2, 2:4), 2) # `list_`-prefixed functions assume a list: try(list_all_vectors(environment())) } \seealso{ \code{\link[=list_sizes]{list_sizes()}} } vctrs/man/vec_proxy.Rd0000644000176200001440000001357414315060307014504 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/proxy.R \name{vec_proxy} \alias{vec_proxy} \alias{vec_restore} \title{Proxy and restore} \usage{ vec_proxy(x, ...) vec_restore(x, to, ...) } \arguments{ \item{x}{A vector.} \item{...}{These dots are for future extensions and must be empty.} \item{to}{The original vector to restore to.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_proxy()} returns the data structure containing the values of a vector. This data structure is usually the vector itself. In this case the proxy is the \link[base:identity]{identity function}, which is the default \code{vec_proxy()} method. Only experts should implement special \code{vec_proxy()} methods, for these cases: \itemize{ \item A vector has vectorised attributes, i.e. metadata for each element of the vector. These \emph{record types} are implemented in vctrs by returning a data frame in the proxy method. If you're starting your class from scratch, consider deriving from the \code{\link[=new_rcrd]{rcrd}} class. It implements the appropriate data frame proxy and is generally the preferred way to create a record class. \item When you're implementing a vector on top of a non-vector type, like an environment or an S4 object. This is currently only partially supported. \item S3 lists are considered scalars by default. This is the safe choice for list objects such as returned by \code{stats::lm()}. To declare that your S3 list class is a vector, you normally add \code{"list"} to the right of your class vector. Explicit inheritance from list is generally the preferred way to declare an S3 list in R, for instance it makes it possible to dispatch on \code{generic.list} S3 methods. If you can't modify your class vector, you can implement an identity proxy (i.e. a proxy method that just returns its input) to let vctrs know this is a vector list and not a scalar. } \code{vec_restore()} is the inverse operation of \code{vec_proxy()}. It should only be called on vector proxies. \itemize{ \item It undoes the transformations of \code{vec_proxy()}. \item It restores attributes and classes. These may be lost when the memory values are manipulated. For example slicing a subset of a vector's proxy causes a new proxy to be allocated. } By default vctrs restores all attributes and classes automatically. You only need to implement a \code{vec_restore()} method if your class has attributes that depend on the data. } \section{Proxying}{ You should only implement \code{vec_proxy()} when your type is designed around a non-vector class. I.e. anything that is not either: \itemize{ \item An atomic vector \item A bare list \item A data frame } In this case, implement \code{vec_proxy()} to return such a vector class. The vctrs operations such as \code{\link[=vec_slice]{vec_slice()}} are applied on the proxy and \code{vec_restore()} is called to restore the original representation of your type. The most common case where you need to implement \code{vec_proxy()} is for S3 lists. In vctrs, S3 lists are treated as scalars by default. This way we don't treat objects like model fits as vectors. To prevent vctrs from treating your S3 list as a scalar, unclass it in the \code{vec_proxy()} method. For instance, here is the definition for \code{list_of}: \if{html}{\out{
}}\preformatted{vec_proxy.vctrs_list_of <- function(x) \{ unclass(x) \} }\if{html}{\out{
}} Another case where you need to implement a proxy is \link[=new_rcrd]{record types}. Record types should return a data frame, as in the \code{POSIXlt} method: \if{html}{\out{
}}\preformatted{vec_proxy.POSIXlt <- function(x) \{ new_data_frame(unclass(x)) \} }\if{html}{\out{
}} Note that you don't need to implement \code{vec_proxy()} when your class inherits from \code{vctrs_vctr} or \code{vctrs_rcrd}. } \section{Restoring}{ A restore is a specialised type of cast, primarily used in conjunction with \code{NextMethod()} or a C-level function that works on the underlying data structure. A \code{vec_restore()} method can make the following assumptions about \code{x}: \itemize{ \item It has the correct type. \item It has the correct names. \item It has the correct \code{dim} and \code{dimnames} attributes. \item It is unclassed. This way you can call vctrs generics with \code{x} without triggering an infinite loop of restoration. } The length may be different (for example after \code{\link[=vec_slice]{vec_slice()}} has been called), and all other attributes may have been lost. The method should restore all attributes so that after restoration, \code{vec_restore(vec_data(x), x)} yields \code{x}. To understand the difference between \code{vec_cast()} and \code{vec_restore()} think about factors: it doesn't make sense to cast an integer to a factor, but if \code{NextMethod()} or another low-level function has stripped attributes, you still need to be able to restore them. The default method copies across all attributes so you only need to provide your own method if your attributes require special care (i.e. they are dependent on the data in some way). When implementing your own method, bear in mind that many R users add attributes to track additional metadata that is important to them, so you should preserve any attributes that don't require special handling for your class. } \section{Dependencies}{ \itemize{ \item \code{x} must be a vector in the vctrs sense (see \code{\link[=vec_is]{vec_is()}}) \item By default the underlying data is returned as is (identity proxy) } All vector classes have a proxy, even those who don't implement any vctrs methods. The exception is S3 lists that don't inherit from \code{"list"} explicitly. These might have to implement an identity proxy for compatibility with vctrs (see discussion above). } \keyword{internal} vctrs/man/vec_recycle.Rd0000644000176200001440000000402214511524374014744 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recycle.R \name{vec_recycle} \alias{vec_recycle} \alias{vec_recycle_common} \title{Vector recycling} \usage{ vec_recycle(x, size, ..., x_arg = "", call = caller_env()) vec_recycle_common(..., .size = NULL, .arg = "", .call = caller_env()) } \arguments{ \item{x}{A vector to recycle.} \item{size}{Desired output size.} \item{...}{Depending on the function used: \itemize{ \item For \code{vec_recycle_common()}, vectors to recycle. \item For \code{vec_recycle()}, these dots should be empty. }} \item{x_arg}{Argument name for \code{x}. These are used in error messages to inform the user about which argument has an incompatible size.} \item{call, .call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.size}{Desired output size. If omitted, will use the common size from \code{\link[=vec_size_common]{vec_size_common()}}.} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} } \description{ \code{vec_recycle(x, size)} recycles a single vector to a given size. \code{vec_recycle_common(...)} recycles multiple vectors to their common size. All functions obey the \link[=theory-faq-recycling]{vctrs recycling rules}, and will throw an error if recycling is not possible. See \code{\link[=vec_size]{vec_size()}} for the precise definition of size. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ # Inputs with 1 observation are recycled vec_recycle_common(1:5, 5) vec_recycle_common(integer(), 5) \dontrun{ vec_recycle_common(1:5, 1:2) } # Data frames and matrices are recycled along their rows vec_recycle_common(data.frame(x = 1), 1:5) vec_recycle_common(array(1:2, c(1, 2)), 1:5) vec_recycle_common(array(1:3, c(1, 3, 1)), 1:5) } vctrs/man/vec_detect_complete.Rd0000644000176200001440000000267214315060307016460 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/complete.R \name{vec_detect_complete} \alias{vec_detect_complete} \title{Complete} \usage{ vec_detect_complete(x) } \arguments{ \item{x}{A vector} } \value{ A logical vector with the same size as \code{x}. } \description{ \code{vec_detect_complete()} detects "complete" observations. An observation is considered complete if it is non-missing. For most vectors, this implies that \code{vec_detect_complete(x) == !vec_detect_missing(x)}. For data frames and matrices, a row is only considered complete if all elements of that row are non-missing. To compare, \code{!vec_detect_missing(x)} detects rows that are partially complete (they have at least one non-missing value). } \details{ A \link[=new_rcrd]{record} type vector is similar to a data frame, and is only considered complete if all fields are non-missing. } \examples{ x <- c(1, 2, NA, 4, NA) # For most vectors, this is identical to `!vec_detect_missing(x)` vec_detect_complete(x) !vec_detect_missing(x) df <- data_frame( x = x, y = c("a", "b", NA, "d", "e") ) # This returns `TRUE` where all elements of the row are non-missing. # Compare that with `!vec_detect_missing()`, which detects rows that have at # least one non-missing value. df2 <- df df2$all_non_missing <- vec_detect_complete(df) df2$any_non_missing <- !vec_detect_missing(df) df2 } \seealso{ \code{\link[stats:complete.cases]{stats::complete.cases()}} } vctrs/man/new_factor.Rd0000644000176200001440000000257314276722575014634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-factor.R \name{new_factor} \alias{new_factor} \alias{new_ordered} \alias{vec_ptype2.factor} \alias{vec_ptype2.ordered} \alias{vec_cast.factor} \alias{vec_cast.ordered} \title{Factor/ordered factor S3 class} \usage{ new_factor(x = integer(), levels = character(), ..., class = character()) new_ordered(x = integer(), levels = character()) \method{vec_ptype2}{factor}(x, y, ...) \method{vec_ptype2}{ordered}(x, y, ...) \method{vec_cast}{factor}(x, to, ...) \method{vec_cast}{ordered}(x, to, ...) } \arguments{ \item{x}{Integer values which index in to \code{levels}.} \item{levels}{Character vector of labels.} \item{..., class}{Used to for subclasses.} } \description{ A \link{factor} is an integer with attribute \code{levels}, a character vector. There should be one level for each integer between 1 and \code{max(x)}. An \link{ordered} factor has the same properties as a factor, but possesses an extra class that marks levels as having a total ordering. } \details{ These functions help the base factor and ordered factor classes fit in to the vctrs type system by providing constructors, coercion functions, and casting functions. \code{new_factor()} and \code{new_ordered()} are low-level constructors - they only check that types, but not values, are valid, so are for expert use only. } \keyword{internal} vctrs/man/vec_split.Rd0000644000176200001440000000240614276722575014470 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/split.R \name{vec_split} \alias{vec_split} \title{Split a vector into groups} \usage{ vec_split(x, by) } \arguments{ \item{x}{Vector to divide into groups.} \item{by}{Vector whose unique values defines the groups.} } \value{ A data frame with two columns and size equal to \code{vec_size(vec_unique(by))}. The \code{key} column has the same type as \code{by}, and the \code{val} column is a list containing elements of type \code{vec_ptype(x)}. Note for complex types, the default \code{data.frame} print method will be suboptimal, and you will want to coerce into a tibble to better understand the output. } \description{ This is a generalisation of \code{\link[=split]{split()}} that can split by any type of vector, not just factors. Instead of returning the keys in the character names, the are returned in a separate parallel vector. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_group_loc]{vec_group_loc()}} \item \code{\link[=vec_chop]{vec_chop()}} } } \examples{ vec_split(mtcars$cyl, mtcars$vs) vec_split(mtcars$cyl, mtcars[c("vs", "am")]) if (require("tibble")) { as_tibble(vec_split(mtcars$cyl, mtcars[c("vs", "am")])) as_tibble(vec_split(mtcars, mtcars[c("vs", "am")])) } } vctrs/man/int64.Rd0000644000176200001440000000134413532250523013423 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-integer64.R \name{vec_ptype_full.integer64} \alias{vec_ptype_full.integer64} \alias{vec_ptype_abbr.integer64} \alias{vec_ptype2.integer64} \alias{vec_cast.integer64} \title{64 bit integers} \usage{ \method{vec_ptype_full}{integer64}(x, ...) \method{vec_ptype_abbr}{integer64}(x, ...) \method{vec_ptype2}{integer64}(x, y, ...) \method{vec_cast}{integer64}(x, to, ...) } \description{ A \code{integer64} is a 64 bits integer vector, implemented in the \code{bit64} package. } \details{ These functions help the \code{integer64} class from \code{bit64} in to the vctrs type system by providing coercion functions and casting functions. } \keyword{internal} vctrs/man/vec_locate_sorted_groups.Rd0000644000176200001440000000675414341667017017565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order.R \name{vec_locate_sorted_groups} \alias{vec_locate_sorted_groups} \title{Locate sorted groups} \usage{ vec_locate_sorted_groups( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{direction}{Direction to sort in. \itemize{ \item A single \code{"asc"} or \code{"desc"} for ascending or descending order respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"asc"} or \code{"desc"}, specifying the direction for each column. }} \item{na_value}{Ordering of missing values. \itemize{ \item A single \code{"largest"} or \code{"smallest"} for ordering missing values as the largest or smallest values respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"largest"} or \code{"smallest"}, specifying how missing values should be ordered within each column. }} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} } \value{ A two column data frame with size equal to \code{vec_size(vec_unique(x))}. \itemize{ \item A \code{key} column of type \code{vec_ptype(x)}. \item A \code{loc} column of type list, with elements of type integer. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_locate_sorted_groups()} returns a data frame containing a \code{key} column with sorted unique groups, and a \code{loc} column with the locations of each group in \code{x}. It is similar to \code{\link[=vec_group_loc]{vec_group_loc()}}, except the groups are returned sorted rather than by first appearance. } \details{ \code{vec_locate_sorted_groups(x)} is equivalent to, but faster than: \if{html}{\out{
}}\preformatted{info <- vec_group_loc(x) vec_slice(info, vec_order(info$key)) }\if{html}{\out{
}} } \section{Dependencies of \code{vec_locate_sorted_groups()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} } } \examples{ df <- data.frame( g = sample(2, 10, replace = TRUE), x = c(NA, sample(5, 9, replace = TRUE)) ) # `vec_locate_sorted_groups()` is similar to `vec_group_loc()`, except keys # are returned ordered rather than by first appearance. vec_locate_sorted_groups(df) vec_group_loc(df) } \keyword{internal} vctrs/man/vec_expand_grid.Rd0000644000176200001440000000462514362266120015607 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/expand.R \name{vec_expand_grid} \alias{vec_expand_grid} \title{Create a data frame from all combinations of the inputs} \usage{ vec_expand_grid( ..., .vary = "slowest", .name_repair = "check_unique", .error_call = current_env() ) } \arguments{ \item{...}{Name-value pairs. The name will become the column name in the resulting data frame.} \item{.vary}{One of: \itemize{ \item \code{"slowest"} to vary the first column slowest. This produces sorted output and is generally the most useful. \item \code{"fastest"} to vary the first column fastest. This matches the behavior of \code{\link[=expand.grid]{expand.grid()}}. }} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"}, \code{"minimal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A data frame with as many columns as there are inputs in \code{...} and as many rows as the \code{\link[=prod]{prod()}} of the sizes of the inputs. } \description{ \code{vec_expand_grid()} creates a new data frame by creating a grid of all possible combinations of the input vectors. It is inspired by \code{\link[=expand.grid]{expand.grid()}}. Compared with \code{expand.grid()}, it: \itemize{ \item Produces sorted output by default by varying the first column the slowest, rather than the fastest. Control this with \code{.vary}. \item Never converts strings to factors. \item Does not add additional attributes. \item Drops \code{NULL} inputs. \item Can expand any vector type, including data frames and \link[=new_rcrd]{records}. } } \details{ If any input is empty (i.e. size 0), then the result will have 0 rows. If no inputs are provided, the result is a 1 row data frame with 0 columns. This is consistent with the fact that \code{prod()} with no inputs returns \code{1}. } \examples{ vec_expand_grid(x = 1:2, y = 1:3) # Use `.vary` to match `expand.grid()`: vec_expand_grid(x = 1:2, y = 1:3, .vary = "fastest") # Can also expand data frames vec_expand_grid( x = data_frame(a = 1:2, b = 3:4), y = 1:4 ) } vctrs/man/vec_as_subscript.Rd0000644000176200001440000000435014512002264016011 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/subscript.R \name{vec_as_subscript} \alias{vec_as_subscript} \alias{vec_as_subscript2} \title{Convert to a base subscript type} \usage{ vec_as_subscript( i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env() ) vec_as_subscript2( i, ..., numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env() ) } \arguments{ \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{...}{These dots are for future extensions and must be empty.} \item{logical, numeric, character}{How to handle logical, numeric, and character subscripts. If \code{"cast"} and the subscript is not one of the three base types (logical, integer or character), the subscript is \link[=vec_cast]{cast} to the relevant base type, e.g. factors are coerced to character. \code{NULL} is treated as an empty integer vector, and is thus coercible depending on the setting of \code{numeric}. Symbols are treated as character vectors and thus coercible depending on the setting of \code{character}. If \code{"error"}, the subscript type is disallowed and triggers an informative error.} \item{arg}{The argument name to be displayed in error messages.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Convert \code{i} to the base type expected by \code{\link[=vec_as_location]{vec_as_location()}} or \code{\link[=vec_as_location2]{vec_as_location2()}}. The values of the subscript type are not checked in any way (length, missingness, negative elements). } \keyword{internal} vctrs/man/vec_unique.Rd0000644000176200001440000000343414276722575014645 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_unique} \alias{vec_unique} \alias{vec_unique_loc} \alias{vec_unique_count} \title{Find and count unique values} \usage{ vec_unique(x) vec_unique_loc(x) vec_unique_count(x) } \arguments{ \item{x}{A vector (including a data frame).} } \value{ \itemize{ \item \code{vec_unique()}: a vector the same type as \code{x} containing only unique values. \item \code{vec_unique_loc()}: an integer vector, giving locations of unique values. \item \code{vec_unique_count()}: an integer vector of length 1, giving the number of unique values. } } \description{ \itemize{ \item \code{vec_unique()}: the unique values. Equivalent to \code{\link[=unique]{unique()}}. \item \code{vec_unique_loc()}: the locations of the unique values. \item \code{vec_unique_count()}: the number of unique values. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \section{Missing values}{ In most cases, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. This behaviour would be unappealing here, so these functions consider all \code{NAs} to be equal. (Similarly, all \code{NaN} are also considered to be equal.) } \examples{ x <- rpois(100, 8) vec_unique(x) vec_unique_loc(x) vec_unique_count(x) # `vec_unique()` returns values in the order that encounters them # use sort = "location" to match to the result of `vec_count()` head(vec_unique(x)) head(vec_count(x, sort = "location")) # Normally missing values are not considered to be equal NA == NA # But they are for the purposes of considering uniqueness vec_unique(c(NA, NA, NA, NA, 1, 2, 1)) } \seealso{ \link{vec_duplicate} for functions that work with the dual of unique values: duplicated values. } vctrs/man/vec_compare.Rd0000644000176200001440000000254214315060307014742 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare.R \name{vec_compare} \alias{vec_compare} \title{Compare two vectors} \usage{ vec_compare(x, y, na_equal = FALSE, .ptype = NULL) } \arguments{ \item{x, y}{Vectors with compatible types and lengths.} \item{na_equal}{Should \code{NA} values be considered equal?} \item{.ptype}{Override to optionally specify common type} } \value{ An integer vector with values -1 for \code{x < y}, 0 if \code{x == y}, and 1 if \code{x > y}. If \code{na_equal} is \code{FALSE}, the result will be \code{NA} if either \code{x} or \code{y} is \code{NA}. } \description{ Compare two vectors } \section{S3 dispatch}{ \code{vec_compare()} is not generic for performance; instead it uses \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} to create a proxy that is used in the comparison. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_recycle_common]{vec_recycle_common()}} \item \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} } } \examples{ vec_compare(c(TRUE, FALSE, NA), FALSE) vec_compare(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) vec_compare(1:10, 5) vec_compare(runif(10), 0.5) vec_compare(letters[1:10], "d") df <- data.frame(x = c(1, 1, 1, 2), y = c(0, 1, 2, 1)) vec_compare(df, data.frame(x = 1, y = 1)) } vctrs/DESCRIPTION0000644000176200001440000000331614532470452013132 0ustar liggesusersPackage: vctrs Title: Vector Helpers Version: 0.6.5 Authors@R: c( person("Hadley", "Wickham", , "hadley@posit.co", role = "aut"), person("Lionel", "Henry", , "lionel@posit.co", role = "aut"), person("Davis", "Vaughan", , "davis@posit.co", role = c("aut", "cre")), person("data.table team", role = "cph", comment = "Radix sort based on data.table's forder() and their contribution to R's order()"), person("Posit Software, PBC", role = c("cph", "fnd")) ) Description: Defines new notions of prototype and size that are used to provide tools for consistent and well-founded type-coercion and size-recycling, and are in turn connected to ideas of type- and size-stability useful for analysing function interfaces. License: MIT + file LICENSE URL: https://vctrs.r-lib.org/, https://github.com/r-lib/vctrs BugReports: https://github.com/r-lib/vctrs/issues Depends: R (>= 3.5.0) Imports: cli (>= 3.4.0), glue, lifecycle (>= 1.0.3), rlang (>= 1.1.0) Suggests: bit64, covr, crayon, dplyr (>= 0.8.5), generics, knitr, pillar (>= 1.4.4), pkgdown (>= 2.0.1), rmarkdown, testthat (>= 3.0.0), tibble (>= 3.1.3), waldo (>= 0.2.0), withr, xml2, zeallot VignetteBuilder: knitr Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 Encoding: UTF-8 Language: en-GB RoxygenNote: 7.2.3 NeedsCompilation: yes Packaged: 2023-12-01 16:27:12 UTC; davis Author: Hadley Wickham [aut], Lionel Henry [aut], Davis Vaughan [aut, cre], data.table team [cph] (Radix sort based on data.table's forder() and their contribution to R's order()), Posit Software, PBC [cph, fnd] Maintainer: Davis Vaughan Repository: CRAN Date/Publication: 2023-12-01 23:50:02 UTC vctrs/build/0000755000176200001440000000000014532404540012513 5ustar liggesusersvctrs/build/vignette.rds0000644000176200001440000000045714532404540015060 0ustar liggesusersAO0afLf>. .^@&q܈W`ھ1!dF0 9wpGp-ak!%sY82'm5eB یmj1p*jP$f9 Ze,e9<@2UNzo>Օ:s̙Öҡۡ ZO F00DqAOϼhKମ+HF{!9&A`;^sUY A and a > B res <- vec_locate_matches(x, y, condition = "<") expect_identical(res$needles, c(1L, 2L, 2L, 2L)) expect_identical(res$haystack, c(3L, 1L, 3L, 4L)) }) test_that("`chr_proxy_collate` can affect the matching process", { x <- c("a", "A") y <- c("a", "A") res <- vec_locate_matches(x, y, condition = "==") expect_identical(res$needles, 1:2) expect_identical(res$haystack, 1:2) res <- vec_locate_matches(x, y, condition = "==", chr_proxy_collate = tolower) expect_identical(res$needles, c(1L, 1L, 2L, 2L)) expect_identical(res$haystack, c(1L, 2L, 1L, 2L)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - lists test_that("lists can be matched", { x <- list(1, 2, 1, NULL) y <- list(1, 1, 3, NULL) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(1L, 2L, NA, 1L, 2L, 4L)) }) test_that("list incompleteness is detected", { res <- vec_locate_matches(list(NULL), list(NULL), incomplete = NA) expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("list ordering is by first appearance in `needles` (so non-equi joins don't make much sense)", { x <- list(3, 2, 1, NULL) y <- list(1, 3, 1, 3) res <- vec_locate_matches(x, y, condition = ">") # x[1] appears first, so it isn't greater than anything # x[2] is greater than x[1] (when x[1] is in y) # and so on... # NULL still doesn't match anything expect_identical(res$needles, c(1L, 2L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(NA, 2L, 4L, 2L, 4L, NA)) # With data frame columns containing list-columns df1 <- data_frame(col = data_frame(x = x)) df2 <- data_frame(col = data_frame(x = y)) expect_identical(vec_locate_matches(x, y, condition = ">"), res) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - data frame test_that("can match with 1 column data frames", { df1 <- data_frame(x = c(1L, 3L, 1L, 3L)) df2 <- data_frame(x = c(1L, 3L, 1L)) expect_identical( vec_locate_matches(df1, df2), vec_locate_matches(df1$x, df2$x) ) }) test_that("can match with >1 column data frames", { df1 <- data_frame(x = c(1L, 3L, 1L, 3L), y = c(1L, 4L, 1L, 2L)) df2 <- data_frame(x = c(1L, 3L, 1L), y = c(1L, 2L, 1L)) res <- vec_locate_matches(df1, df2, condition = c("==", "==")) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(1L, 3L, NA, 1L, 3L, 2L)) }) test_that("can match with df-cols of varying types", { y <- c(1L, 1L) expect_needles <- c(1L, 2L) expect_haystack <- c(NA, 1L) df1 <- data_frame(x = data_frame(x = c(2L, 1L), y = y)) df2 <- data_frame(x = data_frame(x = c(1L, 3L), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame(x = data_frame(x = c(2, 1), y = y)) df2 <- data_frame(x = data_frame(x = c(1, 3), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame(x = data_frame(x = c(TRUE, FALSE), y = y)) df2 <- data_frame(x = data_frame(x = c(FALSE, NA), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame(x = data_frame(x = c("x", "y"), y = y)) df2 <- data_frame(x = data_frame(x = c("y", "z"), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame(x = data_frame(x = complex(real = c(1, 2), imaginary = c(2, 1)), y = y)) df2 <- data_frame(x = data_frame(x = complex(real = c(2, 3), imaginary = c(1, 1)), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) }) test_that("ensure that matching works if outer runs are present (i.e. `==` comes before non-equi condition)", { df1 <- data_frame(x = c(1, 2, 1, 1), y = c(2, 2, 3, 2)) df2 <- data_frame(x = c(1, 1), y = c(2, 3)) res <- vec_locate_matches(df1, df2, condition = c("==", "<=")) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 4L, 4L)) expect_identical(res$haystack, c(1L, 2L, NA, 2L, 1L, 2L)) df1$z <- c(1L, 2L, 1L, 3L) df2$z <- c(5L, 2L) res <- vec_locate_matches(df1, df2, condition = c("==", "==", "<")) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(1L, NA, 2L, 1L)) res <- vec_locate_matches(df1, df2, condition = c("==", ">=", "<")) expect_identical(res$needles, c(1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(1L, NA, 1L, 2L, 1L)) }) test_that("df-cols propagate an NA if any columns are incomplete", { df <- data_frame(x = 1, y = data_frame(x = c(1, 1, NA), y = c(1, NA, 2))) res <- vec_locate_matches(df, df, incomplete = "compare") expect_identical(res$needles, 1:3) expect_identical(res$haystack, 1:3) res <- vec_locate_matches(df, df, incomplete = "match") expect_identical(res$needles, 1:3) expect_identical(res$haystack, 1:3) # 2nd and 3rd rows aren't fully complete res <- vec_locate_matches(df, df, incomplete = NA) expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(1L, NA, NA)) res <- vec_locate_matches(df, df, incomplete = "drop") expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) }) test_that("df-cols aren't flattened, so `condition` is applied jointly on the df-col columns", { x <- data_frame(a = 1L, b = data_frame(x = 3L, y = 4L)) y <- data_frame(a = 1L, b = data_frame(x = 2L, y = 5L)) # In particular `x$b[1,] > y$b[1,]` because `3 > 4` and that breaks the tie # before any values of the `x$b$y` column are checked res <- vec_locate_matches(x, y, condition = c("==", ">")) expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) }) test_that("must have at least 1 column to match", { expect_snapshot(error = TRUE, { vec_locate_matches(data_frame(), data_frame()) }) expect_snapshot(error = TRUE, { vec_locate_matches(data_frame(), data_frame(), error_call = call("foo")) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - rcrd test_that("rcrd types can be matched", { x <- new_rcrd(list(x = c(1L, 3L), y = c(1L, 4L))) y <- new_rcrd(list(x = c(1L, 2L), y = c(1L, 5L))) res <- vec_locate_matches(x, y, condition = "<=") expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 2L, NA)) # In particular: `(3, 4) > (2, 5)` since the first elt breaks the tie res <- vec_locate_matches(x, y, condition = ">") expect_identical(res$needles, c(1L, 2L, 2L)) expect_identical(res$haystack, c(NA, 1L, 2L)) }) test_that("rcrd type matching works with rcrd-cols", { x <- data_frame(a = c(1L, 1L), b = new_rcrd(list(x = c(1L, 3L), y = c(1L, 4L)))) y <- data_frame(a = c(1L, 1L), b = new_rcrd(list(x = c(1L, 2L), y = c(1L, 5L)))) res <- vec_locate_matches(x, y, condition = c("==", "<=")) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 2L, NA)) res <- vec_locate_matches(x, y, condition = c("==", ">")) expect_identical(res$needles, c(1L, 2L, 2L)) expect_identical(res$haystack, c(NA, 1L, 2L)) }) test_that("rcrd type incompleteness is handled correctly", { x <- new_rcrd(list(x = c(1L, NA), y = c(NA_integer_, NA_integer_))) y <- new_rcrd(list(x = c(1L, 2L, NA), y = c(NA, 5L, NA))) # When `incomplete = "compare"`, the types of incompleteness still must # match exactly to have a match. i.e. (x=1L, y=NA) doesn't match (x=NA, y=1L). # This is the same as the rule for data frames. res <- vec_locate_matches(x, y, condition = "==", incomplete = "compare") expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(1L, 3L)) res <- vec_locate_matches(x, y, condition = "==", incomplete = "match") expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(1L, 3L)) # If any field contains NA, the entire observation is incomplete. res <- vec_locate_matches(x, y, condition = "==", incomplete = NA) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(NA_integer_, NA_integer_)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - S3 test_that("S3 types with order proxies that depend on the data are combined before the proxy is taken", { # i.e. `bignum:::vec_proxy_order.bignum_biginteger()` x <- structure(c(5L, 1L), class = "foo") y <- structure(c(8L, 5L), class = "foo") local_methods( vec_proxy_order.foo = function(x, ...) { rank(unclass(x)) } ) # Can't take the order proxies separately because they are the same! expect_identical(vec_proxy_order(x), vec_proxy_order(y)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, NA)) x_df <- data_frame(a = x, b = x) y_df <- data_frame(a = y, b = y) res <- vec_locate_matches(x_df, y_df) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, NA)) }) test_that("Works with base R S3 types we support natively", { x <- new_factor(c(1L, 2L), levels = c("x", "y")) y <- new_factor(c(3L, 1L, 1L), levels = c("x", "y", "z")) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) x <- new_ordered(c(1L, 2L), levels = c("x", "y")) y <- new_ordered(c(2L, 1L, 1L), levels = c("x", "y")) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, 1L)) x <- new_date(c(1, 2)) y <- new_date(c(3, 1, 1)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) x <- new_datetime(c(1, 2)) y <- new_datetime(c(3, 1, 1)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) x <- as.POSIXlt(new_datetime(c(1, 2))) y <- as.POSIXlt(new_datetime(c(3, 1, 1))) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) }) test_that("Works with classed data frame columns", { x_col <- new_data_frame(list(a = c(1L, 2L), b = c(2, 3)), class = "foo") y_col <- new_data_frame(list(a = c(1L, 1L, 1L), b = c(2, 4, 2)), class = "foo") x <- new_data_frame(list(c = c(1L, 1L), d = x_col)) y <- new_data_frame(list(c = c(1L, 1L, 1L), d = y_col)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 3L, NA)) }) test_that("AsIs types are combined before order proxies are taken (#1557)", { x <- I(list(5, 1)) y <- I(list(8, 5, 5)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - ptype2 / casting test_that("common type of `needles` and `haystack` is taken", { x <- 1 y <- "a" expect_snapshot(error = TRUE, { vec_locate_matches(x, y) }) expect_snapshot(error = TRUE, { vec_locate_matches(x, y, needles_arg = "x", error_call = call("foo")) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - missing values test_that("integer missing values match with `==`, `>=`, and `<=` when `incomplete = 'compare'", { res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "==") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<=") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">=") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("integer missing values can match with any condition when `incomplete = 'match'`", { res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "==", incomplete = "match") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<=", incomplete = "match") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">=", incomplete = "match") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<", incomplete = "match") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">", incomplete = "match") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) }) test_that("integer missing values report all matches even with a `filter`", { res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<=", filter = "min") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<", filter = "min", incomplete = "match") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">=", filter = "max") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">", filter = "max", incomplete = "match") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) }) test_that("integer missing value matches can be limited by `multiple`", { res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<=", multiple = "first") expect_identical(res$needles, 1L) expect_identical(res$haystack, 2L) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<=", multiple = "last") expect_identical(res$needles, 1L) expect_identical(res$haystack, 4L) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<=", multiple = "any") expect_identical(res$needles, 1L) expect_identical(res$haystack, 2L) }) test_that("missing values match within columns", { df1 <- data_frame(x = c(1L, 2L, 1L), y = rep(NA_integer_, 3)) df2 <- data_frame(x = c(2L, 1L, 1L), y = c(1L, NA, NA)) res <- vec_locate_matches(df1, df2, condition = c("==", "==")) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA, 2L, 3L)) expect_identical( vec_locate_matches(df1, df2, condition = c("<=", ">=")), vec_locate_matches(df1, df2, condition = c("==", "==")) ) res <- vec_locate_matches(df1, df2, condition = c("<", ">")) expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) res <- vec_locate_matches(df1, df2, condition = c("<=", ">"), incomplete = "compare") expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) res <- vec_locate_matches(df1, df2, condition = c("<=", ">"), incomplete = "match") expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA, 2L, 3L)) }) test_that("missing values being 'match'ed hands off correctly to next column", { df1 <- data_frame(x = c(NA, NA, 1L, 2L, NA), y = c(2, 3, 0, 1, NA)) df2 <- data_frame(x = c(NA, NA, NA, 3L), y = c(2, 1, NA, 0)) res <- vec_locate_matches(df1, df2, condition = c("<", ">"), incomplete = "match") expect_identical(res$needles, c(1L, 2L, 2L, 3L, 4L, 5L)) expect_identical(res$haystack, c(2L, 1L, 2L, NA, 4L, 3L)) }) test_that("integer needles can't match NAs in the haystack", { # At the C level, 1L > NA_integer_ (INT_MIN), # but we are careful to work around this res <- vec_locate_matches(1L, c(1L, NA, 2L, NA), condition = ">=") expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) res <- vec_locate_matches(1L, c(1L, NA, 2L, NA), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("double needles can't match NAs or NaNs in the haystack", { # At the C level, our helpers assumg NA and NaN are the smallest values, # so we are careful to avoid including them with >= and > conditions res <- vec_locate_matches(1, c(1, NA, 2, NaN), condition = ">=") expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) res <- vec_locate_matches(1, c(1, NA, 2, NaN), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("NA and NaN match correctly with non-equi conditions and `nan_distinct`", { res <- vec_locate_matches(c(NA, NaN), c(1L, NA, 2L, NaN), condition = "<=", nan_distinct = TRUE) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(c(NA, NaN), c(1L, NA, 2L, NaN), condition = "<", nan_distinct = TRUE) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(NA_integer_, NA_integer_)) res <- vec_locate_matches(c(NA, NaN), c(1L, NA, 2L, NaN), condition = "<", nan_distinct = TRUE, incomplete = "match") expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(c(NA, NaN), c(1L, NA, 2L, NaN), condition = ">=", nan_distinct = FALSE) expect_identical(res$needles, c(1L, 1L, 2L, 2L)) expect_identical(res$haystack, c(2L, 4L, 2L, 4L)) res <- vec_locate_matches(c(NA, NaN), c(1L, NA, 2L, NaN), condition = ">", nan_distinct = FALSE, incomplete = "match") expect_identical(res$needles, c(1L, 1L, 2L, 2L)) expect_identical(res$haystack, c(2L, 4L, 2L, 4L)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `incomplete` test_that("can handle incomplete needles with `incomplete = `", { x <- c(1L, NA, 2L) y <- c(NA, 1L, 1L) res <- vec_locate_matches(x, y, condition = "==", incomplete = NA) expect_identical(res$needles, c(1L, 1L, 2L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA, NA)) res <- vec_locate_matches(x, y, condition = "<=", incomplete = 0L) expect_identical(res$needles, c(1L, 1L, 2L, 3L)) expect_identical(res$haystack, c(2L, 3L, 0L, NA)) res <- vec_locate_matches(x, y, condition = ">=", incomplete = -1L) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L)) expect_identical(res$haystack, c(2L, 3L, -1L, 2L, 3L)) }) test_that("can drop incomplete needle rows with `incomplete = 'drop'", { x <- c(1L, NA, 2L) y <- c(NA, 1L, 1L) res <- vec_locate_matches(x, y, condition = "==", incomplete = "drop") expect_identical(res$needles, c(1L, 1L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA)) }) test_that("if `incomplete = `, an NA in any column results in the value", { df1 <- data_frame(x = c(1L, NA, 2L, 1L, 1L), y = c(2L, 2L, NA, 1L, 1L)) df2 <- data_frame(x = c(1L, 1L, 2L), y = c(1L, 1L, NA)) res <- vec_locate_matches(df1, df2, condition = c("==", "=="), incomplete = NA) expect_identical(res$needles, c(1L, 2L, 3L, 4L, 4L, 5L, 5L)) expect_identical(res$haystack, c(NA, NA, NA, 1L, 2L, 1L, 2L)) res <- vec_locate_matches(df1, df2, condition = c(">=", ">="), incomplete = NA) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 4L, 4L, 5L, 5L)) expect_identical(res$haystack, c(1L, 2L, NA, NA, 1L, 2L, 1L, 2L)) }) test_that("`incomplete = / 'drop'` still handles NAs in future columns when an earlier column has no matches", { df1 <- data_frame(x = c(1, 1, 2, 3), y = c(1, NA, NA, 4)) df2 <- data_frame(x = c(1, 3), y = c(1, 5)) # The 2 in row 3 of df1 has no match, but the NA in the 2nd column still propagates res <- vec_locate_matches(df1, df2, incomplete = NA, no_match = -1L) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(1L, NA, NA, -1L)) res <- vec_locate_matches(df1, df2, incomplete = "drop", no_match = -1L) expect_identical(res$needles, c(1L, 4L)) expect_identical(res$haystack, c(1L, -1L)) # The 1 in row 1 and 2 of df1 have no match, but the NA in row 2 of the 2nd column propagates res <- vec_locate_matches(df1, df2, incomplete = NA, no_match = -1L, condition = ">") expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(-1L, NA, NA, 1L)) res <- vec_locate_matches(df1, df2, incomplete = "drop", no_match = -1L, condition = ">") expect_identical(res$needles, c(1L, 4L)) expect_identical(res$haystack, c(-1L, 1L)) }) test_that("`incomplete` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(NA, 1, incomplete = "error"))) (expect_error(vec_locate_matches(NA, 1, incomplete = "error", needles_arg = "foo"))) (expect_error(vec_locate_matches(NA, 1, incomplete = "error", needles_arg = "foo", error_call = call("fn")))) }) }) test_that("`incomplete` error is classed", { expect_error(vec_locate_matches(NA, 1, incomplete = "error"), class = "vctrs_error_matches_incomplete") }) test_that("`incomplete` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, incomplete = 1.5))) (expect_error(vec_locate_matches(1, 2, incomplete = c("match", "drop")))) (expect_error(vec_locate_matches(1, 2, incomplete = "x"))) # Uses internal call (expect_error(vec_locate_matches(1, 2, incomplete = "x", error_call = call("fn")))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `condition` test_that("multiple matches from a non-equi condition are returned in first appearance order", { res <- vec_locate_matches(0L, c(1L, 0L, -1L, 0L), condition = "<=") expect_identical(res$needles, rep(1L, 3)) expect_identical(res$haystack, c(1L, 2L, 4L)) # Checking equi for good measure res <- vec_locate_matches(0L, c(1L, 0L, -1L, 0L), condition = "==") expect_identical(res$needles, rep(1L, 2)) expect_identical(res$haystack, c(2L, 4L)) }) test_that("multiple matches from a non-equi condition are returned in first appearance order when the matches are in different nesting containers", { df <- data_frame(x = 0, y = 0) df2 <- data_frame(x = 2:1, y = 1:2) res <- vec_locate_matches(df, df2, condition = c("<=", "<=")) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(1L, 2L)) }) test_that("`condition` is validated", { expect_error(vec_locate_matches(1, 2, condition = 1), "`condition` must be a character vector") expect_error(vec_locate_matches(1, 2, condition = "x"), 'must only contain "==", ">", ">=", "<", or "<="') expect_error(vec_locate_matches(1, 2, condition = c("==", "==")), "must be length 1, or the same length as the number of columns of the input") }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `multiple` test_that("can get all matches", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "all") expect_identical(x$needles, c(1L, 1L, 2L, 2L)) expect_identical(x$haystack, c(1L, 3L, 2L, 4L)) }) test_that("can get first match", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "first") expect_identical(x$needles, 1:2) expect_identical(x$haystack, 1:2) }) test_that("can get last match", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "last") expect_identical(x$needles, 1:2) expect_identical(x$haystack, 3:4) }) test_that("can get any match", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "any") expect_identical(x$needles, 1:2) expect_identical(x$haystack, 1:2) }) test_that("duplicate needles match the same haystack locations", { x <- vec_locate_matches(c(1L, 3L, 1L, 3L), c(1L, 3L, 1L), multiple = "all") expect_identical(x$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(x$haystack, c(1L, 3L, 2L, 1L, 3L, 2L)) }) test_that("correctly gets all matches when they come from different nesting containers", { needles <- data_frame( a = c(1, 8), b = c(2, 9) ) haystack <- data_frame( a = c(6, 5), b = c(6, 7) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "all"), data_frame(needles = c(1L, 1L, 2L), haystack = c(1L, 2L, NA)) ) }) test_that("correctly gets first/last/any match when they come from different nesting containers", { needles <- data_frame( a = c(1, 8), b = c(2, 9) ) haystack <- data_frame( a = c(6, 5, 0), b = c(6, 7, 1) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "first"), data_frame(needles = c(1L, 2L), haystack = c(1L, NA)) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "last"), data_frame(needles = c(1L, 2L), haystack = c(2L, NA)) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "any"), data_frame(needles = c(1L, 2L), haystack = c(2L, NA)) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "first", remaining = NA_integer_), data_frame(needles = c(1L, 2L, NA, NA), haystack = c(1L, NA, 2L, 3L)) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "last", remaining = NA_integer_), data_frame(needles = c(1L, 2L, NA, NA), haystack = c(2L, NA, 1L, 3L)) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "any", remaining = NA_integer_), data_frame(needles = c(1L, 2L, NA, NA), haystack = c(2L, NA, 1L, 3L)) ) }) test_that("`multiple` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, multiple = 1.5))) (expect_error(vec_locate_matches(1, 2, multiple = c("first", "last")))) (expect_error(vec_locate_matches(1, 2, multiple = "x"))) # Uses internal error (expect_error(vec_locate_matches(1, 2, multiple = "x", error_call = call("fn")))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `multiple` (deprecated) test_that("`multiple` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error"))) (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo"))) (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo", error_call = call("fn")))) (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo", haystack_arg = "bar"))) }) }) test_that("`multiple` can warn informatively", { expect_snapshot({ (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning"))) (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo"))) (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", error_call = call("fn")))) (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", haystack_arg = "bar"))) }) }) test_that("warning falls back to 'all'", { expect_warning( result <- vec_locate_matches(c(1L, 3L, 1L, 3L), c(1L, 3L, 1L), multiple = "warning"), class = "vctrs_warning_matches_multiple" ) expect_identical( result, vec_locate_matches(c(1L, 3L, 1L, 3L), c(1L, 3L, 1L), multiple = "all") ) }) test_that("errors on multiple matches that come from different nesting containers", { df <- data_frame(x = 0, y = 0) df2 <- data_frame(x = 1:2, y = 2:1) expect_snapshot(error = TRUE, { vec_locate_matches(df, df2, condition = c("<=", "<="), multiple = "error") }) }) test_that("errors when a match from a different nesting container is processed early on", { # Row 1 has 2 matches # Row 2 has 0 matches needles <- data_frame( a = c(1, 8), b = c(2, 9) ) # Rows 1 and 2 end up in different nesting containers haystack <- data_frame( a = c(5, 6), b = c(7, 6) ) # needles[1,] records the haystack[1,] match first, which is in the 1st # value of `loc_first_match_o_haystack`, then records the haystack[3,] match # which is in the 3rd value of `loc_first_match_o_haystack` even though it # is processed 2nd (i.e. we need to use `loc` rather than `i` when detecting # multiple matches) expect_snapshot(error = TRUE, { vec_locate_matches(needles, haystack, condition = "<", multiple = "error") }) }) test_that("`multiple = 'error'` doesn't error errneously on the last observation", { expect_error(res <- vec_locate_matches(1:2, 1:2, multiple = "error"), NA) expect_identical(res$needles, 1:2) expect_identical(res$haystack, 1:2) }) test_that("`multiple = 'error' / 'warning'` throw correctly when combined with `relationship`", { x <- c(1, 2, 2) y <- c(2, 1, 2) # `multiple` error technically fires first expect_snapshot({ (expect_error(vec_locate_matches(x, y, relationship = "one-to-one", multiple = "error"))) }) # Works when warning is also requested expect_snapshot({ (expect_error(vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "error"))) }) # Both warnings are thrown if applicable expect_snapshot({ vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "warning") }) # Both warning and error are thrown if applicable expect_snapshot(error = TRUE, { vec_locate_matches(x, y, relationship = "one-to-one", multiple = "warning") }) x <- c(1, 2) y <- c(2, 1, 2) expect_snapshot({ (expect_error(vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "error"))) }) # Only `multiple` warning is applicable here expect_snapshot({ vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "warning") }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `relationship` test_that("`relationship` handles one-to-one case", { # No error expect_identical( vec_locate_matches(1:2, 2:1, relationship = "one-to-one"), vec_locate_matches(1:2, 2:1) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "one-to-one"), vec_locate_matches(1:2, 3:4) ) expect_snapshot({ (expect_error(vec_locate_matches(c(2, 1), c(1, 1), relationship = "one-to-one"))) (expect_error(vec_locate_matches(c(1, 1), c(1, 2), relationship = "one-to-one"))) }) }) test_that("`relationship` handles one-to-many case", { # No error expect_identical( vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "one-to-many"), vec_locate_matches(c(1, 2), c(1, 2, 2)) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "one-to-many"), vec_locate_matches(1:2, 3:4) ) expect_snapshot({ (expect_error(vec_locate_matches(c(1, 2, 2), c(2, 1), relationship = "one-to-many"))) }) }) test_that("`relationship` handles many-to-one case", { # No error expect_identical( vec_locate_matches(c(1, 2, 2), c(1, 2), relationship = "many-to-one"), vec_locate_matches(c(1, 2, 2), c(1, 2)) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "many-to-one"), vec_locate_matches(1:2, 3:4) ) expect_snapshot({ (expect_error(vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "many-to-one"))) }) }) test_that("`relationship` handles many-to-many case", { # No error expect_identical( vec_locate_matches(c(1, 2, 2), c(1, 2), relationship = "many-to-many"), vec_locate_matches(c(1, 2, 2), c(1, 2)) ) # No error expect_identical( vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "many-to-many"), vec_locate_matches(c(1, 2), c(1, 2, 2)) ) # No error expect_identical( vec_locate_matches(c(1, 1, 2), c(1, 2, 2), relationship = "many-to-many"), vec_locate_matches(c(1, 1, 2), c(1, 2, 2)) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "many-to-many"), vec_locate_matches(1:2, 3:4) ) }) test_that("`relationship` handles warn-many-to-many case", { # No warning expect_identical( expect_silent( vec_locate_matches(c(1, 2, 2), c(1, 2), relationship = "warn-many-to-many") ), vec_locate_matches(c(1, 2, 2), c(1, 2)) ) # No warning expect_identical( expect_silent( vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "warn-many-to-many") ), vec_locate_matches(c(1, 2), c(1, 2, 2)) ) # Doesn't care about the zero match case expect_identical( expect_silent( vec_locate_matches(1:2, 3:4, relationship = "warn-many-to-many") ), vec_locate_matches(1:2, 3:4) ) # Specifically designed to ensure we test both: # - Finding multiple `needles` matches before multiple `haystack` matches # - Finding multiple `haystack` matches before multiple `needles` matches expect_snapshot({ (expect_warning(vec_locate_matches(c(1, 2, 1), c(1, 2, 2), relationship = "warn-many-to-many"))) (expect_warning(vec_locate_matches(c(1, 1, 2), c(2, 2, 1), relationship = "warn-many-to-many"))) }) }) test_that("`relationship` considers `incomplete` matches as possible multiple matches", { x <- c(1, NA, NaN) y <- c(NA, 1) expect_snapshot({ (expect_error(vec_locate_matches(x, y, relationship = "one-to-many"))) }) # No error expect_identical( vec_locate_matches(x, y, relationship = "one-to-many", incomplete = NA), vec_locate_matches(x, y, incomplete = NA) ) # No error expect_identical( vec_locate_matches(x, y, relationship = "one-to-many", nan_distinct = TRUE), vec_locate_matches(x, y, nan_distinct = TRUE) ) }) test_that("`relationship` errors on multiple matches that come from different nesting containers", { df <- data_frame(x = 0, y = 0) df2 <- data_frame(x = 1:2, y = 2:1) expect_snapshot({ (expect_error(vec_locate_matches(df, df2, condition = c("<=", "<="), relationship = "many-to-one"))) }) }) test_that("`relationship` errors when a match from a different nesting container is processed early on", { # Row 1 has 2 matches # Row 2 has 0 matches needles <- data_frame( a = c(1, 8), b = c(2, 9) ) # Rows 1 and 2 end up in different nesting containers haystack <- data_frame( a = c(5, 6), b = c(7, 6) ) # needles[1,] records the haystack[1,] match first, which is in the 1st # value of `loc_first_match_o_haystack`, then records the haystack[3,] match # which is in the 3rd value of `loc_first_match_o_haystack` even though it # is processed 2nd (i.e. we need to use `loc` rather than `i` when detecting # multiple matches) expect_snapshot({ (expect_error(vec_locate_matches(needles, haystack, condition = "<", relationship = "many-to-one"))) }) }) test_that("`relationship` doesn't error errneously on the last observation", { expect_error(res <- vec_locate_matches(1:2, 1:2, relationship = "many-to-one"), NA) expect_identical(res$needles, 1:2) expect_identical(res$haystack, 1:2) }) test_that("`relationship` doesn't error if `multiple` removes multiple matches", { out <- vec_locate_matches(c(1, 2), c(1, 1), multiple = "any", relationship = "one-to-one") expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(1L, NA)) out <- vec_locate_matches(c(1, 2), c(1, 1), multiple = "first", relationship = "one-to-one") expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(1L, NA)) out <- vec_locate_matches(c(1, 2), c(1, 1), multiple = "last", relationship = "one-to-one") expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(2L, NA)) }) test_that("`relationship` can still detect problematic `haystack` relationships when `multiple = first/last` are used", { expect_snapshot({ (expect_error(vec_locate_matches(c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-one"))) (expect_error(vec_locate_matches(c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-many"))) }) }) test_that("`relationship` and `remaining` work properly together", { expect_snapshot({ out <- vec_locate_matches( c(1, 2, 2), c(2, 3, 1, 1, 4), relationship = "warn-many-to-many", remaining = NA_integer_ ) }) expect_identical(out$needles, c(1L, 1L, 2L, 3L, NA, NA)) expect_identical(out$haystack, c(3L, 4L, 1L, 1L, 2L, 5L)) }) test_that("`relationship` errors if `condition` creates multiple matches", { expect_snapshot({ (expect_error(vec_locate_matches(1, c(1, 2), condition = "<=", relationship = "many-to-one"))) }) }) test_that("`relationship` doesn't error if `filter` removes multiple matches", { out <- vec_locate_matches(1, c(1, 2), condition = "<=", filter = "min", relationship = "many-to-one") expect_identical(out$needles, 1L) expect_identical(out$haystack, 1L) out <- vec_locate_matches(1, c(1, 2), condition = "<=", filter = "max", relationship = "many-to-one") expect_identical(out$needles, 1L) expect_identical(out$haystack, 2L) }) test_that("`relationship` still errors if `filter` hasn't removed all multiple matches", { expect_snapshot({ (expect_error(vec_locate_matches(1, c(1, 2, 1), condition = "<=", filter = "min", relationship = "many-to-one"))) }) # But not here out <- vec_locate_matches(c(1, 1), c(1, 2, 1), condition = "<=", filter = "max", relationship = "many-to-one") expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(2L, 2L)) }) test_that("`relationship` errors when we have >1 size 1 matches across containers (tidyverse/dplyr#6835)", { # Carefully designed to ensure we get 2 nested containment groups that split # up the rows of `y`, but each of the nested containment groups contain exactly # 1 match, so `size_match` in `expand_compact_indices()` won't ever be >1 x <- data_frame(a = 1L, b = 5L) y <- data_frame(a = c(1L, 2L), b = c(4L, 3L)) expect_snapshot(error = TRUE, { vec_locate_matches( x, y, condition = c("<=", ">="), filter = c("none", "none"), relationship = "one-to-one" ) }) }) test_that("`relationship` doesn't error when the first match from a different container gets filtered out (tidyverse/dplyr#6835)", { # Carefully designed to ensure we get 2 nested containment groups that split # up the rows of `y`. Row 1 (processed first) doesn't hold the minimum `b` # value, so it gets filtered out. Row 2 is in the "extra" matches section # but is actually the first (and only) real match, so we don't want to error # on it. x <- data_frame(a = 1L, b = 5L) y <- data_frame(a = c(1L, 2L), b = c(4L, 3L)) out <- vec_locate_matches( x, y, condition = c("<=", ">="), filter = c("none", "min"), relationship = "one-to-one" ) expect_identical(out$needles, 1L) expect_identical(out$haystack, 2L) # Similar to the above example, but with a `max` filter. Row 1 doesn't hold # the max `c` value so it is filtered out even though it is a `>=` match. x <- data_frame(a = 1L, b = 5L, c = 3L) y <- data_frame(a = c(1L, 2L), b = c(4L, 3L), c = c(1L, 2L)) out <- vec_locate_matches( x, y, condition = c("<=", ">=", ">="), filter = c("none", "none", "max"), relationship = "one-to-one" ) expect_identical(out$needles, 1L) expect_identical(out$haystack, 2L) }) test_that("`relationship` errors respect argument tags and error call", { expect_snapshot({ (expect_error(vec_locate_matches(1L, c(1L, 1L), relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) (expect_error(vec_locate_matches(c(1L, 1L), 1L, relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) (expect_error(vec_locate_matches(c(1L, 1L), 1L, relationship = "one-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) (expect_error(vec_locate_matches(1L, c(1L, 1L), relationship = "many-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) }) }) test_that("`relationship` warnings respect argument tags and error call", { expect_snapshot({ (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", error_call = call("fn")))) (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", haystack_arg = "bar", error_call = call("fn")))) }) }) test_that("`relationship` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, relationship = 1.5))) (expect_error(vec_locate_matches(1, 2, relationship = c("one-to-one", "one-to-many")))) (expect_error(vec_locate_matches(1, 2, relationship = "x"))) # Uses internal error (expect_error(vec_locate_matches(1, 2, relationship = "x", error_call = call("fn")))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `no_match` test_that("can control `no_match`", { x <- vec_locate_matches(1:3, 1L) expect_identical(x$haystack, c(1L, NA, NA)) x <- vec_locate_matches(1:3, 1L, no_match = 0L) expect_identical(x$haystack, c(1L, 0L, 0L)) }) test_that("can drop unmatched needles", { x <- vec_locate_matches(1:3, 2L, no_match = "drop") expect_identical(x$needles, 2L) expect_identical(x$haystack, 1L) }) test_that("can drop unmatched missings when `incomplete = 'match'`", { x <- vec_locate_matches(c(NaN, 2, NA), 2, no_match = "drop") expect_identical(x$needles, 2L) expect_identical(x$haystack, 1L) x <- vec_locate_matches(c(NaN, 2, NA), NA, no_match = "drop", nan_distinct = FALSE) expect_identical(x$needles, c(1L, 3L)) expect_identical(x$haystack, c(1L, 1L)) x <- vec_locate_matches(c(NaN, 2, NA), NA, no_match = "drop", nan_distinct = TRUE) expect_identical(x$needles, 3L) expect_identical(x$haystack, 1L) }) test_that("can differentiate between `no_match` and `incomplete`", { res <- vec_locate_matches(c(1, NA), 2, incomplete = NA, no_match = -1L) expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(-1L, NA)) }) test_that("`no_match` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, no_match = "error"))) (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo"))) (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo", error_call = call("fn")))) (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo", haystack_arg = "bar"))) }) }) test_that("`no_match = 'error'` doesn't error on handled incomplete values", { res <- vec_locate_matches(c(NA, NaN, NA, 1), c(NA, 1), incomplete = NA, no_match = "error") expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(rep(NA, 3), 2L)) }) test_that("`no_match = 'drop'` doesn't drop handled incomplete values", { res <- vec_locate_matches(c(NA, NaN, NA, 1), c(NA, 1), incomplete = NA, no_match = "drop") expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(rep(NA, 3), 2L)) }) test_that("errors with the right location on unmatched needles when different nesting containers are present", { df <- data_frame(x = 2:1, y = 2:1) df2 <- data_frame(x = 1:2, y = 2:1) # i.e. should be location 2 expect_snapshot( (expect_error(vec_locate_matches(df, df2, condition = ">=", no_match = "error"))) ) }) test_that("`no_match` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, no_match = 1.5))) (expect_error(vec_locate_matches(1, 2, no_match = c(1L, 2L)))) (expect_error(vec_locate_matches(1, 2, no_match = "x"))) # Uses internal call (expect_error(vec_locate_matches(1, 2, no_match = "x", error_call = call("fn")))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `remaining` test_that("`remaining` can retain `haystack` values that `needles` didn't match", { res <- vec_locate_matches(1, 0:2, remaining = NA) expect_identical(res$needles, c(1L, NA, NA)) expect_identical(res$haystack, c(2L, 1L, 3L)) res <- vec_locate_matches(1, 0:2, remaining = NA, condition = ">=") expect_identical(res$needles, c(1L, 1L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) res <- vec_locate_matches(1, 0:2, remaining = NA, condition = "<") expect_identical(res$needles, c(1L, NA, NA)) expect_identical(res$haystack, c(3L, 1L, 2L)) }) test_that("`incomplete` affects `needles` but not `haystack`", { # Matches NA to NA, so nothing remaining res <- vec_locate_matches(c(1, NA), c(NA, 1), incomplete = "compare", remaining = NA) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 1L)) # Matches NA to NA, so nothing remaining res <- vec_locate_matches(c(1, NA), c(NA, 1), incomplete = "match", remaining = NA) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 1L)) # Doesn't match NA to NA, so `haystack` is left with remaining values res <- vec_locate_matches(c(1, NA), c(NA, 1), condition = "<", incomplete = "compare", remaining = NA) expect_identical(res$needles, c(1L, 2L, NA, NA)) expect_identical(res$haystack, c(NA, NA, 1L, 2L)) # Matches NA to NA, so only remaining value is for `1` res <- vec_locate_matches(c(1, NA), c(NA, 1), condition = "<", incomplete = "match", remaining = NA) expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(NA, 1L, 2L)) # `needles` NA value is propagated, so `haystack` is left with a remaining value res <- vec_locate_matches(c(1, NA), c(NA, 1), incomplete = NA, remaining = NA) expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(2L, NA, 1L)) # `needles` NA value is dropped, so `haystack` is left with a remaining value res <- vec_locate_matches(c(1, NA), c(NA, 1), incomplete = "drop", remaining = NA) expect_identical(res$needles, c(1L, NA)) expect_identical(res$haystack, c(2L, 1L)) }) test_that("`remaining` combined with `multiple = 'first/last'` treats non-first/last matches as remaining", { x <- c(1, 2) y <- c(1, 2, 2) res <- vec_locate_matches(x, y, remaining = NA, multiple = "first") expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) res <- vec_locate_matches(x, y, remaining = NA, multiple = "last") expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(1L, 3L, 2L)) res <- vec_locate_matches(x, y, remaining = NA, multiple = "any") expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) }) test_that("`remaining` combined with the haystack reordering retains appearance order", { x <- data_frame(a = 1, b = 4) y <- data_frame(a = c(2, 1, 0), b = c(2, 1, 0)) # Appearance order for the haystack locations res <- vec_locate_matches(x, y, condition = c("<=", ">=")) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(1L, 2L)) # Retain that appearance order of the matches, with remaining values appended res <- vec_locate_matches(x, y, condition = c("<=", ">="), remaining = NA) expect_identical(res$needles, c(1L, 1L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) }) test_that("`remaining` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, remaining = "error"))) (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo"))) (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo", error_call = call("fn")))) (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo", haystack_arg = "bar"))) }) }) test_that("`remaining` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, remaining = 1.5))) (expect_error(vec_locate_matches(1, 2, remaining = c(1L, 2L)))) (expect_error(vec_locate_matches(1, 2, remaining = "x"))) # Uses internal call (expect_error(vec_locate_matches(1, 2, remaining = "x", error_call = call("fn")))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - filter test_that("simple `filter`s work", { needles <- c(1, 2, 4) haystack <- c(2, 1, 3, 0) res <- vec_locate_matches(needles, haystack, condition = "<", filter = "max") expect_identical(res$haystack, c(3L, 3L, NA)) res <- vec_locate_matches(needles, haystack, condition = "<", filter = "min") expect_identical(res$haystack, c(1L, 3L, NA)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max") expect_identical(res$haystack, c(2L, 1L, 3L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min") expect_identical(res$haystack, c(4L, 4L, 4L)) }) test_that("haystack duplicates are preserved", { needles <- c(1, 2, 4) haystack <- c(2, 1, 2, 3, 0, 1, 0) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max") expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L)) expect_identical(res$haystack, c(2L, 6L, 1L, 3L, 4L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min") expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L, 3L)) expect_identical(res$haystack, c(5L, 7L, 5L, 7L, 5L, 7L)) }) test_that("haystack duplicates can be controlled by `multiple`", { needles <- c(1, 2, 4) haystack <- c(2, 1, 2, 3, 0, 1, 0) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max", multiple = "first") expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(2L, 1L, 4L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max", multiple = "last") expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(6L, 3L, 4L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max", multiple = "any") expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(2L, 1L, 4L)) }) test_that("`filter` works when valid matches are in different nesting containers", { needles <- data_frame(x = 0L, y = 1L, z = 2L) haystack <- data_frame(x = c(1L, 2L, 1L, 0L), y = c(2L, 1L, 2L, 3L), z = c(3L, 3L, 2L, 2L)) info <- compute_nesting_container_info(haystack, c("<=", "<=", "<=")) haystack_order <- info[[1]] container_ids <- info[[2]] # Rows 1 and 2 of haystack are in different nesting containers, but # both have the "max" filter value of `z=3` so both should be in the result. # Row 4 is in its own container, so it will be considered the "max" # of its group, but it is less than rows 1 and 2 so it will ultimately be # filtered out. expect_identical(container_ids, c(1L, 2L, 1L, 0L)) expect_identical(haystack_order, c(4L, 3L, 1L, 2L)) res <- vec_locate_matches(needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max")) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(1L, 2L)) res <- vec_locate_matches(needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max"), multiple = "first") expect_identical(res$haystack, 1L) res <- vec_locate_matches(needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max"), multiple = "last") expect_identical(res$haystack, 2L) res <- vec_locate_matches(needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max"), multiple = "any") expect_identical(res$haystack, 1L) }) test_that("single filter is applied to all columns", { needles <- data_frame(x = 5L, y = 8L, z = 4L) haystack <- data_frame(x = c(1L, 3L, 2L, 2L), y = c(1L, 3L, 2L, 3L), z = c(1L, 2L, 3L, 3L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max") expect_identical(res$haystack, 2L) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min") expect_identical(res$haystack, 1L) }) test_that("different `filter`s can be used per column", { needles <- data_frame(x = c(0, 2, 1, 1), y = c(2, 0, 0, 4)) haystack <- data_frame(x = c(2, 2, 2, 1, 1), y = c(1, 1, 2, 2, 1)) res <- vec_locate_matches(needles, haystack, condition = c(">=", "<"), filter = c("max", "min")) expect_identical(res$needles, c(1L, 2L, 2L, 3L, 4L)) expect_identical(res$haystack, c(NA, 1L, 2L, 5L, NA)) }) test_that("`filter` works with incomplete values", { needles <- c(1, NA, 4, NA) haystack <- c(NA, 1, NA, 1, 3) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max", incomplete = "compare") expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L, 4L, 4L)) expect_identical(res$haystack, c(2L, 4L, 1L, 3L, 5L, 1L, 3L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max", incomplete = "compare", multiple = "first") expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(2L, 1L, 5L, 1L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max", incomplete = "compare", multiple = "any") expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(2L, 1L, 5L, 1L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max", incomplete = NA) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 4L)) expect_identical(res$haystack, c(2L, 4L, NA, 5L, NA)) }) test_that("`filter` works with mixed NA and NaN", { needles <- c(1, NA, 4, NaN) haystack <- c(NA, 1, NaN, 1, 3) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min", incomplete = "compare", nan_distinct = FALSE) expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L)) expect_identical(res$haystack, c(2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min", incomplete = "compare", nan_distinct = TRUE) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(2L, 4L, 1L, 2L, 4L, 3L)) }) test_that("`filter` is validated", { expect_error(vec_locate_matches(1, 2, filter = 1.5), "character vector") expect_error(vec_locate_matches(1, 2, filter = "x"), 'one of "none", "min", or "max"') expect_error(vec_locate_matches(1, 2, filter = c("min", "max")), "length 1, or the same length as") }) # ------------------------------------------------------------------------------ # vec_locate_matches() - edge cases test_that("zero row `needles` results in zero row data frame output", { res <- vec_locate_matches(integer(), 1:3) expect_identical(res$needles, integer()) expect_identical(res$haystack, integer()) res <- vec_locate_matches(integer(), 1:3, condition = "<") expect_identical(res$needles, integer()) expect_identical(res$haystack, integer()) }) test_that("zero row `haystack` results in no-matches for all needles", { res <- vec_locate_matches(1:3, integer()) expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) res <- vec_locate_matches(1:3, integer(), condition = "<") expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) }) test_that("zero row `haystack` still allows needle incomplete handling", { res <- vec_locate_matches(c(1, NA), integer(), incomplete = NA, no_match = 0L) expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(0L, NA)) res <- vec_locate_matches(c(1, NA), integer(), incomplete = NA, no_match = 0L, condition = "<") expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(0L, NA)) }) test_that("zero column data frames are not allowed", { expect_error( vec_locate_matches(data_frame(.size = 2L), data_frame(.size = 2L)), "at least 1 column" ) }) test_that("zero column input still checks `condition` correctness", { x <- data_frame(.size = 2) y <- data_frame(.size = 3) expect_error( vec_locate_matches(x, y, condition = c("==", "<=")), "length 1, or the same length as the number of columns" ) }) test_that("`multiple = 'first'/'last'` returns the first/last by appearance", { x <- c(1, 2, 3) y <- c(2, 1, 0) res <- vec_locate_matches(x, y, condition = ">=", multiple = "first") expect_identical(res$haystack, c(2L, 1L, 1L)) res <- vec_locate_matches(x, y, condition = ">=", multiple = "last") expect_identical(res$haystack, c(3L, 3L, 3L)) }) test_that("NA adjustment of `>` and `>=` conditions is protected from empty haystack", { res <- vec_locate_matches(1L, integer(), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("potential overflow on large output size is caught informatively", { # Windows 32-bit doesn't support long vectors of this size, and the # intermediate `r_ssize` will be too large skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") expect_snapshot({ (expect_error(vec_locate_matches(1:1e7, 1:1e7, condition = ">="))) }) }) vctrs/tests/testthat/test-vctrs.R0000644000176200001440000000155514276722575016705 0ustar liggesusers test_that("generics are extensible", { expect_error(vec_cast(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_restore(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_equal(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_compare(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype2(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_abbr(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_full(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_arith(NA, NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_finalise(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_assign(NA, NA, NA, NA), class = "rlib_error_dots_nonempty") }) vctrs/tests/testthat/test-complete.R0000644000176200001440000001037414315060310017323 0ustar liggesusers# vec_slice_complete ----------------------------------------------------------- test_that("can slice complete", { df <- data_frame(x = c(1, NA, 3), y = c(1, 2, NA)) expect_identical(vec_slice_complete(df), vec_slice(df, 1)) }) test_that("vec_slice_complete() works with size 0 input", { expect_identical(vec_slice_complete(integer()), integer()) expect_identical(vec_slice_complete(data.frame()), data.frame()) }) # vec_locate_complete ---------------------------------------------------------- test_that("vec_locate_complete() can locate complete", { df <- data_frame(x = c(1, NA, 3), y = c(1, 2, NA)) expect_identical(vec_locate_complete(df), 1L) }) test_that("vec_locate_complete() works with size 0 input", { expect_identical(vec_locate_complete(logical()), integer()) expect_identical(vec_locate_complete(data.frame()), integer()) }) # vec_detect_complete ---------------------------------------------------------- test_that("works with size zero input", { expect_identical(vec_detect_complete(integer()), logical()) expect_identical(vec_detect_complete(data.frame()), logical()) }) test_that("NA_real_ and NaN are both missing", { expect_identical(vec_detect_complete(c(NA_real_, NaN)), c(FALSE, FALSE)) }) test_that("works rowwise", { df <- data_frame(x = c(NA, 1, NA, 2), y = c(NA, NA, 2, 3)) expect <- c(FALSE, FALSE, FALSE, TRUE) expect_identical(vec_detect_complete(df), expect) df <- data_frame(x = c(1, 1), y = c(2, 2), z = c(1, NA)) expect <- c(TRUE, FALSE) expect_identical(vec_detect_complete(df), expect) }) test_that("works with data frames with rows but no columns", { expect_identical(vec_detect_complete(new_data_frame(n = 5L)), rep(TRUE, 5)) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, NA, 2, 2), b = c(1, 2, NA, 3)) df <- data_frame(x = rep(1, 4), y = col) expect <- c(TRUE, FALSE, FALSE, TRUE) expect_identical(vec_detect_complete(df), expect) }) test_that("works with various types", { expect <- c(TRUE, TRUE, FALSE, TRUE, FALSE) expect_identical(vec_detect_complete(c(TRUE, TRUE, NA, FALSE, NA)), expect) expect_identical(vec_detect_complete(c(1L, 1L, NA, 2L, NA)), expect) expect_identical(vec_detect_complete(c(1, 1, NA, 2, NA)), expect) expect_identical(vec_detect_complete(complex(real = c(1, 1, NA, 2, 2), imaginary = c(1, 1, 2, 2, NA))), expect) expect_identical(vec_detect_complete(c("a", "a", NA, "b", NA)), expect) expect_identical(vec_detect_complete(list(1, 1, NULL, 2, NULL)), expect) # No missing raw value expect_identical(vec_detect_complete(as.raw(c(1, 1, 2, 2, 3))), rep(TRUE, 5)) }) test_that("takes the equality proxy", { x <- as.POSIXlt(c(NA, 0), origin = "1970-01-01") df <- data_frame(a = 1:2, x = x) expect <- c(FALSE, TRUE) expect_identical(vec_detect_complete(x), expect) expect_identical(vec_detect_complete(df), expect) }) test_that("columns with a data frame proxy are incomplete if any columns of the proxy are incomplete (#1404)", { df <- data_frame( x = c(NA, 0, 1, 2, 3), y = new_rcrd(list(a = c(1, 1, 1, NA, NA), b = c(2, 2, 2, 2, NA))), z = new_rcrd(list(a = c(1, 1, NA, 1, 1), b = c(2, 2, NA, NA, 1))) ) expect_identical(vec_detect_complete(df), c(FALSE, TRUE, FALSE, FALSE, FALSE)) }) test_that("can have rcrd fields of all types", { make_rcrd <- function(x) { new_rcrd(list(x = x)) } expect <- c(TRUE, TRUE, FALSE, TRUE, FALSE) expect_identical(vec_detect_complete(make_rcrd(c(TRUE, TRUE, NA, FALSE, NA))), expect) expect_identical(vec_detect_complete(make_rcrd(c(1L, 1L, NA, 2L, NA))), expect) expect_identical(vec_detect_complete(make_rcrd(c(1, 1, NA, 2, NA))), expect) expect_identical(vec_detect_complete(make_rcrd(complex(real = c(1, 1, NA, 2, 2), imaginary = c(1, 1, 2, 2, NA)))), expect) expect_identical(vec_detect_complete(make_rcrd(c("a", "a", NA, "b", NA))), expect) expect_identical(vec_detect_complete(make_rcrd(list(1, 1, NULL, 2, NULL))), expect) # No missing raw value expect_identical(vec_detect_complete(make_rcrd(as.raw(c(1, 1, 2, 2, 3)))), rep(TRUE, 5)) }) test_that("works with arrays", { x <- array(c(1, 2, 3, NA), c(2, 2)) y <- array(c(1:3, NA, 5:8), c(2, 2, 2)) expect_identical(vec_detect_complete(x), c(TRUE, FALSE)) expect_identical(vec_detect_complete(y), c(TRUE, FALSE)) }) vctrs/tests/testthat/test-type-rational.R0000644000176200001440000000126514315060310020302 0ustar liggesusers # These tests check the rational type from the S3 vignette test_that("equality proxy is taken (#375)", { local_rational_class() x <- rational(c(1, 2, 1, 2, 6), c(1, 1, 2, 2, 2)) expect_identical(x == rational(3, 1), c(FALSE, FALSE, FALSE, FALSE, TRUE)) expect_identical(unique(x), rational(c(1, 2, 1, 6), c(1, 1, 2, 2))) }) test_that("order proxy is taken", { local_rational_class() x <- rational(c(1, 2, 1, 2, 6), c(1, 1, 2, 2, 2)) expect_identical(sort(x), rational(c(1, 1, 2, 2, 6), c(2, 1, 2, 1, 2))) }) test_that("can find common type and cast to rational", { local_rational_class() x <- rational(1:2, 2:1) expect_identical(vec_cast_common(x, x), list(x, x)) }) vctrs/tests/testthat/test-fill.R0000644000176200001440000000763514276722575016477 0ustar liggesuserstest_that("works with empty input", { x <- integer() expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("works with data frames with rows but no columns", { x <- new_data_frame(n = 2L) expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("vectors with all missing values are left unchanged", { x <- c(NA, NA, NA) expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("`NA_real_` and `NaN` are both considered missing", { expect_identical( vec_fill_missing(c(1, NA_real_, NaN)), c(1, 1, 1) ) }) test_that("missings are filled correctly", { x <- c(NA, 1, NA, 2, NA, NA) expect_identical(vec_fill_missing(x, "down"), c(NA, 1, 1, 2, 2, 2)) expect_identical(vec_fill_missing(x, "up"), c(1, 1, 2, 2, NA, NA)) expect_identical(vec_fill_missing(x, "downup"), c(1, 1, 1, 2, 2, 2)) expect_identical(vec_fill_missing(x, "updown"), c(1, 1, 2, 2, 2, 2)) }) test_that("`max_fill` limits the sequential fill amount", { x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) expect_identical(vec_fill_missing(x, "down", max_fill = 1), c(NA, NA, 1, 1, NA, NA, 3, 3, NA)) expect_identical(vec_fill_missing(x, "downup", max_fill = 1), c(NA, 1, 1, 1, NA, NA, 3, 3, NA)) expect_identical(vec_fill_missing(x, "down", max_fill = 2), c(NA, NA, 1, 1, 1, NA, 3, 3, 3)) expect_identical(vec_fill_missing(x, "downup", max_fill = 2), c(1, 1, 1, 1, 1, NA, 3, 3, 3)) expect_identical(vec_fill_missing(x, "up", max_fill = 1), c(NA, 1, 1, NA, NA, 3, 3, NA, NA)) expect_identical(vec_fill_missing(x, "updown", max_fill = 1), c(NA, 1, 1, NA, NA, 3, 3, 3, NA)) expect_identical(vec_fill_missing(x, "up", max_fill = 2), c(1, 1, 1, NA, 3, 3, 3, NA, NA)) expect_identical(vec_fill_missing(x, "updown", max_fill = 2), c(1, 1, 1, NA, 3, 3, 3, 3, 3)) }) test_that("fills data frames", { df <- data_frame(x = c(NA, NA, NA, 2), y = c(NA, 1, NA, 3)) expect_identical(vec_fill_missing(df, "down"), vec_slice(df, c(1, 2, 2, 4))) expect_identical(vec_fill_missing(df, "up"), vec_slice(df, c(2, 2, 4, 4))) }) test_that("can fill rcrd types", { x <- new_rcrd(list(x = c(1, NA, NA), y = c(1, 2, NA))) expect_identical(vec_fill_missing(x, "down"), vec_slice(x, c(1, 2, 2))) expect_identical(vec_fill_missing(x, "up"), vec_slice(x, c(1, 2, 3))) expect_identical(vec_fill_missing(x, "updown"), vec_slice(x, c(1, 2, 2))) }) test_that("validates `direction`", { expect_error(vec_fill_missing(1, 1), "`direction` must be one of") expect_error(vec_fill_missing(1, "foo"), "`direction` must be one of") }) test_that("validates `max_fill`", { expect_error(vec_fill_missing(1, max_fill = -1), "`max_fill` must be") expect_error(vec_fill_missing(1, max_fill = c(1L, 2L)), "`max_fill` must be") expect_error(vec_fill_missing(1, max_fill = NA_integer_), "`max_fill` must be") expect_error(vec_fill_missing(1, max_fill = "x"), class = "vctrs_error_incompatible_type") }) vctrs/tests/testthat/test-ptype-abbr-full.R0000644000176200001440000000602214315060310020513 0ustar liggesusers test_that("input must be a vector", { expect_error(vec_ptype_abbr(sum), "Not a vector") expect_error(vec_ptype_full(sum), "Not a vector") }) test_that("NULL has method", { expect_equal(vec_ptype_abbr(NULL), "NULL") expect_equal(vec_ptype_full(NULL), "NULL") }) test_that("non objects default to type + shape", { expect_equal(vec_ptype_abbr(ones(10)), "dbl[1d]") expect_equal(vec_ptype_abbr(ones(0, 10)), "dbl[,10]") expect_equal(vec_ptype_abbr(ones(10, 0)), "dbl[,0]") expect_equal(vec_ptype_full(ones(10)), "double[1d]") expect_equal(vec_ptype_full(ones(0, 10)), "double[,10]") expect_equal(vec_ptype_full(ones(10, 0)), "double[,0]") }) test_that("non objects can omit shape", { expect_equal(vec_ptype_abbr(ones(10), suffix_shape = FALSE), "dbl") expect_equal(vec_ptype_abbr(ones(0, 10), suffix_shape = FALSE), "dbl") expect_equal(vec_ptype_abbr(ones(10, 0), suffix_shape = FALSE), "dbl") }) test_that("objects default to first class", { x <- structure(1, class = c("foofy", "goofy")) expect_equal(vec_ptype_full(x), "foofy") expect_equal(vec_ptype_abbr(x), "foofy") }) test_that("atomic vectors and arrays as expected", { expect_equal(vec_ptype_full(1:5), "integer") dbl_mat <- array(double(), c(0, 3)) expect_equal(vec_ptype_full(dbl_mat), "double[,3]") }) test_that("complex and factor as expected (#323)", { expect_equal(vec_ptype_abbr(0i), "cpl") expect_equal(vec_ptype_abbr(factor()), "fct") }) test_that("named lists are always tagged (#322)", { expect_identical(vec_ptype_abbr(list(x = 1, y = 2)), "named list") expect_identical(vec_ptype_abbr(list(x = 1, y = 2), prefix_named = TRUE), "named list") }) test_that("named atomics are tagged optionally (#781)", { expect_identical(vec_ptype_abbr(c(x = 1, y = 2), prefix_named = TRUE), "named dbl") expect_identical(vec_ptype_abbr(c(x = 1L, y = 2L), prefix_named = TRUE), "named int") }) test_that("vec_ptype_abbr() adds named tag in case of row names", { expect_equal( vec_ptype_abbr(mtcars, prefix_named = TRUE), "named df[,11]" ) mat <- matrix(1:4, 2) rownames(mat) <- c("foo", "bar") expect_equal( vec_ptype_abbr(mat, prefix_named = TRUE), "named int[,2]" ) }) test_that("vec_ptype_abbr() and vec_ptype_full() are not inherited (#1549)", { foobar <- foobar(class = c("vctrs_bar", "vctrs_foo")) local_methods( vec_ptype_abbr.vctrs_foo = function(...) "foo_abbr", vec_ptype_full.vctrs_foo = function(...) "foo_full" ) expect_equal( vec_ptype_abbr(foobar), vec_ptype_abbr.default(foobar) ) expect_equal( vec_ptype_full(foobar), "vctrs_bar" ) local_methods( vec_ptype_abbr.vctrs_bar = function(...) "bar_abbr", vec_ptype_full.vctrs_bar = function(...) "bar_full" ) expect_equal( vec_ptype_abbr(foobar), "bar_abbr" ) expect_equal( vec_ptype_full(foobar), "bar_full" ) }) test_that("data.frames have good default abbr and full methods", { expect_snapshot({ df <- foobar(data.frame(x = 1, y = "", z = TRUE)) vec_ptype_abbr(df) vec_ptype_full(df) }) }) vctrs/tests/testthat/test-type-data-frame.R0000644000176200001440000004462414511320527020507 0ustar liggesusers # printing ---------------------------------------------------------------- test_that("data frames print nicely", { expect_equal(vec_ptype_abbr(mtcars), "df[,11]") expect_snapshot(vec_ptype_show(mtcars)) expect_snapshot(vec_ptype_show(iris)) }) test_that("embedded data frames print nicely", { df <- data.frame(x = 1:3) df$a <- data.frame(a = 1:3, b = letters[1:3]) df$b <- list_of(1, 2, 3) df$c <- as_list_of(split(data.frame(x = 1:3, y = letters[1:3]), 1:3)) expect_snapshot(vec_ptype_show(df)) }) # coercing ---------------------------------------------------------------- test_that("data frame only combines with other data frames or NULL", { dt <- data.frame(x = 1) expect_equal(vec_ptype_common(dt, NULL), vec_ptype(dt)) expect_error(vec_ptype_common(dt, 1:10), class = "vctrs_error_incompatible_type") }) test_that("data frame takes max of individual variables", { dt1 <- data.frame(x = FALSE, y = 1L) dt2 <- data.frame(x = 1.5, y = 1.5) expect_equal(vec_ptype_common(dt1, dt2), vec_ptype_common(dt2)) }) test_that("data frame combines variables", { dt1 <- data.frame(x = 1) dt2 <- data.frame(y = 1) dt3 <- max(dt1, dt2) expect_equal( vec_ptype_common(dt1, dt2), vec_ptype_common(data.frame(x = double(), y = double())) ) }) test_that("empty data frame still has names", { df <- data.frame() out <- vec_ptype_common(df, df) expect_equal(names(out), character()) }) test_that("combining data frames with foreign classes uses fallback", { foo <- foobar(data.frame()) df <- data.frame() expect_identical(vec_ptype_common(foo, foo, foo), foo) expect_identical(vec_ptype_common(foo, foo, df, foo), df) expect_identical(vec_ptype2(foo, df), data.frame()) expect_identical(vec_ptype2(df, foo), data.frame()) expect_identical(vec_ptype_common(foo, df), df) expect_identical(vec_ptype_common(df, foo), df) cnds <- list() withCallingHandlers( warning = function(cnd) { cnds <<- append(cnds, list(cnd)) invokeRestart("muffleWarning") }, expect_identical( vec_ptype_common(foo, df, foo, foo), df ) ) # There are no longer any warnings expect_length(cnds, 0) expect_equal( vec_cbind(foobar(data.frame(x = 1)), data.frame(y = 2)), data.frame(x = 1, y = 2) ) expect_equal( vec_rbind(foo, data.frame(), foo), df ) }) # casting ----------------------------------------------------------------- test_that("safe casts work as expected", { df <- data.frame(x = 1, y = 0) expect_equal(vec_cast(NULL, df), NULL) expect_equal(vec_cast(df, df), df) expect_equal(vec_cast(data.frame(x = TRUE, y = FALSE), df), df) }) test_that("warn about lossy coercions", { df1 <- data.frame(x = factor("foo"), y = 1) df2 <- data.frame(x = factor("bar")) expect_lossy(vec_cast(df1, df1[1]), df1[1], x = df1, to = df1[1]) expect_lossy( vec_cast(df1[1], df2), data.frame(x = factor(NA, levels = "bar")), x = factor("foo"), to = factor("bar") ) out <- allow_lossy_cast( allow_lossy_cast( vec_cast(df1, df2), factor("foo"), factor("bar") ), df1, df2 ) expect_identical(out, data.frame(x = factor(NA, levels = "bar"))) }) test_that("invalid cast generates error", { expect_error(vec_cast(1L, data.frame()), class = "vctrs_error_incompatible_type") }) test_that("column order matches type", { df1 <- data.frame(x = 1, y = "a") df2 <- data.frame(x = TRUE, z = 3) df3 <- vec_cast(df2, vec_ptype_common(df1, df2)) expect_named(df3, c("x", "y", "z")) }) test_that("restore generates correct row/col names", { df1 <- data.frame(x = NA, y = 1:4, z = 1:4) df1$x <- data.frame(a = 1:4, b = 1:4) df2 <- vec_restore(lapply(df1[1:3], vec_slice, 1:2), df1) expect_named(df2, c("x", "y", "z")) expect_equal(.row_names_info(df2), -2) }) test_that("restore keeps automatic row/col names", { df1 <- data.frame(x = NA, y = 1:4, z = 1:4) df1$x <- data.frame(a = 1:4, b = 1:4) df2 <- vec_restore(df1, df1) expect_named(df2, c("x", "y", "z")) expect_equal(.row_names_info(df2), -4) }) test_that("cast to empty data frame preserves number of rows", { out <- vec_cast(new_data_frame(n = 10L), new_data_frame()) expect_equal(nrow(out), 10L) }) test_that("can cast unspecified to data frame", { df <- data.frame(x = 1, y = 2L) expect_identical(vec_cast(unspecified(3), df), vec_init(df, 3)) }) test_that("cannot cast list to data frame", { df <- data.frame(x = 1, y = 2L) expect_error(vec_cast(list(df, df), df), class = "vctrs_error_incompatible_type") }) test_that("can restore lists with empty names", { expect_identical(vec_restore(list(), data.frame()), data.frame()) }) test_that("can restore subclasses of data frames", { expect_identical(vec_restore(list(), subclass(data.frame())), subclass(data.frame())) local_methods( vec_restore.vctrs_foobar = function(x, to, ..., i) "dispatched" ) expect_identical(vec_restore(list(), subclass(data.frame())), "dispatched") }) test_that("df_cast() checks for names", { x <- new_data_frame(list(1)) y <- new_data_frame(list(2)) expect_error(vec_cast_common(x, y), "must have names") }) test_that("casting to and from data frame preserves row names", { out <- vec_cast(mtcars, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) out <- vec_cast(out, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) }) test_that("df_cast() evaluates arg lazily", { expect_silent(df_cast(data_frame(), data_frame(), x_arg = print("oof"))) expect_silent(df_cast(data_frame(), data_frame(), to_arg = print("oof"))) }) # df_ptype2 --------------------------------------------------------------- test_that("df_ptype2() evaluates arg lazily", { expect_silent(df_ptype2(data_frame(), data_frame(), x_arg = print("oof"))) expect_silent(df_ptype2(data_frame(), data_frame(), y_arg = print("oof"))) }) # new_data_frame ---------------------------------------------------------- test_that("can construct an empty data frame", { expect_identical(new_data_frame(), data.frame()) }) test_that("can validly set the number of rows when there are no columns", { expect <- structure( list(), class = "data.frame", row.names = .set_row_names(2L), names = character() ) expect_identical(new_data_frame(n = 2L), expect) }) test_that("can add additional classes", { expect_s3_class(new_data_frame(class = "foobar"), "foobar") expect_s3_class(new_data_frame(class = c("foo", "bar")), c("foo", "bar")) }) test_that("can add additional attributes", { expect <- data.frame() attr(expect, "foo") <- "bar" attr(expect, "a") <- "b" expect_identical(new_data_frame(foo = "bar", a = "b"), expect) }) test_that("size is pulled from first column if not supplied", { x <- new_data_frame(list(x = 1:5, y = 1:6)) expect_identical(.row_names_info(x, type = 1), -5L) }) test_that("can construct a data frame without column names", { expect_named(new_data_frame(list(1, 2)), NULL) }) test_that("the names on an empty data frame are an empty character vector", { expect_identical(names(new_data_frame()), character()) }) test_that("class attribute", { expect_identical( class(new_data_frame(list(a = 1))), "data.frame" ) expect_identical( class(new_data_frame(list(a = 1), class = "tbl_df")), c("tbl_df", "data.frame") ) expect_identical( class(new_data_frame(list(a = 1), class = c("tbl_df", "tbl", "data.frame"))), c("tbl_df", "tbl", "data.frame", "data.frame") ) expect_identical( class(new_data_frame(list(a = 1), class = "foo_frame")), c("foo_frame", "data.frame") ) expect_identical( class(exec(new_data_frame, list(a = 1), !!!attributes(new_data_frame(list(), class = "tbl_df")))), c("tbl_df", "data.frame", "data.frame") ) expect_identical( class(exec(new_data_frame, list(a = 1), !!!attributes(new_data_frame(list(b = 1), class = "tbl_df")))), c("tbl_df", "data.frame", "data.frame") ) }) test_that("attributes with special names are merged", { expect_identical( names(new_data_frame(list(a = 1))), "a" ) expect_identical( names(new_data_frame(list(a = 1), names = "name")), "name" ) expect_identical( names(new_data_frame(list(1), names = "name")), "name" ) expect_identical( attr(new_data_frame(list()), "row.names"), integer() ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L)), -3L ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L, row.names = 1:3)), 3L ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L, row.names = c(NA, -3L))), -3L ) expect_identical( attr(new_data_frame(list(), n = 1L, row.names = "rowname"), "row.names"), "rowname" ) }) test_that("n and row.names (#894)", { # Can omit n if row.names attribute is given expect_identical( row.names(new_data_frame(list(), row.names = "rowname")), "rowname" ) expect_identical( attr(new_data_frame(list(), row.names = 2L), "row.names"), 2L ) expect_identical( row.names(new_data_frame(list(), row.names = chr())), chr() ) }) test_that("`row.names` completely overrides `n` and the implied size of `x`, even if incompatible (tidyverse/dplyr#6596)", { row_names <- c(NA, -3L) df <- new_data_frame(list(), n = 2L, row.names = row_names) expect_identical(.row_names_info(df, type = 0L), row_names) df <- new_data_frame(list(x = 1:2), row.names = row_names) expect_identical(.row_names_info(df, type = 0L), row_names) }) test_that("ALTREP `row.names` are not materialized by `new_data_frame()` (tidyverse/dplyr#6596)", { skip_if(getRversion() <= "3.5.0") # We are careful in `new_data_frame()` to not call the `Dataptr()` or # `Length()` ALTREP methods, both of which would materialize our lazy # character here row_names <- new_lazy_character(~ c("a", "b")) x <- new_data_frame(list(), row.names = row_names) expect_false(lazy_character_is_materialized(.row_names_info(x, type = 0L))) x <- new_data_frame(list(x = 1:2), row.names = row_names) expect_false(lazy_character_is_materialized(.row_names_info(x, type = 0L))) x <- new_data_frame(list(), n = 2L, row.names = row_names) expect_false(lazy_character_is_materialized(.row_names_info(x, type = 0L))) }) test_that("`x` must be a list", { expect_snapshot((expect_error( new_data_frame(1), "`x` must be a list" ))) }) test_that("if supplied, `n` must be an integer of size 1", { expect_snapshot({ (expect_error(new_data_frame(n = c(1L, 2L)), "must be an integer of size 1")) (expect_error(new_data_frame(n = "x"), "must be an integer of size 1")) }) }) test_that("if supplied, `n` can't be negative or missing (#1477)", { expect_snapshot({ (expect_error(new_data_frame(n = -1L))) (expect_error(new_data_frame(n = NA_integer_))) }) }) test_that("`class` must be a character vector", { expect_snapshot((expect_error( new_data_frame(class = 1), "must be NULL or a character vector" ))) }) test_that("flatten info is computed", { df_flatten_info <- function(x) { .Call(ffi_df_flatten_info, x) } expect_identical(df_flatten_info(mtcars), list(FALSE, ncol(mtcars))) df <- tibble(x = 1, y = tibble(x = 2, y = tibble(x = 3), z = 4), z = 5) expect_identical(df_flatten_info(df), list(TRUE, 5L)) }) test_that("can flatten data frames", { df_flatten <- function(x) { .Call(ffi_df_flatten, x) } expect_identical(df_flatten(mtcars), mtcars) df <- tibble(x = 1, y = tibble(x = 2, y = tibble(x = 3), z = 4), z = 5) expect_identical(df_flatten(df), new_data_frame(list(x = 1, x = 2, x = 3, z = 4, z = 5))) }) test_that("can flatten data frames with rcrd columns (#1318)", { col <- new_rcrd(list(a = 1)) df <- data_frame(col = col, y = 1) expect_identical(vec_proxy_equal(df), data_frame(col = 1, y = 1)) col <- new_rcrd(list(a = 1, b = 2)) df <- data_frame(col = col, y = 1) expect_identical(vec_proxy_equal(df), data_frame(a = 1, b = 2, y = 1)) }) test_that("new_data_frame() zaps existing attributes", { struct <- structure(list(), foo = 1) expect_identical( attributes(new_data_frame(struct)), attributes(new_data_frame(list())), ) expect_identical( attributes(new_data_frame(struct, bar = 2)), attributes(new_data_frame(list(), bar = 2)), ) }) # data_frame -------------------------------------------------------------- test_that("data_frame() and df_list() report error context", { expect_snapshot({ (expect_error(data_frame(a = 1, a = 1))) (expect_error(data_frame(a = 1, a = 1, .error_call = call("foo")))) (expect_error(data_frame(a = 1:2, b = int()))) (expect_error(data_frame(a = 1:2, b = int(), .error_call = call("foo")))) (expect_error(df_list(a = 1, a = 1))) (expect_error(df_list(a = 1, a = 1, .error_call = call("foo")))) (expect_error(df_list(a = 1:2, b = int()))) (expect_error(df_list(a = 1:2, b = int(), .error_call = call("foo")))) }) }) test_that("can construct data frames with empty input", { expect_identical(data_frame(), new_data_frame()) expect_named(data_frame(), character()) }) test_that("input is tidy recycled", { expect_identical( data_frame(x = 1, y = 1:3), data_frame(x = c(1, 1, 1), y = 1:3) ) expect_identical( data_frame(x = 1, y = integer()), data_frame(x = double(), y = integer()) ) expect_snapshot({ expect_error(data_frame(1:2, 1:3), class = "vctrs_error_incompatible_size") }) }) test_that("dots are dynamic", { list_2_data_frame <- function(x) data_frame(!!!x) expect_identical( list_2_data_frame(list(x = 1, y = 2)), data_frame(x = 1, y = 2) ) }) test_that("unnamed input is auto named with empty strings", { expect_named(data_frame(1, 2, .name_repair = "minimal"), c("", "")) }) test_that("unnamed data frames are auto unpacked", { expect_identical( data_frame(w = 1, data_frame(x = 2, y = 3), z = 4), data_frame(w = 1, x = 2, y = 3, z = 4) ) }) test_that("named data frames are not unpacked", { df_col <- data_frame(x = 2, y = 3) df <- data_frame(w = 1, col = data_frame(x = 2, y = 3), z = 4) expect_identical(df$col, df_col) }) test_that("unpacked data frames without names are caught", { df_col <- new_data_frame(list(1)) expect_error(data_frame(df_col), "corrupt data frame") }) test_that("unpacking in `df_list()` can be disabled with `.unpack = FALSE`", { out <- df_list( w = 1, data_frame(x = 2, y = 3), z = 4, .unpack = FALSE, .name_repair = "minimal" ) expect <- list( w = 1, data_frame(x = 2, y = 3), z = 4 ) expect_identical(out, expect) }) test_that("`.unpack` is validated", { expect_snapshot(error = TRUE, { df_list(.unpack = 1) }) expect_snapshot(error = TRUE, { df_list(.unpack = c(TRUE, FALSE)) }) }) test_that("`NULL` inputs are dropped", { expect_identical(data_frame(NULL, x = 1, NULL), data_frame(x = 1)) }) test_that("`NULL` inputs are dropped before name repair", { expect_identical( data_frame(x = NULL, x = 1, .name_repair = "check_unique"), data_frame(x = 1) ) }) test_that("`.size` can force a desired size", { df <- data_frame(x = 1, .size = 5) expect_identical(df$x, rep(1, 5)) expect_size(data_frame(.size = 5), 5L) }) test_that("`.name_repair` repairs names", { expect_message(res <- data_frame(x = 1, x = 1, .name_repair = "unique")) expect_named(res, c("x...1", "x...2")) }) test_that("`.name_repair` happens after auto-naming with empty strings", { expect_message(res <- data_frame(1, 2, .name_repair = "unique")) expect_named(res, c("...1", "...2")) }) test_that("`.name_repair` happens after splicing", { expect_message(res <- data_frame(x = 1, data_frame(x = 2), .name_repair = "unique")) expect_named(res, c("x...1", "x...2")) }) test_that("`.name_repair` can be quiet", { local_name_repair_verbose() expect_snapshot({ dfl_unique <- df_list(1, 2, .name_repair = "unique_quiet") dfl_universal <- df_list("if" = 1, "in" = 2, .name_repair = "universal_quiet") df_unique <- data_frame(1, 2, .name_repair = "unique_quiet") df_universal <- data_frame("if" = 1, "in" = 2, .name_repair = "universal_quiet") }) expect_named(dfl_unique, c("...1", "...2")) expect_named(dfl_universal, c(".if", ".in")) expect_named(df_unique, c("...1", "...2")) expect_named(df_universal, c(".if", ".in")) }) # fallback ---------------------------------------------------------------- test_that("data frame fallback handles column types (#999)", { df1 <- foobar(data.frame(x = 1)) df2 <- foobar(data.frame(x = 1, y = 2)) df3 <- foobar(data.frame(x = "", y = 2)) common <- foobar(data.frame(x = dbl(), y = dbl())) expect_identical(vec_ptype2(df1, df2), common) expect_identical(vec_ptype2(df2, df1), common) exp <- foobar(data_frame(x = 1, y = na_dbl)) expect_identical(vec_cast(df1, df2), exp) expect_identical(vec_cast(set_tibble(df1), set_tibble(df2)), set_tibble(exp)) expect_snapshot({ local_error_call(call("my_function")) (expect_error( vec_ptype2(df1, df3), class = "vctrs_error_incompatible_type" )) (expect_error( vec_ptype2(df3, df1), class = "vctrs_error_incompatible_type" )) (expect_error( vec_cast(df2, df1), class = "vctrs_error_incompatible_type" )) }) expect_identical( vec_rbind(df1, df2), foobar(data.frame(x = c(1, 1), y = c(NA, 2))) ) # Attributes are not restored df1_attrib <- foobar(df1, foo = "foo") df2_attrib <- foobar(df2, bar = "bar") exp <- data.frame(x = c(1, 1), y = c(NA, 2)) expect_equal( vec_rbind(df1_attrib, df2_attrib), exp ) out <- with_methods( `[.vctrs_foobar` = function(x, i, ...) { new_data_frame( NextMethod(), dispatched = TRUE, class = "vctrs_foobar" ) }, vec_rbind(df1_attrib, df2_attrib) ) expect_identical(out, exp) }) test_that("falls back to tibble for tibble subclasses (#1025)", { foo <- foobar(tibble::as_tibble(mtcars)) expect_s3_class(vec_rbind(foo, mtcars), "tbl_df") expect_s3_class(vec_rbind(foo, mtcars, mtcars), "tbl_df") expect_s3_class(vec_rbind(foo, mtcars, foobar(mtcars)), "tbl_df") }) test_that("fallback is recursive", { df <- mtcars[1:3, 1, drop = FALSE] foo <- new_data_frame(list(x = foobar(df, foo = TRUE))) bar <- new_data_frame(list(x = foobar(df, bar = TRUE))) baz <- new_data_frame(list(y = 1:3, x = foobar(df, bar = TRUE))) exp <- new_data_frame(list(x = vec_rbind(df, df))) expect_equal(vec_rbind(foo, bar), exp) exp <- new_data_frame(list(x = vec_rbind(df, df), y = c(NA, NA, NA, 1:3))) expect_equal(vec_rbind(foo, baz), exp) }) vctrs/tests/testthat/test-type-table.R0000644000176200001440000001776414520724751017611 0ustar liggesusers # Print ------------------------------------------------------------------- test_that("ptype print methods are descriptive", { tab1 <- new_table() tab2 <- new_table(dim = c(0L, 1L, 2L, 1L)) expect_equal(vec_ptype_abbr(tab1), "table[1d]") expect_equal(vec_ptype_abbr(tab2), "table[,1,2,1]") expect_equal(vec_ptype_full(tab1), "table[1d]") expect_equal(vec_ptype_full(tab2), "table[,1,2,1]") }) # Coercion ---------------------------------------------------------------- test_that("can find a common type among tables with identical dimensions", { tab1 <- new_table() tab2 <- new_table(1:2, dim = c(1L, 2L, 1L)) expect_identical(vec_ptype2(tab1, tab1), new_table()) expect_identical(vec_ptype2(tab2, tab2), new_table(dim = c(0L, 2L, 1L))) }) test_that("size is not considered in the ptype", { x <- new_table(1:2, dim = 2L) y <- new_table(1:3, dim = 3L) expect_identical(vec_ptype2(x, y), new_table()) }) test_that("vec_ptype2() can broadcast table shapes", { x <- new_table(dim = c(0L, 1L)) y <- new_table(dim = c(0L, 2L)) expect_identical(vec_ptype2(x, y), new_table(dim = c(0L, 2L))) x <- new_table(dim = c(0L, 1L, 3L)) y <- new_table(dim = c(0L, 2L, 1L)) expect_identical(vec_ptype2(x, y), new_table(dim = c(0L, 2L, 3L))) }) test_that("vec_ptype2() never propagates dimnames", { x <- new_table(dim = c(0L, 1L), dimnames = list(character(), "x1")) y <- new_table(dim = c(0L, 2L), dimnames = list(character(), c("y1", "y2"))) expect_null(dimnames(vec_ptype2(x, x))) expect_null(dimnames(vec_ptype2(x, y))) }) test_that("implicit axes are broadcast", { x <- new_table(dim = c(0L, 2L)) y <- new_table(dim = c(0L, 1L, 3L)) expect_identical(vec_ptype2(x, y), new_table(dim = c(0L, 2L, 3L))) }) test_that("errors on non-broadcastable dimensions", { x <- new_table(dim = c(0L, 2L)) y <- new_table(dim = c(0L, 3L)) expect_error(vec_ptype2(x, y), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2() errors on non-tables", { expect_error(vec_ptype2(new_table(), 1), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(new_table(), 1L), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(new_table(), "1"), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1L, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2("1", new_table()), class = "vctrs_error_incompatible_type") }) test_that("common types have symmetry when mixed with unspecified input", { x <- new_table() expect_identical(vec_ptype2(x, NA), new_table()) expect_identical(vec_ptype2(NA, x), new_table()) x <- new_table(dim = c(0L, 2L)) expect_identical(vec_ptype2(x, NA), new_table(dim = c(0L, 2L))) expect_identical(vec_ptype2(NA, x), new_table(dim = c(0L, 2L))) }) test_that("`table` delegates coercion", { expect_identical( vec_ptype2(new_table(1), new_table(FALSE)), new_table(double()) ) expect_error( vec_ptype2(new_table(1), new_table("")), class = "vctrs_error_incompatible_type" ) }) # Casting ----------------------------------------------------------------- test_that("can cast to an identically shaped table", { x <- new_table(1:5, dim = 5L) y <- new_table(1:8, dim = c(2L, 2L, 2L)) expect_identical(vec_cast(x, x), x) expect_identical(vec_cast(y, y), y) }) test_that("vec_cast() can broadcast table shapes", { # We test only the dim here and not the class because on R 3.2 # the `[.table` method did not exist and `shape_broadcast()` # gives back a matrix, not a table. x <- new_table(dim = c(0L, 1L)) y <- new_table(dim = c(0L, 2L)) expect_identical(dim(vec_cast(x, y)), c(0L, 2L)) x <- new_table(dim = c(0L, 1L, 1L)) y <- new_table(dim = c(0L, 2L, 3L)) expect_identical(dim(vec_cast(x, y)), c(0L, 2L, 3L)) }) test_that("cannot decrease axis length", { x <- new_table(dim = c(0L, 3L)) y <- new_table(dim = c(0L, 1L)) expect_error(vec_cast(x, y), "Non-recyclable", class = "vctrs_error_incompatible_type") }) test_that("cannot decrease dimensionality", { x <- new_table(dim = c(0L, 1L, 1L)) y <- new_table(dim = c(0L, 1L)) expect_snapshot({ (expect_error(vec_cast(x, y), class = "vctrs_error_incompatible_type")) }) }) test_that("vec_cast() errors on non-tables", { expect_error(vec_cast(new_table(), 1), class = "vctrs_error_incompatible_type") expect_error(vec_cast(new_table(), 1L), class = "vctrs_error_incompatible_type") expect_error(vec_cast(new_table(), "1"), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1L, new_table()), class = "vctrs_error_incompatible_type") expect_error(vec_cast("1", new_table()), class = "vctrs_error_incompatible_type") }) test_that("can cast from, but not to, unspecified", { x <- new_table() expect_error(vec_cast(x, NA), class = "vctrs_error_incompatible_type") expect_identical(vec_cast(NA, x), new_table(NA_integer_, dim = 1L)) x <- new_table(dim = c(0L, 2L)) expect_error(vec_cast(x, NA), class = "vctrs_error_incompatible_type") expect_identical(vec_cast(NA, x), new_table(c(NA_integer_, NA_integer_), dim = c(1L, 2L))) }) test_that("`table` delegates casting", { expect_identical( vec_cast(new_table(1), new_table(FALSE)), new_table(TRUE) ) expect_error( vec_cast(new_table(1), new_table("")), class = "vctrs_error_incompatible_type" ) }) # Misc -------------------------------------------------------------------- test_that("`new_table()` validates input", { expect_error(new_table(1L, 1), "`dim` must be an integer vector") expect_error(new_table(1:2, 1L), "must match the length of `x`") }) test_that("ptype is correct", { tab1 <- new_table(1L, dim = 1L) tab2 <- new_table(1:2, dim = c(1L, 2L, 1L)) expect_identical(vec_ptype(tab1), new_table()) expect_identical(vec_ptype(tab2), new_table(dim = c(0L, 2L, 1L))) }) test_that("can use a table in `vec_c()`", { expect_identical(vec_c(new_table()), new_table()) expect_identical(vec_c(new_table(), new_table()), new_table()) x <- new_table(1:5, 5L) y <- new_table(1:4, dim = c(2L, 2L)) expect_identical(vec_c(x, x), new_table(c(1:5, 1:5), dim = 10L)) expect_identical(vec_c(y, y), new_table(c(1:2, 1:2, 3:4, 3:4), dim = c(4L, 2L))) expect_identical(vec_c(x, y), new_table(c(1:5, 1:2, 1:5, 3:4), dim = c(7L, 2L))) }) test_that("names of the first dimension are kept in `vec_c()`", { x <- new_table(1:4, c(2L, 2L)) dimnames(x) <- list(c("r1", "r2"), c("c1", "c2")) xx <- vec_c(x, x) expect_identical(dimnames(xx), list(c("r1", "r2", "r1", "r2"), NULL)) }) test_that("can use a table in `list_unchop()`", { x <- new_table(1:4, dim = c(2L, 2L)) expect_identical(list_unchop(list(x)), x) expect_identical(list_unchop(list(x, x), indices = list(1:2, 4:3)), vec_slice(x, c(1:2, 2:1))) }) test_that("can concatenate tables", { x <- table(1:2) out <- vec_c(x, x) exp <- new_table(rep(1L, 4), dimnames = list(c("1", "2", "1", "2"))) expect_identical(out, exp) out <- vec_rbind(x, x) exp <- data_frame(`1` = new_table(c(1L, 1L)), `2` = new_table(c(1L, 1L))) expect_identical(out, exp) y <- table(list(1:2, 3:4)) # FIXME out <- vec_c(y, y) exp <- new_table( matrix(int(1, 0, 1, 0, 0, 1, 0, 1), nrow = 4), dim = c(4L, 2L), dimnames = list(c("1", "2", "1", "2"), NULL) ) expect_identical(out, exp) out <- vec_rbind(y, y) exp <- new_data_frame(list( `3` = int(1, 0, 1, 0), `4` = int(0, 1, 0, 1) ), row.names = c("1...1", "2...2", "1...3", "2...4") ) expect_identical(out, exp) skip("FIXME: dimnames of matrices are not properly concatenated") }) test_that("can concatenate tables of type double (#1190)", { x <- table(c(1, 2)) / 2 out <- vec_c(x, x) exp <- new_table(c(0.5, 0.5, 0.5, 0.5), dimnames = list(c("1", "2", "1", "2"))) expect_identical(out, exp) out <- vec_rbind(x, x) exp <- data_frame(`1` = new_table(c(0.5, 0.5)), `2` = new_table(c(0.5, 0.5))) expect_identical(out, exp) }) vctrs/tests/testthat/test-recycle.R0000644000176200001440000001253514315060310017142 0ustar liggesuserstest_that("vec_recycle_common() reports error context", { my_function <- function(...) vec_recycle_common(...) expect_snapshot({ (expect_error(my_function(this_arg = 1:2, that_arg = int()))) (expect_error(my_function(this_arg = 1:2, that_arg = int(), .size = 2))) (expect_error(my_function(this_arg = 1:2, that_arg = int(), .arg = "my_arg"))) (expect_error(my_function(this_arg = 1:2, that_arg = int(), .size = 2, .arg = "my_arg"))) }) }) # vec_recycle() ------------------------------------------------------------- test_that("vec_recycle recycles size 1 to any other size", { x <- 1 x0 <- numeric() x2 <- c(x, x) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) }) test_that("vec_recycle(): incompatible lengths get error messages", { x2 <- c(1, 2) expect_snapshot({ (expect_error(vec_recycle(x2, 1), class = "vctrs_error_recycle_incompatible_size")) }) expect_error(vec_recycle(x2, 0), class = "vctrs_error_recycle_incompatible_size") expect_error(vec_recycle(x2, 3), class = "vctrs_error_recycle_incompatible_size") }) test_that("can recycle arrays", { x <- matrix(1:2, 1) x2 <- matrix(1:2, 2, 2, byrow = TRUE) x0 <- matrix(integer(), 0, 2) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) # List arrays data <- c(list(1), list(2)) x <- matrix(data, 1) x2 <- matrix(data, 2, 2, byrow = TRUE) x0 <- matrix(list(), 0, 2) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) }) test_that("vec_recycle() evaluates x_arg lazily", { expect_silent(vec_recycle(1L, 1L, x_arg = print("oof"))) }) # Empty ------------------------------------------------------------------- test_that("empty input returns empty list", { expect_equal(vec_recycle_common(), list()) }) # Vectors ----------------------------------------------------------------- test_that("NULL is idempotent", { expect_equal(vec_recycle_common(NULL, NULL), list(NULL, NULL)) expect_equal(vec_recycle_common(1:5, NULL), list(1:5, NULL)) expect_equal(vec_recycle_common(NULL, 1:5), list(NULL, 1:5)) }) test_that("equal lengths returned as is", { x <- 1:3 expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x[1], x[1]), list(x[1], x[1])) expect_equal(vec_recycle_common(x[0], x[0]), list(x[0], x[0])) }) test_that("vec_recycle_common recycles size 1 to any other size", { x1 <- 1 x3 <- rep(1, 3) x0 <- numeric() expect_equal(vec_recycle_common(x1, x3), list(x3, x3)) expect_equal(vec_recycle_common(x3, x1), list(x3, x3)) expect_equal(vec_recycle_common(x1, x0), list(x0, x0)) }) test_that("vec_recycle_common(): incompatible lengths get error messages", { expect_snapshot({ (expect_error(vec_recycle_common(1:2, 1:3), class = "vctrs_error_incompatible_size")) }) expect_error(vec_recycle_common(1:3, 1:2), class = "vctrs_error_incompatible_size") expect_error(vec_recycle_common(numeric(), 1:2), class = "vctrs_error_incompatible_size") }) # Matrices ---------------------------------------------------------------- test_that("can vec_recycle_common matrices", { x <- matrix(nrow = 4, ncol = 4) x1 <- x[1, , drop = FALSE] expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x1, x), list(x, x)) }) test_that("recycling matrices respects incompatible sizes", { x <- matrix(nrow = 4, ncol = 4) x2 <- x[1:2, , drop = FALSE] x0 <- x[0, , drop = FALSE] expect_snapshot({ (expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size")) }) expect_error(vec_recycle_common(x0, x), class = "vctrs_error_incompatible_size") }) test_that("can vec_recycle_common data frames", { x <- data.frame(a = rep(1, 3), b = rep(2, 3)) x1 <- vec_slice(x, 1L) expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x1, x), list(x, x)) }) test_that("recycling data frames respects incompatible sizes", { x <- data.frame(a = rep(1, 3), b = rep(2, 3)) x2 <- vec_slice(x, 1:2) x0 <- vec_slice(x, integer()) expect_snapshot({ (expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size")) }) expect_error(vec_recycle_common(x0, x), class = "vctrs_error_incompatible_size") }) test_that("can vec_recycle_common matrix and data frame", { mt <- matrix(nrow = 2, ncol = 2) df <- data.frame(x = c(1, 1), y = c(2, 2)) expect_equal( vec_recycle_common(vec_slice(mt, 1L), df), list(mt, df) ) expect_equal( vec_recycle_common(mt, vec_slice(df, 1L)), list(mt, df) ) }) test_that("recycling data frames with matrices respects incompatible sizes", { mt <- matrix(nrow = 2, ncol = 2) df <- data.frame(x = c(1, 1), y = c(2, 2)) expect_error( vec_recycle_common(vec_slice(mt, integer()), df), class = "vctrs_error_incompatible_size" ) expect_error( vec_recycle_common(mt, vec_slice(df, 0L)), class = "vctrs_error_incompatible_size" ) }) test_that("recycling to size 1 has informative error", { expect_snapshot({ (expect_error(vec_recycle(1:2, 1), class = "vctrs_error_recycle_incompatible_size")) }) }) test_that("incompatible recycling size has informative error", { expect_snapshot(error = TRUE, vec_recycle(1:2, 4)) expect_snapshot(error = TRUE, vec_recycle(1:2, 4, x_arg = "foo")) }) vctrs/tests/testthat/test-type-list-of.R0000644000176200001440000001475414373202700020062 0ustar liggesuserstest_that("list_of inherits from list", { x1 <- list_of(1, 1) expect_s3_class(x1, "list") }) test_that("list_of works like list", { x1 <- list_of(1, 1) expect_type(x1, "list") expect_s3_class(x1, "vctrs_list_of") expect_equal(attr(x1, "ptype"), double()) x2 <- list_of(1, 1, .ptype = integer()) expect_equal(attr(x1, "ptype"), integer()) x3 <- as_list_of(list(1, 1)) expect_equal(x3, x1) x4 <- list_of(a = 1, b = 2) expect_equal(x4$b, 2) expect_error(x4$c, "Invalid index", fixed = TRUE) }) test_that("list_of errors if it can't find common type", { expect_error(list_of(1, "a"), class = "vctrs_error_incompatible_type") expect_error(list_of(), "find common type") }) test_that("can use as_list_of to change type", { x1 <- list_of(1) expect_equal(as_list_of(x1), x1) x2 <- as_list_of(x1, .ptype = integer()) expect_identical(x2[[1]], 1L) }) test_that("is_list_of as expected", { expect_false(is_list_of(list(1))) expect_true(is_list_of(list_of(1))) }) test_that("print method gives human friendly output", { skip_on_cran() # Depends on tibble expect_snapshot(list_of(1, 2:3)) expect_snapshot(tibble::tibble(x = list_of(1, 2:3))) }) test_that("str method is reasonably correct", { x <- list_of(1, 2:3) expect_snapshot(str(x)) expect_snapshot(str(list(list(x, y = 2:1)))) expect_snapshot(str(x[0])) expect_snapshot(str(list(list(x[0], y = 2:1)))) }) test_that("constructor requires list input", { expect_error(new_list_of(1), "must be a list") expect_error(new_list_of(mtcars), "must be a list") }) test_that("constructor requires size 0 ptype", { expect_error(new_list_of(ptype = 1), "must have size 0") }) test_that("can combine a mix of named and unnamed list-ofs (#784)", { a <- new_list_of(list(x = 1L), ptype = integer()) b <- new_list_of(list(2L), ptype = integer()) expect <- new_list_of(list(x = 1L, 2L), ptype = integer()) expect_identical(vec_c(a, b), expect) }) # Subsetting -------------------------------------------------------------- test_that("[ preserves type", { x <- list_of(1) expect_equal(x[1], x) }) test_that("[<-, [[<- and $<- coerce their input", { x <- list_of(x = 1, y = 1, z = 1, w = 1) x[1] <- list(FALSE) expect_identical(x, list_of(x = 0, y = 1, z = 1, w = 1)) x[[2]] <- FALSE expect_identical(x, list_of(x = 0, y = 0, z = 1, w = 1)) x$z <- FALSE expect_identical(x, list_of(x = 0, y = 0, z = 0, w = 1)) x[3:4] <- c(TRUE, FALSE) expect_identical(x, list_of(x = 0, y = 0, z = 1, w = 0)) x[[2]] <- NULL expect_equal(x, list_of(x = 0, y = NULL, z = 1, w = 0)) expect_error(x[[2]] <- list(20), class = "vctrs_error_incompatible_type") expect_error(x$y <- list(20), class = "vctrs_error_incompatible_type") x[3:4] <- list(NULL) expect_equal(x, list_of(x = 0, y = NULL, z = NULL, w = NULL)) expect_equal(is.na(x), c(FALSE, TRUE, TRUE, TRUE)) }) test_that("assingment can increase size of vector", { x <- list_of(x = 1) x[[2]] <- 2 x$z <- 3 x[4:5] <- c(4,5) expect_length(x, 5) }) # Type system ------------------------------------------------------------- test_that("list_of() are vectors", { expect_true(obj_is_vector(list_of(1))) expect_true(vec_is(list_of(1))) }) test_that("list coercions are symmetric and unchanging", { types <- list( list(), list_of(.ptype = integer()), list_of(.ptype = double()), list_of(.ptype = character()) ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) local_options(width = 200) expect_snapshot(print(mat)) }) test_that("max, list_of> is list_of>", { r_int <- list_of(.ptype = integer()) r_dbl <- list_of(.ptype = double()) expect_equal(vec_ptype_common(r_int, r_int), r_int) expect_equal(vec_ptype_common(r_int, r_dbl), r_int) }) test_that("can cast to self type", { x <- list_of(1) expect_identical(vec_cast(x, x), x) }) test_that("can cast between different list_of types", { x <- list_of(1, 2) to <- list_of(.ptype = integer()) expect_identical(vec_cast(x, to), list_of(1L, 2L)) }) test_that("list_of casting retains outer names", { x <- list_of(x = 1, 2, z = 3) to <- list_of(.ptype = integer()) expect_named(vec_cast(x, to), c("x", "", "z")) }) test_that("safe casts work as expected", { x <- list_of(1) expect_equal(vec_cast(NULL, x), NULL) expect_equal(vec_cast(NA, x), list_of(NULL, .ptype = double())) expect_identical(vec_cast(list(1), x), list_of(1)) expect_identical(vec_cast(list(TRUE), x), list_of(1)) expect_identical(vec_cast(x, list()), list(1)) expect_identical(vec_cast(x, list()), list(1)) expect_error( vec_cast(list_of(1), list_of("")), class = "vctrs_error_incompatible_type" ) # These used to be allowed expect_error(vec_cast(1L, x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1, x), class = "vctrs_error_incompatible_type") }) test_that("error call is passed to inner cast methods", { fn1 <- function() vec_cast(list_of(1), list_of("")) fn2 <- function() vec_cast(list(1), list_of("")) expect_snapshot({ (expect_error(fn1())) (expect_error(fn2())) }) }) test_that("lossy casts generate warning (no longer the case)", { # This used to be a lossy cast warning expect_error( vec_cast(list(c(1.5, 1), 1L), to = list_of(1L)), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), list_of(1)), class = "vctrs_error_incompatible_type") }) test_that("list_of() has as.character() method (tidyverse/tidyr#654)", { exp <- rep(paste0("<", vec_ptype_abbr(mtcars), ">"), 2) expect_identical(as.character(list_of(mtcars, mtcars)), exp) }) test_that("vec_ptype2(>, NA) is symmetric (#687)", { lof <- list_of(1, 2, 3) expect_identical(vec_ptype2(lof, NA), vec_ptype(lof)) expect_identical(vec_ptype2(NA, lof), vec_ptype(lof)) }) test_that("list_of() coerces to list() and list_of() (#1701)", { expect_equal(vec_ptype_common(list_of(1), list()), list()) expect_equal(vec_cast_common(list_of(1), list()), list(list(1), list())) expect_equal(vec_ptype_common(list_of(1), list("")), list()) expect_equal(vec_cast_common(list_of(1), list("")), list(list(1), list(""))) expect_equal( vec_ptype_common(list_of(1), list_of("")), list() ) expect_equal( vec_ptype_common(list_of(1), list(), list_of("")), list() ) }) test_that("can concatenate list and list-of (#1161)", { expect_equal( vec_c(list(1), list_of(2)), list(1, 2) ) expect_equal( vec_c(list(""), list_of(2)), list("", 2) ) }) vctrs/tests/testthat/test-type-integer64.R0000644000176200001440000001667214402367170020323 0ustar liggesusers test_that("casting of integer64 works", { x <- bit64::as.integer64(1:10) expect_equal(vec_cast(x, bit64::integer64()), x) expect_equal(vec_cast(x, integer()), 1:10) expect_equal(vec_cast(1:10, bit64::integer64()), x) expect_equal(vec_cast(x, double()), as.double(x)) expect_equal(vec_cast(as.numeric(1:10), bit64::integer64()), x) expect_equal(vec_cast(x, logical()), rep(TRUE, 10L)) expect_equal(vec_cast(c(TRUE, FALSE), bit64::integer64()), bit64::as.integer64(c(1, 0))) expect_equal(vec_cast(NA, bit64::integer64()), bit64::as.integer64(NA)) expect_equal(vec_cast(unspecified(2), bit64::integer64()), bit64::as.integer64(c(NA, NA))) expect_error(vec_cast(x, factor()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(factor(), x), class = "vctrs_error_incompatible_type") # These used to be allowed expect_error(vec_cast(x, character()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(as.character(1:10), bit64::integer64()), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2 for integer64 works", { x <- bit64::as.integer64(1:10) expect_equal(vec_ptype2(x, x), bit64::integer64()) expect_equal(vec_ptype2(x, 1L), bit64::integer64()) expect_equal(vec_ptype2(1L, x), bit64::integer64()) expect_equal(vec_ptype2(x, TRUE), bit64::integer64()) expect_equal(vec_ptype2(TRUE, x), bit64::integer64()) expect_equal(vec_ptype2(x, NA), bit64::integer64()) expect_equal(vec_ptype2(NA, x), bit64::integer64()) expect_equal(vec_ptype2(unspecified(), x), bit64::integer64()) expect_equal(vec_ptype2(x, unspecified()), bit64::integer64()) expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, ""), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2("", x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(data.frame(), x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, data.frame()), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype_abbr.integer64", { expect_equal(vec_ptype_abbr(bit64::as.integer64(1:10)), "int64") expect_equal(vec_ptype_full(bit64::as.integer64(1:10)), "integer64") }) test_that("can sort integer64", { x <- bit64::as.integer64(c(-1, -3, -2, 1)) expect_identical(vec_order(x), int(2, 3, 1, 4)) expect_identical(x[vec_order(x)], bit64::as.integer64(c(-3, -2, -1, 1))) }) test_that("can slice integer64 objects of all dimensions", { x <- bit64::as.integer64(1:8) expect <- bit64::as.integer64(c(1, 3)) expect_identical(vec_slice(x, c(1, 3)), expect) dim(x) <- c(4, 2) expect <- bit64::as.integer64(c(1, 3, 5, 7)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, c(1, 3)), expect) dim(x) <- c(2, 2, 2) expect <- bit64::as.integer64(c(2, 4, 6, 8)) dim(expect) <- c(1, 2, 2) expect_identical(vec_slice(x, 2), expect) }) test_that("can slice integer64 objects with `NA_integer_`", { idx <- c(NA_integer_, 1) x <- bit64::as.integer64(1:8) expect <- bit64::as.integer64(c(NA, 1)) expect_identical(vec_slice(x, idx), expect) dim(x) <- c(4, 2) expect <- bit64::as.integer64(c(NA, 1, NA, 5)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, idx), expect) dim(x) <- c(2, 2, 2) expect <- bit64::as.integer64(c(NA, 1, NA, 3, NA, 5, NA, 7)) dim(expect) <- c(2, 2, 2) expect_identical(vec_slice(x, idx), expect) }) test_that("can init integer64 objects", { idx <- c(NA_integer_, NA_integer_) x <- bit64::as.integer64(1:8) expect_identical(vec_init(x, 2), vec_slice(x, idx)) dim(x) <- c(4, 2) expect_identical(vec_init(x, 2), vec_slice(x, idx)) dim(x) <- c(2, 2, 2) expect_identical(vec_init(x, 2), vec_slice(x, idx)) }) test_that("can chop integer64 objects with `NA_integer_` indices", { idx <- list(NA_integer_, 1) x <- bit64::as.integer64(1:8) expect <- list( bit64::as.integer64(NA), bit64::as.integer64(1) ) expect_identical(vec_chop(x, indices = idx), expect) dim(x) <- c(4, 2) expect <- list( bit64::as.integer64(c(NA, NA)), bit64::as.integer64(c(1, 5)) ) dim(expect[[1]]) <- c(1, 2) dim(expect[[2]]) <- c(1, 2) expect_identical(vec_chop(x, indices = idx), expect) dim(x) <- c(2, 2, 2) expect <- list( bit64::as.integer64(c(NA, NA, NA, NA)), bit64::as.integer64(c(1, 3, 5, 7)) ) dim(expect[[1]]) <- c(1, 2, 2) dim(expect[[2]]) <- c(1, 2, 2) expect_identical(vec_chop(x, indices = idx), expect) }) test_that("equality proxy converts atomic input to data frames of doubles", { x <- bit64::as.integer64(1) expect_identical( vec_proxy_equal(x), data_frame(left = 2147483648, right = 1) ) }) test_that("equality proxy works with 1-D arrays", { x <- bit64::as.integer64(1:6) y <- x dim(y) <- 6 expect_identical( vec_proxy_equal(x), vec_proxy_equal(y) ) }) test_that("equality proxy on >=2-D input converts to data frame and proxies each column", { x <- bit64::as.integer64(1:8) dim(x) <- c(2, 2, 2) proxy1 <- integer64_proxy(x[1:2, 1, 1]) proxy2 <- integer64_proxy(x[1:2, 2, 1]) proxy3 <- integer64_proxy(x[1:2, 1, 2]) proxy4 <- integer64_proxy(x[1:2, 2, 2]) expect_identical( vec_proxy_equal(x), vec_cbind(proxy1, proxy2, proxy3, proxy4, .name_repair = "minimal") ) }) test_that("can detect missing values with integer64 (#1304)", { x <- bit64::as.integer64(c(NA, NA, 2, NA, 2, 2)) expect_identical(vec_detect_missing(x), c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE)) dim(x) <- c(3, 2) expect_identical(vec_detect_missing(x), c(TRUE, FALSE, FALSE)) }) test_that("can fill missing values with integer64", { x <- bit64::as.integer64(c(NA, NA, 2, NA, 2, 2)) expect <- bit64::as.integer64(c(NA, NA, 2, 2, 2, 2)) expect_identical(vec_fill_missing(x, "down"), expect) dim(x) <- c(3, 2) expect <- bit64::as.integer64(c(NA, NA, 2, 2, 2, 2)) dim(expect) <- c(3, 2) expect_identical(vec_fill_missing(x, "up"), expect) }) test_that("can compare values with integer64", { x <- bit64::as.integer64(c(1, NA, 2)) y <- bit64::as.integer64(c(0, 2, 3)) expect_identical(vec_compare(x, y), c(1L, NA, -1L)) x <- bit64::as.integer64(1:8) y <- bit64::as.integer64(c(1, 2, 1, 5, 1, 5, 1, 5)) dim(x) <- c(2, 2, 2) dim(y) <- c(2, 2, 2) expect_identical(vec_compare(x, y), c(1L, -1L)) }) test_that("integer64 <-> data frame works as expected", { x <- bit64::as.integer64(c(-2, -1, 0, 1)) proxy <- integer64_proxy(x) expect_identical(proxy$left, c(2147483647, 2147483647, 2147483648, 2147483648)) expect_identical(proxy$right, c(4294967294, 4294967295, 0, 1)) expect_identical(integer64_restore(proxy), x) x <- bit64::as.integer64("9223372036854775807") + -1:0 proxy <- integer64_proxy(x) expect_identical(proxy$left, c(4294967295, 4294967295)) expect_identical(proxy$right, c(4294967294, 4294967295)) expect_identical(integer64_restore(proxy), x) x <- bit64::as.integer64("-9223372036854775807") + 0:1 proxy <- integer64_proxy(x) expect_identical(proxy$left, c(0, 0)) expect_identical(proxy$right, c(1, 2)) expect_identical(integer64_restore(proxy), x) x <- bit64::as.integer64(NA) proxy <- integer64_proxy(x) expect_identical(proxy$left, NA_real_) expect_identical(proxy$right, NA_real_) expect_identical(integer64_restore(proxy), x) }) test_that("`integer64_proxy()` doesn't allow arrays", { x <- bit64::as.integer64(1:6) dim(x) <- c(3, 2) expect_error(integer64_proxy(x), "should not have a `dim` attribute") }) vctrs/tests/testthat/test-type-data-table.R0000644000176200001440000000342014405105465020475 0ustar liggesusers# Never run on CRAN, even if they have data.table, because we don't regularly # check these on CI and we don't want a change in data.table to force a CRAN # failure for vctrs. skip_on_cran() # Avoids adding `data.table` to Suggests. # These tests are only run on the devs' machines. testthat_import_from("data.table", "data.table") test_that("common type of data.table and data.frame is data.table", { expect_identical( vec_ptype2(data.table(x = TRUE), data.table(y = 2)), data.table(x = lgl(), y = dbl()) ) expect_identical( vec_ptype2(data.table(x = TRUE), data.frame(y = 2)), data.table(x = lgl(), y = dbl()) ) expect_identical( vec_ptype2(data.frame(y = 2), data.table(x = TRUE)), data.table(y = dbl(), x = lgl()) ) expect_identical( vec_cast(data.table(y = 2), data.table(x = TRUE, y = 1L)), data.table(x = NA, y = 2L) ) expect_identical( vec_cast(data.frame(y = 2), data.table(x = TRUE, y = 1L)), data.table(x = NA, y = 2L) ) expect_identical( vec_cast(data.table(y = 2), data.frame(x = TRUE, y = 1L)), data.frame(x = NA, y = 2L) ) }) test_that("data.table and tibble do not have a common type", { expect_equal( vec_ptype_common(data.table(x = TRUE), tibble(y = 2)), tibble(x = lgl(), y = dbl()) ) expect_equal( vec_ptype_common(tibble(y = 2), data.table(x = TRUE)), tibble(y = dbl(), x = lgl()) ) expect_identical( vec_cast(data.table(y = 2), tibble(x = TRUE, y = 1L)), tibble(x = lgl(NA), y = 2L) ) expect_identical( vec_cast(tibble(y = 2), data.table(x = TRUE, y = 1L)), data_frame(x = lgl(NA), y = 2L) ) }) test_that("data table has formatting methods", { expect_snapshot({ dt <- data.table(x = 1, y = 2, z = 3) vec_ptype_abbr(dt) vec_ptype_full(dt) }) }) vctrs/tests/testthat/test-rank.R0000644000176200001440000001252714511320527016457 0ustar liggesuserstest_that("can rank with different types of `ties`", { x <- c(2L, 5L, 1L, 1L, 2L) expect_identical(vec_rank(x, ties = "min"), rank(x, ties.method = "min")) expect_identical(vec_rank(x, ties = "max"), rank(x, ties.method = "max")) expect_identical(vec_rank(x, ties = "sequential"), rank(x, ties.method = "first")) expect_identical(vec_rank(x, ties = "dense"), c(2L, 3L, 1L, 1L, 2L)) }) test_that("can rank in descending order", { x <- c(2L, 5L, 1L, 1L, 2L) expect_identical(vec_rank(x, ties = "min", direction = "desc"), rank(-x, ties.method = "min")) expect_identical(vec_rank(x, ties = "max", direction = "desc"), rank(-x, ties.method = "max")) expect_identical(vec_rank(x, ties = "sequential", direction = "desc"), rank(-x, ties.method = "first")) expect_identical(vec_rank(x, ties = "dense", direction = "desc"), c(2L, 1L, 3L, 3L, 2L)) }) test_that("can rank incomplete values with `NA`", { x <- c(2, NA, 4, NaN, 4, 2, NA) expect_identical(vec_rank(x, ties = "min", incomplete = "na"), rank(x, ties.method = "min", na.last = "keep")) expect_identical(vec_rank(x, ties = "max", incomplete = "na"), rank(x, ties.method = "max", na.last = "keep")) expect_identical(vec_rank(x, ties = "sequential", incomplete = "na"), rank(x, ties.method = "first", na.last = "keep")) expect_identical(vec_rank(x, ties = "dense", incomplete = "na"), c(1L, NA, 2L, NA, 2L, 1L, NA)) # NaN are treated as missing, regardless of whether or not they are distinct from NA_real_ expect_identical( vec_rank(x, ties = "min", incomplete = "na", nan_distinct = TRUE), vec_rank(x, ties = "min", incomplete = "na", nan_distinct = FALSE) ) }) test_that("works correctly when `incomplete = 'na'` with no missing values", { x <- c(1, 2, 1, 5, 2) expect_identical(vec_rank(x, incomplete = "na"), rank(x, ties.method = "min")) }) test_that("when ranking incomplete values, all NA (or NaN) values get the same rank", { # this is in contrast to rank(), which treats all NA (NaN) as different x <- c(1, NA, 3, NaN, NA, 1, NaN) expect_identical(vec_rank(x, na_value = "largest"), c(1L, 4L, 3L, 4L, 4L, 1L, 4L)) expect_identical(vec_rank(x, na_value = "smallest"), c(5L, 1L, 7L, 1L, 1L, 5L, 1L)) # If distinct, NaN are always ranked between real numbers and NA_real_ expect_identical(vec_rank(x, na_value = "largest", nan_distinct = TRUE), c(1L, 6L, 3L, 4L, 6L, 1L, 4L)) expect_identical(vec_rank(x, na_value = "smallest", nan_distinct = TRUE), c(5L, 1L, 7L, 3L, 1L, 5L, 3L)) }) test_that("ranks character vectors in the C locale", { x <- c("B", "b", "a") expect_identical(vec_rank(x), c(1L, 3L, 2L)) }) test_that("works with data frames", { df <- data_frame( x = c(1, 2, 1, 2, 2), y = c(2, 2, 1, 2, 5) ) expect_identical(vec_rank(df, ties = "min"), c(2L, 3L, 1L, 3L, 5L)) expect_identical(vec_rank(df, ties = "sequential"), c(2L, 3L, 1L, 4L, 5L)) }) test_that("works with data frames with 0 columns and >0 rows (#1863)", { # All rows are treated as being from the same group df <- data_frame(.size = 5) expect_identical(vec_rank(df, ties = "min"), c(1L, 1L, 1L, 1L, 1L)) expect_identical(vec_rank(df, ties = "sequential"), c(1L, 2L, 3L, 4L, 5L)) expect_identical(vec_rank(df, ties = "sequential", direction = "desc"), c(1L, 2L, 3L, 4L, 5L)) }) test_that("works with data frames with 0 columns and 0 rows (#1863)", { df <- data_frame(.size = 0) expect_identical(vec_rank(df, ties = "min"), integer()) expect_identical(vec_rank(df, ties = "sequential"), integer()) }) test_that("can control the direction per column", { df <- data_frame( x = c(1, 2, 1, 2, 2), y = c(2, 2, 1, 2, 5) ) df2 <- df df2$y <- -df2$y expect_identical( vec_rank(df, direction = c("asc", "desc")), vec_rank(df2, direction = "asc") ) }) test_that("incompleteness is respected in data frames and rcrds", { df <- data_frame( x = c(1, NA, NA, 1), y = c(NA, NA, 1, 1) ) expect_identical(vec_rank(df, incomplete = "na"), c(NA, NA, NA, 1L)) expect_identical(vec_rank(df, incomplete = "na", direction = "desc"), c(NA, NA, NA, 1L)) x <- new_rcrd(list( x = c(1, 1, NA, NA, 1), y = c(1, NA, 1, NA, 1) )) expect_identical(vec_rank(x, incomplete = "na"), c(1L, NA, NA, NA, 1L)) }) test_that("can control `na_value` per column", { df <- data_frame( x = c(1, 1, NA, NA, NA), y = c(3, NA, NA, 2, 1) ) expect_identical( vec_rank(df, na_value = c("largest", "smallest")), c(2L, 1L, 3L, 5L, 4L) ) expect_identical( vec_rank(df, na_value = c("largest", "smallest"), direction = "desc"), c(4L, 5L, 3L, 1L, 2L) ) # But `incomplete = "na"` overrules it expect_identical( vec_rank(df, na_value = c("largest", "smallest"), incomplete = "na"), c(1L, NA, NA, NA, NA) ) expect_identical( vec_rank(df, na_value = c("largest", "smallest"), incomplete = "na", direction = "desc"), c(1L, NA, NA, NA, NA) ) }) test_that("`x` must be a vector", { expect_error(vec_rank(identity), class = "vctrs_error_scalar_type") }) test_that("`ties` is validated", { expect_snapshot(error = TRUE, vec_rank(1, ties = "foo")) expect_snapshot(error = TRUE, vec_rank(1, ties = 1)) }) test_that("`incomplete` is validated", { expect_snapshot(error = TRUE, vec_rank(1, incomplete = NA)) expect_snapshot(error = TRUE, vec_rank(1, incomplete = c(TRUE, FALSE))) expect_snapshot(error = TRUE, vec_rank(1, incomplete = "foo")) }) vctrs/tests/testthat/test-group.R0000644000176200001440000001340514276722575016675 0ustar liggesusers # group id ---------------------------------------------------------------- test_that("vec_group_id detects groups in order of appearance", { x <- c(2, 4, 2, 1, 4) expect <- structure(c(1L, 2L, 1L, 3L, 2L), n = 3L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works for size 0 input", { expect <- structure(integer(), n = 0L) expect_equal(vec_group_id(NULL), expect) expect_equal(vec_group_id(numeric()), expect) }) test_that("vec_group_id works on base S3 objects", { x <- factor(c("x", "y", "x")) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) x <- new_date(c(0, 1, 0)) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works row wise on data frames", { df <- data.frame(x = c(1, 2, 1, 1), y = c(2, 3, 2, 3)) expect <- structure(c(1L, 2L, 1L, 3L), n = 3L) expect_equal(vec_group_id(df), expect) }) test_that("vec_group_id works row wise on arrays", { x <- array(c(1, 1, 1, 2, 4, 2), c(3, 2)) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works with different encodings", { expect <- structure(c(1L, 1L, 1L), n = 1L) expect_equal(vec_group_id(encodings()), expect) }) test_that("vec_group_id takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), c(1, 1, 1, 2)) # Compares on only the first field expect <- structure(c(1L, 2L, 1L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- structure(c(1L, 2L, 1L, 1L), n = 2L) expect_equal(vec_group_id(df), expect) }) # group rle --------------------------------------------------------------- test_that("vec_group_rle returns a `vctrs_group_rle` object", { expect_s3_class(vec_group_rle(1), "vctrs_group_rle") }) test_that("vec_group_rle works with size 0 input", { expect <- new_group_rle(integer(), integer(), 0L) expect_equal(vec_group_rle(integer()), expect) expect_equal(vec_group_rle(NULL), expect) }) test_that("vec_group_rle detects groups in order of appearance", { x <- c(2, 2, 3, 1, 1) expect <- new_group_rle(1:3, c(2L, 1L, 2L), 3L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle can refer to groups it has already seen", { x <- c(2, 3, 2) expect <- new_group_rle(c(1L, 2L, 1L), rep(1L, 3), 2L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle works on base S3 objects", { expect <- new_group_rle(c(1L, 2L, 1L, 3L), c(1L, 2L, 1L, 1L), 3L) x <- factor(c("x", "y", "y", "x", "z")) expect_equal(vec_group_rle(x), expect) x <- new_date(c(0, 1, 1, 0, 2)) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), c(1, 1, 1, 2)) # Compares on only the first field expect <- new_group_rle(c(1L, 2L, 1L), c(1L, 1L, 2L), 2L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle works row wise on data frames", { df <- data.frame(x = c(1, 1, 2, 1), y = c(2, 2, 3, 2)) expect <- new_group_rle(c(1L, 2L, 1L), c(2L, 1L, 1L), 2L) expect_equal(vec_group_rle(df), expect) }) test_that("vec_group_rle takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- new_group_rle(c(1L, 2L, 1L), c(1L, 1L, 2L), 2L) expect_equal(vec_group_rle(df), expect) }) test_that("can access fields", { x <- vec_group_rle(c(1, 1, 2)) expect_equal(fields(x), c("group", "length")) expect_identical(field(x, "group"), c(1L, 2L)) expect_identical(field(x, "length"), c(2L, 1L)) }) test_that("can access number of groups", { x <- vec_group_rle(c(1, 1, 2)) expect_identical(attr(x, "n"), 2L) }) test_that("print method is useful", { x <- new_group_rle(c(1L, 2L, 1L), c(3L, 2L, 1L), 2L) expect_snapshot(x) }) # group loc -------------------------------------------------------------- test_that("can locate unique groups of an empty vector", { out <- vec_group_loc(integer()) expect_s3_class(out, "data.frame") expect_equal(out$key, integer()) expect_equal(out$loc, list()) }) test_that("can locate unique groups of a data frame", { df <- data_frame(x = c(1, 1, 1, 2, 2), y = c("a", "a", "b", "a", "b")) out <- vec_group_loc(df) expect_equal(nrow(out), 4L) expect_equal(out$key, vec_unique(df)) }) test_that("can locate unique groups of a data frame with a list column", { df <- data_frame(x = list(1:2, 1:2, "a", 5.5, "a")) out <- vec_group_loc(df) expect_equal(nrow(out), 3L) expect_equal(out$key, vec_unique(df)) }) test_that("`x` must be a vector", { expect_error(vec_group_loc(environment()), class = "vctrs_error_scalar_type") }) test_that("`key` column retains full type information", { x <- factor(letters[c(1, 2, 1)], levels = letters[1:3]) out <- vec_group_loc(x) expect_equal(levels(out$key), levels(x)) }) test_that("vec_group_loc takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_equal(vec_group_loc(x)$key, x[1:2]) expect_equal(vec_group_loc(x)$loc, list(c(1L, 3L), 2L)) x <- as.POSIXlt(new_datetime(c(1, 2, 1))) expect_equal(vec_group_loc(x)$key, x[1:2]) expect_equal(vec_group_loc(x)$loc, list(c(1L, 3L), 2L)) }) test_that("vec_group_loc takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- data_frame(key = vec_slice(df, c(1, 2)), loc = list(c(1L, 3L, 4L), 2L)) expect_equal(vec_group_loc(df), expect) }) test_that("vec_group_loc works with different encodings", { encs <- encodings() expect_identical(nrow(vec_group_loc(encs)), 1L) }) vctrs/tests/testthat/test-error-call.R0000644000176200001440000001304514376223321017565 0ustar liggesuserstest_that("failing common type reports correct error call", { my_function <- function() vec_ptype2(2, chr()) expect_snapshot((expect_error(my_function()))) }) test_that("failing cast reports correct error call", { my_function <- function() vec_cast(2, chr()) expect_snapshot((expect_error(my_function()))) df1 <- data_frame(x = TRUE, y = TRUE) df2 <- data_frame(y = "1") my_function <- function(lhs, rhs) vec_cast(lhs, rhs) expect_snapshot((expect_error(my_function(df1, df2)))) df1 <- data_frame(y = TRUE) df2 <- data_frame(y = "1") expect_snapshot((expect_error(my_function(df1, df2)))) }) test_that("lossy cast reports correct error call", { my_function <- function() vec_cast(2, lgl()) expect_snapshot((expect_error(my_function()))) }) test_that("failing common size reports correct error call", { my_function <- function() vec_recycle(1:2, 10) expect_snapshot((expect_error(my_function()))) # FIXME my_function <- function() vec_size_common(1:2, 1:10) expect_snapshot((expect_error(my_function()))) }) test_that("unsupported error reports correct error call", { x <- new_vctr(1:2) my_function <- function() dim(x) <- 1:2 expect_snapshot((expect_error(my_function()))) my_function <- function() median(x) expect_snapshot((expect_error(my_function()))) }) test_that("scalar error reports correct error call", { my_function <- function() obj_check_vector(foobar()) expect_snapshot((expect_error(my_function()))) }) test_that("size error reports correct error call", { my_function <- function() vec_check_size(1:2, size = 1) expect_snapshot((expect_error(my_function()))) }) test_that("bare casts report correct error call", { my_function <- function() vec_cast(1.5, int()) expect_snapshot((expect_error(my_function()))) my_function <- function() vec_cast(1.5, lgl()) expect_snapshot((expect_error(my_function()))) my_function <- function() vec_cast(2L, lgl()) expect_snapshot((expect_error(my_function()))) # Passing call to `shape_broadcast()` my_function <- function() vec_cast(matrix(TRUE), dbl()) expect_snapshot((expect_error(my_function()))) }) test_that("base S3 casts report correct error call", { my_function <- function() vec_cast("a", factor("b")) expect_snapshot((expect_error(my_function()))) }) test_that("names validation reports correct error call", { my_function <- function() vec_as_names(c("x", "", "y"), repair = "check_unique") expect_snapshot((expect_error(my_function()))) my_function <- function() vec_as_names(c("x", "x"), repair = "check_unique", repair_arg = "repair") expect_snapshot((expect_error(my_function()))) my_function <- function() vec_as_names("...", repair = "check_unique", repair_arg = "repair") expect_snapshot((expect_error(my_function()))) }) test_that("subscript validation reports correct error calls", { my_function <- function() vctrs::num_as_location(1, 1L, missing = "bogus") expect_snapshot((expect_error(my_function()))) my_function <- function() vctrs::vec_as_location(10, 2) expect_snapshot((expect_error(my_function()))) my_function <- function(my_arg) vec_as_location(my_arg, 2) expect_snapshot((expect_error(my_function(1.5)))) my_function <- function(my_arg) vctrs::vec_as_subscript(my_arg) expect_snapshot((expect_error(my_function(1.5)))) my_function <- function(my_arg) vctrs::vec_as_location(my_arg, 2) expect_snapshot((expect_error(my_function(list())))) my_function <- function(my_arg) vec_as_location(1, my_arg) expect_snapshot((expect_error(my_function(1.5)))) my_function <- function(my_arg) vec_as_location(my_arg, 1, missing = "error") expect_snapshot((expect_error(my_function(NA)))) }) test_that("`vec_ptype()` reports correct error call", { my_function <- function(my_arg) vec_ptype(my_arg) expect_snapshot({ (expect_error(my_function(env()))) (expect_error(my_function(foobar(list())))) }) }) test_that("`vec_slice()` uses `error_call`", { my_function <- function(x, i) vec_slice(x, i, error_call = current_env()) expect_snapshot({ (expect_error(my_function(env(), 1))) (expect_error(my_function(1, 2))) }) }) test_that("vec_slice() reports self in error context", { expect_snapshot({ (expect_error(vec_slice(foobar(list()), 1))) (expect_error(vec_slice(list(), env()))) }) }) test_that("list_sizes() reports error context", { expect_snapshot({ (expect_error(list_sizes(foobar(list())))) (expect_error(list_sizes(list(env())))) (expect_error(list_sizes(list(1, 2, env())))) (expect_error(list_sizes(list(1, 2, foo = env())))) }) }) test_that("vec_size() reports error context", { expect_snapshot({ (expect_error(vec_size(env()))) }) }) test_that("vec_cast_common() reports error context", { my_function <- function(...) vec_cast_common(...) expect_snapshot((expect_error(my_function(my_arg = 1.5, .to = int())))) expect_snapshot((expect_error(my_function(my_arg = 1.5, .to = int(), .arg = "my_arg")))) expect_snapshot((expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg")))) expect_snapshot((expect_error(my_function(1, "foo", .arg = "my_arg")))) x <- data.frame(x = "a") y <- data.frame(x = 1, y = 2) expect_snapshot((expect_error(my_function(this_arg = x, that_arg = y)))) }) test_that("vec_ptype_common() reports error context", { my_function <- function(...) vec_ptype_common(...) expect_snapshot((expect_error(my_function(this_arg = 1, that_arg = "foo")))) expect_snapshot((expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg")))) expect_snapshot((expect_error(my_function(1, "foo", .arg = "my_arg")))) }) vctrs/tests/testthat/test-c.R0000644000176200001440000005007014437364137015755 0ustar liggesuserslocal_name_repair_quiet() test_that("zero length input returns NULL", { expect_equal(vec_c(), NULL) expect_equal(vec_c(NULL), NULL) expect_equal(vec_c(NULL,), NULL) expect_equal(vec_c(NULL, NULL), NULL) }) test_that("NULL is idempotent", { expect_equal(vec_c(NULL, 1L), 1L) expect_equal(vec_c(1L, NULL), 1L) }) test_that("NA is idempotent", { expect_equal(vec_c(NA, 1L), c(NA, 1L)) expect_equal(vec_c(NA, "x"), c(NA, "x")) expect_equal(vec_c(NA, factor("x")), factor(c(NA, "x"))) expect_equal(vec_c(NA, new_date(0)), new_date(c(NA, 0))) expect_equal(vec_c(NA, new_datetime(0)), new_datetime(c(NA, 0))) expect_equal(vec_c(NA, new_duration(0)), new_duration(c(NA, 0))) }) test_that("NA is logical if no other types intervene", { expect_equal(vec_c(logical()), logical()) expect_equal(vec_c(NA), NA) expect_equal(vec_c(NA, NA), c(NA, NA)) }) test_that("different types are coerced to common", { expect_equal(vec_c(TRUE, 1L, 1), c(1, 1, 1)) expect_equal(vec_c(TRUE, 2:4), 1:4) }) test_that("specified .ptypes do not allow more casts", { expect_error( vec_c(TRUE, .ptype = character()), class = "vctrs_error_incompatible_type" ) }) test_that("common type failure uses error call and error arg (#1641, #1692)", { expect_snapshot(error = TRUE, { vec_c("x", 1, .error_call = call("foo"), .error_arg = "arg") }) expect_snapshot(error = TRUE, { vec_c("x", .ptype = integer(), .error_call = call("foo"), .error_arg = "arg") }) }) test_that("common type failure uses positional errors", { expect_snapshot({ # Looking for `..1` and `a` (expect_error(vec_c(1, a = "x", 2))) # Directed cast should also produce positional errors (#1690) (expect_error(vec_c(1, a = "x", 2, .ptype = double(), .error_arg = "arg"))) # Lossy cast (expect_error(vec_c(1, a = 2.5, .ptype = integer()))) }) }) test_that("combines outer an inner names", { expect_equal(vec_c(x = 1), c(x = 1)) expect_equal(vec_c(c(x = 1)), c(x = 1)) expect_equal(vec_c(c(x = 1:2)), c(x1 = 1, x2 = 2)) expect_error(vec_c(y = c(x = 1)), "Please supply") }) test_that("can bind data.frame columns", { df <- data.frame(x = NA, y = 1:2) df$x <- data.frame(a = 1:2) expected <- data.frame(x = NA, y = c(1:2, 1:2)) expected$x <- data.frame(a = c(1:2, 1:2)) expect_equal(vec_c(df, df), expected) }) test_that("vec_c() handles matrices", { m <- matrix(1:4, nrow = 2) dimnames(m) <- list(c("foo", "bar"), c("baz", "quux")) # FIXME: `vec_ptype_common(m, m)` doesn't return dimension names exp <- matrix(c(1:2, 1:2, 3:4, 3:4), nrow = 4) rownames(exp) <- c("foo", "bar", "foo", "bar") expect_identical(vec_c(m, m), exp) expect_error(vec_c(outer = m), "Please supply") }) test_that("vec_c() includes index in argument tag", { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) expect_snapshot(error = TRUE, vec_c(df1, df2)) expect_snapshot(error = TRUE, vec_c(df1, df1, df2)) expect_snapshot(error = TRUE, vec_c(foo = df1, bar = df2)) }) test_that("vec_c() handles record classes", { local_rational_class() out <- vec_c(rational(1, 2), 1L, NA) expect_true(vec_is(out, rational(1, 2))) expect_size(out, 3) expect_identical(vec_proxy(out), data.frame(n = c(1L, 1L, NA), d = c(2L, 1L, NA))) }) test_that("can mix named and unnamed vectors (#271)", { expect_identical(vec_c(c(a = 1), 2), c(a = 1, 2)) expect_identical(vec_c(0, c(a = 1), 2, b = 3), c(0, a = 1, 2, b =3)) }) test_that("preserves names when inputs are cast to a common type (#1690)", { expect_named(vec_c(c(a = 1), .ptype = integer()), "a") expect_named(vec_c(foo = c(a = 1), .ptype = integer(), .name_spec = "{outer}_{inner}"), "foo_a") }) test_that("vec_c() repairs names", { local_name_repair_quiet() # Default minimal repair expect_named(vec_c(a = 1, a = 2, `_` = 3), c("a", "a", "_")) out <- vec_c(!!!set_names(1, NA)) expect_named(out, "") expect_named(vec_c(a = 1, a = 2, `_` = 3, .name_repair = "unique"), c("a...1", "a...2", "_")) expect_error(vec_c(a = 1, a = 2, `_` = 3, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") expect_named(vec_c(a = 1, a = 2, `_` = 3, .name_repair = "universal"), c("a...1", "a...2", "._")) expect_named(vec_c(a = 1, a = 2, .name_repair = ~ toupper(.)), c("A", "A")) }) test_that("vec_c() can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_c(x = TRUE, x = 0, .name_repair = "unique_quiet") res_universal <- vec_c("if" = TRUE, "in" = 0, .name_repair = "universal_quiet") }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in")) }) test_that("vec_c() doesn't use outer names for data frames (#524)", { x <- data.frame(inner = 1) expect_equal(vec_c(outer = x), x) a <- data.frame(x = 1L) b <- data.frame(x = 2L) expect_equal(vec_c(foo = a, bar = b), data.frame(x = 1:2)) }) test_that("vec_c() preserves row names and inner names", { x <- data.frame(a = 1, row.names = "r1") y <- data.frame(a = 2, row.names = "r2") expect_equal(rownames(vec_c(x, y)), c("r1", "r2")) expect_equal(rownames(vec_c(x, x)), c("r1...1", "r1...2")) vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) oo_x <- set_names(as.POSIXlt(c("2020-01-01", "2020-01-02", "2020-01-03")), letters[1:3]) oo_y <- as.POSIXlt(c(FOO = "2020-01-04")) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "d") mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("d")) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x, oo = oo_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y, oo = oo_y), row.names = c("quux") ) nested_out <- vec_c(nested_x, nested_y) expect_identical(row.names(nested_out), c("foo", "bar", "baz", "quux")) expect_identical(row.names(nested_out$df), c("a", "b", "c", "d")) expect_identical(row.names(nested_out$mat), c("a", "b", "c", "d")) expect_identical(names(nested_out$vec), c("a", "b", "c", "FOO")) expect_identical(names(nested_out$oo), c("a", "b", "c", "FOO")) }) test_that("vec_c() outer names work with proxied objects", { x <- as.POSIXlt(new_datetime(0)) exp <- set_names(x, "outer") expect_equal(vec_c(outer = x), exp) named_x <- set_names(x, "inner") exp <- set_names(named_x, "outer_inner") expect_error(vec_c(outer = named_x), "Please supply") expect_equal(vec_c(outer = named_x, .name_spec = "{outer}_{inner}"), exp) xs <- as.POSIXlt(new_datetime(c(0, 1))) exp <- set_names(xs, c("outer_1", "outer_2")) expect_error(vec_c(outer = xs), "Please supply") expect_equal(vec_c(outer = xs, .name_spec = "{outer}_{inner}"), exp) }) test_that("vec_c() works with simple homogeneous foreign S3 classes", { expect_identical(vec_c(foobar(1), foobar(2)), vec_c(foobar(c(1, 2)))) expect_identical(vec_c(NULL, foobar(1), foobar(2)), vec_c(foobar(c(1, 2)))) }) test_that("vec_c() works with simple homogeneous foreign S4 classes", { joe1 <- .Counts(c(1L, 2L), name = "Joe") joe2 <- .Counts(3L, name = "Joe") expect_identical(vec_c(joe1, joe2), .Counts(1:3, name = "Joe")) }) test_that("vec_c() fails with complex foreign S3 classes", { expect_snapshot({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type")) (expect_error(vec_c(x, y, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type")) }) }) test_that("vec_c() fails with complex foreign S4 classes", { expect_snapshot({ joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(vec_c(joe, jane), class = "vctrs_error_incompatible_type")) (expect_error(vec_c(joe, jane, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type")) }) }) test_that("vec_c() falls back to c() if S3 method is available", { # Check off-by-one error expect_error( vec_c(foobar(1), "", foobar(2)), class = "vctrs_error_incompatible_type" ) # Fallback when the class implements `c()` method <- function(...) rep_along(list(...), "dispatched") local_methods( c.vctrs_foobar = method ) expect_identical( vec_c(foobar(1), foobar(2, class = "foo")), c("dispatched", "dispatched") ) expect_identical( vec_c(NULL, foobar(1), NULL, foobar(2, class = "foo")), c("dispatched", "dispatched") ) # Registered fallback s3_register("base::c", "vctrs_c_fallback", method) expect_identical( vec_c( structure(1, class = "vctrs_c_fallback"), structure(2, class = "vctrs_c_fallback") ), c("dispatched", "dispatched") ) # Don't fallback for S3 lists which are treated as scalars by default expect_error( vec_c(foobar(list(1)), foobar(list(2))), class = "vctrs_error_scalar_type" ) }) test_that("c() fallback is consistent", { dispatched <- function(x) structure(x, class = "dispatched") c_method <- function(...) dispatched(NextMethod()) out <- with_methods( vec_ptype2.vctrs_foobaz.vctrs_foobaz = function(...) foobaz(df_ptype2(...)), vec_cast.vctrs_foobaz.vctrs_foobaz = function(...) foobaz(df_cast(...)), c.vctrs_foobar = c_method, list( direct = vec_c(foobar(1L), foobar(2L)), df = vec_c(data_frame(x = foobar(1L)), data_frame(x = foobar(2L))), tib = vec_c(tibble(x = foobar(1L)), tibble(x = foobar(2L))), foreign_df = vec_c(foobaz(data_frame(x = foobar(1L))), foobaz(data_frame(x = foobar(2L)))) ) ) expect_equal(out$direct, dispatched(1:2)) expect_equal(out$df$x, dispatched(1:2)) expect_equal(out$tib$x, dispatched(1:2)) expect_equal(out$foreign_df$x, dispatched(1:2)) # Hard case: generic record vectors my_rec_record <- function(x) { new_rcrd(list(x = x), class = "my_rec_record") } out <- with_methods( c.vctrs_foobar = c_method, vec_ptype2.my_rec_record.my_rec_record = function(x, y, ...) { my_rec_record(vec_ptype2(field(x, "x"), field(y, "x"), ...)) }, vec_cast.my_rec_record.my_rec_record = function(x, to, ...) { x }, vec_c( data_frame(x = my_rec_record(foobar(1L))), data_frame(x = my_rec_record(foobar(2L))) ) ) expect_equal(field(out$x, "x"), dispatched(1:2)) }) test_that("vec_c() falls back to c() if S4 method is available", { joe1 <- .Counts(c(1L, 2L), name = "Joe") joe2 <- .Counts(3L, name = "Joe") c_counts <- function(x, ...) { xs <- list(x, ...) xs_data <- lapply(xs, function(x) x@.Data) new_data <- do.call(c, xs_data) .Counts(new_data, name = x@name) } local_s4_method("c", methods::signature(x = "vctrs_Counts"), c_counts) expect_identical( vec_c(joe1, joe2), .Counts(c(1L, 2L, 3L), name = "Joe") ) expect_identical( vec_c(NULL, joe1, joe2), .Counts(c(1L, 2L, 3L), name = "Joe") ) }) test_that("vec_c() fallback doesn't support `name_spec` or `ptype`", { expect_snapshot({ (expect_error( with_c_foobar(vec_c(foobar(1), foobar(2), .name_spec = "{outer}_{inner}")), "name specification" )) # Used to be an error about `ptype` (expect_error( with_c_foobar(vec_c(foobar(1), foobar(2), .ptype = "")), class = "vctrs_error_incompatible_type" )) # Uses error call (#1641) (expect_error( with_c_foobar(vec_c( foobar(1), foobar(2), .error_call = call("foo"), .name_spec = "{outer}_{inner}" )) )) }) }) test_that("vec_c() doesn't fall back when ptype2 is implemented", { new_quux <- function(x) structure(x, class = "vctrs_quux") with_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) new_quux(int()), vec_cast.vctrs_quux.vctrs_foobar = function(x, to, ...) new_quux(x), vec_restore.vctrs_quux = function(x, ...) new_quux(x), c.vctrs_foobar = function(...) foobar(NextMethod()), { expect_s3_class(c(foobar(1:3), foobar(4L)), "vctrs_foobar") expect_s3_class(vec_c(foobar(1:3), foobar(4L)), "vctrs_quux") } ) }) test_that("vec_c() falls back even when ptype is supplied", { expect_foobar(vec_c(foobar(1), foobar(2), .ptype = foobar(dbl()))) with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), { expect_quux(vec_c(foobar(1), foobar(2), .ptype = foobar(dbl()))) expect_quux(vec_c(foobar(1, foo = TRUE), foobar(2, bar = TRUE), .ptype = foobar(dbl()))) } ) }) test_that("vec_implements_ptype2() is FALSE for scalars", { expect_false(vec_implements_ptype2(quote(foo))) }) test_that("vec_implements_ptype2() and vec_c() fallback are compatible with old registration", { foo <- structure(NA, class = "vctrs_implements_ptype2_false") expect_false(vec_implements_ptype2(foo)) vec_ptype2.vctrs_implements_ptype2_true <- function(...) NULL s3_register( "vctrs::vec_ptype2", "vctrs_implements_ptype2_true", vec_ptype2.vctrs_implements_ptype2_true ) bar <- structure(NA, class = "vctrs_implements_ptype2_true") expect_true(vec_implements_ptype2(bar)) local_methods( `c.vctrs_implements_ptype2_true` = function(...) stop("never called") ) expect_identical(vec_c(bar), bar) }) test_that("can ignore names in `vec_c()` by providing a `zap()` name-spec (#232)", { expect_error(vec_c(a = c(b = 1:2))) expect_identical(vec_c(a = c(b = 1:2), b = 3L, .name_spec = zap()), 1:3) expect_snapshot({ (expect_error( vec_c(a = c(b = letters), b = 1, .name_spec = zap()), class = "vctrs_error_incompatible_type" )) }) }) test_that("can concatenate subclasses of `vctrs_vctr` which don't have ptype2 methods", { x <- new_vctr(1, class = "vctrs_foo") expect_identical(vec_c(x, x), new_vctr(c(1, 1), class = "vctrs_foo")) }) test_that("base c() fallback handles unspecified chunks", { local_methods( c.vctrs_foobar = function(...) { x <- NextMethod() # Should not be passed any unspecified chunks if (anyNA(x)) { abort("tilt") } foobar(x) }, `[.vctrs_foobar` = function(x, i, ...) { # Return a quux to detect dispatch quux(NextMethod()) } ) out <- vec_c(foobar(1:2), rep(NA, 2)) expect_identical(out, quux(c(1:2, NA, NA))) out <- vec_c(rep(NA, 2), foobar(1:2), NA) expect_identical(out, quux(c(NA, NA, 1:2, NA))) }) test_that("can zap outer names from a name-spec (#1215)", { zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner expect_null( names(vec_c(a = 1:2, .name_spec = zap_outer_spec)) ) expect_identical( names(vec_c(a = 1:2, c(foo = 3L), .name_spec = zap_outer_spec)), c("", "", "foo") ) expect_null( names(list_unchop(list(a = 1:2), indices = list(1:2), name_spec = zap_outer_spec)) ) expect_identical( names(list_unchop(list(a = 1:2, c(foo = 3L)), indices = list(1:2, 3), name_spec = zap_outer_spec)), c("", "", "foo") ) }) test_that("named empty vectors force named output (#1263)", { x <- set_names(int(), chr()) expect_named(vec_c(x), chr()) expect_named(vec_c(x, x), chr()) expect_named(vec_c(x, 1L), "") expect_named(vec_c(x, 1), "") expect_named(list_unchop(list(x), indices = list(int())), chr()) expect_named(list_unchop(list(x, x), indices = list(int(), int())), chr()) expect_named(list_unchop(list(x, 1L), indices = list(int(), 1)), "") expect_named(list_unchop(list(x, 1), indices = list(int(), 1)), "") }) # Golden tests ------------------------------------------------------- test_that("concatenation performs expected allocations", { vec_c_list <- function(x, ptype = NULL) { vec_c(!!!x, .ptype = ptype) } expect_snapshot({ ints <- rep(list(1L), 1e2) dbls <- rep(list(1), 1e2) # Extra allocations from `list2()`, see r-lib/rlang#937 "# `vec_c()` " "Integers" with_memory_prof(vec_c_list(ints)) "Doubles" with_memory_prof(vec_c_list(dbls)) "Integers to integer" with_memory_prof(vec_c_list(ints, ptype = int())) "Doubles to integer" with_memory_prof(vec_c_list(dbls, ptype = int())) "# `list_unchop()` " "Integers" with_memory_prof(list_unchop(ints)) "Doubles" with_memory_prof(list_unchop(dbls)) "Integers to integer" with_memory_prof(list_unchop(ints, ptype = int())) "Doubles to integer" with_memory_prof(list_unchop(dbls, ptype = int())) "# Concatenation with names" "Named integers" ints <- rep(list(set_names(1:3, letters[1:3])), 1e2) with_memory_prof(list_unchop(ints)) "Named matrices" mat <- matrix(1:4, 2, dimnames = list(c("foo", "bar"))) mats <- rep(list(mat), 1e2) with_memory_prof(list_unchop(mats)) "Data frame with named columns" df <- data_frame( x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c("A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb"))) ) dfs <- rep(list(df), 1e2) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (non-repaired, non-recursive case)" df <- data_frame(x = 1:2) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (repaired, non-recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (non-repaired, recursive case) (#1217)" df <- data_frame( x = 1:2, y = data_frame(x = 1:2) ) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (repaired, recursive case) (#1217)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "list-ofs (#1496)" make_list_of <- function(n) { df <- tibble::tibble( x = new_list_of(vec_chop(1:n), ptype = integer()) ) vec_chop(df) } with_memory_prof(list_unchop(make_list_of(1e3))) with_memory_prof(list_unchop(make_list_of(2e3))) with_memory_prof(list_unchop(make_list_of(4e3))) }) }) test_that("can dispatch many times", { # This caused a crash when counters were not correctly protected foo <- structure( list(x.sorted = numeric(0), tp = numeric(0), fp = numeric(0)), row.names = integer(0), class = c("vctrs_foobar", "tbl_df", "tbl", "data.frame") ) x <- lapply(1:200, function(...) foo) expect_error(NA, object = vctrs::list_unchop(x)) }) test_that("dots splicing clones as appropriate", { x <- list(a = 1) vctrs::vec_cbind(!!!x) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_rbind(!!!x) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_c(!!!x) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_cbind(!!!x, 2) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_rbind(!!!x, 2) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_c(!!!x, 2) expect_equal(x, list(a = 1)) }) test_that("can combine records wrapped in data frames", { local_methods( vec_proxy.vctrs_foobar = function(x, ...) { data_frame(x = unclass(x), y = seq_along(x)) }, vec_restore.vctrs_foobar = function(x, to, ...) { foobar(x$x) } ) x <- foobar(1:2) y <- foobar(3:4) expect_equal( vec_c(x, y), foobar(1:4) ) expect_equal( list_unchop(list(x, y), indices = list(1:2, 3:4)), foobar(1:4) ) expect_equal( vec_rbind(data_frame(x = x), data_frame(x = y)), data_frame(x = foobar(1:4)) ) }) test_that("fallback works with subclasses of `vctrs_vctr`", { # Used to fail because of interaction between common class fallback # for `base::c()` and the `c()` method for `vctrs_vctr` that called # back into `vec_c()`. # Reprex for failure in the ricu package x <- new_rcrd(list(a = 1), class = "vctrs_foobar") expect_equal( vec_c(x, x, .name_spec = "{inner}"), new_rcrd(list(a = c(1, 1)), class = "vctrs_foobar") ) # Reprex for failure in the groupr package x <- new_rcrd(list(a = 1), class = "vctrs_foobar") df <- data_frame(x = x) expect_equal( vec_rbind(df, data.frame()), df ) expect_equal( vec_cast_common(df, data.frame()), list(df, data_frame(x = x[0])) ) }) vctrs/tests/testthat/test-missing.R0000644000176200001440000001245514315060310017166 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_detect_missing() test_that("can detect different types of NA", { expect_true(vec_detect_missing(NA)) expect_true(vec_detect_missing(NA_integer_)) expect_true(vec_detect_missing(NA_real_)) expect_true(vec_detect_missing(NA_complex_)) expect_true(vec_detect_missing(complex(real = NA, imaginary = 1))) expect_true(vec_detect_missing(NaN)) expect_true(vec_detect_missing(NA_character_)) expect_true(vec_detect_missing(list(NULL))) }) test_that("can detect different types of NA in data frames", { # using multiple columns to prevent proxy unwrapping expect_true(vec_detect_missing(data.frame(x = NA, y = NA))) expect_true(vec_detect_missing(data.frame(x = NA_integer_, y = NA_integer_))) expect_true(vec_detect_missing(data.frame(x = NA_real_, y = NaN))) expect_true(vec_detect_missing(data.frame(x = NA_complex_, y = NA_complex_))) expect_true(vec_detect_missing(data.frame(x = complex(real = NA, imaginary = 1), y = complex(real = 1, imaginary = NA)))) expect_true(vec_detect_missing(data.frame(x = NA_character_, y = NA_character_))) expect_true(vec_detect_missing(new_data_frame(list(x = list(NULL), y = list(NULL))))) }) test_that("raw vectors can never be NA", { expect_false(vec_detect_missing(raw(1))) expect_false(vec_detect_missing(data.frame(x = raw(1), y = raw(1)))) }) test_that("vectorised over rows of a data frame", { df <- data.frame(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA)) expect_equal(vec_detect_missing(df), c(FALSE, FALSE, FALSE, TRUE)) }) test_that("works recursively with data frame columns", { df <- data.frame(x = c(1, 1, NA, NA)) df$df <- data.frame(y = c(NA, 1, 1, NA), z = c(1, NA, 1, NA)) expect_equal(vec_detect_missing(df), c(FALSE, FALSE, FALSE, TRUE)) }) test_that("0 row, N col data frame always returns `logical()` (#1585)", { expect_identical(vec_detect_missing(data_frame()), logical()) expect_identical(vec_detect_missing(data_frame(x = integer(), y = double())), logical()) }) test_that(">0 row, 0 col data frame always returns `TRUE` for each row (#1585)", { # `vec_detect_missing()` returns `TRUE` for each row because it (in theory) does # `all()` on each row, and since there are 0 columns we get # `all(logical()) == TRUE` for each row. expect_identical( vec_detect_missing(data_frame(.size = 2L)), c(TRUE, TRUE) ) }) test_that("works with `NULL` input (#1494)", { expect_identical(vec_detect_missing(NULL), logical()) }) # ------------------------------------------------------------------------------ # vec_any_missing() test_that("can check for any missing with all base vector types", { expect_false(vec_any_missing(TRUE)) expect_false(vec_any_missing(1L)) expect_false(vec_any_missing(1)) expect_false(vec_any_missing(complex(real = 1, imaginary = 1))) expect_false(vec_any_missing("1")) expect_false(vec_any_missing(list(1))) expect_true(vec_any_missing(c(TRUE, NA))) expect_true(vec_any_missing(c(1L, NA_integer_))) expect_true(vec_any_missing(c(1, NA_real_))) expect_true(vec_any_missing(complex(real = c(1, NA), imaginary = c(1, NA)))) expect_true(vec_any_missing(c("1", NA_character_))) expect_true(vec_any_missing(list(1, NULL))) }) test_that("raw vectors can never be missing", { expect_false(vec_any_missing(raw(1))) expect_false(vec_any_missing(data.frame(x = raw(1), y = raw(1)))) }) test_that("works with empty vectors", { # Like `any(logical())` expect_false(vec_any_missing(integer())) }) test_that("correctly detects complex missingness", { expect_false(vec_any_missing(complex(real = 1, imaginary = 1))) expect_true(vec_any_missing(complex(real = 1, imaginary = NA))) expect_true(vec_any_missing(complex(real = NA, imaginary = 1))) expect_true(vec_any_missing(complex(real = NA, imaginary = NA))) }) test_that("treats NaN as missing", { expect_true(vec_any_missing(NaN)) }) test_that("works with `NULL` input", { expect_false(vec_any_missing(NULL)) }) test_that("entire row of a data frame must be missing", { df <- data.frame(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA)) expect_true(vec_any_missing(df)) expect_false(vec_any_missing(df[-4,])) }) test_that("works recursively with data frame columns", { df <- data.frame(x = c(1, 1, NA, NA)) df$df <- data.frame(y = c(NA, 1, 1, NA), z = c(1, NA, 1, NA)) expect_true(vec_any_missing(df)) expect_false(vec_any_missing(df[-4,])) }) test_that("0 row, N col data frame always returns `FALSE` (#1585)", { df <- data_frame() expect_false(vec_any_missing(df)) expect_false(vec_any_missing(data_frame(x = integer(), y = double()))) # This is consistent with `vec_detect_missing()` returning `logical()` for 0 row # data frames. Then `any(logical()) == FALSE` to get `vec_any_missing()`. expect_identical( vec_any_missing(df), any(vec_detect_missing(df)) ) }) test_that(">0 row, 0 col data frame always returns `TRUE` (#1585)", { df <- data_frame(.size = 2L) expect_true(vec_any_missing(df)) # This is consistent with `vec_detect_missing()` returning `TRUE` for each row # because it (in theory) does `all()` on each row, and since there are 0 # columns we get `all(logical()) == TRUE` for each row. # Then `any(c(TRUE, TRUE)) == TRUE` to get `vec_any_missing()`. expect_identical( vec_any_missing(df), any(vec_detect_missing(df)) ) }) vctrs/tests/testthat/test-bind.R0000644000176200001440000010724614375224663016457 0ustar liggesuserslocal_name_repair_quiet() # rows -------------------------------------------------------------------- test_that("empty inputs return an empty data frame", { expect_equal(vec_rbind(), data_frame()) expect_equal(vec_rbind(NULL, NULL), data_frame()) }) test_that("vec_rbind(): NULL is idempotent", { df <- data_frame(x = 1) expect_equal(vec_rbind(df, NULL), df) }) test_that("vec_rbind() output is tibble if any input is tibble", { df <- data_frame(x = 1) dt <- tibble::tibble(x = 1) expect_s3_class(vec_rbind(dt), "tbl_df") expect_s3_class(vec_rbind(dt, df), "tbl_df") expect_s3_class(vec_rbind(df, dt), "tbl_df") }) test_that("type of column is common type of individual columns", { x_int <- data_frame(x = 1L) x_dbl <- data_frame(x = 2.5) expect_equal(vec_rbind(x_int, x_int), data_frame(x = c(1L, 1L))) expect_equal(vec_rbind(x_int, x_dbl), data_frame(x = c(1, 2.5))) }) test_that("incompatible columns throws common type error", { x_int <- data_frame(x = 1L) x_chr <- data_frame(x = "a") expect_snapshot({ (expect_error( vec_rbind(x_int, x_chr), class = "vctrs_error_incompatible_type" )) (expect_error( vec_rbind(x_int, x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type" )) (expect_error( vec_rbind(x_int, x_chr, .ptype = x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type" )) }) }) test_that("result contains union of columns", { expect_named( vec_rbind( data_frame(x = 1), data_frame(y = 1) ), c("x" , "y") ) expect_named( vec_rbind( data_frame(y = 1, x = 1), data_frame(y = 1, z = 2) ), c("y", "x", "z") ) }) test_that("all inputs coerced to data frames", { expect_equal( vec_rbind(data_frame(x = 1L), c(x = 1.5)), data_frame(x = c(1, 1.5)) ) }) test_that("names are supplied if needed", { local_name_repair_verbose() expect_snapshot(out <- vec_rbind(data_frame(...1 = 1), 1)) expect_equal(out, data_frame(...1 = c(1, 1))) }) test_that("matrix becomes data frame and has names properly repaired", { x <- matrix(1:4, nrow = 2) expect_equal(vec_rbind(x), data.frame(...1 = 1:2, ...2 = 3:4)) }) test_that("can bind data.frame columns", { df <- data.frame(x = NA, y = 1:2) df$x <- data.frame(a = 1:2) expected <- data.frame(x = NA, y = c(1:2, 1:2)) expected$x <- data.frame(a = c(1:2, 1:2)) expect_equal(vec_rbind(df, df), expected) }) test_that("can rbind unspecified vectors", { expect_identical(vec_rbind(NA), data_frame(...1 = NA)) expect_identical(vec_rbind(NA, NA), data_frame(...1 = lgl(NA, NA))) df <- data.frame(x = 1) expect_identical(vec_rbind(NA, df), data.frame(x = c(NA, 1))) expect_identical(vec_rbind(df, NA), data.frame(x = c(1, NA))) expect_identical(vec_rbind(NA, df, NA), data.frame(x = c(NA, 1, NA))) expect_identical(vec_rbind(c(x = NA), data.frame(x = 1)), data.frame(x = c(NA, 1))) expect_identical(vec_rbind(c(y = NA), df), data.frame(y = c(NA, NA), x = c(NA, 1))) out <- suppressMessages(vec_rbind(c(x = NA, x = NA), df)) exp <- data.frame(x...1 = c(NA, NA), x...2 = c(NA, NA), x = c(NA, 1)) expect_identical(out, exp) }) test_that("as_df_row() tidies the names of unspecified vectors", { expect_identical(as_df_row(c(NA, NA)), c(NA, NA)) expect_identical(as_df_row(unspecified(2)), unspecified(2)) expect_identical(as_df_row(c(a = NA, a = NA), quiet = TRUE), data.frame(a...1 = NA, a...2 = NA)) expect_identical(as_df_row(c(a = TRUE, a = TRUE), quiet = TRUE), data.frame(a...1 = TRUE, a...2 = TRUE)) }) test_that("can rbind spliced lists", { data <- list(c(a = 1, b = 2), c(a = TRUE, b = FALSE)) expect_identical(vec_rbind(!!!data), data_frame(a = c(1, 1), b = c(2, 0))) }) test_that("can rbind list columns", { out <- vec_rbind(data_frame(x = list(1, 2)), data_frame(x = list(3))) expect_identical(out, data_frame(x = list(1, 2, 3))) }) test_that("can rbind lists", { out <- vec_rbind(list(x = 1:2)) expect_identical(out, data_frame(x = list(c(1L, 2L)))) out <- vec_rbind(list(x = 1:2, y = 3L)) expect_identical(out, data_frame(x = list(c(1L, 2L)), y = list(3L))) out <- vec_rbind(list(x = 1, y = 2), list(y = "string")) expect_identical(out, data_frame(x = list(1, NULL), y = list(2, "string"))) }) test_that("can rbind factors", { fctr <- factor(c("a", "b")) expect_equal(vec_rbind(fctr), data_frame(...1 = fctr[1], ...2 = fctr[2])) fctr_named <- set_names(fctr) expect_equal(vec_rbind(fctr_named), data_frame(a = fctr[1], b = fctr[2])) }) test_that("can rbind dates", { date <- new_date(c(0, 1)) expect_equal(vec_rbind(date), data_frame(...1 = date[1], ...2 = date[2])) date_named <- set_names(date, c("a", "b")) expect_equal(vec_rbind(date_named), data_frame(a = date[1], b = date[2])) }) test_that("can rbind POSIXlt objects into POSIXct objects", { datetime <- as.POSIXlt(new_datetime(0)) expect_s3_class(vec_rbind(datetime, datetime)[[1]], "POSIXct") datetime_named <- set_names(datetime, "col") expect_named(vec_rbind(datetime_named, datetime_named), "col") }) test_that("can rbind table objects (#913)", { x <- new_table(1:4, c(2L, 2L)) y <- x colnames <- c("c1", "c2") rownames <- c("r1", "r2", "r3", "r4") dimnames(x) <- list(rownames[1:2], colnames) dimnames(y) <- list(rownames[3:4], colnames) expect <- data.frame(c1 = c(1:2, 1:2), c2 = c(3:4, 3:4), row.names = rownames) expect_identical(vec_rbind(x, y), expect) }) test_that("can rbind missing vectors", { expect_identical(vec_rbind(c(x = na_int)), data_frame(x = na_int)) expect_identical(vec_rbind(c(x = na_int), c(x = na_int)), data_frame(x = int(na_int, na_int))) }) test_that("vec_rbind() respects size invariants (#286)", { expect_identical(vec_rbind(), new_data_frame(n = 0L)) expect_identical(vec_rbind(int(), int()), new_data_frame(n = 2L)) expect_identical(vec_rbind(c(x = int()), c(x = TRUE)), new_data_frame(list(x = lgl(NA, TRUE)))) expect_identical(vec_rbind(int(), new_data_frame(n = 2L), int()), new_data_frame(n = 4L)) }) test_that("can repair names in `vec_rbind()` (#229)", { expect_snapshot({ (expect_error(vec_rbind(.name_repair = "none"), "can't be `\"none\"`")) (expect_error(vec_rbind(.name_repair = "minimal"), "can't be `\"minimal\"`")) (expect_error(vec_rbind(list(a = 1, a = 2), .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) }) expect_named(vec_rbind(list(a = 1, a = 2), .name_repair = "unique"), c("a...1", "a...2")) expect_named(vec_rbind(list(`_` = 1)), "_") expect_named(vec_rbind(list(`_` = 1), .name_repair = "universal"), c("._")) expect_named(vec_rbind(list(a = 1, a = 2), .name_repair = ~ toupper(.)), c("A", "A")) }) test_that("can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_rbind(c(x = 1, x = 2), c(x = 3, x = 4), .name_repair = "unique_quiet") res_universal <- vec_rbind(c("if" = 1, "in" = 2), c("if" = 3, "for" = 4), .name_repair = "universal_quiet") }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in", ".for")) }) test_that("can construct an id column", { df <- data.frame(x = 1) expect_named(vec_rbind(df, df, .names_to = "id"), c("id", "x")) expect_equal(vec_rbind(df, df, .names_to = "id")$id, c(1L, 2L)) expect_equal(vec_rbind(a = df, b = df, .names_to = "id")$id, c("a", "b")) expect_equal(vec_rbind(a = df, df, .names_to = "id")$id, c("a", "")) }) test_that("vec_rbind() fails with arrays of dimensionality > 3", { expect_snapshot({ (expect_error(vec_rbind(array(NA, c(1, 1, 1))))) (expect_error(vec_rbind(array(NA, c(1, 1, 1)), .error_call = call("foo")))) }) }) test_that("row names are preserved by vec_rbind()", { df1 <- mtcars[1:3, ] df2 <- mtcars[4:5, ] expect_identical(vec_rbind(df1, df2), mtcars[1:5, ]) row.names(df2) <- NULL out <- mtcars[1:5, ] row.names(out) <- c(row.names(df1), "...4", "...5") expect_identical(vec_rbind(df1, df2), out) }) test_that("can assign row names in vec_rbind()", { df1 <- mtcars[1:3, ] df2 <- mtcars[4:5, ] expect_snapshot({ (expect_error( vec_rbind( foo = df1, df2, .names_to = NULL ), "specification" )) }) # Combination out <- vec_rbind( foo = df1, df2, .names_to = NULL, .name_spec = "{outer}_{inner}" ) exp <- mtcars[1:5, ] row.names(exp) <- c(paste0("foo_", row.names(df1)), row.names(df2)) expect_identical(out, exp) out <- vec_rbind(foo = df1, df2, .names_to = "id") exp <- mtcars[1:5, ] exp <- vec_cbind(id = c(rep("foo", 3), rep("", 2)), exp) expect_identical(out, exp) # Sequence out <- vec_rbind( foo = unrownames(df1), df2, bar = unrownames(mtcars[6, ]), .names_to = NULL, .name_spec = "{outer}_{inner}" ) exp <- mtcars[1:6, ] row.names(exp) <- c(paste0("foo_", 1:3), row.names(df2), "bar") expect_identical(out, exp) out <- vec_rbind( foo = unrownames(df1), df2, bar = unrownames(mtcars[6, ]), .names_to = "id" ) exp <- mtcars[1:6, ] exp <- vec_cbind(id = c(rep("foo", 3), rep("", 2), "bar"), exp) row.names(exp) <- c(paste0("...", 1:3), row.names(df2), "...6") expect_identical(out, exp) }) test_that("vec_rbind() takes the proxy and restores", { df <- foobar(data.frame(x = 1)) # This data frame subclass has an identity proxy and the restore # method falls back to a bare data frame if `$x` has any missing values. # In `vec_rbind()`, the `vec_init()` call will create a bare data frame, # but at the end it is `vec_restore()`d to the right class. local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) { x }, vec_proxy.vctrs_foobar = function(x, ...) { x }, vec_restore.vctrs_foobar = function(x, to, ...) { if (any(is.na(x$x))) { new_data_frame(x) } else { vec_restore_default(x, to) } } ) expect_identical( vec_rbind(df, df), foobar(data.frame(x = c(1, 1))) ) }) test_that("vec_rbind() proxies before initializing", { df <- foobar(data.frame(x = 1)) # This data frame subclass doesn't allow `NA`s in columns. # If initialization happened before proxying, it would try to # create `NA` rows with `vec_init()`. local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) { x }, vec_proxy.vctrs_foobar = function(x, ...) { new_data_frame(x) }, vec_restore.vctrs_foobar = function(x, to, ...) { if (any(is.na(x$x))) { abort("`x` can't have NA values.") } vec_restore_default(x, to) } ) expect_identical( vec_rbind(df, df), foobar(data.frame(x = c(1, 1))) ) }) test_that("vec_rbind() requires a data frame proxy for data frame ptypes", { df <- foobar(data.frame(x = 1)) local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) x, vec_proxy.vctrs_foobar = function(x, ...) 1 ) expect_error( vec_rbind(df, df), "Can't fill a data frame that doesn't have a data frame proxy" ) }) test_that("monitoring: name repair while rbinding doesn't modify in place", { df <- new_data_frame(list(x = 1, x = 1)) expect <- new_data_frame(list(x = 1, x = 1)) # Name repair occurs expect_named(vec_rbind(df), c("x...1", "x...2")) # No changes to `df` expect_identical(df, expect) }) test_that("performance: Row binding with S3 columns doesn't duplicate on every assignment (#1151)", { skip_if_not_testing_performance() x <- as.Date("2000-01-01") x <- rep(x, 100) df <- data.frame(x = x) lst <- rep_len(list(df), 10000) expect_time_lt(vec_rbind(!!!lst), 5) }) test_that("performance: Row binding with df-cols doesn't duplicate on every assignment (#1122)", { skip_if_not_testing_performance() df_col <- new_data_frame(list(x = 1:1000)) df <- new_data_frame(list(y = df_col)) lst <- rep_len(list(df), 10000) expect_time_lt(vec_rbind(!!!lst), 5) }) # cols -------------------------------------------------------------------- test_that("vec_cbind() reports error context", { expect_snapshot({ (expect_error(vec_cbind(foobar(list())))) (expect_error(vec_cbind(foobar(list()), .error_call = call("foo")))) (expect_error(vec_cbind(a = 1:2, b = int()))) (expect_error(vec_cbind(a = 1:2, b = int(), .error_call = call("foo")))) }) }) test_that("empty inputs give data frame", { expect_equal(vec_cbind(), data_frame()) expect_equal(vec_cbind(NULL), data_frame()) expect_equal(vec_cbind(data.frame(a = 1), NULL), data_frame(a = 1)) }) test_that("number of rows is preserved with zero column data frames (#1281)", { df <- new_data_frame(n = 2L) expect_size(vec_cbind(df, df), 2L) }) test_that("vec_cbind(): NULL is idempotent", { df <- data_frame(x = 1) expect_equal(vec_cbind(df, NULL), df) }) test_that("outer names are respected", { expect_named(vec_cbind(x = 1, y = 4), c("x", "y")) expect_named(vec_cbind(a = data.frame(x = 1)), "a") }) test_that("inner names are respected", { expect_named(vec_cbind(data_frame(x = 1), data_frame(y = 1)), c("x", "y")) }) test_that("nameless vectors get tidy defaults", { expect_named(vec_cbind(1:2, 1), c("...1", "...2")) }) test_that("matrix becomes data frame", { x <- matrix(1:4, nrow = 2) expect_equal(vec_cbind(x), data.frame(...1 = 1:2, ...2 = 3:4)) # Packed if named expect_equal(vec_cbind(x = x), data_frame(x = x)) }) test_that("duplicate names are de-deduplicated", { local_name_repair_verbose() expect_snapshot({ (expect_named(vec_cbind(x = 1, x = 1), c("x...1", "x...2"))) (expect_named(vec_cbind(data.frame(x = 1), data.frame(x = 1)), c("x...1", "x...2"))) }) }) test_that("rows recycled to longest", { df <- data.frame(x = 1:3) expect_dim(vec_cbind(df), c(3, 1)) expect_dim(vec_cbind(df, NULL), c(3, 1)) expect_dim(vec_cbind(df, y = 1), c(3, 2)) expect_dim(vec_cbind(data.frame(x = 1), y = 1:3), c(3, 2)) expect_dim( vec_cbind( data.frame(a = 1, b = 2), y = 1:3 ), c(3, 3) ) }) test_that("vec_cbind() output is tibble if any input is tibble", { df <- data.frame(x = 1) dt <- tibble::tibble(y = 2) expect_s3_class(vec_cbind(dt), "tbl_df") expect_s3_class(vec_cbind(df, dt), "tbl_df") expect_s3_class(vec_cbind(dt, df), "tbl_df") }) test_that("can override default .nrow", { expect_dim(vec_cbind(x = 1, .size = 3), c(3, 1)) }) test_that("can repair names in `vec_cbind()` (#227)", { expect_snapshot({ (expect_error(vec_cbind(a = 1, a = 2, .name_repair = "none"), "can't be `\"none\"`")) (expect_error(vec_cbind(a = 1, a = 2, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) }) expect_named(vec_cbind(a = 1, a = 2, .name_repair = "unique"), c("a...1", "a...2")) expect_named(vec_cbind(`_` = 1, .name_repair = "universal"), "._") expect_named(vec_cbind(a = 1, a = 2, .name_repair = "minimal"), c("a", "a")) expect_named(vec_cbind(a = 1, a = 2, .name_repair = toupper), c("A", "A")) }) test_that("can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_cbind(x = 1, x = 2, .name_repair = "unique_quiet") res_universal <- vec_cbind("if" = 1, "in" = 2, .name_repair = "universal_quiet") }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in")) }) test_that("can supply `.names_to` to `vec_rbind()` (#229)", { expect_snapshot({ (expect_error(vec_rbind(.names_to = letters))) (expect_error(vec_rbind(.names_to = 10))) (expect_error(vec_rbind(.names_to = letters, .error_call = call("foo")))) }) x <- data_frame(foo = 1:2, bar = 3:4) y <- data_frame(foo = 5L, bar = 6L) expect_identical( vec_rbind(a = x, b = y, .names_to = "quux"), data_frame(quux = c("a", "a", "b"), foo = c(1L, 2L, 5L), bar = c(3L, 4L, 6L)) ) expect_identical( vec_rbind(a = x, b = y, .names_to = "foo"), data_frame(foo = c("a", "a", "b"), bar = c(3L, 4L, 6L)) ) # No names expect_identical( vec_rbind(x, y, .names_to = "quux"), data_frame(quux = c(1L, 1L, 2L), foo = c(1L, 2L, 5L), bar = c(3L, 4L, 6L)) ) expect_identical( vec_rbind(x, y, .names_to = "foo"), data_frame(foo = c(1L, 1L, 2L), bar = c(3L, 4L, 6L)) ) # Partial names expect_identical(vec_rbind(x, b = y, .names_to = "quux")$quux, c("", "", "b")) }) test_that("can supply existing `.names_to`", { x <- data.frame(a = 1, id = TRUE) expect_identical( vec_rbind(foo = x, bar = c(a = 2), .names_to = "id"), data_frame(a = c(1, 2), id = c("foo", "bar")) ) y <- data.frame(id = TRUE, a = 1) expect_identical( vec_rbind(foo = y, bar = c(a = 2), .names_to = "id"), data_frame(id = c("foo", "bar"), a = c(1, 2)) ) }) test_that("vec_cbind() returns visibly (#452)", { # Shouldn't be needed once `check_unique` is implemented in C expect_visible(vctrs::vec_cbind(x = 1, .name_repair = "check_unique")) }) test_that("vec_cbind() packs named data frames (#446)", { expect_identical(vec_cbind(data_frame(y = 1:3)), data_frame(y = 1:3)) expect_identical(vec_cbind(x = data_frame(y = 1:3)), data_frame(x = data_frame(y = 1:3))) }) test_that("vec_cbind() packs 1d arrays", { a <- array(1:2) expect_identical(vec_cbind(a), data_frame(...1 = 1:2)) expect_identical(vec_cbind(x = a), data_frame(x = a)) }) test_that("vec_cbind() packs named matrices", { m <- matrix(1:4, 2) expect_identical(vec_cbind(m), data_frame(...1 = 1:2, ...2 = 3:4)) expect_identical(vec_cbind(x = m), data_frame(x = m)) }) test_that("vec_cbind() never packs named vectors", { expect_identical(vec_cbind(1:2), data_frame(...1 = 1:2)) expect_identical(vec_cbind(x = 1:2), data_frame(x = 1:2)) }) test_that("names are repaired late if unpacked", { df <- data_frame(b = 2, b = 3, .name_repair = "minimal") out1 <- vec_cbind(a = 1, df) out2 <- vec_cbind(a = 1, as.matrix(df)) out3 <- vec_cbind(a = 1, matrix(1:2, nrow = 1)) expect_named(out1, c("a", "b...2", "b...3")) expect_named(out2, c("a", "b...2", "b...3")) expect_named(out3, c("a", "...2", "...3")) }) test_that("names are not repaired if packed", { df <- data_frame(b = 2, b = 3, .name_repair = "minimal") out1 <- vec_cbind(a = 1, packed = df) out2 <- vec_cbind(a = 1, packed = as.matrix(df)) out3 <- vec_cbind(a = 1, packed = matrix(1:2, nrow = 1)) expect_named(out1, c("a", "packed")) expect_named(out2, c("a", "packed")) expect_named(out3, c("a", "packed")) expect_named(out1$packed, c("b", "b")) expect_identical(colnames(out2$packed), c("b", "b")) expect_identical(colnames(out3$packed), NULL) }) test_that("vec_cbind() fails with arrays of dimensionality > 3", { a <- array(NA, c(1, 1, 1)) expect_snapshot({ (expect_error(vec_cbind(a))) (expect_error(vec_cbind(a, .error_call = call("foo")))) (expect_error(vec_cbind(x = a))) }) }) test_that("monitoring: name repair while cbinding doesn't modify in place", { df <- new_data_frame(list(x = 1, x = 1)) expect <- new_data_frame(list(x = 1, x = 1)) # Name repair occurs expect_named(vec_cbind(df), c("x...1", "x...2")) # No changes to `df` expect_identical(df, expect) }) test_that("vec_rbind() consistently handles unnamed outputs", { # Name repair of columns is a little weird but unclear we can do better expect_identical( vec_rbind(1, 2, .names_to = NULL), data.frame(...1 = c(1, 2)) ) expect_identical( vec_rbind(1, 2, ...10 = 3, .names_to = NULL), data.frame(...1 = c(1, 2, 3), row.names = c("...1", "...2", "...3")) ) expect_identical( vec_rbind(a = 1, b = 2, .names_to = NULL), data.frame(...1 = c(1, 2), row.names = c("a", "b")) ) expect_identical( vec_rbind(c(a = 1), c(b = 2), .names_to = NULL), data.frame(a = c(1, NA), b = c(NA, 2)) ) }) test_that("vec_rbind() ignores named inputs by default (#966)", { expect_identical( vec_rbind(foo = c(a = 1)), data.frame(a = 1) ) expect_identical( vec_rbind(foo = c(a = 1), .names_to = NULL), data.frame(a = 1, row.names = "foo") ) }) test_that("vec_cbind() consistently handles unnamed outputs", { expect_identical( vec_cbind(1, 2), data.frame(...1 = 1, ...2 = 2) ) expect_identical( vec_cbind(1, 2, ...10 = 3), data.frame(...1 = 1, ...2 = 2, ...3 = 3) ) expect_identical( vec_cbind(a = 1, b = 2), data.frame(a = 1, b = 2) ) expect_identical( vec_cbind(c(a = 1), c(b = 2)), new_data_frame(list(...1 = c(a = 1), ...2 = c(b = 2))) ) }) test_that("vec_rbind() name repair messages are useful", { local_name_repair_verbose() expect_snapshot({ vec_rbind(1, 2) vec_rbind(1, 2, .names_to = NULL) vec_rbind(1, 2, ...10 = 3) vec_rbind(1, 2, ...10 = 3, .names_to = NULL) vec_rbind(a = 1, b = 2) vec_rbind(a = 1, b = 2, .names_to = NULL) vec_rbind(c(a = 1), c(b = 2)) vec_rbind(c(a = 1), c(b = 2), .names_to = NULL) }) }) test_that("vec_rbind() is silent when assigning duplicate row names of df-cols", { df <- new_data_frame(list(x = mtcars[1:3, 1, drop = FALSE])) expect_snapshot(vec_rbind(df, df)) expect_snapshot(vec_rbind(mtcars[1:4, ], mtcars[1:3, ])) }) test_that("vec_cbind() name repair messages are useful", { local_name_repair_verbose() expect_snapshot({ vec_cbind(1, 2) vec_cbind(1, 2, ...10 = 3) vec_cbind(a = 1, b = 2) vec_cbind(c(a = 1), c(b = 2)) }) }) test_that("cbind() deals with row names", { expect_identical( vec_cbind(mtcars[1:3], foo = 1), cbind(mtcars[1:3], foo = 1) ) expect_identical( vec_cbind(mtcars[1:3], mtcars[4]), cbind(mtcars[1:3], mtcars[4]) ) out <- vec_cbind( mtcars[1, 1, drop = FALSE], unrownames(mtcars[1:3, 2, drop = FALSE]) ) exp <- mtcars[1:3, c(1, 2)] exp[[1]] <- exp[[1, 1]] row.names(exp) <- paste0(c("Mazda RX4..."), 1:3) expect_identical(out, exp) }) test_that("prefer row names of first named input (#1058)", { df0 <- unrownames(mtcars[1:5, 1:3]) df1 <- mtcars[1:5, 4:6] df2 <- mtcars[5:1, 7:9] expect_identical( row.names(vec_cbind(df0, df1, df2)), row.names(df1) ) expect_identical( row.names(vec_cbind(df0, df2, df1)), row.names(df2) ) }) test_that("can rbind data frames with matrix columns (#625)", { df <- tibble(x = 1:2, y = matrix(1:4, nrow = 2)) expect_identical(vec_rbind(df, df), vec_slice(df, c(1, 2, 1, 2))) }) test_that("rbind repairs names of data frames (#704)", { df <- data_frame(x = 1, x = 2, .name_repair = "minimal") df_repaired <- data_frame(x...1 = 1, x...2 = 2) expect_identical(vec_rbind(df), df_repaired) expect_identical(vec_rbind(df, df), vec_rbind(df_repaired, df_repaired)) expect_snapshot({ (expect_error( vec_rbind(df, df, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique" )) (expect_error( vec_rbind(df, df, .name_repair = "check_unique", .error_call = call("foo")), class = "vctrs_error_names_must_be_unique" )) }) }) test_that("vec_rbind() works with simple homogeneous foreign S3 classes", { expect_identical( vec_rbind(set_names(foobar(1), "x"), set_names(foobar(2), "x")), data_frame(x = foobar(c(1, 2))) ) }) test_that("vec_rbind() works with simple homogeneous foreign S4 classes", { skip_if_cant_set_names_on_s4() joe1 <- .Counts(1L, name = "Joe") joe2 <- .Counts(2L, name = "Joe") expect_identical( vec_rbind(set_names(joe1, "x"), set_names(joe2, "x")), data_frame(x = .Counts(1:2, name = "Joe")) ) }) test_that("vec_rbind() fails with complex foreign S3 classes", { expect_snapshot({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error( vec_rbind(set_names(x, "x"), set_names(y, "x")), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_rbind() fails with complex foreign S4 classes", { skip_if_cant_set_names_on_s4() expect_snapshot({ joe <- .Counts(1L, name = "Joe") jane <- .Counts(2L, name = "Jane") (expect_error(vec_rbind(set_names(joe, "x"), set_names(jane, "y")), class = "vctrs_error_incompatible_type")) }) }) test_that("vec_rbind() falls back to c() if S3 method is available", { x <- foobar(1, foo = 1) y <- foobar(2, bar = 2) x_df <- data_frame(x = x) y_df <- data_frame(x = y) expect_error(vec_rbind(x_df, y_df), class = "vctrs_error_incompatible_type") expect_error(vec_c(x_df, y_df), class = "vctrs_error_incompatible_type") expect_error(list_unchop(list(x_df, y_df), indices = list(1, 2)), class = "vctrs_error_incompatible_type") with_c_method <- function(expr) { with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), expr ) } out <- with_c_method(vec_rbind(x_df, y_df)) exp <- data_frame(x = quux(c(1, 2))) expect_identical(out, exp) expect_identical(with_c_method(vec_c(x_df, y_df)), exp) expect_identical( with_c_method(list_unchop(list(x_df, y_df), indices = list(1, 2))), exp ) # Fallback is used with data frame subclasses, with or without # ptype2 method foo_df <- foobaz(x_df) bar_df <- foobaz(y_df) out <- with_c_method(vec_rbind(foo_df, bar_df)) exp <- foobaz(data_frame(x = quux(c(1, 2)))) expect_identical(out, exp) expect_identical(with_c_method(vec_c(foo_df, bar_df)), exp) expect_identical( with_c_method(list_unchop(list(foo_df, bar_df), indices = list(1, 2))), exp ) with_hybrid_methods <- function(expr, cast = TRUE) { methods <- list( c.vctrs_foobar = function(...) quux(NextMethod()), vec_ptype2.vctrs_foobaz.vctrs_foobaz = function(...) foobaz(df_ptype2(...)), vec_cast.vctrs_foobaz.vctrs_foobaz = if (cast) function(...) foobaz(df_cast(...)) ) with_methods(expr, !!!compact(methods)) } expect_equal( with_hybrid_methods( cast = FALSE, vec_rbind(foo_df, bar_df) ), foobaz(data_frame(x = quux(c(1, 2)))) ) # Falls back to data frame because there is no ptype2/cast methods out <- with_hybrid_methods(vec_rbind(foo_df, bar_df)) exp <- foobaz(data_frame(x = quux(c(1, 2)))) expect_identical(out, exp) expect_identical(with_hybrid_methods(vec_c(foo_df, bar_df)), exp) expect_identical( with_hybrid_methods(list_unchop(list(foo_df, bar_df), indices = list(1, 2))), exp ) wrapper_x_df <- data_frame(x = x_df) wrapper_y_df <- data_frame(x = y_df) out <- with_c_method(vec_rbind(wrapper_x_df, wrapper_y_df)) exp <- data_frame(x = data_frame(x = quux(c(1, 2)))) expect_identical(out, exp) expect_identical(with_c_method(vec_c(wrapper_x_df, wrapper_y_df)), exp) expect_identical( with_c_method(list_unchop(list(wrapper_x_df, wrapper_y_df), indices = list(1, 2))), exp ) }) test_that("c() fallback works with unspecified columns", { local_methods( c.vctrs_foobar = function(...) foobar(NextMethod()), `[.vctrs_foobar` = function(x, i, ...) foobar(NextMethod(), dispatched = TRUE) ) out <- vec_rbind( data_frame(x = foobar(1)), data_frame(y = foobar(2)) ) expect_identical(out, data_frame( x = foobar(c(1, NA), dispatched = TRUE), y = foobar(c(NA, 2), dispatched = TRUE) )) }) test_that("c() fallback works with vctrs-powered data frame subclass", { local_methods( c.vctrs_quux = function(...) quux(vec_paste0(NextMethod(), "-c")), `[.vctrs_quux` = function(x, i, ...) quux(vec_paste0(NextMethod(), "-[")) ) local_foobar_df_methods() ### Joint case df1 <- foobar(data_frame(x = quux(1:3))) df2 <- data_frame(x = quux(4:5)) out <- vctrs::vec_rbind(df1, df2) exp <- foobar(data_frame(x = quux(paste0(1:5, "-c")))) expect_identical(out, exp) out <- vctrs::vec_rbind(df2, df1) exp <- foobar(data_frame(x = quux(paste0(c(4:5, 1:3), "-c")))) expect_identical(out, exp) ### Disjoint case df1 <- foobar(data_frame(x = quux(1:3))) df2 <- data.frame(y = 4:5) out <- vctrs::vec_rbind(df1, df2) exp <- foobar(data_frame( x = quux(c(paste0(1:3, "-c-["), paste0(c(NA, NA), "-["))), y = c(rep(NA, 3), 4:5) )) expect_identical(out, exp) out <- vctrs::vec_rbind(df2, df1) exp <- foobar(data_frame( y = c(4:5, rep(NA, 3)), x = quux(c(paste0(c(NA, NA), "-["), paste0(1:3, "-c-["))) )) expect_identical(out, exp) }) test_that("vec_rbind() falls back to c() if S3 method is available for S4 class", { joe <- data_frame(x = .Counts(c(1L, 2L), name = "Joe")) jane <- data_frame(x = .Counts(3L, name = "Jane")) expect_error(vec_rbind(joe, jane), class = "vctrs_error_incompatible_type") out <- with_methods( c.vctrs_Counts = function(...) .Counts(NextMethod(), name = "dispatched"), vec_rbind(joe, jane) ) expect_identical(out$x, .Counts(1:3, name = "dispatched")) }) test_that("rbind supports names and inner names (#689)", { out <- vec_rbind( data_frame(x = list(a = 1, b = 2)), data_frame(x = list(3)), data_frame(x = list(d = 4)) ) expect_identical(out$x, list(a = 1, b = 2, 3, d = 4)) vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) oo_x <- set_names(as.POSIXlt(c("2020-01-01", "2020-01-02", "2020-01-03")), letters[1:3]) oo_y <- c(FOO = as.POSIXlt(c("2020-01-04"))) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "d") mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("d")) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x, oo = oo_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y, oo = oo_y), row.names = c("quux") ) nested_out <- vec_rbind(nested_x, nested_y) expect_identical(row.names(nested_out), c("foo", "bar", "baz", "quux")) expect_identical(row.names(nested_out$df), c("a", "b", "c", "d")) expect_identical(row.names(nested_out$mat), c("a", "b", "c", "d")) expect_identical(names(nested_out$vec), c("a", "b", "c", "FOO")) expect_identical(names(nested_out$oo), c("a", "b", "c", "FOO")) }) test_that("vec_rbind() doesn't fall back to c() with proxied classes (#1119)", { foobar_rcrd <- function(x, y) new_rcrd(list(x = x, y = y), class = "vctrs_foobar") x <- foobar_rcrd(x = 1:2, y = 3:4) out <- vec_rbind(x, x) exp <- data_frame( ...1 = foobar_rcrd(x = c(1L, 1L), y = c(3L, 3L)), ...2 = foobar_rcrd(x = c(2L, 2L), y = c(4L, 4L)) ) expect_identical(out, exp) out <- vec_rbind(data_frame(x = x), data_frame(x = x)) exp <- data_frame( x = foobar_rcrd(x = c(1L, 2L, 1L, 2L), y = c(3L, 4L, 3L, 4L)) ) expect_identical(out, exp) }) test_that("vec_rbind() fallback works with tibbles", { x <- foobar("foo") df <- data_frame(x = x) tib <- tibble(x = x) local_methods(c.vctrs_foobar = function(...) quux(NextMethod())) exp <- tibble(x = quux(c("foo", "foo"))) expect_identical(vec_rbind(tib, tib), exp) expect_identical(vec_rbind(df, tib), exp) expect_identical(vec_rbind(tib, df), exp) }) test_that("vec_rbind() zaps names when name-spec is zap() and names-to is NULL", { expect_identical( vec_rbind(foo = c(x = 1), .names_to = NULL, .name_spec = zap()), data.frame(x = 1) ) }) test_that("can't zap names when `.names_to` is supplied", { expect_identical( vec_rbind(foo = c(x = 1), .names_to = zap(), .name_spec = zap()), data.frame(x = 1) ) expect_snapshot({ (expect_error( vec_rbind(foo = c(x = 1), .names_to = "id", .name_spec = zap()) )) (expect_error( vec_rbind(foo = c(x = 1), .names_to = "id", .name_spec = zap(), .error_call = call("foo")) )) }) }) test_that("can zap outer names from a name-spec (#1215)", { zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner df <- data.frame(x = 1:2) df_named <- data.frame(x = 3L, row.names = "foo") expect_null( vec_names(vec_rbind(a = df, .names_to = NULL, .name_spec = zap_outer_spec)) ) expect_identical( vec_names(vec_rbind(a = df, df_named, .name_spec = zap_outer_spec)), c("...1", "...2", "foo") ) }) test_that("column names are treated consistently in vec_rbind()", { exp <- data.frame(a = c(1L, 1L), b = c(2L, 2L)) x <- c(a = 1L, b = 2L) expect_identical(vec_rbind(x, x), exp) x <- array(1:2, dimnames = list(c("a", "b"))) expect_identical(vec_rbind(x, x), exp) x <- matrix(1:2, nrow = 1, dimnames = list(NULL, c("a", "b"))) expect_identical(vec_rbind(x, x), exp) x <- array(1:6, c(1, 2, 1), dimnames = list(NULL, c("a", "b"), NULL)) expect_error(vec_rbind(x, x), "Can't bind arrays") }) test_that("can repair names of row-binded vectors (#1567)", { local_name_repair_verbose() expect_silent( expect_named( vec_rbind( x = 1:3, y = 4:6, .name_repair = function(x) c("a", "a", "a") ), c("a", "a", "a") ) ) }) test_that("can repair names of row-binded matrices", { local_name_repair_verbose() expect_silent({ expect_named( vec_rbind( x = matrix(1:3, 1), y = matrix(4:6, 1), .name_repair = function(x) c("a", "a", "a") ), c("a", "a", "a") ) expect_named( vec_rbind( x = matrix(1:3, 1), y = 4:6, .name_repair = function(x) c("a", "a", "a") ), c("a", "a", "a") ) }) }) test_that("vec_rbind() only restores one time", { restored <- list() local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) x, vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, to, ...) { # Ignore proxying and restoration of ptypes if (length(x)) { restored <<- c(restored, list(x)) } foobar(x) } ) df <- data_frame(x = foobar(1:3)) vec_rbind(df, df) expect_equal(restored, list( rep(na_int, 6), # From `vec_init()` foobar(c(1:3, 1:3)) # Final restoration )) }) test_that("vec_rbind() applies `base::c()` fallback to df-cols (#1462, #1640)", { x <- structure(1, class = "myclass") df <- tibble(a = tibble(x = x)) df <- vec_rbind(df, df) expect_equal(df$a$x, structure(c(1, 1), class = "myclass")) }) # Golden tests ------------------------------------------------------- test_that("row-binding performs expected allocations", { vec_rbind_list <- function(x) { vec_rbind(!!!x) } expect_snapshot({ ints <- rep(list(1L), 1e2) named_ints <- rep(list(set_names(1:3, letters[1:3])), 1e2) "Integers as rows" suppressMessages(with_memory_prof(vec_rbind_list(ints))) suppressMessages(with_memory_prof(vec_rbind_list(named_ints))) "Data frame with named columns" df <- data_frame( x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c("A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb"))) ) dfs <- rep(list(df), 1e2) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (non-repaired, non-recursive case)" df <- data_frame(x = 1:2) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (repaired, non-recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (non-repaired, recursive case) (#1217)" df <- data_frame( x = 1:2, y = data_frame(x = 1:2) ) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (repaired, recursive case) (#1217)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) }) }) test_that("`.names_to` is assigned after restoration (#1648)", { df <- data_frame(x = factor("foo")) expect_equal( vec_rbind(name = df, .names_to = "x"), data_frame(x = "name") ) # This used to fail with: #> Error in `vctrs::vec_rbind()`: #> ! adding class "factor" to an invalid object }) vctrs/tests/testthat/test-type-dplyr.R0000644000176200001440000001152514362266120017634 0ustar liggesusers # `grouped_df` ------------------------------------------------------- bare_mtcars <- unrownames(mtcars) test_that("grouped-df is proxied and restored", { gdf <- dplyr::group_by(bare_mtcars, cyl) expect_identical(vec_proxy(gdf), gdf) expect_identical(vec_restore(bare_mtcars, gdf), gdf) expect_identical(vec_ptype(gdf), gdf[0, ]) gdf <- dplyr::group_by(bare_mtcars, cyl, am, vs) expect_identical(gdf[0, ], vec_ptype(gdf)) out <- vec_ptype(dplyr::group_by(bare_mtcars, cyl, .drop = FALSE)) expect_drop(out, FALSE) }) test_that("can take the common type of grouped tibbles and tibbles", { gdf <- dplyr::group_by(bare_mtcars, cyl) expect_identical(vec_ptype2(gdf, data.frame()), vec_ptype(gdf)) expect_identical(vec_ptype2(data.frame(), gdf), vec_ptype(gdf)) expect_identical(vec_ptype2(gdf, tibble()), vec_ptype(gdf)) expect_identical(vec_ptype2(tibble(), gdf), vec_ptype(gdf)) gdf_nodrop <- dplyr::group_by(bare_mtcars, cyl, .drop = FALSE) expect_drop(vec_ptype2(gdf, gdf_nodrop), FALSE) expect_drop(vec_ptype2(gdf_nodrop, gdf), FALSE) expect_drop(vec_ptype2(gdf_nodrop, bare_mtcars), FALSE) expect_drop(vec_ptype2(bare_mtcars, gdf_nodrop), FALSE) }) test_that("the common type of grouped tibbles includes the union of grouping variables", { gdf1 <- dplyr::group_by(bare_mtcars, cyl) gdf2 <- dplyr::group_by(bare_mtcars, am, vs) expect_identical( vec_ptype2(gdf1, gdf2), vec_ptype(dplyr::group_by(bare_mtcars, cyl, am, vs)) ) }) test_that("can cast to and from `grouped_df`", { gdf <- dplyr::group_by(unrownames(bare_mtcars), cyl) input <- bare_mtcars[10] cast_gdf <- dplyr::group_by(vec_cast(bare_mtcars[10], bare_mtcars), cyl) expect_error( vec_cast(input, dplyr::group_by(bare_mtcars["cyl"], cyl)), class = "vctrs_error_cast_lossy" ) expect_identical( vec_cast(input, gdf), cast_gdf ) expect_identical( vec_cast(gdf, bare_mtcars), unrownames(bare_mtcars) ) expect_identical( vec_cast(tibble::as_tibble(input), gdf), unrownames(cast_gdf) ) tib <- tibble::as_tibble(bare_mtcars) expect_identical( unrownames(vec_cast(gdf, tib)), tib ) }) test_that("casting to `grouped_df` doesn't require grouping variables", { expect_identical( vec_cast(bare_mtcars[10], dplyr::group_by(bare_mtcars, cyl)), dplyr::group_by(vec_cast(bare_mtcars[10], bare_mtcars), cyl) ) }) test_that("casting to `grouped_df` handles `drop`", { gdf_nodrop <- dplyr::group_by(bare_mtcars, cyl, .drop = FALSE) expect_identical(vec_cast(bare_mtcars, gdf_nodrop), gdf_nodrop) }) test_that("can cbind grouped data frames", { gdf <- dplyr::group_by(bare_mtcars[-10], cyl) df <- unrownames(bare_mtcars)[10] expect_identical( unrownames(vec_cbind(gdf, df)), tibble::as_tibble(bare_mtcars)[c(1:9, 11, 10)] ) gdf1 <- dplyr::group_by(bare_mtcars[2], cyl) gdf2 <- dplyr::group_by(bare_mtcars[8:9], vs, am) expect_identical( unrownames(vec_cbind(gdf1, gdf2)), tibble::as_tibble(bare_mtcars)[c(2, 8, 9)] ) }) # `rowwise` ---------------------------------------------------------- test_that("rowwise can be proxied and restored", { rww <- dplyr::rowwise(unrownames(bare_mtcars)) expect_identical(vec_proxy(rww), rww) expect_identical(vec_restore(unrownames(bare_mtcars), rww), rww) expect_identical(vec_ptype(rww), rww[0, ]) }) test_that("can take the common type of rowwise tibbles and tibbles", { rww <- dplyr::rowwise(bare_mtcars) expect_identical(vec_ptype2(rww, data.frame()), vec_ptype(rww)) expect_identical(vec_ptype2(data.frame(), rww), vec_ptype(rww)) expect_identical(vec_ptype2(rww, tibble()), vec_ptype(rww)) expect_identical(vec_ptype2(tibble(), rww), vec_ptype(rww)) }) test_that("can cast to and from `rowwise_df`", { rww <- unrownames(dplyr::rowwise(bare_mtcars)) input <- bare_mtcars[10] cast_rww <- dplyr::rowwise(vec_cast(bare_mtcars[10], bare_mtcars)) expect_error( vec_cast(input, dplyr::rowwise(bare_mtcars["cyl"])), class = "vctrs_error_cast_lossy" ) expect_identical( vec_cast(input, rww), cast_rww ) expect_identical( vec_cast(rww, bare_mtcars), unrownames(bare_mtcars) ) expect_identical( vec_cast(tibble::as_tibble(input), rww), unrownames(cast_rww) ) tib <- tibble::as_tibble(bare_mtcars) expect_identical( unrownames(vec_cast(rww, tib)), tib ) }) test_that("can cbind rowwise data frames", { df <- unrownames(bare_mtcars) rww <- dplyr::rowwise(df[-2]) gdf <- dplyr::group_by(df[2], cyl) exp <- dplyr::rowwise(df[c(1, 3:11, 2)]) expect_identical(vec_cbind(rww, df[2]), exp) # Suboptimal expect_identical(vec_cbind(rww, gdf), exp) }) test_that("common type between rowwise and grouped data frames is a bare df", { out <- vec_ptype_common(dplyr::rowwise(bare_mtcars), dplyr::group_by(bare_mtcars, cyl)) expect_identical(out, tibble::as_tibble(bare_mtcars[0, ])) }) vctrs/tests/testthat/helper-conditions.R0000644000176200001440000000367714315060310020174 0ustar liggesusers with_subscript_data <- function(expr, subscript_arg, subscript_elt = NULL, subscript_action = NULL) { local_options(rlang_force_unhandled_error = TRUE) tryCatch( expr, vctrs_error_subscript = function(cnd) { cnd$subscript_arg <- subscript_arg cnd$subscript_elt <- subscript_elt cnd$subscript_action <- subscript_action cnd_signal(cnd) } ) } with_tibble_cols <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "column", subscript_action = "rename" ) } with_tibble_rows <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "row", subscript_action = "remove" ) } with_dm_tables <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "table", subscript_action = "extract" ) } with_tidyselect_select <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "column", subscript_action = "select" ) } with_tidyselect_relocate <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "column", subscript_action = "relocate" ) } my_vec_rep <- function(my_x, my_times) { vec_rep( my_x, my_times, error_call = current_env(), x_arg = "my_x", times_arg = "my_times" ) } my_vec_rep_each <- function(my_x, my_times) { vec_rep_each( my_x, my_times, error_call = current_env(), x_arg = "my_x", times_arg = "my_times" ) } my_vec_as_names <- function(my_names, ..., my_repair = "minimal", my_quiet = FALSE) { vec_as_names( my_names, repair = my_repair, repair_arg = "my_repair", quiet = my_quiet ) } vctrs/tests/testthat/helper-type-dplyr.R0000644000176200001440000000023414276722575020146 0ustar liggesusers expect_drop <- function(x, value) { drop <- dplyr::group_by_drop_default(x) if (value) { expect_true(drop) } else { expect_false(drop) } } vctrs/tests/testthat/test-type-misc.R0000644000176200001440000001700314405105465017434 0ustar liggesusers test_that("`numeric_version` is a vector (#723)", { x <- numeric_version("0.1.0") y <- numeric_version("0.2.0") z <- c(x, y) expect_true(vec_is(x)) expect_true(vec_equal(x, x)) expect_false(vec_equal(x, y)) expect_identical(vec_equal(y, z), c(FALSE, TRUE)) expect_identical(vec_unique(z), z) expect_identical(vec_unique(c(y, z, x)), z[2:1]) }) test_that("`numeric_version` falls back to base methods", { x <- utils::packageVersion("rlang") y <- utils::packageVersion("vctrs") z <- c(x, y) # `z` is a `list-of`-like type but slicing 1 element returns the # atomic type. To implement this in vctrs we'd need to provide a way # of customising the "wrapper" type for size > 1 vectors. expect_identical(vec_slice(z, 1:2), z) expect_identical(vec_slice(z, 1), x) expect_identical(vec_slice(z, 2), y) expect_identical(vec_c(x, y), z) }) test_that("`numeric_version` has an equality, comparison, and order proxy", { numeric_row <- function(...) { out <- list2(...) out <- map(out, as.integer) names(out) <- paste0("...", seq_len(8L)) new_data_frame(out, n = 1L) } x <- numeric_version(c("1.2-3", "1.21.1", "3", "2.21.0.9000", "0.5.01")) expect <- vec_rbind( numeric_row(1, 2, 3, 0, 0, 0, 0, 0), numeric_row(1, 21, 1, 0, 0, 0, 0, 0), numeric_row(3, 0, 0, 0, 0, 0, 0, 0), numeric_row(2, 21, 0, 9000, 0, 0, 0, 0), numeric_row(0, 5, 1, 0, 0, 0, 0, 0) ) expect_identical(vec_proxy_equal(x), expect) expect_identical(vec_proxy_compare(x), expect) expect_identical(vec_proxy_order(x), expect) }) test_that("`numeric_version` proxy works with empty vectors", { x <- numeric_version(character()) expect <- vec_rep(list(integer()), times = 8L) names(expect) <- paste0("...", seq_len(8L)) expect <- new_data_frame(expect, n = 0L) expect_identical(vec_proxy_equal(x), expect) }) test_that("`numeric_version` proxy handles pseudo-`NA`", { numeric_row <- function(...) { out <- list2(...) out <- map(out, as.integer) names(out) <- paste0("...", seq_len(8L)) new_data_frame(out, n = 1L) } x <- numeric_version(c("1_1", "1.2", NA), strict = FALSE) expect <- vec_rbind( numeric_row(NA, NA, NA, NA, NA, NA, NA, NA), numeric_row(1, 2, 0, 0, 0, 0, 0, 0), numeric_row(NA, NA, NA, NA, NA, NA, NA, NA) ) expect_identical(vec_proxy_equal(x), expect) expect_identical(vec_proxy_compare(x), expect) expect_identical(vec_proxy_order(x), expect) }) test_that("`numeric_version` works with functions using the equality proxy", { x <- numeric_version(c("1.2-3", "1.21.1", "1_1", "0.5", "1.3"), strict = FALSE) y <- numeric_version(c("1.21.1", "1.21.1", "1_2", "0.05", "1_3"), strict = FALSE) expect_identical(vec_unique(x), x) expect_identical(vec_unique(y), y[c(1, 3, 4)]) expect_identical(vec_detect_missing(y), c(FALSE, FALSE, TRUE, FALSE, TRUE)) expect_identical(vec_equal(x, y), c(FALSE, TRUE, NA, TRUE, NA)) expect_identical(vec_equal(x, y, na_equal = TRUE), c(FALSE, TRUE, TRUE, TRUE, FALSE)) }) test_that("`numeric_version` works with functions using the comparison proxy", { x <- numeric_version(c("1.2-3", "1.21.1", "1_1", "0.5", "1.3"), strict = FALSE) y <- numeric_version(c("1.21.1", "1.21.1", "1_2", "0.05", "1_3"), strict = FALSE) expect_identical(vec_compare(x, y), c(-1L, 0L, NA, 0L, NA)) expect_identical(vec_compare(x, y, na_equal = TRUE), c(-1L, 0L, 0L, 0L, 1L)) # Specifically related to base R taking a joint proxy in `Ops.numeric_version` x <- numeric_version("3.3") y <- numeric_version("3.21") # `.encode_numeric_version(x) < .encode_numeric_version(y)` == FALSE # `x < y` == TRUE expect_identical(vec_compare(x, y), -1L) }) test_that("`numeric_version` works with functions using the order proxy (tidyverse/dplyr#6680)", { x <- numeric_version(c("1.2-3", "1.21.1", "1_1", "0.5", "1.30"), strict = FALSE) y <- numeric_version(c("1.21.1", "1.21.1", "1_2", "0.05", "1_3"), strict = FALSE) expect_identical(vec_order(y), c(4L, 1L, 2L, 3L, 5L)) expect_identical(vec_order_radix(y), c(4L, 1L, 2L, 3L, 5L)) expect_identical(vec_order(y, na_value = "smallest"), c(3L, 5L, 4L, 1L, 2L)) expect_identical(vec_order_radix(y, na_value = "smallest"), c(3L, 5L, 4L, 1L, 2L)) expect_identical( vec_locate_matches(x, y), data_frame( needles = c(1L, 2L, 2L, 3L, 3L, 4L, 5L), haystack = c(NA, 1L, 2L, 3L, 5L, 4L, NA) ) ) expect_identical( vec_locate_matches(x, y, condition = "<"), data_frame( needles = c(1L, 1L, 2L, 3L, 4L, 4L, 5L), haystack = c(1L, 2L, NA, NA, 1L, 2L, NA) ) ) }) test_that("`numeric_version` proxy can handle at most 8 components", { x <- numeric_version("1.2.3.4.5.6.7.8") expect_silent(vec_proxy_equal(x)) x <- numeric_version("1.2.3.4.5.6.7.8.9") expect_snapshot(error = TRUE, { vec_proxy_equal(x) }) }) test_that("`numeric_version` can compare against components with 8 components", { x <- numeric_version("2.3.4.5.6.7.8.9") y <- c(x, numeric_version(c("1.1", "11.2", "2.1"))) expect_identical(vec_compare(x, y), c(0L, 1L, -1L, 1L)) }) test_that("`package_version` and `R_system_version` use the `numeric_version` proxy", { x <- numeric_version("1.5.6") y <- package_version("1.5.6") z <- R_system_version("1.5.6") expect_identical(vec_proxy_equal(y), vec_proxy_equal(x)) expect_identical(vec_proxy_equal(z), vec_proxy_equal(x)) }) test_that("can slice `ts` vectors", { x <- ts(1:3) expect_identical(vec_ptype(x), x[0]) expect_identical(vec_slice(x, 2), x[2]) }) test_that("can concatenate `ts` vectors", { x <- ts(1:3) expect_identical(vec_c(x, x), c(x, x)) df <- data_frame(x = x) expect_identical(vec_rbind(df, df), data_frame(x = c(x, x))) }) test_that("`omit` class is numeric (#1160)", { x <- c(NA, 1:3, NA) omit <- attr(na.omit(x), "na.action") expect_identical(vec_ptype_common(omit, omit), structure(int(), class = "omit")) expect_identical(vec_ptype_common(1.5, omit), dbl()) expect_identical(vec_ptype_common(omit, 1L), int()) expect_identical(vec_cast_common(omit, omit), list(omit, omit)) expect_identical(vec_cast_common(omit, 1L), list(unstructure(omit), 1L)) expect_identical(vec_cast_common(1.5, omit), list(1.5, unstructure(as.double(omit)))) expect_error(vec_cast(1L, omit), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1.0, omit), class = "vctrs_error_incompatible_type") expect_identical(vec_slice(omit, 1), structure(1L, class = "omit")) expect_identical(vec_c(omit, omit), structure(c(1L, 5L, 1L, 5L), class = "omit")) expect_identical(vec_c(omit, omit, 10L), c(1L, 5L, 1L, 5L, 10L)) expect_identical(vec_slice(x, omit), x[omit]) }) test_that("`exclude` class is numeric (#1160)", { x <- c(NA, 1:3, NA) exc <- attr(na.exclude(x), "na.action") expect_identical(vec_ptype_common(exc, exc), structure(int(), class = "exclude")) expect_identical(vec_ptype_common(1.5, exc), dbl()) expect_identical(vec_ptype_common(exc, 1L), int()) expect_identical(vec_cast_common(exc, exc), list(exc, exc)) expect_identical(vec_cast_common(exc, 1L), list(unstructure(exc), 1L)) expect_identical(vec_cast_common(1.5, exc), list(1.5, unstructure(as.double(exc)))) expect_error(vec_cast(1L, exc), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1.0, exc), class = "vctrs_error_incompatible_type") expect_identical(vec_slice(exc, 1), structure(1L, class = "exclude")) expect_identical(vec_c(exc, exc), structure(c(1L, 5L, 1L, 5L), class = "exclude")) expect_identical(vec_c(exc, exc, 10L), c(1L, 5L, 1L, 5L, 10L)) expect_identical(vec_slice(x, exc), x[exc]) }) vctrs/tests/testthat/helper-types.R0000644000176200001440000000405014276722575017201 0ustar liggesusers# Don't call tibble::tibble() to avoid catch-22, because tibble now uses vctrs bare_tibble <- structure(data.frame(), class = c("tbl_df", "tbl", "data.frame")) base_empty_types <- list( null = NULL, logical = lgl(), integer = int(), double = dbl(), complex = cpl(), character = chr(), raw = bytes(), list = list(), dataframe = data.frame() ) base_s3_empty_types <- list( bare_factor = new_factor(), bare_ordered = new_ordered(), bare_date = new_date(), bare_posixct = new_datetime(tzone = "UTC"), bare_posixlt = as.POSIXlt(new_datetime(tzone = "UTC")), bare_tibble = bare_tibble ) proxied_empty_types <- list( double = new_hidden(), dataframe = bare_tibble, dataframe = structure(data.frame(), class = c("vctrs_foobar", "data.frame")) ) empty_types <- c( base_empty_types, proxied_empty_types, # Non proxied type scalar = foobar(list()), scalar = function() NULL ) atomics <- list(TRUE, 1L, 1.0, 1i, "foo", bytes(1)) vectors <- c(atomics, list(list())) records <- list( df = data.frame(x = 1), rcrd = new_rcrd(list(x = 1)), posixlt = as.POSIXlt("2020-01-01") ) tuple <- function(x = integer(), y = integer()) { fields <- vec_recycle_common( x = vec_cast(x, integer()), y = vec_cast(y, integer()) ) new_rcrd(fields, class = "vctrs_tuple") } tuple_methods <- list( format.vctrs_tuple = function(x, ...) { paste0("(", field(x, "x"), ",", field(x, "y"), ")") }, vec_ptype2.vctrs_tuple.vctrs_tuple = function(x, y, ...) x, vec_cast.vctrs_tuple.vctrs_tuple = function(x, to, ...) x ) local_tuple_methods <- function(frame = caller_env()) { local_methods(.frame = frame, !!!tuple_methods) } set_tuple_methods <- function(env = global_env()) { env_bind(env, !!!tuple_methods) } local_comparable_tuple <- function(frame = caller_env()) { local_tuple_methods(frame = frame) # Compare only on first field local_methods( .frame = frame, vec_proxy_equal.vctrs_tuple = function(x, ...) field(x, "x") ) } c_na <- function(...) { x <- c(...) names(x)[names(x) == ""] <- NA_character_ x } vctrs/tests/testthat/test-arith.R0000644000176200001440000000146614276722575016654 0ustar liggesusers test_that("logical/integer/numeric works", { expect_equal(vec_arith("+", TRUE, TRUE), 2L) expect_equal(vec_arith("+", TRUE, 1L), 2L) expect_equal(vec_arith("+", TRUE, 1), 2) expect_equal(vec_arith("+", 1L, TRUE), 2L) expect_equal(vec_arith("+", 1L, 1L), 2L) expect_equal(vec_arith("+", 1L, 1), 2) expect_equal(vec_arith("+", 1, TRUE), 2L) expect_equal(vec_arith("+", 1, 1L), 2L) expect_equal(vec_arith("+", 1, 1), 2) }) test_that("default is error", { f <- new_vctr(1:10, class = "foo") expect_error(vec_arith("+", f, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", TRUE, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", 1L, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", 1, f), class = "vctrs_error_incompatible_op") }) vctrs/tests/testthat/test-type-asis.R0000644000176200001440000000627614315060310017437 0ustar liggesusers# ------------------------------------------------------------------------------ # Printing test_that("I() wraps contents", { f <- factor() expect_equal(vec_ptype_abbr(I(f)), "I") expect_equal(vec_ptype_full(I(f)), "I>") }) test_that("AsIs class stripped from I()", { df <- data.frame(x = 1, y = 1:2) class(df) <- c("myclass", "data.frame") expect_equal(vec_ptype_full(I(df)), "I>") expect_equal(vec_ptype_full(I(df[1])), "I>") expect_equal(vec_ptype_full(I(df[0])), "I>") }) # ------------------------------------------------------------------------------ # Proxy / restore test_that("can slice AsIs class", { df <- data.frame(x = I(1:3), y = I(list(4, 5, 6))) expect_identical(vec_slice(df, 2:3), unrownames(df[2:3, ])) }) test_that("equality proxy is forwarded correctly for atomic types (#1557)", { # We don't define any equality proxies for base atomic types, but we can fake it local_methods(vec_proxy_equal.integer = function(x, ...) "dispatched") asis <- I(1L) expect_identical(vec_proxy_equal(asis), "dispatched") }) test_that("comparison proxy is forwarded correctly for atomic types (#1557)", { # vec_proxy_compare.raw() exists x <- raw() asis <- I(x) expect_identical(vec_proxy_compare(asis), vec_proxy_compare(x)) expect_identical(vec_proxy_compare(asis), integer()) }) test_that("order proxy is forwarded correctly for atomic types (#1557)", { # vec_proxy_order.list() exists x <- list(2, 1, 2) asis <- I(x) expect_identical(vec_proxy_order(asis), vec_proxy_order(x)) expect_identical(vec_proxy_order(asis), c(1L, 2L, 1L)) }) # ------------------------------------------------------------------------------ # Coercion test_that("can take the common type of identical AsIs objects", { expect_identical(vec_ptype2(I(1), I(1)), I(numeric())) }) test_that("AsIs objects throw ptype2 errors with their underlying types", { expect_snapshot({ (expect_error(vec_ptype2(I(1), I("x")), class = "vctrs_error_incompatible_type")) }) }) test_that("AsIs always wraps the common type", { expect_identical(vec_ptype2(I(1L), 1), I(numeric())) expect_identical(vec_ptype2(1, I(1L)), I(numeric())) }) # ------------------------------------------------------------------------------ # Casting test_that("can cast one AsIs to another AsIs", { expect_identical(vec_cast(I(1), I(1)), I(1)) expect_identical(vec_cast(I(1), I(1L)), I(1L)) }) test_that("AsIs objects throw cast errors with their underlying types", { expect_snapshot({ (expect_error(vec_cast(I(1), I(factor("x"))), class = "vctrs_error_incompatible_type")) }) }) test_that("casting from an AsIs drops the AsIs class", { expect_identical(vec_cast(I(1), 1), 1) }) test_that("casting to an AsIs adds the AsIs class", { expect_identical(vec_cast(1, I(1)), I(1)) }) # ------------------------------------------------------------------------------ # Misc test_that("can `vec_c()` with only AsIs objects", { expect_identical(vec_c(I(1), I(2)), I(c(1, 2))) expect_identical(vec_c(I(1), I(2L)), I(c(1, 2))) }) test_that("can `vec_c()` with AsIs objects mixed with other types", { expect_identical(vec_c(I(1L), 1), I(c(1, 1))) }) vctrs/tests/testthat/test-cast.R0000644000176200001440000002330314377212620016454 0ustar liggesusers test_that("Casting to named argument mentions 'match type '", { expect_snapshot(error = TRUE, vec_cast(1, "", x_arg = "foo", to_arg = "bar")) expect_snapshot(error = TRUE, vec_cast(1, "", x_arg = "foo")) }) # vec_cast() --------------------------------------------------------------- test_that("new classes are uncoercible by default", { x <- structure(1:10, class = "vctrs_nonexistant") expect_error(vec_cast(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(x, 1), class = "vctrs_error_incompatible_type") }) test_that("casting requires vectors", { expect_error(vec_cast(NULL, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(NA, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(list(), quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), NULL), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), NA), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), list()), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), quote(name)), class = "vctrs_error_scalar_type") }) test_that("casting between `NULL` and partial types is allowed", { expect_identical(vec_cast(NULL, partial_factor()), NULL) expect_identical(vec_cast(partial_factor(), NULL), partial_factor()) }) test_that("dimensionality matches output" ,{ x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) x <- matrix(1, nrow = 2, ncol = 2) expect_error(vec_cast(x, logical()), class = "vctrs_error_incompatible_type") }) test_that("empty input to vec_cast_common() returns list()", { expect_equal(vec_cast_common(), list()) expect_equal(vec_cast_common(NULL, NULL), list(NULL, NULL)) }) test_that("identical structures can be cast to each other", { expect_identical(vec_cast(foobar("foo"), foobar("bar")), foobar("foo")) }) test_that("cast common preserves names", { expect_identical(vec_cast_common(foo = 1, bar = 2L), list(foo = 1, bar = 2)) }) test_that("cast errors create helpful messages (#57, #225)", { # Lossy cast expect_snapshot(error = TRUE, vec_cast(1.5, 10L)) # Incompatible cast expect_snapshot(error = TRUE, vec_cast(factor("foo"), 10)) # Nested data frames - Lossy cast expect_snapshot(error = TRUE, { x <- tibble(a = tibble(b = 1.5)) y <- tibble(a = tibble(b = 10L)) vec_cast(x, y) }) # Nested data frames - Incompatible cast expect_snapshot(error = TRUE, { x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast(x, y) }) # Nested data frames - Common cast error expect_snapshot(error = TRUE, { x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast_common(x, y) }) }) test_that("unspecified can be cast to shaped vectors", { x <- matrix(letters[1:4], 2) expect_identical(vec_cast(NA, x), matrix(chr(NA, NA), 1)) x <- foobar(c(1:4)) dim(x) <- c(2, 2) out <- vec_cast(NA, x) exp <- foobar(int(c(NA, NA))) dim(exp) <- c(1, 2) expect_identical(out, exp) }) test_that("vec_cast() falls back to base class even when casting to non-base type", { expect_equal(vec_cast(foobar(mtcars), mtcars), mtcars) expect_equal(vec_cast(mtcars, foobar(mtcars)), mtcars) }) test_that("vec_cast() only attempts to fall back if `to` is a data frame (#1568)", { expect_snapshot({ (expect_error( vec_cast(foobar(mtcars), 1), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_cast() evaluates x_arg and to_arg lazily", { expect_silent(vec_cast(TRUE, logical(), x_arg = print("oof"))) expect_silent(vec_cast(TRUE, logical(), to_arg = print("oof"))) }) # Conditions -------------------------------------------------------------- test_that("can suppress cast errors selectively", { f <- function() vec_cast(factor("a"), to = factor("b")) expect_error(regexp = NA, allow_lossy_cast(f())) expect_error(regexp = NA, allow_lossy_cast(f(), x_ptype = factor("a"))) expect_error(regexp = NA, allow_lossy_cast(f(), to_ptype = factor("b"))) expect_error(regexp = NA, allow_lossy_cast(f(), x_ptype = factor("a"), to_ptype = factor("b"))) expect_error(allow_lossy_cast(f(), x_ptype = factor("c")), class = "vctrs_error_cast_lossy") expect_error(allow_lossy_cast(f(), x_ptype = factor("b"), to_ptype = factor("a")), class = "vctrs_error_cast_lossy") expect_error(allow_lossy_cast(f(), x_ptype = factor("a"), to_ptype = factor("c")), class = "vctrs_error_cast_lossy") }) test_that("can signal deprecation warnings for lossy casts", { local_options(lifecycle_verbosity = "warning") lossy_cast <- function() { maybe_lossy_cast( TRUE, factor("foo"), factor("bar"), lossy = TRUE, .deprecation = TRUE, x_arg = "x", to_arg = "to" ) } expect_snapshot({ (expect_warning(expect_true(lossy_cast()))) }) expect_warning(regexp = NA, expect_true(allow_lossy_cast(lossy_cast()))) expect_warning(regexp = NA, expect_true(allow_lossy_cast(lossy_cast(), factor("foo"), factor("bar")))) expect_warning(expect_true(allow_lossy_cast(lossy_cast(), factor("bar"), double()))) }) # vec_cast_common() ------------------------------------------------------- test_that("vec_ptype_common() optionally falls back to base class", { x <- foobar(NA, foo = 1) y <- foobaz(NA, bar = 2) x_df <- data_frame(x = x) y_df <- data_frame(x = y) expect_error( vec_ptype_common_opts(x, y, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype_common_opts(x_df, y_df, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast_common_opts(x, y, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast_common_opts(x_df, y_df, .opts = full_fallback_opts()), class = "vctrs_error_incompatible_type" ) class(y) <- c("foo", class(x)) y_df <- data_frame(x = y) common_sentinel <- vec_ptype_common_opts(x, y, .opts = full_fallback_opts()) expect_true(is_common_class_fallback(common_sentinel)) expect_identical(fallback_class(common_sentinel), "vctrs_foobar") common_sentinel <- vec_ptype_common_opts(x_df, y_df, .opts = full_fallback_opts()) expect_true(is_common_class_fallback(common_sentinel$x)) expect_identical(fallback_class(common_sentinel$x), "vctrs_foobar") common <- vec_cast_common_opts(x = x, y = y, .opts = full_fallback_opts()) expect_identical(common, list(x = x, y = y)) common <- vec_cast_common_opts(x = x_df, y = y_df, .opts = full_fallback_opts()) expect_identical(common, list(x = x_df, y = y_df)) }) test_that("vec_ptype_common_fallback() collects common type", { x <- foobar(1, foo = 1, class = c("quux", "baz")) y <- foobar(2, bar = 2, class = "baz") x_df <- data_frame(x = x) y_df <- data_frame(x = y) out <- vec_ptype_common_fallback(x, y) expect_identical(typeof(out), "double") expect_true(is_common_class_fallback(out)) expect_identical(fallback_class(out), c("baz", "vctrs_foobar")) out <- vec_ptype_common_fallback(x_df, y_df) expect_identical(typeof(out$x), "double") expect_true(is_common_class_fallback(out$x)) expect_identical(fallback_class(out$x), c("baz", "vctrs_foobar")) # Different base types can't fall back to common class z <- foobar(3L, baz = 3) expect_error( vec_ptype_common_fallback(x, z), class = "vctrs_error_incompatible_type" ) z_df <- data_frame(x = z) expect_error( vec_ptype_common_fallback(x_df, z_df), class = "vctrs_error_incompatible_type" ) }) test_that("fallback sentinel is returned with unspecified inputs", { fallback <- vec_ptype_common_fallback(foobar(1), foobar(1)) expect_identical(vec_ptype_common_fallback(NA, foobar(1)), fallback) expect_identical(vec_ptype_common_fallback(foobar(1), NA), fallback) }) test_that("vec_ptype_common() supports subclasses of list", { x <- structure(list(1), class = c("vctrs_foo", "list")) y <- structure(list(2), class = c("bar", "vctrs_foo", "list")) expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type") out <- with_methods( c.vctrs_foo = function(...) quux(NextMethod()), vec_c(x, y) ) expect_identical(out, quux(list(1, 2))) }) test_that("vec_cast_common_fallback() works with tibbles", { x <- foobar("foo") df <- data_frame(x = x) tib <- tibble(x = x) exp <- list(tib, tib) expect_identical(vec_cast_common_fallback(tib, tib), exp) expect_identical(vec_cast_common_fallback(tib, df), exp) expect_identical(vec_cast_common_fallback(df, tib), exp) }) test_that("can call `vec_cast()` from C (#1666)", { fn <- inject(function(x, i) .Call(!!ffi_exp_vec_cast, x, i)) environment(fn) <- ns_env("utils") x <- array(1, dim = c(1, 1)) y <- array(2, dim = c(2, 2)) expect_equal(fn(x, y), vec_cast(x, y)) }) test_that("df-fallback for cast is not sensitive to attributes order", { x <- structure( list(col = ""), class = c("vctrs_foobar", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -1L), foo = "foo", bar = "bar" ) ptype <- structure( list(col = character(0)), foo = "foo", bar = "bar", row.names = integer(0), class = c("vctrs_foobar", "tbl_df", "tbl", "data.frame") ) expect_identical(vec_cast(x, ptype), x) }) test_that("bare-type fallback for df-cast works", { # NOTE: Not sure why this was necessary. The cubble and yamlet # packages fail without this. local_methods( c.vctrs_foobaz = function(...) quux(NextMethod()) ) df <- data_frame(x = 1, y = foobaz("foo")) gdf <- dplyr::new_grouped_df( df, data_frame(x = 1, .rows = list(1L)), class = "vctrs_foobar" ) expect_error(vec_rbind(gdf, gdf), NA) }) vctrs/tests/testthat/helper-s4.R0000644000176200001440000000340614276722575016367 0ustar liggesusers .rando <- setClass( "vctrs_rando", contains = "numeric", slots = list(.Data = "numeric") ) rando <- function(n = 0) { .rando(as.numeric(seq_len(n))) } as_rando <- function(x) { rando(length(x)) } setMethod("[", "vctrs_rando", function(x, i, j, ..., drop = TRUE) { new_n <- length(vec_as_location(i, length(x@.Data), names(x@.Data))) rando(new_n) }) .Counts <- methods::setClass( "vctrs_Counts", contains = "integer", slots = c(name = "character") ) local_c_counts <- function(frame = caller_env()) { c_counts <- function(x, ...) { xs <- list(x, ...) xs_data <- lapply(xs, function(x) x@.Data) new_data <- do.call(c, xs_data) .Counts(new_data, name = "Dispatched") } local_s4_method( frame = frame, "c", methods::signature(x = "vctrs_Counts"), c_counts ) } local_s4_method <- function(generic, signature, method, frame = caller_env()) { methods::setMethod(generic, signature, method) exit_expr <- call2(methods::removeMethod, generic, signature, where = topenv(frame)) local_exit(exit_expr, frame = frame) } with_s4_method <- function(generic, signature, method, expr) { local_s4_method(generic, signature, method) expr } local_exit <- function(expr, frame = caller_env()) { # We are at top-level when only one frame refers to the global environment if (is_reference(frame, global_env())) { is_global_frame <- sys.parents() == 0 if (sum(is_global_frame) == 1) { abort("Can't add an exit event at top-level") } } # Inline everything so the call will succeed in any environment expr <- call2(on.exit, expr, add = TRUE) eval_bare(expr, frame) invisible(expr) } skip_if_cant_set_names_on_s4 <- function() { skip_if(getRversion() < "3.5.0", message = "Can't set names on S4 objects") } vctrs/tests/testthat/test-set.R0000644000176200001440000003177314511320527016323 0ustar liggesusers# vec_set_intersect ------------------------------------------------------- test_that("retains names of `x` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 2) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_intersect(x, y), c(a = 1, e = 2) ) }) test_that("returns elements in order they first appear in `x`", { expect_identical(vec_set_intersect(c(3, 1, 2, 3), c(2, 3)), c(3, 2)) }) test_that("returns unique elements", { expect_identical(vec_set_intersect(c(1, 2, 1), c(2, 2, 1)), c(1, 2)) }) test_that("works with character vectors of different encodings", { encs <- encodings() # Always returns UTF-8 expect_identical(vec_set_intersect(encs$utf8, encs$latin1), encs$utf8) expect_identical(vec_set_intersect(encs$latin1, encs$utf8), encs$utf8) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_intersect(c(NA_real_, 1), NA_real_), NA_real_) expect_identical(vec_set_intersect(c(1, NA_real_), NA_real_), NA_real_) expect_identical(vec_set_intersect(c(NA_real_, NaN), NaN), NaN) expect_identical(vec_set_intersect(c(NaN, NA_real_), NaN), NaN) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_intersect(na, na), na) expect_identical(vec_set_intersect(na, na[2]), na[2]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_intersect(NA, NA), NA) }) test_that("returns a vector of the common type", { expect_identical(vec_set_intersect(1L, c(2, 1)), 1) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical(vec_set_intersect(x, y), vec_slice(x, c(2, 4))) expect_identical(vec_set_intersect(y, x), vec_slice(y, c(1, 3))) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical(vec_set_intersect(x, y), vec_slice(x, c(2, 4))) expect_identical(vec_set_intersect(y, x), vec_slice(y, c(1, 3))) }) # vec_set_difference ------------------------------------------------------ test_that("retains names of `x` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 5) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_difference(x, y), c(b = 4, e = 5) ) }) test_that("returns elements in order they first appear in `x`", { expect_identical(vec_set_difference(c(3, 1, 2, 3), 1), c(3, 2)) }) test_that("returns unique elements", { expect_identical(vec_set_difference(c(1, 2, 1, 4), c(4, 5)), c(1, 2)) }) test_that("works with character vectors of different encodings", { encs <- encodings() expect_identical(vec_set_difference(encs$utf8, encs$latin1), character()) expect_identical(vec_set_difference(encs$latin1, encs$utf8), character()) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_difference(c(NA_real_, 1), NA_real_), 1) expect_identical(vec_set_difference(c(1, NA_real_), NA_real_), 1) expect_identical(vec_set_difference(c(NA_real_, NaN), NaN), NA_real_) expect_identical(vec_set_difference(c(NaN, NA_real_), NaN), NA_real_) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_difference(na, na), complex()) expect_identical(vec_set_difference(na, na[2]), na[c(1, 3, 4)]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_difference(NA, NA), logical()) }) test_that("returns a vector of the common type", { expect_identical(vec_set_difference(c(3L, 1L), c(2, 1)), 3) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical(vec_set_difference(x, y), vec_slice(x, 1)) expect_identical(vec_set_difference(y, x), vec_slice(y, c(2, 4))) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical(vec_set_difference(x, y), vec_slice(x, 1)) expect_identical(vec_set_difference(y, x), vec_slice(y, c(2, 4))) }) # vec_set_union ----------------------------------------------------------- test_that("retains names of `x` and `y` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 5) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_union(x, y), c(a = 1, b = 4, e = 5, w = 3, x = 2) ) }) test_that("does minimal name repair on duplicate names", { x <- c(a = 1) y <- c(a = 2) expect_named(vec_set_union(x, y), c("a", "a")) }) test_that("returns elements in order they first appear in `x` and `y`", { expect_identical(vec_set_union(c(3, 1, 2, 3), c(4, 2, 5, 4)), c(3, 1, 2, 4, 5)) }) test_that("returns unique elements", { expect_identical(vec_set_union(c(1, 2, 1, 4), c(4, 5, 5)), c(1, 2, 4, 5)) }) test_that("works with character vectors of different encodings", { encs <- encodings() # Always returns UTF-8 expect_identical(vec_set_union(encs$utf8, encs$latin1), encs$utf8) expect_identical(vec_set_union(encs$latin1, encs$utf8), encs$utf8) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_union(c(NA_real_, 1), NA_real_), c(NA_real_, 1)) expect_identical(vec_set_union(c(1, NA_real_), NA_real_), c(1, NA_real_)) expect_identical(vec_set_union(NA_real_, NaN), c(NA_real_, NaN)) expect_identical(vec_set_union(NaN, NA_real_), c(NaN, NA_real_)) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_union(na, na), na) expect_identical(vec_set_union(na[-2], na), na[c(1, 3, 4, 2)]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_union(NA, NA), NA) }) test_that("returns a vector of the common type", { expect_identical(vec_set_union(c(3L, 1L), c(2, 1)), c(3, 1, 2)) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical(vec_set_union(x, y), vec_c(vec_slice(x, c(1, 2, 4)), vec_slice(y, c(2, 4)))) expect_identical(vec_set_union(y, x), vec_c(vec_slice(y, c(1, 2, 3, 4)), vec_slice(x, 1))) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical(vec_set_union(x, y), vec_c(vec_slice(x, c(1, 2, 4)), vec_slice(y, c(2, 4)))) expect_identical(vec_set_union(y, x), vec_c(vec_slice(y, c(1, 2, 3, 4)), vec_slice(x, 1))) }) # vec_set_symmetric_difference -------------------------------------------- test_that("retains names of `x` and `y` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 5) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_symmetric_difference(x, y), c(b = 4, e = 5, w = 3, x = 2) ) }) test_that("returns elements in order they first appear in `x` and `y`", { expect_identical(vec_set_symmetric_difference(c(3, 1, 2, 3), c(4, 2, 5, 4)), c(3, 1, 4, 5)) }) test_that("returns unique elements", { expect_identical(vec_set_symmetric_difference(c(1, 2, 1, 4), c(4, 5, 5)), c(1, 2, 5)) }) test_that("works with character vectors of different encodings", { encs <- encodings() # Always returns UTF-8 expect_identical(vec_set_symmetric_difference(encs$utf8, encs$latin1), character()) expect_identical(vec_set_symmetric_difference(encs$latin1, encs$utf8), character()) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_symmetric_difference(c(NA_real_, 1), NA_real_), 1) expect_identical(vec_set_symmetric_difference(c(1, NA_real_), NA_real_), 1) expect_identical(vec_set_symmetric_difference(c(NaN, 1), NaN), 1) expect_identical(vec_set_symmetric_difference(c(1, NaN), NaN), 1) expect_identical(vec_set_symmetric_difference(NA_real_, NaN), c(NA_real_, NaN)) expect_identical(vec_set_symmetric_difference(NaN, NA_real_), c(NaN, NA_real_)) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_symmetric_difference(na, na), complex()) expect_identical(vec_set_symmetric_difference(na[-2], na[-4]), na[c(4, 2)]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_symmetric_difference(NA, NA), logical()) }) test_that("returns a vector of the common type", { expect_identical(vec_set_symmetric_difference(c(3L, 1L), c(2, 1)), c(3, 2)) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical( vec_set_symmetric_difference(x, y), vec_c(vec_slice(x, 1), vec_slice(y, c(2, 4))) ) expect_identical( vec_set_symmetric_difference(y, x), vec_c(vec_slice(y, c(2, 4)), vec_slice(x, 1)) ) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical( vec_set_symmetric_difference(x, y), vec_c(vec_slice(x, 1), vec_slice(y, c(2, 4))) ) expect_identical( vec_set_symmetric_difference(y, x), vec_c(vec_slice(y, c(2, 4)), vec_slice(x, 1)) ) }) # common ------------------------------------------------------------------ test_that("works with package version columns of data frames (#1837)", { package_frame <- function(x) { data_frame(version = package_version(x)) } x <- package_frame(c("4.0", "2.0")) y <- package_frame(c("1.0", "3.0" ,"4.0")) expect_identical(vec_set_intersect(x, y), package_frame("4.0")) expect_identical(vec_set_difference(x, y), package_frame("2.0")) expect_identical(vec_set_union(x, y), package_frame(c("4.0", "2.0", "1.0", "3.0"))) expect_identical(vec_set_symmetric_difference(x, y), package_frame(c("2.0", "1.0", "3.0"))) }) test_that("errors nicely if common type can't be taken", { expect_snapshot(error = TRUE, { vec_set_intersect(1, "x") }) expect_snapshot(error = TRUE, { vec_set_difference(1, "x") }) expect_snapshot(error = TRUE, { vec_set_union(1, "x") }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, "x") }) }) test_that("dots must be empty", { expect_snapshot(error = TRUE, { vec_set_intersect(1, 2, 3) }) expect_snapshot(error = TRUE, { vec_set_difference(1, 2, 3) }) expect_snapshot(error = TRUE, { vec_set_union(1, 2, 3) }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, 2, 3) }) }) test_that("`ptype` is respected", { expect_identical(vec_set_intersect(1, 1, ptype = integer()), 1L) expect_identical(vec_set_difference(1, 1, ptype = integer()), integer()) expect_identical(vec_set_union(1, 2, ptype = integer()), c(1L, 2L)) expect_identical(vec_set_symmetric_difference(1, 2, ptype = integer()), c(1L, 2L)) expect_snapshot(error = TRUE, { vec_set_intersect(1, 1.5, ptype = integer()) }) expect_snapshot(error = TRUE, { vec_set_difference(1, 1.5, ptype = integer()) }) expect_snapshot(error = TRUE, { vec_set_union(1, 1.5, ptype = integer()) }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, 1.5, ptype = integer()) }) }) test_that("`x_arg` and `y_arg` can be adjusted", { expect_snapshot(error = TRUE, { vec_set_intersect(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_difference(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_union(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_intersect(1, "2", x_arg = "", y_arg = "") }) }) test_that("`error_call` can be adjusted", { my_set_intersect <- function() { vec_set_intersect(1, "x", error_call = current_env()) } my_set_difference <- function() { vec_set_difference(1, "x", error_call = current_env()) } my_set_union <- function() { vec_set_union(1, "x", error_call = current_env()) } my_set_symmetric_difference <- function() { vec_set_symmetric_difference(1, "x", error_call = current_env()) } expect_snapshot(error = TRUE, { my_set_intersect() }) expect_snapshot(error = TRUE, { my_set_difference() }) expect_snapshot(error = TRUE, { my_set_union() }) expect_snapshot(error = TRUE, { my_set_symmetric_difference() }) }) vctrs/tests/testthat/test-split.R0000644000176200001440000000225014276722575016670 0ustar liggesusers test_that("can split empty vector", { out <- vec_split(integer(), character()) expect_s3_class(out, "data.frame") expect_equal(out$key, character()) expect_equal(out$val, list()) }) test_that("split data frame with data frame", { df <- data.frame(x = c(1, 1, 2), y = c(1, 1, 1)) out <- vec_split(df, df) expect_s3_class(out, "data.frame") expect_equal(out$key, data.frame(x = c(1, 2), y = c(1, 1))) expect_equal(out$val, list( data.frame(x = c(1, 1), y = c(1, 1)), data.frame(x = 2, y = 1) )) }) test_that("x and by must be same size", { expect_error( vec_split(1:3, 1:2), "same size" ) }) test_that("split takes the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_identical(nrow(vec_split(1:3, x)), 2L) }) test_that("split works with different encodings", { encs <- encodings() expect_identical(nrow(vec_split(1:3, encs)), 1L) }) test_that("`key` and `value` retain names", { x <- c(a = 1, b = 2, c = 1, a = 1) split <- vec_split(x, x) expect_identical(split$key, c(a = 1, b = 2)) expect_identical(split$val[[1]], c(a = 1, c = 1, a = 1)) expect_identical(split$val[[2]], c(b = 2)) }) vctrs/tests/testthat/test-order.R0000644000176200001440000013640214511320527016636 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(integer()), integer()) }) test_that("can order integers", { x <- c(2L, 3L, 1L, 5L) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:5 expect_identical(vec_order_radix(x), order(x)) }) test_that("orders correctly around the UINT8_MAX boundary", { x <- 251:255 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1L, 3L, 1L, 3L) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c(1L, NA_integer_, 3L) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("integer, small: `NA` order can be first", { x <- c(1L, NA_integer_, 3L) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("double: `direction` can be set to `desc`", { x <- c(1, 5, 3) expect_identical(vec_order_radix(x, direction = "desc"), c(2L, 3L, 1L)) x <- c(1L, .Machine$integer.max, 3L) expect_identical(vec_order_radix(x, direction = "desc"), c(2L, 3L, 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_integer_, NA_integer_) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order when in expected order", { x <- c(1L, 1L, 2L, NA, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5) x <- c(NA, NA, 3L, 3L, 2L) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5) x <- c(NA, NA, 1L, 1L, 2L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5) x <- c(3L, 3L, 2L, NA, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, 2L, 1L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1) x <- c(1L, 2L, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1) x <- c(2L, 1L, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1) x <- c(NA, 1L, 2L) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1) }) # ------------------------------------------------------------------------------ # vec_order_radix() - counting # To trigger counting ordering, get above the insertion order boundary and then # have a range less than the counting order range boundary. test_that("can order integers with counting order", { x <- (ORDER_INSERTION_BOUNDARY + 1L):1L expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:(ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L) expect_identical(vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L, 1:ORDER_INSERTION_BOUNDARY) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # To trigger radix ordering, get above the insertion order boundary and then # have a range greater than the counting order range boundary. test_that("can order integers with radix order", { x <- c(INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L, 1:ORDER_INSERTION_BOUNDARY) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- c(1:ORDER_INSERTION_BOUNDARY, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical(vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L, 1:ORDER_INSERTION_BOUNDARY, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("can order all 1 value", { x <- rep(1L, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical(vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE)) }) test_that("all `NA` values works - ensures that we can compute the 'range' of all NAs", { x <- rep(NA_integer_, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical(vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE)) }) test_that("can order with many NAs first", { x <- c(rep(NA_integer_, ORDER_INSERTION_BOUNDARY + 1L), 2L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical(vec_order_radix(x, na_value = "smallest"), base_order(x, na.last = FALSE)) }) test_that("subtraction in counting order range computation works correctly (#1399)", { x <- c(rep(1L, ORDER_INSERTION_BOUNDARY), -2147483647L) expect_identical(vec_order_radix(x), base_order(x)) }) # ------------------------------------------------------------------------------ # vec_order_radix() # Really this just goes through the integer infrastructure. Just checking that # it is working. test_that("can order size zero input", { expect_identical(vec_order_radix(logical()), integer()) }) test_that("can order logicals", { x <- c(FALSE, TRUE, FALSE) expect_identical(vec_order_radix(x), order(x)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(TRUE, NA, FALSE) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA, NA) expect_identical(vec_order_radix(x), order(x)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(double()), integer()) }) test_that("can order doubles", { x <- c(2, 3, 1, 5) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:5 + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1, 3, 1, 3) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c(1, NA_real_, 3) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("double: `NA` order can be first", { x <- c(1, NA_real_, 3) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3, NA_real_, 1, 2) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_real_, NA_real_) expect_identical(vec_order_radix(x), order(x)) }) test_that("NA_real_ and NaN look identical for ordering", { x <- c(NA_real_, NaN) expect_identical(vec_order_radix(x, na_value = "largest"), c(1L, 2L)) expect_identical(vec_order_radix(x, na_value = "smallest"), c(1L, 2L)) }) test_that("double: -Inf / Inf order correctly", { x <- c(0, -Inf, Inf) expect_identical(vec_order_radix(x, direction = "asc"), c(2L, 1L, 3L)) expect_identical(vec_order_radix(x, direction = "desc"), c(3L, 1L, 2L)) }) test_that("double: -0 and 0 order identically / stably", { x <- c(0, -0) expect_identical(vec_order_radix(x, direction = "desc"), c(1L, 2L)) expect_identical(vec_order_radix(x, direction = "asc"), c(1L, 2L)) }) test_that("can order when in expected order", { x <- c(1, 1, 2, NA, NaN) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5) x <- c(NA, NaN, 3, 3, 2) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5) x <- c(NA, NaN, 1, 1, 2) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5) x <- c(3, 3, 2, NA, NaN) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5) }) test_that("can order when in expected order - using distinct NaN values", { x <- c(1, 1, 2, NaN, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest", nan_distinct = TRUE), 1:5) x <- c(NA, NaN, 3, 3, 2) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest", nan_distinct = TRUE), 1:5) x <- c(NA, NaN, 1, 1, 2) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest", nan_distinct = TRUE), 1:5) x <- c(3, 3, 2, NaN, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest", nan_distinct = TRUE), 1:5) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, 2, 1) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1) x <- c(1, 2, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1) x <- c(2, 1, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1) x <- c(NA, 1, 2) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1) }) test_that("can order when in strictly opposite of expected order (no ties) - using distinct NaN values", { x <- c(NA, NaN, 2, 1) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest", nan_distinct = TRUE), 4:1) x <- c(1, 2, NaN, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest", nan_distinct = TRUE), 4:1) x <- c(2, 1, NaN, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest", nan_distinct = TRUE), 4:1) x <- c(NA, NaN, 1, 2) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest", nan_distinct = TRUE), 4:1) }) test_that("NaN is always placed next to numbers when treated as distinct", { x <- c(1, 2, NA, NaN) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest", nan_distinct = TRUE), c(1L, 2L, 4L, 3L)) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest", nan_distinct = TRUE), c(3L, 4L, 1L, 2L)) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest", nan_distinct = TRUE), c(3L, 4L, 2L, 1L)) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest", nan_distinct = TRUE), c(2L, 1L, 4L, 3L)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # To trigger radix ordering, get above the insertion order boundary. There is # no intermediate counting sort for doubles. test_that("can order doubles with radix order", { x <- (ORDER_INSERTION_BOUNDARY + 1L):1L + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:(ORDER_INSERTION_BOUNDARY + 1L) + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L) + 0 expect_identical(vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3, NA_real_, 1, 2, 1:ORDER_INSERTION_BOUNDARY) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- rep(NA_real_, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("NA_real_ and NaN generally look identical for ordering", { x <- rep(c(NA_real_, NaN), ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x, na_value = "largest"), seq_along(x)) expect_identical(vec_order_radix(x, na_value = "smallest"), seq_along(x)) }) test_that("NA_real_ and NaN can be considered distinct with `nan_distinct`", { x <- rep(c(NA_real_, NaN), ORDER_INSERTION_BOUNDARY + 1L) loc_nan <- seq(2L, length(x), by = 2L) loc_na <- seq(1L, length(x), by = 2L) expect_identical(vec_order_radix(x, na_value = "largest", nan_distinct = TRUE), c(loc_nan, loc_na)) expect_identical(vec_order_radix(x, na_value = "smallest", nan_distinct = TRUE), c(loc_na, loc_nan)) }) test_that("-Inf / Inf order correctly", { x <- c(rep(0, ORDER_INSERTION_BOUNDARY), -Inf, Inf) expect_identical(vec_order_radix(x, direction = "asc"), order(x, decreasing = FALSE)) expect_identical(vec_order_radix(x, direction = "desc"), order(x, decreasing = TRUE)) }) test_that("double, large: -0 and 0 order identically / stably", { x <- c(rep(0, ORDER_INSERTION_BOUNDARY), -0) expect_identical(vec_order_radix(x, direction = "desc"), order(x, decreasing = TRUE)) expect_identical(vec_order_radix(x, direction = "asc"), order(x, decreasing = FALSE)) }) # ------------------------------------------------------------------------------ # vec_order_radix() test_that("can order size zero input", { expect_identical(vec_order_radix(complex()), integer()) }) test_that("can order complex", { x <- complex(real = c(3, 1, 2)) expect_identical(vec_order_radix(x), c(2L, 3L, 1L)) }) test_that("ordering on ties is done stably", { x <- complex(real = c(1, 3, 1, 3)) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("imaginary section is used to break ties", { x <- complex( real = c(1L, 2L, 1L), imaginary = c(3L, 2L, 1L) ) expect_identical(vec_order_radix(x), c(3L, 1L, 2L)) }) test_that("can be used in a data frame", { x <- c(1L, 1L, 1L, 2L, 1L) y <- complex( real = c(1L, 2L, 1L, 3L, 1L), imaginary = c(3L, 2L, 1L, 4L, 1L) ) z <- c(1, 2, 5, 4, 3) # as second column df1 <- data.frame(x = x, y = y) # as first column df2 <- data.frame(y = y, x = x) # as second column with a third after it to break ties df3 <- data.frame(x = x, y = y, z = z) # Base R can't do radix sorting with complex expect_identical(vec_order_radix(df1), c(3L, 5L, 1L, 2L, 4L)) expect_identical(vec_order_radix(df2), c(3L, 5L, 1L, 2L, 4L)) expect_identical(vec_order_radix(df3), c(5L, 3L, 1L, 2L, 4L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- complex(real = c(3, NA, 1.5, 2, NA), imaginary = c(1, 1, 1, 1, 2)) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) # In fixing #1403, we now align with base R expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("full gambit of tests involving missing values are working as expected (#1403)", { x <- complex( real = c(NaN, NA, NA, NA, NaN, NaN, 1, 1, 1, 2), imaginary = c(NA, NA, NaN, 1, NaN, 1, NA, NaN, 1, NA) ) df <- data_frame(a = rep(1L, length(x)), x = x) # {number}, {NaN}, {NaN + NA}, {NA + NaN}, {NA} expect <- c(9L, 5L, 6L, 8L, 1L, 3L, 2L, 4L, 7L, 10L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest", nan_distinct = TRUE), expect) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest", nan_distinct = TRUE), expect) expect_identical(vec_order_radix(df, direction = "asc", na_value = "largest", nan_distinct = TRUE), expect) expect_identical(vec_order_radix(df, direction = "desc", na_value = "smallest", nan_distinct = TRUE), expect) # {NA}, {NA + NaN}, {NaN + NA}, {NaN}, {number} expect <- c(2L, 4L, 7L, 10L, 3L, 1L, 5L, 6L, 8L, 9L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest", nan_distinct = TRUE), expect) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest", nan_distinct = TRUE), expect) expect_identical(vec_order_radix(df, direction = "asc", na_value = "smallest", nan_distinct = TRUE), expect) expect_identical(vec_order_radix(df, direction = "desc", na_value = "largest", nan_distinct = TRUE), expect) # {number}, {NA or NaN} expect <- c(9L, 1:7, 8L, 10L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest", nan_distinct = FALSE), expect) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest", nan_distinct = FALSE), expect) expect_identical(vec_order_radix(df, direction = "asc", na_value = "largest", nan_distinct = FALSE), expect) expect_identical(vec_order_radix(df, direction = "desc", na_value = "smallest", nan_distinct = FALSE), expect) # {NA or NaN}, {number} expect <- c(1:8, 10L, 9L) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest", nan_distinct = FALSE), expect) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest", nan_distinct = FALSE), expect) expect_identical(vec_order_radix(df, direction = "asc", na_value = "smallest", nan_distinct = FALSE), expect) expect_identical(vec_order_radix(df, direction = "desc", na_value = "largest", nan_distinct = FALSE), expect) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(character()), integer()) }) test_that("can order characters", { x <- c("xy", "x", "a", "bc") expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- c("a", "b", "c") expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c("ab", "ba", "ab", "ba") expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c("x", NA_character_, "y") expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("character, small: `NA` order can be first", { x <- c("x", NA_character_, "y") expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("character, small: `direction` can be set to `desc`", { x <- c("x", "abcde", "yz") expect_identical(vec_order_radix(x, direction = "desc"), c(3L, 1L, 2L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c("aaa", NA_character_, "a", "aa") expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_character_, NA_character_) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order empty string vs ASCII value 1 'Start of Header'", { x <- c("\001", "") expect_identical(vec_order_radix(x), c(2L, 1L)) }) test_that("can be used in a data frame", { x <- c(1L, 4L, 1L, 3L, 1L) y <- c("zy", "zz", "abcd", "gfa", "zy") z <- c(1, 2, 5, 4, 3) # as second column df1 <- data.frame(x = x, y = y) # as first column df2 <- data.frame(y = y, x = x) # as second column with a third after it to break ties df3 <- data.frame(x = x, y = y, z = z) expect_identical(vec_order_radix(df1), base_order(df1)) expect_identical(vec_order_radix(df2), base_order(df2)) expect_identical(vec_order_radix(df3), base_order(df3)) }) test_that("can have multiple character columns in a data frame", { df <- data.frame( x = c("def", "aba", "aba", "aba", "q"), y = c("zy", "zz", "zz", "gfa", "zy"), z = c("foo", "qux", "bar", "baz", "boo") ) expect_identical(vec_order_radix(df), base_order(df)) }) test_that("can order with varying encodings by converting to UTF-8", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1, "AC") expect_identical(vec_order_radix(x), c(4L, 1L, 2L, 3L)) expect_identical(vec_order_radix(x, direction = "desc"), c(1L, 2L, 3L, 4L)) }) test_that("can order when in expected order", { x <- c("a", "a", "b", NA, NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5) x <- c(NA, NA, "c", "c", "b") expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5) x <- c(NA, NA, "a", "a", "b") expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5) x <- c("c", "c", "b", NA, NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, "b", "a") expect_identical(vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1) x <- c("a", "b", NA) expect_identical(vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1) x <- c("b", "a", NA) expect_identical(vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1) x <- c(NA, "a", "b") expect_identical(vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # Have to get the number of unique strings above the ORDER_INSERTION_BOUNDARY # to trigger radix ordering. test_that("can order character vectors", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) expect_identical(vec_order_radix(x), base_order(x)) }) test_that("ordering on ties is done stably", { x <- c(paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)), "x1") expect_identical(vec_order_radix(x)[1:2], c(1L, length(x))) }) test_that("`NA` order defaults to last", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "y") expect_identical(vec_order_radix(x)[length(x)], length(x) - 1L) }) test_that("character, large: `NA` order can be first", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "y") expect_identical(vec_order_radix(x, na_value = "smallest")[[1L]], length(x) - 1L) }) test_that("character, large: `direction` can be set to `desc`", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) expect_identical(vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE)) }) test_that("all combinations of `direction` and `na_value` work", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "x", "aa", "x1") expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[base_order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[base_order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[base_order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[base_order(x, na.last = TRUE, decreasing = TRUE)] ) }) # ------------------------------------------------------------------------------ # vec_order_radix() test_that("list elements are ordered by first appearance", { expect_identical(vec_order_radix(list(1:2, "a", 1:2)), c(1L, 3L, 2L)) }) test_that("missing values in lists are respected (#1401)", { x <- list(1, NULL, 2, NULL) expect_identical(vec_order_radix(x, na_value = "largest"), c(1L, 3L, 2L, 4L)) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 4L, 1L, 3L)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("data frame with no columns and no rows returns integer()", { x <- data.frame() expect_identical(vec_order_radix(x), integer()) }) test_that("data frame with no columns and some rows returns sequential rows", { x <- new_data_frame(n = 5L) expect_identical(vec_order_radix(x), 1:5) }) test_that("can order with multiple pre-sorted keys", { df <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_order_radix(df), 1:2) }) test_that("first column has ordering presedence", { df <- data.frame(x = c(3L, 2L, 1L), y = c(1L, 2L, 3L)) expect_identical(vec_order_radix(df), 3:1) }) test_that("secondary columns break ties - integer", { df <- data.frame( x = c(1L, 2L, 1L), y = c(3L, 2L, 1L) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("secondary columns break ties - double", { df <- data.frame( x = c(1, 2, 1), y = c(3, 2, 1) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("secondary columns break ties - logical", { df <- data.frame( x = c(FALSE, TRUE, FALSE), y = c(TRUE, TRUE, FALSE) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("orders correctly when first column is already ordered but second isn't", { df <- data.frame( x = c(1L, 1L, 2L, 2L), y = c(3L, 2L, 4L, 1L) ) expect_identical(vec_order_radix(df), c(2L, 1L, 4L, 3L)) }) test_that("orders correctly when first column is already ordered but second isn't - character", { df <- data.frame( x = c("a", "a", "b", "b"), y = c("c", "b", "d", "a") ) expect_identical(vec_order_radix(df), c(2L, 1L, 4L, 3L)) }) test_that("`direction` is recycled", { df <- data.frame( x = c(1L, 1L, 2L, 2L), y = c(3L, 2L, 4L, 1L) ) expect_identical(vec_order_radix(df, direction = "desc"), c(3L, 4L, 1L, 2L)) }) test_that("`na_value` is recycled", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA), y = c(3L, 2L, 4L, 1L, NA) ) expect_identical(vec_order_radix(df, na_value = "smallest"), c(5L, 2L, 1L, 4L, 3L)) }) test_that("`direction` can be a vector", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, 1L), y = c(3L, 2L, 4L, 1L, 3L, NA) ) expect_identical(vec_order_radix(df, direction = c("desc", "asc")), c(5L, 4L, 3L, 2L, 1L, 6L)) }) test_that("`na_value` can be a vector", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, NA), y = c(3L, 2L, 4L, 1L, NA, 2) ) expect_identical(vec_order_radix(df, na_value = c("smallest", "largest")), c(6L, 5L, 2L, 1L, 4L, 3L)) }) test_that("`na_value` and `direction` can both be vectors", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, NA), y = c(3L, 2L, 4L, 1L, NA, 2) ) expect_identical( vec_order_radix(df, direction = c("desc", "asc"), na_value = c("smallest", "largest")), c(4:1, 6:5) ) }) test_that("`direction` is recycled right with array columns (#1753)", { df <- data_frame( x = matrix(c(1, 1, 1, 3, 2, 2), ncol = 2), y = 3:1 ) expect_identical( vec_order_radix(df, direction = c("asc", "desc")), c(2L, 3L, 1L) ) expect_snapshot(error = TRUE, { vec_order_radix(df, direction = c("asc", "desc", "desc")) }) df <- data_frame( x = array(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 3, 3), dim = c(3, 2, 2)), y = 3:1 ) expect_identical( vec_order_radix(df, direction = c("asc", "desc")), c(2L, 3L, 1L) ) }) test_that("`na_value` is recycled right with array columns (#1753)", { df <- data_frame( x = matrix(c(1, 1, 1, 3, NA, 2), ncol = 2), y = 3:1 ) expect_identical( vec_order_radix(df, na_value = c("largest", "smallest")), c(3L, 1L, 2L) ) expect_identical( vec_order_radix(df, na_value = c("smallest", "largest")), c(2L, 3L, 1L) ) expect_snapshot(error = TRUE, { vec_order_radix(df, direction = c("smallest", "largest", "largest")) }) df <- data_frame( x = array(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, NA, 3), dim = c(3, 2, 2)), y = 3:1 ) expect_identical( vec_order_radix(df, na_value = c("largest", "smallest")), c(3L, 1L, 2L) ) expect_identical( vec_order_radix(df, na_value = c("smallest", "largest")), c(2L, 3L, 1L) ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - counting test_that("can order 2+ integer column chunks with counting sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1L, y = c(rep(2L, quarter_low), rep(1L, quarter_high), rep(3L, half)) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix test_that("can order 2+ integer column chunks with radix sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1L, y = c(rep(2L, quarter_low), rep(1L, quarter_high), rep(3L, half), INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) ) expect_identical(vec_order_radix(df), base_order(df)) }) test_that("can order 2+ double column chunks with radix sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1, y = c(rep(2, quarter_low), rep(1, quarter_high), rep(3, half), INT_ORDER_COUNTING_RANGE_BOUNDARY + 1) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - chr_proxy_collate test_that("`chr_proxy_collate` transforms string input", { x <- c("b", "a", "A") expect_identical(vec_order_radix(x, chr_proxy_collate = tolower), c(2L, 3L, 1L)) expect_identical(vec_order_radix(x, chr_proxy_collate = ~tolower(.x)), c(2L, 3L, 1L)) }) test_that("`chr_proxy_collate` works with data frame columns and is applied to all string columns", { df <- data_frame(x = c(1, 1, 1), y = c("B", "a", "a"), z = c("a", "D", "c")) expect_identical(vec_order_radix(df, chr_proxy_collate = tolower), c(3L, 2L, 1L)) }) test_that("`chr_proxy_collate` is validated", { expect_error(vec_order_radix("x", chr_proxy_collate = 1), "Can't convert `chr_proxy_collate` to a function") expect_error(vec_order_radix("x", chr_proxy_collate = ~c("y", "z")), "1, not 2") expect_error(vec_order_radix("x", chr_proxy_collate = ~1), "character vector") expect_error(vec_order_radix("x", chr_proxy_collate = function() {"y"})) }) test_that("`chr_proxy_collate` can return bytes-encoded strings (like `stringi::stri_sort_key()`)", { x <- c("A", "a", "b", "B") # Mimic stringi::stri_sort_key(x, locale = "en") sort_key <- function(x) { # dput(lapply(stringi::stri_sort_key(x, locale = "en"), charToRaw)) out <- list( as.raw(c(0x2a, 0x01, 0x05, 0x01, 0xdc)), as.raw(c(0x2a, 0x01, 0x05, 0x01, 0x05)), as.raw(c(0x2c, 0x01, 0x05, 0x01, 0x05)), as.raw(c(0x2c, 0x01, 0x05, 0x01, 0xdc)) ) out <- vapply(out, FUN.VALUE = character(1), function(x) { # Uses native encoding x <- rawToChar(x) Encoding(x) <- "bytes" x }) out } expect_identical( vec_order_radix(x, chr_proxy_collate = sort_key), c(2L, 1L, 3L, 4L) ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - error checking test_that("`na_value` is checked", { expect_error(vec_order_radix(1L, na_value = "x"), "\"largest\" or \"smallest\"") expect_error(vec_order_radix(1L, na_value = c(TRUE, TRUE)), "must be a character vector") expect_error(vec_order_radix(1L, na_value = NA_character_), "can't be missing") }) test_that("`direction` is checked", { expect_error(vec_order_radix(1L, direction = "x"), "must contain only") expect_error(vec_order_radix(1L, direction = c("asc", "asc")), "single value") expect_error(vec_order_radix(1L, direction = NA_character_), "can't be missing") expect_error(vec_order_radix(data.frame(x = 1), direction = c("asc", "asc")), "length 1 or") }) test_that("`x` is checked", { expect_error(vec_order_radix(foobar()), class = "vctrs_error_scalar_type") }) # ------------------------------------------------------------------------------ # vec_order_radix() - groups test_that("groups can be reallocated if we exceed the max group data size", { set.seed(123) # The first column has all unique groups so 1 more than the default group # data size is needed and will be reallocated on the fly df <- data.frame( x = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE), y = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE), z = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - comparison proxy test_that("ordering works with rcrd types", { x <- tuple(c(1, 2, 1), c(3, 2, 1)) expect_identical(vec_order_radix(x), c(3L, 1L, 2L)) }) test_that("data frame comparison proxies don't allow vector `direction` or `na_value`", { x <- tuple(c(1, 2, 1), c(3, 2, 1)) expect_error(vec_order_radix(x, direction = c("desc", "asc")), "single value") expect_error(vec_order_radix(x, na_value = c("largest", "smallest")), "single value") }) test_that("ordering works with df-cols", { df_col <- new_data_frame(list(y = c(2, 1, 2), z = c(3, 3, 3))) df <- new_data_frame(list(x = c(1, 1, 1), y = df_col)) expect_identical(vec_order_radix(df), c(2L, 1L, 3L)) # Can only supply a max of 2 `direction` or `na_value` values which get internally # expanded to 3 to match the flattened df proxy expect_identical(vec_order_radix(df, direction = c("asc", "desc")), c(1L, 3L, 2L)) expect_error(vec_order_radix(df, direction = c("desc", "desc", "asc")), "or length equal to") }) test_that("ordering works with df-cols with 0 cols", { df_col <- new_data_frame(list(), n = 3L) df <- new_data_frame(list(x = c(1, 3, 1), y = df_col, z = c(2, 1, 1))) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) # Can supply 3 `direction` values even though the 0-col df-col gets dropped expect_identical(vec_order_radix(df, direction = c("asc", "desc", "desc")), c(1L, 3L, 2L)) expect_error(vec_order_radix(df, direction = c("desc", "asc")), "or length equal to") }) test_that("ordering works with rcrd cols", { y <- tuple(c(1, 2, 1), c(3, 2, 1)) df <- new_data_frame(list(z = c(1, 1, 1), y = y)) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) # Can only supply a max of 2 `direction` values which get internally # expanded to 3 to match the flattened df proxy expect_identical(vec_order_radix(df, direction = c("asc", "desc")), c(2L, 1L, 3L)) expect_error(vec_order_radix(df, direction = c("desc", "desc", "asc")), "or length equal to") }) # ------------------------------------------------------------------------------ # `vec_order_radix()` - Pre-existing tests test_that("can request NAs sorted first", { expect_equal(vec_order_radix(c(1, NA), direction = "asc", na_value = "largest"), 1:2) expect_equal(vec_order_radix(c(1, NA), direction = "desc", na_value = "largest"), 2:1) expect_equal(vec_order_radix(c(1, NA), direction = "asc", na_value = "smallest"), 2:1) expect_equal(vec_order_radix(c(1, NA), direction = "desc", na_value = "smallest"), 1:2) }) test_that("can sort data frames", { df <- data.frame(x = c(1, 2, 1), y = c(1, 2, 2)) out1 <- vec_sort(df) expect_equal(out1, data.frame(x = c(1, 1, 2), y = c(1, 2, 2))) out2 <- vec_sort(df, direction = "desc") expect_equal(out2, data.frame(x = c(2, 1, 1), y = c(2, 2, 1))) }) test_that("can sort empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_sort(df1), df1) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_sort(df2), df2) }) test_that("can order tibbles that contain non-comparable objects", { expect_equal(vec_order_radix(data_frame(x = list(10, 2, 1))), 1:3) }) test_that("can order matrices and arrays (#306)", { x <- matrix(c(1, 1, 1, 1, 2, 1), ncol = 2) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) x <- array(1:8, c(2, 2, 2)) x[2] <- 1 x[3] <- 5 expect_identical(vec_order_radix(x), 2:1) }) test_that("can order empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_order_radix(df1), integer()) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_order_radix(df2), integer()) }) test_that("can order data frames with data frame columns (#527)", { expect_equal( vec_order_radix(iris), vec_order_radix(data_frame(iris = iris)) ) }) test_that("can order data frames (and subclasses) with matrix columns", { df <- new_data_frame(n = 2L) df$x <- new_data_frame(list(y = matrix(1:2, 2))) expect_identical(vec_order_radix(df), 1:2) df$x <- tibble::tibble(y = matrix(1:2, 2)) expect_identical(vec_order_radix(df), 1:2) }) # ------------------------------------------------------------------------------ # vec_locate_sorted_groups() test_that("`vec_locate_sorted_groups()` is working", { x <- c(1, 3, 1, 5, 2, 5, 1) expect <- new_data_frame( list( key = c(1, 2, 3, 5), loc = list(c(1L, 3L, 7L), 5L, 2L, c(4L, 6L)) ) ) expect_identical(vec_locate_sorted_groups(x), expect) }) test_that("`chr_proxy_collate` can result in keys being seen as identical", { x <- c("b", "A", "a") y <- c("b", "a", "A") x_expect <- data_frame(key = c("A", "b"), loc = list(c(2L, 3L), 1L)) y_expect <- data_frame(key = c("a", "b"), loc = list(c(2L, 3L), 1L)) expect_identical(vec_locate_sorted_groups(x, chr_proxy_collate = tolower), x_expect) expect_identical(vec_locate_sorted_groups(y, chr_proxy_collate = tolower), y_expect) }) # ------------------------------------------------------------------------------ # `vec_order_info(chr_ordered = FALSE)` test_that("can order character vectors in appearance order", { x <- c("b", "a", "B", "B", "a") info <- vec_order_info(x, chr_ordered = FALSE) expect_identical(info[[1]], c(1L, 2L, 5L, 3L, 4L)) expect_identical(info[[2]], c(1L, 2L, 2L)) expect_identical(info[[3]], 2L) }) test_that("using appearance order means `direction` has no effect", { x <- c("b", "a", "B", "B", "a") info1 <- vec_order_info(x, direction = "asc", chr_ordered = FALSE) info2 <- vec_order_info(x, direction = "desc", chr_ordered = FALSE) expect_identical(info1[[1]], info2[[1]]) expect_identical(info1[[2]], info2[[2]]) expect_identical(info1[[3]], info2[[3]]) }) test_that("appearance order works with NA - `na_value` has no effect", { x <- c(NA, "foo", NA, "bar") info <- vec_order_info(x, chr_ordered = FALSE) expect_identical(info[[1]], c(1L, 3L, 2L, 4L)) expect_identical(info[[2]], c(2L, 1L, 1L)) expect_identical(info[[3]], 2L) }) test_that("appearance order can be mixed with regular ordering", { x <- c("b", "a", "B", "B", "a", "a") y <- c(1, 2, 3, 4, 1, 2) df <- data_frame(x = x, y = y) # `y` breaks ties info <- vec_order_info(df, chr_ordered = FALSE) expect_identical(info[[1]], c(1L, 5L, 2L, 6L, 3L, 4L)) expect_identical(info[[2]], c(1L, 1L, 2L, 1L, 1L)) expect_identical(info[[3]], 2L) }) # ------------------------------------------------------------------------------ # `vec_order_info(nan_distinct = FALSE)` test_that("Indistinct NA and NaN are reported in the same group", { x <- c(NA, NaN) info <- vec_order_info(x, nan_distinct = FALSE) expect_identical(info[[1]], c(1L, 2L)) expect_identical(info[[2]], 2L) expect_identical(info[[3]], 2L) }) # ------------------------------------------------------------------------------ # `vec_order_info()` test_that("Zero column data frames with >0 rows work (#1863)", { # All rows are treated as being from the same group x <- data_frame(.size = 5) info <- vec_order_info(x) expect_identical(info[[1]], 1:5) # Order expect_identical(info[[2]], 5L) # Group sizes expect_identical(info[[3]], 5L) # Max group size }) test_that("Zero column data frames with exactly 0 rows work (#1863)", { # This is a particularly special case, since we don't actually push a group size x <- data_frame(.size = 0L) info <- vec_order_info(x) expect_identical(info[[1]], integer()) expect_identical(info[[2]], integer()) expect_identical(info[[3]], 0L) }) # ------------------------------------------------------------------------------ # vec_sort test_that("can sort data frames", { df <- data.frame(x = c(1, 2, 1), y = c(1, 2, 2)) out1 <- vec_sort(df) expect_equal(out1, data.frame(x = c(1, 1, 2), y = c(1, 2, 2))) out2 <- vec_sort(df, direction = "desc") expect_equal(out2, data.frame(x = c(2, 1, 1), y = c(2, 2, 1))) }) test_that("can sort empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_sort(df1), df1) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_sort(df2), df2) }) # ------------------------------------------------------------------------------ # vec_order test_that("can request NAs sorted first", { expect_equal(vec_order(c(1, NA), direction = "asc", na_value = "largest"), 1:2) expect_equal(vec_order(c(1, NA), direction = "desc", na_value = "largest"), 2:1) expect_equal(vec_order(c(1, NA), direction = "asc", na_value = "smallest"), 2:1) expect_equal(vec_order(c(1, NA), direction = "desc", na_value = "smallest"), 1:2) }) test_that("can order complex vectors", { x <- complex(real = c(1, 2, 2, 3, 3), imaginary = c(5, 4, 3, 2, NA)) expect_equal(vec_order(x, direction = "asc", na_value = "largest"), c(1, 3, 2, 4, 5)) expect_equal(vec_order(x, direction = "desc", na_value = "largest"), rev(c(1, 3, 2, 4, 5))) expect_equal(vec_order(x, direction = "asc", na_value = "smallest"), c(5, 1, 3, 2, 4)) expect_equal(vec_order(x, direction = "desc", na_value = "smallest"), rev(c(5, 1, 3, 2, 4))) }) test_that("can order tibbles that contain non-comparable objects", { expect_equal(vec_order(data_frame(x = list(10, 2, 1))), 1:3) }) test_that("can order matrices and arrays (#306)", { x <- matrix(c(1, 1, 1, 1, 2, 1), ncol = 2) expect_identical(vec_order(x), c(1L, 3L, 2L)) x <- array(1:8, c(2, 2, 2)) x[2] <- 1 x[3] <- 5 expect_identical(vec_order(x), 2:1) }) test_that("can order zero column data frames (#356, #1499)", { df <- data_frame() expect_identical(vec_order(df), integer()) df <- data_frame(.size = 5L) expect_identical(vec_order(df), 1:5) df <- data_frame(.size = 5L) expect_identical(vec_order(df, direction = "desc"), 1:5) }) test_that("can order zero row data frames (#356, #1499)", { df <- data.frame(x = numeric()) expect_identical(vec_order(df), integer()) df <- data.frame(x = numeric(), y = integer()) expect_identical(vec_order(df), integer()) df <- data.frame(x = numeric(), y = integer()) expect_identical(vec_order(df, direction = "desc"), integer()) }) test_that("can order data frames with data frame columns (#527)", { expect_equal( vec_order(iris), vec_order(data_frame(iris = iris)) ) }) test_that("can order data frames (and subclasses) with matrix columns", { df <- new_data_frame(n = 2L) df$x <- new_data_frame(list(y = matrix(1:2, 2))) expect_identical(vec_order(df), 1:2) df$x <- tibble::tibble(y = matrix(1:2, 2)) expect_identical(vec_order(df), 1:2) }) test_that("classed proxies do not affect performance (tidyverse/dplyr#5423)", { skip_on_cran() x <- glue::glue("{1:10000}") expect_time_lt(vec_order(x), 0.2) }) test_that("can order data frames that don't allow removing the column names (#1298)", { skip_if_not_installed("withr") local_methods( `names<-.vctrs_foobar` = function(x, value) { if (is.null(value)) { abort("Cannot remove names.") } NextMethod() } ) df <- foobar(data.frame(x = 1, y = 2)) expect_silent(expect_identical(vec_order(df), 1L)) }) test_that("missing values in lists are respected (#1401)", { x <- list(1, NULL, 2, NULL) expect_identical(vec_order(x, na_value = "largest"), c(1L, 3L, 2L, 4L)) expect_identical(vec_order(x, na_value = "smallest"), c(2L, 4L, 1L, 3L)) }) test_that("dots must be empty (#1647)", { expect_snapshot(error = TRUE, { vec_order(1, 2) }) expect_snapshot(error = TRUE, { vec_sort(1, 2) }) }) vctrs/tests/testthat/test-hash.R0000644000176200001440000001110014276722575016452 0ustar liggesusers # Vectorised -------------------------------------------------------------- test_that("vec_hash() produces same hash for same values", { x <- vec_hash(1:3) y <- do.call(c, map(1:3, vec_hash)) expect_identical(x, y) }) test_that("F, T, and NA hash to different values", { x <- map(c(TRUE, FALSE, NA), vec_hash) expect_length(unique(x), 3) }) test_that("vec_hash of double produces different values", { x <- vec_hash(c(1, 1, 2)) expect_true(identical(x[1:4], x[5:8])) expect_false(identical(x[5:8], x[9:12])) }) test_that("NA and NaN hash to different values", { x <- vec_hash(c(NA, NaN)) expect_false(identical(x[1:4], x[5:8])) }) test_that("same string hashes to same value", { x <- vec_hash(c("1", "1", "2")) expect_true(identical(x[1:4], x[5:8])) expect_false(identical(x[5:8], x[9:12])) }) test_that("list hashes to values of individual values", { x <- vec_hash(list(1:3, letters)) expect_identical(x[1:4], obj_hash(1:3)) expect_identical(x[5:8], obj_hash(letters)) x <- map(list(list(1:3), list(letters)), vec_hash) expect_identical(x[[1]], obj_hash(1:3)) expect_identical(x[[2]], obj_hash(letters)) }) test_that("hash of data frame works down rows", { df <- data.frame(x = 1:3, y = 1:3) x <- vec_hash(df) expect_length(x, 4 * vec_size(df)) expect_identical(x[1:4], vec_hash(df[1, ])) }) test_that("hashes are consistent from run to run", { # no string, since we're currently hashing the address in string pool df <- list( lgl = c(TRUE, FALSE, NA), int = 1:100, dbl1 = as.double(1:100), dbl2 = seq(0, 1, length = 100) ) hash <- lapply(df, vec_hash) # Big-endian results are byte-swapped, but otherwise equivalent. # Swap results so that there's no need to save results twice. if (.Platform$endian == "big") { hash <- lapply( hash, function(x) { writeBin(readBin(x, "int", 100, endian = "big"), x, endian = "little") } ) } local_options(max.print = 99999) expect_snapshot(hash) }) test_that("can hash list of non-vectors", { x <- list(quote(x), mean) expect_equal( vec_hash(x), c(obj_hash(x[[1]]), obj_hash(x[[2]])) ) }) test_that("can hash matrices", { x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2)) expect_identical( vec_hash(x), vec_hash(x) ) x <- matrix(c(1, 2, 3, 4), c(2, 2)) expect_identical( vec_hash(x), vec_hash(x) ) expect_false(identical( vec_hash(x), vec_hash(c(1, 2)) )) y <- matrix(c(1, 2, 3, 5), c(2, 2)) expect_false(identical( vec_hash(x), vec_hash(y) )) }) test_that("can hash NA", { expect_identical( vec_hash(NA), vec_hash(NA), ) }) test_that("can hash 1D arrays", { # 1D arrays are dispatched to `as.data.frame.vector()` which # currently does not strip dimensions. This caused an infinite # recursion. expect_length(vec_hash(array(1:2)), 8) expect_identical(vec_hash(array(1:2)), vec_hash(1:2)) }) test_that("can hash raw vectors", { expect_identical(vec_hash(0:255), vec_hash(as.raw(0:255))) }) test_that("can hash complex vectors", { expect_identical( vec_hash(c(1, 2) + 0i), c(obj_hash(c(1, 0)), obj_hash(c(2, 0))) ) }) test_that("hash treats positive and negative 0 as equivalent (#637)", { expect_equal(vec_hash(-0), vec_hash(0)) }) test_that("can hash lists of expressions", { expect_equal( vec_hash(list(expression(x), expression(y))), c(obj_hash(expression(x)), obj_hash(expression(y))) ) }) test_that("vec_hash() uses recursive equality proxy", { x <- new_data_frame(list(x = foobar(1:3))) default <- vec_hash(x) local_methods(vec_proxy_equal.vctrs_foobar = function(...) c(0, 0, 0)) overridden <- vec_hash(x) expect_false(identical(default, overridden)) }) # Object ------------------------------------------------------------------ test_that("equal objects hash to same value", { # just test function since they'll recurse through every other object type f1 <- function(x, y = NULL) x + y f2 <- function(x, y = NULL) x + y expect_false(identical(obj_hash(f1), obj_hash(f2))) expect_false(identical(vec_hash(data_frame(x = list(f1))), vec_hash(data_frame(x = list(f2))))) attr(f1, "srcref") <- NULL attr(f2, "srcref") <- NULL expect_equal(obj_hash(f1), obj_hash(f2)) expect_equal(vec_hash(data_frame(x = list(f1))), vec_hash(data_frame(x = list(f2)))) }) test_that("expression vectors hash to the same value as lists of calls/names", { expect_equal( obj_hash(expression(x, y)), obj_hash(list(as.name("x"), as.name("y"))) ) expect_equal( obj_hash(expression(mean(), sd())), obj_hash(list(call("mean"), call("sd"))) ) }) vctrs/tests/testthat/test-rep.R0000644000176200001440000001204114363556517016320 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_rep() test_that("`vec_rep()` can repeat vectors", { expect_identical(vec_rep(1:2, 5), rep(1:2, 5)) expect_identical(vec_rep(list(1, "x"), 5), rep(list(1, "x"), 5)) }) test_that("`vec_rep()` repeats data frames row wise", { x <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_rep(x, 2), vec_slice(x, c(1:2, 1:2))) }) test_that("`vec_rep()` can repeat 0 `times`", { expect_identical(vec_rep(1, 0), numeric()) }) test_that("`vec_rep()` can repeat 1 `time`", { expect_identical(vec_rep(1:3, 1), 1:3) }) test_that("`vec_rep()` can repeat `x` of size 1", { expect_identical(vec_rep(1, 2), c(1, 1)) }) test_that("`vec_rep()` errors on long vector output", { # Exact error message may be platform specific expect_error(vec_rep(1:2, .Machine$integer.max), "output size must be less than") }) test_that("`vec_rep()` validates `times`", { expect_snapshot({ (expect_error(my_vec_rep(1, "x"), class = "vctrs_error_incompatible_type")) (expect_error(my_vec_rep(1, c(1, 2)))) (expect_error(my_vec_rep(1, -1))) (expect_error(my_vec_rep(1, NA_integer_))) }) }) # ------------------------------------------------------------------------------ # vec_rep_each() test_that("`vec_rep_each()` can repeat each element of vectors", { expect_identical(vec_rep_each(1:2, 5), rep(1:2, each = 5)) expect_identical(vec_rep_each(list(1, "x"), 5), rep(list(1, "x"), each = 5)) }) test_that("`vec_rep_each()` `times` is vectorized", { expect_identical(vec_rep_each(1:2, 1:2), rep(1:2, times = 1:2)) }) test_that("`vec_rep_each()` repeats data frames row wise", { x <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_rep_each(x, c(2, 1)), vec_slice(x, c(1, 1, 2))) }) test_that("`vec_rep_each()` can repeat 0 `times`", { expect_identical(vec_rep_each(1:2, 0), integer()) }) test_that("`vec_rep_each()` finalizes type when repeating 0 times (#1673)", { expect_identical(vec_rep_each(NA, 0), logical()) }) test_that("`vec_rep_each()` retains names when repeating 0 times (#1673)", { x <- c(a = 1, b = 2) expect_identical(vec_rep_each(x, 0), named(numeric())) }) test_that("`vec_rep_each()` can repeat 1 `time`", { expect_identical(vec_rep_each(1:2, 1), 1:2) }) test_that("`vec_rep_each()` errors on long vector output", { # Exact error message may be platform specific expect_error(vec_rep_each(1:2, .Machine$integer.max), "output size must be less than") }) test_that("`vec_rep_each()` validates `times`", { expect_snapshot({ (expect_error(my_vec_rep_each(1, "x"), class = "vctrs_error_incompatible_type")) (expect_error(my_vec_rep_each(1, -1))) (expect_error(my_vec_rep_each(c(1, 2), c(1, -1)))) (expect_error(my_vec_rep_each(1, NA_integer_))) (expect_error(my_vec_rep_each(c(1, 2), c(1, NA_integer_)))) }) }) test_that("`vec_rep_each()` uses recyclying errors", { expect_snapshot({ (expect_error(my_vec_rep_each(1:2, 1:3), class = "vctrs_error_recycle_incompatible_size")) }) }) # ------------------------------------------------------------------------------ test_that("`vec_rep()` validates `times`", { expect_snapshot(error = TRUE, my_vec_rep(1, "x")) expect_snapshot(error = TRUE, my_vec_rep(1, c(1, 2))) expect_snapshot(error = TRUE, my_vec_rep(1, -1)) expect_snapshot(error = TRUE, my_vec_rep(1, NA_integer_)) }) test_that("`vec_rep_each()` validates `times`", { expect_snapshot(error = TRUE, my_vec_rep_each(1, "x")) expect_snapshot(error = TRUE, my_vec_rep_each(1, -1)) expect_snapshot(error = TRUE, my_vec_rep_each(c(1, 2), c(1, -1))) expect_snapshot(error = TRUE, my_vec_rep_each(1, NA_integer_)) expect_snapshot(error = TRUE, my_vec_rep_each(c(1, 2), c(1, NA_integer_))) }) test_that("`vec_rep_each()` uses recyclying errors", { expect_snapshot(error = TRUE, my_vec_rep_each(1:2, 1:3)) }) # vec_unrep -------------------------------------------------------------------- test_that("can unrep a vector", { x <- c(1, 3, 3, 1, 5, 5, 6) expect <- data_frame( key = c(1, 3, 1, 5, 6), times = c(1L, 2L, 1L, 2L, 1L) ) expect_identical(vec_unrep(x), expect) }) test_that("can unrep a data frame", { df <- data_frame( x = c(1, 1, 2, 2, 2), y = c(1, 1, 1, 1, 2) ) expect <- data_frame( key = vec_slice(df, c(1, 3, 5)), times = c(2L, 2L, 1L) ) expect_identical(vec_unrep(df), expect) }) test_that("works with size zero input", { expect_identical(vec_unrep(integer()), data_frame(key = integer(), times = integer())) }) test_that("can roundtrip empty input", { x <- integer() compressed <- vec_unrep(x) expect_identical(vec_rep_each(compressed$key, compressed$times), x) x <- data_frame() compressed <- vec_unrep(x) expect_identical(vec_rep_each(compressed$key, compressed$times), x) }) test_that("works with data frames with rows but no columns", { x <- data_frame(.size = 5) expect <- data_frame(key = data_frame(.size = 1L), times = 5L) expect_identical(vec_unrep(x), expect) }) test_that("errors on scalars", { expect_snapshot(error = TRUE, { vec_unrep(environment()) }) }) vctrs/tests/testthat/test-translate.R0000644000176200001440000001034314276722575017534 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_normalize_encoding() test_that("can translate a character vector of various encodings (#553)", { x <- unlist(encodings(), use.names = FALSE) results <- vec_normalize_encoding(x) expect_equal_encoding(results, encodings()$utf8) }) test_that("translates all encodings to UTF-8", { encs <- encodings() for (enc in encs) { expect_equal_encoding(vec_normalize_encoding(enc), encodings()$utf8) } }) test_that("can translate a list containing character vectors with different encodings", { results <- vec_normalize_encoding(encodings()) results <- unlist(results) expect_equal_encoding(results, encodings()$utf8) }) test_that("translation fails purposefully with any bytes", { expect_error( vec_normalize_encoding(encoding_bytes()), "translating strings with \"bytes\" encoding" ) }) test_that("translation fails purposefully when mixing with bytes with other encodings", { for (enc in encodings()) { x <- c(encoding_bytes(), enc) expect_error(vec_normalize_encoding(x), "translating strings with \"bytes\" encoding") } }) test_that("attributes are kept on translation (#599)", { encs <- encodings() x <- c(encs$utf8, encs$latin1) x <- structure(x, names = c("a", "b"), extra = 1) expect_equal(attributes(vec_normalize_encoding(x)), attributes(x)) }) test_that("translation is robust against scalar types contained in lists (#633)", { x <- list(a = z ~ y, b = z ~ z) expect_equal(vec_normalize_encoding(x), x) }) test_that("translation can still occur even if a scalar type is in a list", { encs <- encodings() x <- list(a = z ~ y, b = encs$latin1) result <- vec_normalize_encoding(x) expect_equal_encoding(result$b, encs$utf8) }) test_that("translation occurs inside scalars contained in a list", { encs <- encodings() scalar <- structure(list(x = encs$latin1), class = "scalar_list") lst <- list(scalar) result <- vec_normalize_encoding(lst) expect_equal_encoding(result[[1]]$x, encs$utf8) }) test_that("translation treats data frames elements of lists as lists (#1233)", { encs <- encodings() field <- c(encs$utf8, encs$latin1) a <- new_rcrd(list(field = field)) df <- data.frame(a = a, b = 1:2) x <- list(df) # Recursive proxy won't proxy list elements, # so the rcrd column in the data frame won't get proxied proxy <- vec_proxy_equal(x) result <- vec_normalize_encoding(proxy) expect_identical(result, x) result_field <- field(result[[1]]$a, "field") expect_field <- c(encs$utf8, encs$utf8) expect_equal_encoding(result_field, expect_field) }) test_that("attributes are translated", { utf8 <- encodings()$utf8 latin1 <- encodings()$latin1 a <- structure(1, enc = utf8) b <- structure(1, enc = latin1) c <- structure(1, enc1 = utf8, enc2 = list(latin1), enc3 = latin1) x <- list(a, b, c) result <- vec_normalize_encoding(x) a_enc <- attr(result[[1]], "enc") b_enc <- attr(result[[2]], "enc") c_enc1 <- attr(result[[3]], "enc1") c_enc2 <- attr(result[[3]], "enc2")[[1]] c_enc3 <- attr(result[[3]], "enc3") expect_equal_encoding(a_enc, utf8) expect_equal_encoding(b_enc, utf8) expect_equal_encoding(c_enc1, utf8) expect_equal_encoding(c_enc2, utf8) expect_equal_encoding(c_enc3, utf8) expect <- list( structure(1, enc = utf8), structure(1, enc1 = utf8, enc2 = list(utf8), enc3 = utf8) ) expect_identical(vec_unique(x), expect) }) test_that("attributes are translated recursively", { utf8 <- encodings()$utf8 latin1 <- encodings()$latin1 nested <- structure(1, latin1 = latin1) x <- structure(2, nested = nested, foo = 1, latin1 = latin1) result <- vec_normalize_encoding(x) attrib <- attributes(result) attrib_nested <- attributes(attrib$nested) expect_equal_encoding(attrib$latin1, utf8) expect_equal_encoding(attrib_nested$latin1, utf8) }) test_that("NAs aren't converted to 'NA' (#1291)", { utf8 <- c(NA, encodings()$utf8) latin1 <- c(NA, encodings()$latin1) result1 <- vec_normalize_encoding(utf8) result2 <- vec_normalize_encoding(latin1) expect_equal_encoding(result1, utf8) expect_equal_encoding(result2, utf8) expect_identical(result1[[1]], NA_character_) expect_identical(result2[[1]], NA_character_) }) vctrs/tests/testthat/test-partial-frame.R0000644000176200001440000000533714276722575020272 0ustar liggesusers test_that("has ok print method", { pf <- vec_ptype2(partial_frame(x = 1L), data.frame(y = 2)) expect_snapshot(pf) expect_equal(vec_ptype_abbr(pf), "prtl") }) test_that("order of variables comes from data", { pf <- partial_frame(y = 1, x = 2) df <- data.frame(x = 1, y = 2) expect_named(vec_ptype_common(pf, df), c("x", "y")) expect_named(vec_ptype_common(df, pf), c("x", "y")) }) test_that("partial variables added to end if not in data", { pf <- partial_frame(y = 1) df <- data.frame(x = 1) expect_named(vec_ptype_common(pf, df), c("x", "y")) expect_named(vec_ptype_common(df, pf), c("x", "y")) }) test_that("can assert partial frames based on column presence", { pf <- partial_frame(y = 1) expect_true(vec_is(data.frame(y = 2), pf)) expect_false(vec_is(data.frame(x = 1), pf)) expect_true(vec_is(data.frame(x = 1, y = 2), pf)) expect_true(vec_is(data.frame(x = 1, y = 2, z = 3), pf)) pf <- partial_frame(y = 1, z = 3) expect_false(vec_is(data.frame(y = 2), pf)) expect_false(vec_is(data.frame(x = 1), pf)) expect_false(vec_is(data.frame(x = 1, y = 2), pf)) expect_true(vec_is(data.frame(x = 1, y = 2, z = 3), pf)) }) test_that("can assert partial frames based on column type", { pf <- partial_frame(y = 1) expect_false(vec_is(data.frame(y = "2"), pf)) }) test_that("incompatible data frames are an error", { df <- data.frame(y = 1) expect_error(vec_ptype2(df, partial_frame(y = chr())), class = "vctrs_error_incompatible_type") expect_error(new_partial_frame(df, data.frame(y = chr())), class = "vctrs_error_incompatible_type") }) test_that("dispatch is symmetric with tibbles", { # Now causes infloop with the new double-dispatch mechanism because # of the way we call vec_ptype2() from is_same_type() for partial # types return(expect_true(TRUE)) left <- vec_ptype2(partial_frame(x = 1), tibble::tibble(x = 1)) right <- vec_ptype2(tibble::tibble(x = 1), partial_frame(x = 1)) expect_identical(left, right) }) test_that("can take the common type with partial frames", { exp <- data.frame(x = dbl(), y = chr(), a = chr()) out <- vec_ptype_common( partial_frame(x = double(), a = character()), data.frame(x = 1L, y = "a") ) expect_identical(out, exp) out <- vec_ptype_common( data.frame(x = 1L, y = "a"), partial_frame(x = double(), a = character()) ) expect_identical(out, data.frame(x = dbl(), y = chr(), a = chr())) }) test_that("can rbind with a partial frame prototype", { out <- vec_rbind( data.frame(x = 1L, y = "a"), data.frame(x = FALSE, z = 10), .ptype = partial_frame(x = double(), a = character()) ) exp <- data.frame( x = dbl(1, 0), y = chr("a", NA), z = dbl(NA, 10), a = chr(NA, NA) ) expect_identical(out, exp) }) vctrs/tests/testthat/test-type2.R0000644000176200001440000002667114362266120016576 0ustar liggesusers test_that("base coercions are symmetric and unchanging", { types <- list( logical(), integer(), double(), character(), raw(), list() ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) expect_snapshot(mat) }) test_that("new classes are uncoercible by default", { x <- structure(1:10, class = "vctrs_nonexistant") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") }) test_that("vec_typeof2() returns common type", { nms <- names(base_empty_types) for (i in seq_along(base_empty_types)) { this <- nms[[i]] for (j in seq_along(base_empty_types)) { that <- nms[[j]] if (i <= j) { exp <- paste0("VCTRS_TYPE2_", this, "_", that) } else { exp <- paste0("VCTRS_TYPE2_", that, "_", this) } out <- vec_typeof2(base_empty_types[[this]], base_empty_types[[that]]) expect_identical(out, exp) } } }) test_that("vec_typeof2_s3() returns common type", { all_base_empty_types <- c(base_empty_types, base_s3_empty_types) nms_s3 <- names(base_s3_empty_types) nms <- names(all_base_empty_types) for (i in seq_along(all_base_empty_types)) { this <- nms[[i]] for (j in seq_along(all_base_empty_types)) { that <- nms[[j]] # Skip when we have two non s3 objects if (!(this %in% nms_s3) & !(that %in% nms_s3)) { next } if (i <= j) { exp <- paste0("VCTRS_TYPE2_S3_", this, "_", that) } else { exp <- paste0("VCTRS_TYPE2_S3_", that, "_", this) } out <- vec_typeof2_s3(all_base_empty_types[[this]], all_base_empty_types[[that]]) expect_identical(out, exp) } } }) test_that("vec_ptype2() dispatches when inputs have shape", { expect_identical(dim(vec_ptype2(int(), matrix(nrow = 3, ncol = 4))), c(0L, 4L)) expect_identical(dim(vec_ptype2(matrix("", nrow = 3), c("", "", ""))), c(0L, 1L)) }) test_that("vec_ptype2() requires vectors", { expect_error(vec_ptype2(NULL, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(NA, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(list(), quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), NULL), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), NA), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), list()), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), quote(name)), class = "vctrs_error_scalar_type") }) test_that("vec_ptype2() with unspecified requires vectors", { expect_error(vec_ptype2(unspecified(), quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), unspecified()), class = "vctrs_error_scalar_type") }) test_that("vec_ptype2() forwards argument tag", { expect_error(vec_ptype2(quote(name), list(), x_arg = "foo"), "`foo`", class = "vctrs_error_scalar_type") expect_error(vec_ptype2(list(), quote(name), y_arg = "foo"), "`foo`", class = "vctrs_error_scalar_type") }) test_that("stop_incompatible_type() checks for scalars", { expect_error(stop_incompatible_type(NA, foobar(), x_arg = "x", y_arg = "y"), class = "vctrs_error_scalar_type") expect_error(vec_ptype_common(NA, foobar()), class = "vctrs_error_scalar_type") expect_error(vec_ptype_common(foobar(), list()), class = "vctrs_error_scalar_type") }) test_that("vec_ptype2() methods forward args to stop_incompatible_type()", { expect_args(new_hidden(), lgl(), x_arg = "foo", y_arg = "bar") expect_args(lgl(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(int(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(dbl(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(chr(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(list(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(new_rcrd(list(x = NA)), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(data.frame(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(Sys.Date(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(as.difftime(1, units = "hours"), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(factor(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(ordered(""), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(ordered(""), factor(), x_arg = "foo", y_arg = "bar") expect_args(bit64::as.integer64(1), new_hidden(), x_arg = "foo", y_arg = "bar") }) test_that("vec_ptype2() data frame methods builds argument tags", { # Bare objects expect_snapshot(error = TRUE, { vec_ptype2("foo", 10) }) # Nested dataframes expect_snapshot(error = TRUE, { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) vec_ptype2(df1, df2) }) }) test_that("stop_incompatible_type() can be called without argument tags", { expect_error(stop_incompatible_type(1, 2, x_arg = "", y_arg = ""), " and ", class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2() returns empty prototype when other input is NULL", { expect_identical(vec_ptype2(1:5, NULL), int()) expect_identical(vec_ptype2(NULL, 1:5), int()) }) test_that("Subclasses of data.frame dispatch to `vec_ptype2()` methods", { local_methods( vec_ptype2.quuxframe = function(x, y, ...) UseMethod("vec_ptype2.quuxframe"), vec_ptype2.quuxframe.data.frame = function(x, y, ...) "dispatched!", vec_ptype2.data.frame.quuxframe = function(x, y, ...) "dispatched!" ) quux <- structure(data.frame(), class = c("quuxframe", "data.frame")) expect_identical(vec_ptype2(quux, mtcars), "dispatched!") expect_identical(vec_ptype2(mtcars, quux), "dispatched!") quux <- structure(data.frame(), class = c("quuxframe", "tbl_df", "data.frame")) expect_identical(vec_ptype2(quux, mtcars), "dispatched!") expect_identical(vec_ptype2(mtcars, quux), "dispatched!") }) test_that("Subclasses of `tbl_df` have `tbl_df` common type (#481)", { quux <- foobar(tibble()) expect_identical( vec_ptype_common(quux, tibble()), tibble() ) expect_identical( vec_ptype_common(tibble(), quux), tibble() ) }) test_that("Column name encodings are handled correctly in the common type (#553)", { encs <- encodings() data <- list(chr()) df_utf8 <- tibble::as_tibble(set_names(data, encs$utf8)) df_unknown <- tibble::as_tibble(set_names(data, encs$unknown)) expect_identical(vec_ptype2(df_utf8, df_unknown), df_utf8) }) test_that("vec_is_subtype() determines subtyping relationship", { expect_true(vec_is_subtype(lgl(), int())) expect_false(vec_is_subtype(int(), lgl())) expect_false(vec_is_subtype(lgl(), chr())) expect_false(vec_is_subtype(chr(), lgl())) local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) UseMethod("vec_ptype2.vctrs_foobar"), vec_ptype2.vctrs_foobar.logical = function(x, y, ...) logical(), vec_ptype2.logical.vctrs_foobar = function(x, y, ...) logical() ) expect_true(vec_is_subtype(foobar(TRUE), lgl())) expect_false(vec_is_subtype(lgl(), foobar(TRUE))) }) test_that("can override scalar vector error message for base scalar types", { expect_snapshot({ (expect_error(vec_ptype2(NULL, quote(x), y_arg = "foo"), class = "vctrs_error_scalar_type")) (expect_error(vec_ptype2(quote(x), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type")) }) }) test_that("can override scalar vector error message for S3 types", { expect_snapshot({ (expect_error(vec_ptype2(NULL, foobar(), y_arg = "foo"), class = "vctrs_error_scalar_type")) (expect_error(vec_ptype2(foobar(), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type")) }) }) test_that("ptype2 and cast errors when same class fallback is impossible are informative", { expect_snapshot({ (expect_error( vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type" )) (expect_error( vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type" )) }) }) test_that("Incompatible attributes bullets are not show when methods are implemented", { expect_snapshot({ with_foobar_cast <- function(expr ) { with_methods( vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) vec_default_cast(x, to, ...), expr ) } with_foobar_ptype2 <- function(expr ) { with_methods( vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) vec_default_ptype2(x, y, ...), expr ) } (expect_error( with_foobar_cast(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type" )) (expect_error( with_foobar_ptype2(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type" )) }) }) test_that("attributes no longer play a role in bare data frame fallback", { foobar_bud <- foobar(mtcars, bud = TRUE) foobar_boo <- foobar(mtcars, boo = TRUE) expect_equal( vec_ptype2(foobar_bud, foobar_boo), vec_slice(unrownames(mtcars), 0) ) expect_equal( vec_ptype2(foobar(mtcars), foobaz(mtcars)), vec_slice(unrownames(mtcars), 0) ) }) test_that("vec_ptype2() methods get prototypes", { x <- NULL y <- NULL local_methods(vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x <<- x y <<- y NULL }) vec_ptype2(foobar(1:3), foobar(letters)) expect_identical(x, foobar(int())) expect_identical(y, foobar(chr())) vec_ptype2(foobar(mtcars), foobar(iris)) expect_identical(x, foobar(mtcars[0, , drop = FALSE])) expect_identical(y, foobar(iris[0, , drop = FALSE])) }) test_that("vec_ptype2() allows vec_ptype() to return another type", { out <- with_methods( vec_restore.vctrs_foobar = function(x, to, ...) unstructure(x), vec_ptype2(foobar(1), foobar(2)) ) expect_identical(out, dbl()) }) test_that("vec_ptype2() evaluates x_arg and y_arg lazily", { expect_silent(vec_ptype2(1L, 1L, x_arg = print("oof"))) expect_silent(vec_ptype2(1L, 1L, y_arg = print("oof"))) }) test_that("can restart ptype2 errors", { x <- data_frame(x = ordered(c("a", "b", "c"))) y <- data_frame(x = ordered(c("A", "B", "C"))) exp <- c("a", "b", "c", "A", "B", "C") exp <- factor(exp, exp) expect_error(vec_rbind(x, y), class = "vctrs_error_incompatible_type") expect_equal( with_ordered_restart(vec_rbind(x, y)), data_frame(x = exp) ) z <- data_frame(x = chr()) expect_equal( with_ordered_restart(vec_ptype_common(x, y)), data_frame(x = exp[0]) ) expect_equal( with_ordered_restart(vec_ptype_common(x, y, z)), data_frame(x = chr()) ) expect_equal( with_ordered_restart(vec_cast_common(x, y)), list( data_frame(x = factor(c("a", "b", "c"), levels(exp))), data_frame(x = factor(c("A", "B", "C"), levels(exp))) ) ) expect_equal( with_ordered_restart(vec_cast_common(x, y, z)), list( data_frame(x = c("a", "b", "c")), data_frame(x = c("A", "B", "C")), data_frame(x = chr()) ) ) # Factor case y <- data_frame(x = factor(c("A", "B", "C"))) expect_equal( with_ordered_restart(vec_rbind(x, y)), data_frame(x = exp) ) }) test_that("subclasses of tibble are compatible", { tib <- foobar(tibble(x = 1)) ptype <- foobar(tibble(x = dbl())) expect_equal(vec_ptype_common(tib), ptype) expect_equal(vec_ptype_common(tib, tib), ptype) }) vctrs/tests/testthat/helper-cast.R0000644000176200001440000000034714276722575016774 0ustar liggesusers expect_lossy_cast <- function(expr) { cnd <- NULL out <- with_handlers( warning = calling(function(x) { cnd <<- x cnd_muffle(x) }), expr ) expect_s3_class(cnd, "vctrs_warning_cast_lossy") out } vctrs/tests/testthat/test-interval.R0000644000176200001440000004307014315060310017336 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_interval_groups() test_that("can compute groups", { x <- data_frame( start = c(1L, 9L, 2L, 2L, 10L), end = c(5L, 11L, 6L, 8L, 12L) ) expect_identical( vec_interval_groups(x$start, x$end), data_frame(start = c(1L, 9L), end = c(8L, 12L)) ) }) test_that("can group with size one input", { x <- data_frame(start = 1L, end = 2L) expect_identical( vec_interval_groups(x$start, x$end), x ) }) test_that("can group with size zero input", { x <- data_frame(start = integer(), end = integer()) expect_identical( vec_interval_groups(x$start, x$end), x ) }) test_that("missing intervals are retained", { x <- data_frame(start = NA, end = NA) expect_identical( vec_interval_groups(x$start, x$end), x ) x <- data_frame(start = c(NA, NA), end = c(NA, NA)) expect_identical( vec_interval_groups(x$start, x$end), x[1,] ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) expect_identical( vec_interval_groups(x$start, x$end), data_frame(start = c(2, NA), end = c(5, NA)) ) }) test_that("missing intervals can be dropped", { x <- data_frame(start = NA, end = NA) expect_identical( vec_interval_groups(x$start, x$end, missing = "drop"), data_frame(start = logical(), end = logical()) ) x <- data_frame(start = c(NA, NA), end = c(NA, NA)) expect_identical( vec_interval_groups(x$start, x$end, missing = "drop"), data_frame(start = logical(), end = logical()) ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) expect_identical( vec_interval_groups(x$start, x$end, missing = "drop"), data_frame(start = 2, end = 5) ) }) test_that("max endpoint is retained even if it isn't the last in the group", { # 10 is max end of first group, but 5 is last value in that group x <- data_frame(start = c(1L, 2L, 12L), end = c(10L, 5L, 15L)) expect_identical( vec_interval_groups(x$start, x$end), data_frame(start = c(1L, 12L), end = c(10L, 15L)) ) }) # ------------------------------------------------------------------------------ # vec_interval_locate_groups() test_that("can locate groups", { x <- data_frame( start = c(1L, 9L, 2L, 2L, 10L), end = c(5L, 11L, 6L, 8L, 12L) ) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = c(1L, 9L), end = c(8L, 12L)) ) expect_identical( out$loc, list(c(1L, 3L, 4L), c(2L, 5L)) ) }) test_that("can locate groups with size one input", { expect_identical( vec_interval_locate_groups(1L, 2L), data_frame( key = data_frame(start = 1L, end = 2L), loc = list(1L) ) ) }) test_that("can locate groups with size zero input", { expect_identical( vec_interval_locate_groups(integer(), integer()), data_frame( key = data_frame(start = integer(), end = integer()), loc = list() ) ) }) test_that("locations are ordered by both `start` and `end`", { x <- data_frame(start = c(4L, 4L, 1L), end = c(6L, 5L, 2L)) out <- vec_interval_locate_groups(x$start, x$end) # Ties of `start = 4` are broken by `end` values and reordered expect_identical( out$loc, list(3L, c(2L, 1L)) ) # So this orders `x` expect_identical( vec_slice(x, unlist(out$loc)), vec_sort(x) ) }) test_that("missing intervals are retained", { x <- data_frame(start = NA, end = NA) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = NA, end = NA) ) expect_identical( out$loc, list(1L) ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = c(2, NA), end = c(5, NA)) ) expect_identical( out$loc, list(c(3L, 1L), c(2L, 4L)), ) }) test_that("missing intervals can be dropped", { x <- data_frame(start = NA, end = NA) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical( out$key, data_frame(start = logical(), end = logical()) ) expect_identical( out$loc, list() ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical( out$key, data_frame(start = 2, end = 5) ) expect_identical( out$loc, list(c(3L, 1L)), ) }) test_that("treats NA and NaN as equivalent with doubles", { x <- data_frame(start = c(NA, NaN, NA, NaN), end = c(NA, NA, NaN, NaN)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = NA_real_, end = NaN) ) expect_identical( out$loc, list(1:4), ) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical( out$key, data_frame(start = double(), end = double()) ) expect_identical( out$loc, list(), ) }) test_that("recognizes missing rows in data frames", { start <- data_frame(year = c(2019, NA, NA, 2019, 2019), month = c(12, NA, NA, 12, 12)) end <- data_frame(year = c(2020, NA, NA, 2020, 2020), month = c(2, NA, NA, 11, 12)) x <- data_frame(start = start, end = end) out <- vec_interval_locate_groups(x$start, x$end) expect_start <- data_frame(year = c(2019, NA), month = c(12, NA)) expect_end <- data_frame(year = c(2020, NA), month = c(12, NA)) expect <- data_frame(start = expect_start, end = expect_end) expect_identical(out$key, expect) expect_identical(out$loc, list(c(1L, 4L, 5L), c(2L, 3L))) }) test_that("works on various types", { x <- data_frame(start = c(1.5, 2, 3.1, NA), end = c(1.7, 3.2, 4.5, NA)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical(out$key, data_frame(start = c(1.5, 2, NA), end = c(1.7, 4.5, NA))) expect_identical(out$loc, list(1L, 2:3, 4L)) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical(out$key, data_frame(start = c(1.5, 2), end = c(1.7, 4.5))) expect_identical(out$loc, list(1L, 2:3)) x <- data_frame(start = c("a", "c", "f", NA), end = c("b", "g", "h", NA)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical(out$key, data_frame(start = c("a", "c", NA), end = c("b", "h", NA))) expect_identical(out$loc, list(1L, 2:3, 4L)) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical(out$key, data_frame(start = c("a", "c"), end = c("b", "h"))) expect_identical(out$loc, list(1L, 2:3)) }) test_that("can keep abutting intervals separate", { # after x <- data_frame(start = c(1L, 2L, 0L), end = c(2L, 3L, 2L)) out <- vec_interval_locate_groups(x$start, x$end, abutting = FALSE) expect_identical(out$key, data_frame(start = c(0L, 2L), end = c(2L, 3L))) expect_identical(out$loc, list(c(3L, 1L), 2L)) # before x <- data_frame(start = c(1L, 0L), end = c(2L, 1L)) out <- vec_interval_locate_groups(x$start, x$end, abutting = FALSE) expect_identical(out$key, data_frame(start = c(0L, 1L), end = c(1L, 2L))) expect_identical(out$loc, list(2L, 1L)) # both x <- data_frame(start = c(1L, 0L, 2L), end = c(2L, 1L, 3L)) out <- vec_interval_locate_groups(x$start, x$end, abutting = FALSE) expect_identical(out$key, data_frame(start = c(0L, 1L, 2L), end = c(1L, 2L, 3L))) expect_identical(out$loc, list(2L, 1L, 3L)) }) test_that("`missing` is validated", { expect_snapshot((expect_error(vec_interval_locate_groups(1, 2, missing = "s")))) expect_snapshot((expect_error(vec_interval_locate_groups(1, 2, missing = c("group", "drop"))))) }) test_that("common type is taken", { expect_snapshot((expect_error(vec_interval_locate_groups(1, "x")))) }) # ------------------------------------------------------------------------------ # vec_interval_complement() test_that("computes the complement", { x <- data_frame( start = c(6L, 1L, 2L, 12L), end = c(9L, 3L, 4L, 14L) ) expect_identical( vec_interval_complement(x$start, x$end), data_frame(start = c(4L, 9L), end = c(6L, 12L)) ) }) test_that("treats intervals as half-open like [a, b)", { x <- data_frame( start = c(1L, 5L), end = c(4L, 6L) ) expect_identical( vec_interval_complement(x$start, x$end), data_frame(start = 4L, end = 5L) ) }) test_that("`[a, b)` and `[b, c)` result in no complement values", { x <- data_frame( start = c(1L, 5L), end = c(5L, 6L) ) expect_identical( vec_interval_complement(x$start, x$end), data_frame(start = integer(), end = integer()) ) }) test_that("works with `lower == upper`", { x <- data_frame( start = c(1L, 2L, 12L, NA), end = c(10L, 5L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 10L, upper = 10L), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(x$start, x$end, lower = -1L, upper = -1L), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 20L, upper = 20L), data_frame(start = integer(), end = integer()) ) }) test_that("works with `lower` before any values", { x <- data_frame( start = c(1L, 2L, 12L, NA), end = c(10L, 5L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = -1L), data_frame(start = c(-1L, 10L), end = c(1L, 12L)) ) }) test_that("works if both `lower` and `upper` are before any values", { x <- data_frame( start = c(2L, 1L, 12L, NA), end = c(5L, 10L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = -5L, upper = -2L), data_frame(start = -5L, end = -2L) ) }) test_that("works with `upper` after any values", { x <- data_frame( start = c(2L, 1L, 13L, 12L, NA), end = c(5L, 10L, 17L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, upper = 20L), data_frame(start = c(10L, 17L), end = c(12L, 20L)) ) }) test_that("works if both `lower` and `upper` are after any values", { x <- data_frame( start = c(2L, 1L, 12L, NA), end = c(5L, 10L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 17L, upper = 19L), data_frame(start = 17L, end = 19L) ) }) test_that("works with only NA and `lower`", { x <- data_frame(start = NA_integer_, end = NA_integer_) expect_identical(vec_interval_complement(x$start, x$end, lower = 5L), data_frame(start = integer(), end = integer())) }) test_that("works with only NA and `upper`", { x <- data_frame(start = NA_integer_, end = NA_integer_) expect_identical(vec_interval_complement(x$start, x$end, upper = 5L), data_frame(start = integer(), end = integer())) }) test_that("works with only NA and both `lower` and `upper`", { x <- data_frame(start = NA_integer_, end = NA_integer_) expect_identical(vec_interval_complement(x$start, x$end, lower = 2L, upper = 5L), data_frame(start = 2L, end = 5L)) }) test_that("works with `lower` that is on the max set value", { x <- data_frame( start = c(1L, 12L), end = c(9L, 13L) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 9L), data_frame(start = 9L, end = 12L) ) }) test_that("works with `upper` that is on the max set value", { x <- data_frame( start = c(-5L, 1L, 2L, 12L), end = c(0L, 10L, 5L, 15L) ) expect_identical( vec_interval_complement(x$start, x$end, upper = 10L), data_frame(start = 0L, end = 1L) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 10L, upper = 10L), data_frame(start = integer(), end = integer()) ) }) test_that("size zero case generally returns nothing", { expect_identical( vec_interval_complement(integer(), integer()), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(integer(), integer(), lower = 5L), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(integer(), integer(), upper = 5L), data_frame(start = integer(), end = integer()) ) }) test_that("size zero case with both `lower` and `upper` returns an interval", { expect_identical( vec_interval_complement(integer(), integer(), lower = 5L, upper = 10L), data_frame(start = 5L, end = 10L) ) }) test_that("size zero case with `lower == upper` doesn't return anything", { expect_identical( vec_interval_complement(integer(), integer(), lower = 5L, upper = 5L), data_frame(start = integer(), end = integer()) ) }) test_that("works when `lower` is contained in an interval", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 3), data_frame(start = 5, end = 10) ) }) test_that("works when `lower` is in a gap between intervals", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 7), data_frame(start = 7, end = 10) ) }) test_that("works when `upper` is in a gap between intervals", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), upper = 7), data_frame(start = c(-3, 5), end = c(1, 7)) ) }) test_that("works when `lower` and `upper` are in a gap between intervals", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 6, upper = 7), data_frame(start = 6, end = 7) ) expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 7, upper = 7), data_frame(start = double(), end = double()) ) }) test_that("works when `lower` and `upper` have an interval between them", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 0, upper = 7), data_frame(start = c(0, 5), end = c(1, 7)) ) expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = -6, upper = 7), data_frame(start = c(-6, -3, 5), end = c(-5, 1, 7)) ) }) test_that("allow `lower > upper` which returns an empty interval", { x <- data_frame(start = c(1, 2), end = c(5, 12)) expect_identical( vec_interval_complement(x$start, x$end, lower = 10, upper = 9), data_frame(start = double(), end = double()) ) }) test_that("complement works when `lower` and `upper` are in the same interval", { x <- data_frame(start = 1, end = 5) expect_identical( vec_interval_complement(x$start, x$end, lower = 2, upper = 4), data_frame(start = double(), end = double()) ) }) test_that("`lower` and `upper` can't contain missing values", { expect_snapshot({ (expect_error(vec_interval_complement(1, 2, lower = NA))) (expect_error(vec_interval_complement(1, 2, upper = NA))) start <- data_frame(x = 1, y = 2) end <- data_frame(x = 1, y = 3) (expect_error(vec_interval_complement(start, end, lower = data_frame(x = 1, y = NA)))) (expect_error(vec_interval_complement(start, end, upper = data_frame(x = 1, y = NA)))) }) }) # ------------------------------------------------------------------------------ # vec_interval_locate_containers() test_that("can locate containers", { x <- data_frame( start = c(1L, 9L, 2L, 2L, 10L), end = c(5L, 12L, 6L, 8L, 12L) ) expect_identical( vec_interval_locate_containers(x$start, x$end), c(1L, 4L, 2L) ) }) test_that("can locate containers with size one input", { x <- data_frame(start = 1L, end = 2L) expect_identical( vec_interval_locate_containers(x$start, x$end), 1L ) }) test_that("can locate containers with size zero input", { x <- data_frame(start = integer(), end = integer()) expect_identical( vec_interval_locate_containers(x$start, x$end), integer() ) }) test_that("missing intervals are retained", { x <- data_frame(start = NA, end = NA) expect_identical( vec_interval_locate_containers(x$start, x$end), 1L ) x <- data_frame(start = c(NA, NA), end = c(NA, NA)) # Ties use first missing value seen expect_identical( vec_interval_locate_containers(x$start, x$end), 1L ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 5, NA)) # Missing intervals at the end expect_identical( vec_interval_locate_containers(x$start, x$end), c(3L, 2L) ) }) test_that("locations order the intervals", { x <- data_frame(start = c(4L, 4L, 1L, NA, 4L), end = c(5L, 6L, 2L, NA, 6L)) out <- vec_interval_locate_containers(x$start, x$end) expect_identical( out, c(3L, 2L, 4L) ) # This orders `x` expect_identical( vec_slice(x, out), vec_sort(vec_slice(x, out)) ) }) test_that("treats NA and NaN as equivalent with doubles", { x <- data_frame(start = c(NA, NaN, NA, NaN), end = c(NA, NA, NaN, NaN)) expect_identical(vec_interval_locate_containers(x$start, x$end), 1L) }) test_that("recognizes missing rows in data frames", { start <- data_frame(year = c(2019, NA, NA, 2019, 2019), month = c(12, NA, NA, 12, 12)) end <- data_frame(year = c(2020, NA, NA, 2020, 2020), month = c(2, NA, NA, 11, 12)) x <- data_frame(start = start, end = end) expect_identical( vec_interval_locate_containers(x$start, x$end), c(5L, 2L) ) }) test_that("duplicate containers return the first", { x <- data_frame(start = c(1, 1, 2, 1, 2), end = c(2, 2, 3, 2, 3)) expect_identical(vec_interval_locate_containers(x$start, x$end), c(1L, 3L)) }) test_that("works on various types", { x <- data_frame(start = c(1.5, 3, NA, 1.6, NA), end = c(1.7, 3.1, NA, 3.2, NA)) out <- vec_interval_locate_containers(x$start, x$end) expect_identical(out, c(1L, 4L, 3L)) x <- data_frame(start = c("a", "a", NA, "f", NA), end = c("b", "g", NA, "h", NA)) out <- vec_interval_locate_containers(x$start, x$end) expect_identical(out, c(2L, 4L, 3L)) }) test_that("common type is taken", { expect_snapshot((expect_error(vec_interval_locate_containers(1, "x")))) }) vctrs/tests/testthat/test-type-vctr.R0000644000176200001440000005237514315060310017457 0ustar liggesusers test_that("constructor sets attributes", { x <- new_vctr(1:4, class = "x", x = 1) expect_equal(x, structure(1:4, class = c("x", "vctrs_vctr"), x = 1)) x <- new_vctr(1:4, class = "x", x = 1, inherit_base_type = TRUE) expect_equal(x, structure(1:4, class = c("x", "vctrs_vctr", "integer"), x = 1)) }) test_that(".data must be a vector", { expect_error(new_vctr(mean), "vector type") }) test_that("attributes other than names are ignored", { out <- new_vctr(structure(1, a = 1)) expect_null(attributes(out)$a) }) test_that("default format method is internal", { x <- new_vctr(1, class = "x") expect_equal(format(x), format(x)) }) test_that("vctr class is proxied", { expect_identical(vec_proxy(new_vctr(1:3)), new_vctr(1:3)) expect_identical(vec_proxy(new_vctr(as.list(1:3))), unclass(new_vctr(as.list(1:3)))) expect_true(vec_is(new_vctr(as.list(1:3)))) }) test_that("Can opt out of base type", { x <- new_vctr(1, class = "x", inherit_base_type = FALSE) expect_s3_class(x, c("x", "vctrs_vctr"), exact = TRUE) }) test_that("base type is always set for lists", { expect_s3_class(new_vctr(list()), "list") }) test_that("cannot opt out of the base type with lists", { expect_error(new_vctr(list(), inherit_base_type = FALSE), "must inherit from the base type") }) test_that("data frames are not allowed", { expect_error(new_vctr(mtcars), "can't be a data frame") }) test_that("attributes must be named", { expect_error(vec_set_attributes(1, list(1)), "must be named") expect_error(vec_set_attributes(1, list(y = 1, 2)), "2 does not") }) test_that("can strip all attributes without adding new ones", { expect_equal(vec_set_attributes(structure(1, a = 1), NULL), 1) }) test_that("`c.vctrs_vctr()` checks for default method arguments (#791)", { expect_error(c(new_vctr(1), recursive = TRUE), "`recursive` must be") expect_error(c(new_vctr(1), use.names = FALSE), "`use.names` must be") }) # Cast/restore ------------------------------------------------------------ test_that("cast to NULL returns x", { x <- new_vctr(1, class = "x") expect_equal(vec_cast(NULL, x), NULL) }) test_that("cast succeeds if attributes equal", { x1 <- new_vctr(1, class = "x", a = 1, b = 2) x2 <- new_vctr(2, class = "x", a = 1, b = 2) expect_equal(vec_cast(x1, x2), x1) expect_equal(vec_cast(x2, x1), x2) }) test_that("and fails if attributes are different", { x1 <- new_vctr(1, class = "x", a = 1, b = 2) x2 <- new_vctr(2, class = "x", a = 2, b = 2) expect_error(vec_cast(x1, x2), class = "vctrs_error_incompatible_type") }) test_that("restoring to atomic vector of same type preserves attributes", { x1 <- new_vctr(1, class = "x") x2 <- new_vctr(2, class = "x") expect_equal(vec_restore(2, x1), x2) }) test_that("restoring to atomic vector of different type throws error", { x1 <- new_vctr(1, class = "x") expect_error(vec_restore("x", x1), class = "vctrs_error_incompatible_type") }) test_that("base coercion methods mapped to vec_cast", { x <- new_vctr(1, inherit_base_type = FALSE) expect_error(as.logical(x), class = "vctrs_error_incompatible_type") expect_error(as.integer(x), class = "vctrs_error_incompatible_type") expect_error(as.logical(x), class = "vctrs_error_incompatible_type") expect_error(as.double(x), class = "vctrs_error_incompatible_type") expect_error(as.character(x), class = "vctrs_error_incompatible_type") expect_error(as.Date(x), class = "vctrs_error_incompatible_type") expect_error(as.POSIXct(x), class = "vctrs_error_incompatible_type") expect_error(as.POSIXlt(x), class = "vctrs_error_incompatible_type") expect_equal(as.list(x), list(x)) }) test_that("as.data.frame creates data frame", { x <- new_vctr(1:3) df <- as.data.frame(x) expect_s3_class(df, "data.frame") expect_equal(nrow(df), 3) expect_named(df, "x") }) test_that("as.data.frame on shaped vctrs doesn't bring along extra attributes", { x <- new_vctr(1:3, foo = "bar", dim = c(3L, 1L)) df <- as.data.frame(x) expect_null(attr(df, "foo", exact = TRUE)) }) test_that("as.data.frame2() unclasses input to avoid dispatch on as.data.frame()", { x <- structure(1:2, dim = c(1L, 2L), dimnames = list("r1", c("c1", "c2")), class = "foo") expect <- data.frame(c1 = 1L, c2 = 2L, row.names = "r1") local_methods(as.data.frame.foo = function(x, ...) "dispatched!") expect_identical(as.data.frame2(x), expect) }) test_that("as.list() chops vectors", { expect_identical( as.list(new_vctr(1:3)), list(new_vctr(1L), new_vctr(2L), new_vctr(3L)) ) x <- new_vctr(as.list(1:3)) expect_identical(as.list(x), as.list(1:3)) }) # equality + comparison + arith + math --------------------------------------- test_that("equality functions remapped", { x <- new_vctr(c(1, 1, NA), inherit_base_type = FALSE) expect_error(x == 1, class = "vctrs_error_incompatible_type") expect_error(x != 1, class = "vctrs_error_incompatible_type") expect_equal(is.na(x), c(FALSE, FALSE, TRUE)) expect_true(anyNA(x)) expect_equal(unique(x), new_vctr(c(1, NA), inherit_base_type = FALSE)) expect_equal(duplicated(x), c(FALSE, TRUE, FALSE)) expect_true(anyDuplicated(x)) }) test_that("is.na<-() supported", { x <- new_vctr(1:4) is.na(x) <- c(FALSE, FALSE, TRUE, NA) expect_identical(x, new_vctr(c(1:2, NA, 4L))) x <- new_vctr(1:4) is.na(x) <- TRUE expect_identical(x, new_vctr(rep(NA_integer_, 4))) x <- new_vctr(1:4) is.na(x) <- c(2, 3) expect_identical(x, new_vctr(c(1L, NA, NA, 4L))) names <- c("a", "b", "c", "d") x <- set_names(new_vctr(1:4), names) is.na(x) <- c("d", "b", "b") expect_identical(x, set_names(new_vctr(c(1L, NA, 3L, NA)), names)) x <- new_vctr(1:4) expect_error(is.na(x) <- "x", "character names to index an unnamed vector") expect_error(is.na(x) <- 5, class = "vctrs_error_subscript_oob") }) test_that("comparison functions remapped", { local_methods( vec_proxy_compare.bizzaro = function(x, ...) -vec_data(x) ) x1 <- new_vctr(c(1, 2), class = "bizzaro") x2 <- new_vctr(2, class = "bizzaro") expect_equal(order(x1), c(2L, 1L)) expect_equal(x1 < x2, c(FALSE, FALSE)) expect_equal(x1 <= x2, c(FALSE, TRUE)) expect_equal(x1 > x2, c(TRUE, FALSE)) expect_equal(x1 >= x2, c(TRUE, TRUE)) }) test_that("operators remapped", { local_methods( vec_arith.bizzaro = function(op, x, y) 1L ) x <- new_vctr(c(1, 2), class = "bizzaro") expect_equal(x + 1, 1L) expect_equal(x - 1, 1L) expect_equal(x * 1, 1L) expect_equal(x / 1, 1L) expect_equal(x ^ 1, 1L) expect_equal(x %% 1, 1L) expect_equal(x %/% 1, 1L) expect_equal(x & 1, 1L) expect_equal(x | 1, 1L) expect_equal(!x, 1L) expect_equal(+x, 1L) expect_equal(-x, 1L) }) test_that("math functions overridden", { local_methods( vec_math.bizzaro = function(fn, x, ...) vec_math_base(fn, 2L) ) x <- new_vctr(c(1, NA), class = "bizzaro") expect_equal(mean(x), 2L) expect_equal(sum(x), 2L) expect_equal(is.finite(x), TRUE) expect_equal(is.infinite(x), FALSE) expect_equal(is.nan(x), FALSE) }) test_that("diff matches base R", { local_methods( vec_arith.vctrs_minus = function(op, x, y) vec_arith_base(op, x, y) ) x1 <- cumsum(cumsum(1:10)) x2 <- new_vctr(x1, class = "vctrs_minus") expect_equal(diff(x2), diff(x1)) expect_equal(diff(x2, lag = 2L), diff(x1, lag = 2L)) expect_equal(diff(x2, differences = 2L), diff(x1, differences = 2L)) expect_equal(diff(x2, lag = 11), x2[0L]) expect_equal(diff(x2, differences = 11), x2[0L]) }) test_that("na.omit() works and retains metadata", { x <- new_vctr(c(a = 1, b = NA, c = 2)) result <- na.omit(x) expect <- vec_slice(x, c(1, 3)) attr(expect, "na.action") <- structure(c(b = 2L), class = "omit") expect_identical(result, expect) }) test_that("na.omit() returns its input unchanged if there are no missing values", { x <- new_vctr(c(a = 1, b = 2)) expect_identical(na.omit(x), x) }) test_that("na.exclude() works and retains metadata", { x <- new_vctr(c(a = 1, b = NA, c = 2)) result <- na.exclude(x) expect <- vec_slice(x, c(1, 3)) attr(expect, "na.action") <- structure(c(b = 2L), class = "exclude") expect_identical(result, expect) }) test_that("na.fail() works", { x <- new_vctr(c(a = 1, b = 2)) expect_identical(na.fail(x), x) x <- new_vctr(c(a = 1, b = NA, c = 2)) expect_snapshot(error = TRUE, na.fail(x)) }) # names ------------------------------------------------------------------- test_that("`NA_character_` names are repaired to the empty string (#784)", { expect_named(new_vctr(set_names(1, NA_character_)), "") expect_named(new_vctr(set_names(1:2, c("a", NA))), c("a", "")) }) test_that("the empty string is an allowed name (#784)", { expect_named(new_vctr(set_names(1, "")), "") expect_named(new_vctr(set_names(1:2, c("", "x"))), c("", "x")) }) test_that("can not provide invalid names", { x <- new_vctr(c(a = 1, b = 2)) expect_error(names(x) <- "x", "length") expect_error(names(x) <- c("x", "y", "z"), "length") expect_error(names(x) <- NULL, NA) }) test_that("can set names to the empty string (#784)", { x <- new_vctr(c(a = 1, b = 2)) names(x) <- c("", "") expect_named(x, c("", "")) names(x) <- c("", "x") expect_named(x, c("", "x")) }) test_that("setting names to `NA_character_` repairs to the empty string (#784)", { x <- new_vctr(1:2) names(x) <- c(NA_character_, NA_character_) expect_named(x, c("", "")) names(x) <- c("x", NA_character_) expect_named(x, c("x", "")) }) test_that("can use [ and [[ with names", { local_methods( vec_ptype2.vctrs_vctr.double = function(...) dbl(), vec_ptype2.double.vctrs_vctr = function(...) dbl() ) x <- new_vctr(c(a = 1, b = 2)) expect_equal(x["b"], new_vctr(c(b = 2))) expect_equal(x[["b"]], new_vctr(2)) # [[ drops names x[["c"]] <- 3 expect_equal(x[["c"]], new_vctr(3)) x["d"] <- 4 expect_equal(x[["d"]], new_vctr(4)) }) test_that("can use [ and [[ with names - list vctr", { local_methods( vec_ptype2.vctrs_vctr = function(...) list(), vec_ptype2.list.vctrs_vctr = function(...) list() ) y <- new_vctr(list(a = 1, b = 2)) y[["c"]] <- 3 expect_equal(y[["c"]], 3) y["d"] <- list(4) expect_equal(y[["d"]], 4) }) test_that("can use [[<- to replace n-dimensional elements", { local_methods( vec_restore.vctrs_mtrx = function(x, to, ...) x, vec_ptype2.double.vctrs_mtrx = function(...) dbl(), vec_ptype2.vctrs_mtrx = function(...) dbl() ) x <- new_vctr(rep(1, times = 4), dim = c(2, 2), class = "vctrs_mtrx") x[[2, 2]] <- 4 expect_equal(x[[2, 2]], 4) }) test_that("subsetting preserves attributes", { x <- new_vctr(c(a = 1, b = 2)) attr(x, "extra") <- TRUE y <- x[1] expect_equal(attr(x, "extra"), TRUE) }) test_that("$ inherits from underlying vector", { x1 <- new_vctr(c(a = 1, b = 2)) expect_error(x1$a, "atomic vectors") expect_error(x1$a <- 2, "atomic vectors") x2 <- new_vctr(list(a = 1, b = 2)) expect_equal(x2$a, 1) x2$a <- 10 expect_equal(x2$a, 10) }) # unsupported/unimplemented operations -------------------------------------- test_that("can't touch protected attributes", { x <- new_vctr(1:4) expect_error(dim(x) <- c(2, 2), class = "vctrs_error_unsupported") expect_error(dimnames(x) <- list("x"), class = "vctrs_error_unsupported") expect_error(levels(x) <- "x", class = "vctrs_error_unsupported") # It is expected that unimplemented `levels()` returns `NULL` expect_null(levels(x)) # But it's ok to set names to NULL; this happens at least in vec_c # and maybe elsewhere. We may need to back off on this level of # strictness in the future expect_error(names(x) <- NULL, NA) }) test_that("summary is unimplemented", { x <- new_vctr(1:4) expect_error(summary(x), class = "vctrs_error_unimplemented") }) # hidden class ------------------------------------------------------------ # We can't construct classes in test because the methods are not found # when vctr generics call other generics. Instead we rely on a very simple # class implemented in vctr.R test_that("class preserved when subsetting", { h <- new_hidden(1:4) expect_s3_class(h, "hidden") expect_s3_class(h[1], "hidden") expect_s3_class(h[[1]], "hidden") expect_s3_class(rep(h[1], 2), "hidden") expect_s3_class(as.list(h)[[1]], "hidden") length(h) <- 3 expect_s3_class(h, "hidden") }) test_that("RHS cast when using subset assign", { local_hidden() h <- new_hidden(1) expect_error(h[[1]] <- "x", class = "vctrs_error_incompatible_type") expect_error(h[1] <- "x", class = "vctrs_error_incompatible_type") h[2] <- 1 expect_equal(h, new_hidden(c(1, 1))) h[[2]] <- 2 expect_equal(h, new_hidden(c(1, 2))) }) test_that("c passes on to vec_c", { local_hidden() h <- new_hidden(1) expect_equal(c(h), h) expect_equal(c(h, NULL), h) expect_equal(c(h, 1), rep(h, 2)) expect_equal(c(h, h), rep(h, 2)) }) test_that("rbind does not fail with an unclear message (#1186)", { # In general, vec_rbind() should be preferred. In many cases rbind() does # the right thing, this test exists to alert us if this changes in the future. skip_on_cran() local_hidden() h <- new_hidden(1) # A failure in levels() for vctrs_vctr classes was the underlying issue. expect_null(levels(h)) df <- data_frame(h = h) expect_equal(rbind(df), df) expect_equal(rbind(df, NULL), df) expect_equal(rbind(df, data_frame(h = 1)), unrownames(df[c(1, 1), , drop = FALSE])) expect_equal(rbind(df, df), unrownames(df[c(1, 1), , drop = FALSE])) # An example where the result differs, to alert us if the rbind() contract # changes expect_equal(rbind(data_frame(h = 1), df), data_frame(h = c(1, 1))) }) test_that("summaries preserve class", { h <- new_hidden(c(1, 2)) expect_equal(sum(h), new_hidden(3)) expect_equal(mean(h), new_hidden(1.5)) }) test_that("methods using vec_proxy_compare agree with base", { h <- new_hidden(c(1:10)) h_na <- new_hidden(c(NA, 1:10)) expect_agree <- function(f, x, na.rm = FALSE) { f <- enexpr(f) expect_equal(vec_data((!!f)(x, na.rm = na.rm)), (!!f)(vec_data(x), na.rm = na.rm)) } expect_agree(min, h) expect_agree(max, h) expect_agree(range, h) expect_agree(min, h_na) expect_agree(max, h_na) expect_agree(range, h_na) expect_agree(min, h_na, na.rm = TRUE) expect_agree(max, h_na, na.rm = TRUE) expect_agree(range, h_na, na.rm = TRUE) }) test_that("can put in data frame", { h <- new_hidden(1:4) expect_named(as.data.frame(h), "h") expect_named(data.frame(x = h), "x") }) test_that("base coercions default to vec_cast", { local_hidden() h <- new_hidden(1) expect_error(as.character(h), class = "vctrs_error_incompatible_type") expect_error(as.integer(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.factor(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.ordered(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.difftime(h), class = "vctrs_error_incompatible_type") expect_equal(as.logical(h), TRUE) expect_equal(as.double(h), 1) }) test_that("default print and str methods are useful", { h <- new_hidden(1:4) expect_snapshot(h) expect_snapshot(h[0]) expect_snapshot(str(h)) }) test_that("default print method shows names", { h <- new_hidden(c(A = 1, B = 2, C = 3)) expect_snapshot(h) }) test_that("can't transpose", { h <- new_hidden(1:4) expect_error(t(h), class = "vctrs_error_unsupported") }) test_that("shaped vctrs can be cast to data frames", { x <- new_vctr(1:4, dim = 4) expect_identical(as.data.frame(x), data.frame(V1 = 1:4)) x <- new_vctr(1:4, dim = c(2, 2)) expect_identical(as.data.frame(x), data.frame(V1 = 1:2, V2 = 3:4)) }) # slicing ----------------------------------------------------------------- test_that("additional subscripts are handled (#269)", { new_2d <- function(.data, dim) { vctrs::new_vctr(.data, dim = dim, class = "vctrs_2d") } x <- new_2d(c(1, 2), dim = c(2L, 1L)) expect_identical(x[1], new_2d(1, dim = c(1, 1))) expect_identical(x[1, 1], new_2d(1, dim = c(1, 1))) expect_identical(x[, 1], new_2d(c(1, 2), dim = c(2, 1))) }) # summary generics -------------------------------------------------------- test_that("na.rm is forwarded to summary generics", { x <- new_vctr(dbl(1, 2, NA)) expect_identical(mean(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(mean(x, na.rm = TRUE), new_vctr(1.5)) expect_identical(min(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(min(x, na.rm = TRUE), new_vctr(1)) expect_identical(max(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(max(x, na.rm = TRUE), new_vctr(2)) x <- new_vctr(lgl(TRUE, NA)) expect_identical(all(x, na.rm = FALSE), lgl(NA)) expect_identical(all(x, na.rm = TRUE), TRUE) }) test_that("Summary generics behave identically to base for empty vctrs (#88)", { expect_warning( expect_identical( new_vctr(max(numeric())), max(new_vctr(numeric())) ) ) expect_warning( expect_identical( new_vctr(min(numeric())), min(new_vctr(numeric())) ) ) expect_identical( new_vctr(range(1)), range(new_vctr(1)) ) expect_identical( new_vctr(prod(numeric())), prod(new_vctr(numeric())) ) expect_identical( new_vctr(sum(numeric())), sum(new_vctr(numeric())) ) expect_identical( new_vctr(cummax(numeric())), cummax(new_vctr(numeric())) ) expect_identical( new_vctr(cummin(numeric())), cummin(new_vctr(numeric())) ) expect_identical( new_vctr(cumprod(numeric())), cumprod(new_vctr(numeric())) ) expect_identical( new_vctr(cumsum(numeric())), cumsum(new_vctr(numeric())) ) expect_identical( new_vctr(mean(numeric())), mean(new_vctr(numeric())) ) }) test_that("generic predicates return logical vectors (#251)", { x <- new_vctr(c(1, 2)) expect_identical(is.finite(x), c(TRUE, TRUE)) expect_identical(is.infinite(x), c(FALSE, FALSE)) expect_identical(is.nan(x), c(FALSE, FALSE)) x <- new_vctr(TRUE) expect_identical(any(x), TRUE) expect_identical(all(x), TRUE) }) test_that("xtfrm() converts logical types to integer", { expect_identical(xtfrm(new_vctr(c(TRUE, FALSE, NA), foo = "bar")), c(1L, 0L, NA)) }) test_that("xtfrm() unwraps integer and double atomic types", { expect_identical(xtfrm(new_vctr(1:3, foo = "bar")), 1:3) expect_identical(xtfrm(new_vctr(1:3 + 0, foo = "bar")), 1:3 + 0) }) test_that("xtfrm() works with character subclass", { expect_identical(xtfrm(new_vctr(chr())), int()) }) test_that("xtfrm() maintains ties when falling through to vec_rank() (#1354)", { x <- new_vctr(c("F", "F", "M", "A", "M", "A")) expect_identical(xtfrm(x), c(2L, 2L, 3L, 1L, 3L, 1L)) }) test_that("xtfrm() propagates NAs when falling through to vec_rank()", { x <- new_vctr(c("F", NA)) expect_identical(xtfrm(x), c(1L, NA)) }) test_that("xtfrm() uses C locale ordering with character proxies", { x <- new_vctr(c("A", "a", "B")) expect_identical(xtfrm(x), c(1L, 3L, 2L)) }) test_that("xtfrm() works on rcrd types", { x <- new_rcrd(list(x = c(1, 2, 1, NA), y = c(2, 1, 1, NA))) expect_identical(xtfrm(x), c(2L, 3L, 1L, NA)) }) test_that("Summary generics behave as expected if na.rm = TRUE and all values are NA (#1357)", { expect_identical(min(new_vctr(NA_real_), na.rm = TRUE), new_vctr(Inf)) expect_identical(max(new_vctr(NA_real_), na.rm = TRUE), new_vctr(-Inf)) expect_identical(range(new_vctr(NA_real_), na.rm = TRUE), new_vctr(c(Inf, -Inf))) expect_identical(min(new_vctr(NA_integer_), na.rm = TRUE), new_vctr(NA_integer_)) expect_identical(max(new_vctr(NA_integer_), na.rm = TRUE), new_vctr(NA_integer_)) expect_identical(range(new_vctr(NA_integer_), na.rm = TRUE), new_vctr(c(NA_integer_, NA_integer_))) expect_identical(min(new_vctr(NA_character_), na.rm = TRUE), new_vctr(NA_character_)) expect_identical(max(new_vctr(NA_character_), na.rm = TRUE), new_vctr(NA_character_)) expect_identical(range(new_vctr(NA_character_), na.rm = TRUE), new_vctr(c(NA_character_, NA_character_))) expect_identical(min(new_vctr(NA), na.rm = TRUE), new_vctr(NA)) expect_identical(max(new_vctr(NA), na.rm = TRUE), new_vctr(NA)) expect_identical(range(new_vctr(NA), na.rm = TRUE), new_vctr(c(NA, NA))) }) test_that("Summary generics behave as expected for empty vctrs (#1357)", { expect_identical(min(new_vctr(numeric()), na.rm = TRUE), new_vctr(Inf)) expect_identical(max(new_vctr(numeric()), na.rm = TRUE), new_vctr(-Inf)) expect_identical(range(new_vctr(numeric()), na.rm = TRUE), new_vctr(c(Inf, -Inf))) expect_identical(min(new_vctr(integer()), na.rm = TRUE), new_vctr(NA_integer_)) expect_identical(max(new_vctr(integer()), na.rm = TRUE), new_vctr(NA_integer_)) expect_identical(range(new_vctr(integer()), na.rm = TRUE), new_vctr(c(NA_integer_, NA_integer_))) expect_identical(min(new_vctr(character()), na.rm = TRUE), new_vctr(NA_character_)) expect_identical(max(new_vctr(character()), na.rm = TRUE), new_vctr(NA_character_)) expect_identical(range(new_vctr(character()), na.rm = TRUE), new_vctr(c(NA_character_, NA_character_))) expect_identical(min(new_vctr(logical()), na.rm = TRUE), new_vctr(NA)) expect_identical(max(new_vctr(logical()), na.rm = TRUE), new_vctr(NA)) expect_identical(range(new_vctr(logical()), na.rm = TRUE), new_vctr(c(NA, NA))) }) test_that("anyNA(recursive = TRUE) works with lists (#1278)", { x <- list_of(1:4, c(2, NA, 5)) expect_false(anyNA(x)) expect_true(anyNA(x, recursive = TRUE)) x <- new_vctr(list(1:4, list(c(2, NA, 5)))) expect_false(anyNA(x)) expect_true(anyNA(x, recursive = TRUE)) }) vctrs/tests/testthat/test-compare.R0000644000176200001440000002120614376223321017147 0ustar liggesusers test_that("inputs must be vectors", { expect_error(vec_compare(NULL, 1), class = "vctrs_error_scalar_type") expect_error(vec_compare(1, NULL), class = "vctrs_error_scalar_type") }) test_that("matches R ordering", { expect_same <- function(x, y) { expect_equal(vec_compare(!!x, !!y), cmp(!!x, !!y)) } expect_same(c(NA, FALSE, TRUE), FALSE) expect_same(c(NA, -100L, 0L, 100L), 0L) expect_same(c(NA, -Inf, -100, 100, Inf), 0L) expect_same(c(NA, NaN, 0), NA) expect_same(c(NA, "a", "b", "c"), "b") expect_same(as.raw(2:5), as.raw(4)) }) test_that("NAs equal when requested", { expect_value <- function(x, y, val, .ptype = NULL) { expect_equal(vec_compare(!!x, !!y, .ptype = .ptype, na_equal = TRUE), !!val) } expect_value(NA, NA, 0L) expect_value(NA, FALSE, -1L) expect_value(FALSE, NA, 1L) expect_value(NA_integer_, NA_integer_, 0L) expect_value(NA_integer_, 0L, -1L) expect_value(0L, NA_integer_, 1L) expect_value(NA_character_, NA_character_, 0L) expect_value(NA_character_, "", -1L) expect_value("", NA_character_, 1L) expect_value(0, NA_real_, 1L) expect_value(0, NaN, 1L) expect_value(0, 0, 0L) expect_value(NA_real_, NA_real_, 0L) expect_value(NA_real_, NaN, 1L) expect_value(NA_real_, 0, -1L) expect_value(NaN, NA_real_, -1L) expect_value(NaN, NaN, 0L) expect_value(NaN, 0, -1L) }) test_that("data frames are compared column by column", { df1 <- data.frame(x = c(1, 1, 1), y = c(-1, 0, 1)) expect_equal(vec_compare(df1, df1[2, ]), c(-1, 0, 1)) expect_equal(vec_compare(df1[1], df1[2, 1, drop = FALSE]), c(0, 0, 0)) expect_equal(vec_compare(df1[2], df1[2, 2, drop = FALSE]), c(-1, 0, 1)) expect_equal(vec_compare(df1[2:1], df1[2, 2:1]), c(-1, 0, 1)) }) test_that("can compare data frames with various types of columns", { x1 <- data_frame(x = 1, y = 2) y1 <- data_frame(x = 2, y = 1) x2 <- data_frame(x = "a") y2 <- data_frame(x = "b") x3 <- data_frame(x = FALSE) y3 <- data_frame(x = TRUE) x4 <- data_frame(x = 1L) y4 <- data_frame(x = 2L) expect_equal(vec_compare(x1, y1), -1) expect_equal(vec_compare(x2, y2), -1) expect_equal(vec_compare(x3, y3), -1) expect_equal(vec_compare(x4, y4), -1) }) test_that("can compare data frames with data frame columns", { df1 <- data_frame(x = data_frame(a = 1)) df2 <- data_frame(x = data_frame(a = 2)) expect_equal(vec_compare(df1, df1), 0) expect_equal(vec_compare(df1, df2), -1) }) test_that("can compare data frames with 0 columns", { x <- new_data_frame(n = 2L) expect_identical(vec_compare(x, x), c(0L, 0L)) }) test_that("C code doesn't crash with bad inputs", { df <- data.frame(x = c(1, 1, 1), y = c(-1, 0, 1)) expect_error(.Call(ffi_vec_compare, df, df[1], TRUE), "not comparable") # Names are not checked, as `vec_cast_common()` should take care of the type. # So if `vec_cast_common()` is not called, or is improperly specified, then # this could result in false equality. expect_equal(.Call(ffi_vec_compare, df, setNames(df, c("x", "z")), TRUE), c(0, 0, 0)) df1 <- new_data_frame(list(x = 1:3, y = c(1, 1, 1))) df2 <- new_data_frame(list(y = 1:2, x = 1:2)) expect_error(.Call(ffi_vec_compare, df1, df2, TRUE), "must have the same types and lengths") }) test_that("xtfrm.vctrs_vctr works for variety of base classes", { df <- data.frame(x = c(NA, 1, 1), y = c(1, 2, 1)) # Internally uses `vec_rank()`, which propagates rows if not "complete" expect_equal(xtfrm.vctrs_vctr(df), c(NA, 2, 1)) x <- c(2, 3, 1) expect_equal(xtfrm.vctrs_vctr(x), x) expect_equal(xtfrm.vctrs_vctr(letters[x]), x) }) test_that("vec_proxy_order() orders list using order of appearance", { x <- 1:2 y <- 2:4 z <- "a" lst <- list(x, y, x, y, z) expect_identical(vec_proxy_order(lst), c(1L, 2L, 1L, 2L, 5L)) }) test_that("vec_compare() calls vec_proxy_compare()", { local_methods( vec_proxy_compare.vctrs_foobar = function(x, ...) rev(x), vec_ptype2.integer.vctrs_foobar = function(...) foobar(int()), vec_ptype2.vctrs_foobar = function(...) foobar(int()), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.integer = function(x, ...) x, ) expect_identical(vec_compare(1:3, 1:3), int(0, 0, 0)) expect_identical(vec_compare(1:3, foobar(1:3)), int(-1, 0, 1)) }) test_that("vec_proxy_compare() preserves data frames and vectors", { df <- data_frame(x = 1:2, y = c("a", "b")) expect_identical(vec_proxy_compare(df), df) x <- c(NA, "a", "b", "c") expect_identical(vec_proxy_compare(x), x) }) test_that("vec_proxy_compare() handles data frame with a POSIXlt column", { df <- data.frame(times = 1:5, x = 1:5) df$times <- as.POSIXlt(seq.Date(as.Date("2019-12-30"), as.Date("2020-01-03"), by = "day")) df2 <- df df2$times <- vec_proxy_compare(df$times) expect_identical( vec_proxy_compare(df), vec_proxy_compare(df2) ) }) test_that("vec_proxy_compare.POSIXlt() correctly orders (#720)", { dates <- as.POSIXlt(seq.Date(as.Date("2019-12-30"), as.Date("2020-01-03"), by = "day")) expect_equal(vec_order(dates), 1:5) }) test_that("vec_proxy_compare.POSIXlt() correctly orders around DST", { # 1am in EDT x <- as.POSIXlt("2020-11-01 01:00:00", tz = "America/New_York") # "falls back" to 1am again, but in EST y <- as.POSIXlt(x + 3600) expect_equal(vec_order(c(y, x)), c(2, 1)) }) test_that("vec_proxy_compare() flattens df-cols", { df_col <- data_frame(z = 3:4, w = 4:5) df <- data_frame(x = 1:2, y = df_col) expect <- data_frame(x = 1:2, z = 3:4, w = 4:5) expect_identical(vec_proxy_compare(df), expect) }) test_that("vec_proxy_compare() unwraps 1 col dfs", { df <- data_frame(x = 1:2) expect_identical(vec_proxy_compare(df), 1:2) df_col <- data_frame(y = 1:2) df <- data_frame(x = df_col) expect_identical(vec_proxy_compare(df), 1:2) }) test_that("vec_proxy_order() works on deeply nested lists", { df_col <- data_frame(z = list("b", "a", "b")) # Relaxed and unwrapped df1 <- data_frame(x = df_col) expect_identical(vec_proxy_order(df1), c(1L, 2L, 1L)) df2 <- data_frame(x = df_col, y = 1:3) expect_identical(vec_proxy_order(df2), data_frame(x = c(1L, 2L, 1L), y = 1:3)) }) test_that("error is thrown when comparing complexes (#1655)", { expect_snapshot({ (expect_error(vec_compare(complex(), complex()))) }) }) test_that("error is thrown when comparing lists", { expect_error(vec_compare(list(), list()), class = "vctrs_error_unsupported") expect_error(.Call(ffi_vec_compare, list(), list(), FALSE), "Can't compare lists") }) test_that("error is thrown when comparing data frames with list columns", { df <- data_frame(x = list()) expect_error(vec_compare(df, df), class = "vctrs_error_unsupported") expect_error(.Call(ffi_vec_compare, df, df, FALSE), "Can't compare lists") }) test_that("error is thrown when comparing scalars", { x <- new_sclr(x = 1) expect_error(vec_compare(x, x), class = "vctrs_error_scalar_type") expect_error(.Call(ffi_vec_compare, x, x, FALSE), class = "vctrs_error_scalar_type") }) test_that("`na_equal` is validated", { expect_snapshot({ (expect_error(vec_compare(1, 1, na_equal = 1))) (expect_error(vec_compare(1, 1, na_equal = c(TRUE, FALSE)))) }) }) test_that("can compare equal strings with different encodings", { for (x_encoding in encodings()) { for (y_encoding in encodings()) { expect_equal(vec_compare(x_encoding, y_encoding), 0L) } } }) test_that("can compare non-equal strings with different encodings", { x <- "x" y <- encodings()$latin1 expect_equal(vec_compare(x, y), -1L) }) test_that("comparison can be determined when strings have identical encodings", { encs <- encodings() for (enc in encs) { expect_equal(vec_compare(enc, enc), 0L) } }) test_that("comparison is known to always fail with bytes", { enc <- encoding_bytes() error <- "translating strings with \"bytes\" encoding" expect_error(vec_compare(enc, enc), error) }) test_that("comparison is known to fail when comparing bytes to other encodings", { error <- "translating strings with \"bytes\" encoding" for (enc in encodings()) { expect_error(vec_compare(encoding_bytes(), enc), error) expect_error(vec_compare(enc, encoding_bytes()), error) } }) test_that("can compare unspecified", { expect_equal(vec_compare(NA, NA), NA_integer_) expect_equal(vec_compare(NA, NA, na_equal = TRUE), 0) expect_equal(vec_compare(c(NA, NA), unspecified(2)), c(NA_integer_, NA_integer_)) }) test_that("can't supply NA as `na_equal`", { expect_snapshot(error = TRUE, { vec_compare(NA, NA, na_equal = NA) }) }) test_that("vec_compare() silently falls back to base data frame", { expect_silent(expect_identical( vec_compare(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(0L, 32) )) }) vctrs/tests/testthat/helper-c.R0000644000176200001440000000007114315060310016226 0ustar liggesusersclass_type <- function(x) { .Call(ffi_class_type, x) } vctrs/tests/testthat/test-type-tibble.R0000644000176200001440000000644614362266120017751 0ustar liggesuserstest_that("tibble beats data frame", { df <- new_data_frame() dt <- tibble::tibble() expect_s3_class(vec_ptype_common(dt, df), "tbl_df") expect_s3_class(vec_ptype_common(df, dt), "tbl_df") }) test_that("can cast tibble to df and vice versa", { df <- new_data_frame() tib <- tibble::tibble() expect_identical(vec_cast(df, tib), tib) expect_identical(vec_cast(tib, df), df) expect_identical(vec_cast(tib, tib), tib) }) test_that("can't cast vector to tibble", { dt <- tibble::tibble() v <- logical() expect_snapshot({ local_error_call(call("my_function")) (expect_error(vec_ptype2(v, dt), class = "vctrs_error_incompatible_type")) (expect_error(vec_ptype2(dt, v), class = "vctrs_error_incompatible_type")) (expect_error(vec_cast(v, dt), class = "vctrs_error_incompatible_type")) }) }) test_that("casting to and from tibble preserves row names", { out <- vec_cast(mtcars, tibble::as_tibble(mtcars)) expect_identical(row.names(out), row.names(mtcars)) out <- vec_cast(out, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) }) test_that("no common type between list and tibble", { dt <- tibble::tibble() l <- list() expect_error(vec_ptype2(l, dt), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(dt, l), class = "vctrs_error_incompatible_type") }) test_that("vec_restore restores tibbles", { df1 <- tibble::tibble(x = 1:4) df2 <- vec_restore(vec_data(df1), df1) expect_s3_class(df2, "tbl_df") }) test_that("the type of a tibble with an unspecified column retains unspecifiedness", { df1 <- tibble::tibble(x = 1, y = NA) df2 <- tibble::tibble(x = 1, y = unspecified(1)) expect <- tibble::tibble(x = numeric(), y = unspecified()) expect_identical(vec_ptype(df1), expect) expect_identical(vec_ptype(df2), expect) }) test_that("vec_ptype_finalise() works recursively over tibbles", { df <- tibble(x = numeric(), y = unspecified()) expect <- tibble(x = numeric(), y = logical()) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() can handle tibble df columns", { df <- tibble(x = numeric(), y = tibble(z = unspecified())) expect <- tibble(x = numeric(), y = tibble(z = logical())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("can use ptype2 and cast with tibble that has incorrect class vector", { tib1 <- structure(data.frame(x = 1), class = c("tbl_df", "data.frame")) tib2 <- structure(data.frame(y = 2), class = c("tbl_df", "data.frame")) exp <- structure(data.frame(x = dbl(), y = dbl()), class = c("tbl_df", "data.frame")) requireNamespace("tibble") expect_identical( vec_ptype_common(tib1, tib2), exp ) expect_identical( vec_ptype_common(tib1, data.frame(y = 2)), tibble::new_tibble(exp, nrow = nrow(exp)) ) expect_identical( vec_ptype_common(data.frame(x = 1), tib2), tibble::new_tibble(exp, nrow = nrow(exp)) ) expect_identical( vec_cast(tib1, tib1), tib1 ) expect_snapshot({ local_error_call(call("my_function")) (expect_error( vec_cast(tib1, tib2), class = "vctrs_error_cast" )) (expect_error( vec_cast(tib1, data.frame(y = 2)), class = "vctrs_error_cast" )) (expect_error( vec_cast(data.frame(x = 1), tib2), class = "vctrs_error_cast" )) }) }) vctrs/tests/testthat/test-type-sf.R0000644000176200001440000001704414405105465017116 0ustar liggesusers# Never run on CRAN, even if they have sf, because we don't regularly # check these on CI and we don't want a change in sf to force a CRAN # failure for vctrs. skip_on_cran() # Avoids adding `sf` to Suggests. # These tests are only run on the devs' machines. testthat_import_from("sf", c( "st_sf", "st_sfc", "st_point", "st_bbox", "st_precision", "st_crs", "st_linestring", "st_as_sf", "st_multipoint" )) # Need recent version to work around restore bug for sfc lists and changes # to `c.sfc()` skip_if_not_installed("sf", "1.0-11") test_that("sf has a ptype2 method", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) out = vctrs::vec_ptype2(sf1, sf2) exp = st_sf( x = double(), y = character(), geo1 = sfc1[0], geo2 = sfc2[0], stringsAsFactors = FALSE ) expect_identical(out, exp) out = vctrs::vec_ptype2(sf1, new_data_frame(sf2)) expect_identical(out, exp) out = vctrs::vec_ptype2(new_data_frame(sf1), sf2) exp_rhs = st_sf( x = double(), y = character(), geo1 = sfc1[0], geo2 = sfc2[0], stringsAsFactors = FALSE, sf_column_name = "geo2" ) expect_identical(out, exp_rhs) }) test_that("sf has a cast method", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) expect_error( vctrs::vec_cast(sf1, sf2), class = "vctrs_error_cast_lossy" ) expect_error( vctrs::vec_cast(sf2, sf1), class = "vctrs_error_cast_lossy" ) common = vec_ptype2(sf1, sf2) out = vctrs::vec_cast(sf1, common) exp = st_sf( x = c(1, 0), y = character(2)[NA], geo1 = sfc1, geo2 = sfc2[c(NA, NA) + 0L], stringsAsFactors = FALSE ) expect_identical(out, exp) out = vctrs::vec_cast(new_data_frame(sf1), common) expect_identical(out, exp) out = vctrs::vec_cast(sf1, new_data_frame(common)) expect_identical(out, new_data_frame(exp)) out = vctrs::vec_cast(sf2, common) exp = st_sf( x = 0, y = "", geo1 = sfc1[NA + 0L], geo2 = sfc2, stringsAsFactors = FALSE ) expect_identical(out, exp) }) # https://github.com/r-lib/vctrs/issues/1136 test_that("can combine sf data frames", { testthat_import_from("dplyr", "bind_rows") sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) exp = data_frame( x = c(1, 0, 0), geo1 = sfc1[c(1:2, NA)], y = c(NA, NA, ""), geo2 = sfc2[c(NA, NA, 1)] ) expect_identical(vctrs::vec_rbind(sf1, sf2), exp) expect_identical(bind_rows(sf1, sf2), st_as_sf(exp)) exp = data_frame( y = c("", NA, NA, ""), x = c(0, 1, 0, 0), geo2 = sfc2[c(1, NA, NA, 1)], geo1 = sfc1[c(NA, 1:2, NA)] ) expect_identical(vctrs::vec_rbind(sf2, sf1, sf2), exp) expect_identical(bind_rows(sf2, sf1, sf2), st_as_sf(exp)) }) test_that("can combine sf and tibble", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) out = vctrs::vec_rbind(sf2, data.frame(x = 1)) exp = data_frame( y = c("", NA), x = c(0, 1), geo2 = sfc2[c(1L, NA)] ) expect_identical(out, exp) out = vctrs::vec_rbind(sf2, tibble::tibble(x = 1)) expect_identical(out, exp) out = vctrs::vec_rbind(tibble::tibble(x = 1), sf2) exp = data_frame( x = c(1, 0), y = c(NA, ""), geo2 = sfc2[c(NA, 1L)] ) expect_identical(out, exp) }) # https://github.com/r-spatial/sf/issues/1390 test_that("can combine sfc lists", { ls <- st_linestring(matrix(1:3, ncol = 3)) sfc <- st_sfc(ls) expect_identical(vec_c(sfc, sfc), c(sfc, sfc)) sf <- st_as_sf(data.frame(id = 1, geometry = sfc)) # Currently returns a bare data frame because of the workaround for # the `c()` fallback sentinels expect_identical(vec_rbind(sf, sf), new_data_frame(rbind(sf, sf))) expect_identical(vec_rbind(sf, sf, sf), new_data_frame(rbind(sf, sf, sf))) }) test_that("can combine sfc lists with unspecified chunks", { point <- st_point(1:2) out <- vec_c(c(NA, NA), st_sfc(point), NA) expect_identical(out, st_sfc(NA, NA, point, NA)) multipoint <- st_multipoint(matrix(1:4, 2)) x <- st_sfc(point) y <- st_sfc(multipoint, multipoint) out <- vec_rbind( data_frame(x = x), data_frame(y = y) ) expect_identical(out, data_frame( x = st_sfc(point, NA, NA), y = st_sfc(NA, multipoint, multipoint) )) }) test_that("`n_empty` attribute of `sfc` vectors is restored", { pt1 = st_sfc(st_point(c(NA_real_, NA_real_))) pt2 = st_sfc(st_point(0:1)) x = c(pt1, pt2) expect_identical(attr(vctrs::vec_slice(x, 1), "n_empty"), 1L) expect_identical(attr(vctrs::vec_slice(x, 2), "n_empty"), 0L) combined = vctrs::vec_c(pt1, pt2, pt1) expect_length(combined, 3) expect_identical(attr(combined, "n_empty"), 2L) }) test_that("bbox attributes of `sfc` vectors are restored", { pt1 = st_sfc(st_point(c(1L, 2L))) pt2 = st_sfc(st_point(c(10L, 20L))) x = c(pt1, pt2) expect_identical(st_bbox(vctrs::vec_slice(x, 1)), st_bbox(pt1)) expect_identical(st_bbox(vctrs::vec_slice(x, 2)), st_bbox(pt2)) combined = vctrs::vec_c(pt1, pt2) expect_identical(st_bbox(x), st_bbox(combined)) }) test_that("`precision` and `crs` attributes of `sfc` vectors are restored", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) out = vctrs::vec_slice(x, 1) expect_identical(st_precision(x), st_precision(out)) expect_identical(st_crs(x), st_crs(out)) }) test_that("`precision` attributes of `sfc` vectors are combined", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 3857) out = vctrs::vec_c(x, y) expect_identical(st_precision(x), st_precision(out)) # These used to be errors before we fell back to c() y = st_sfc(st_point(c(0, 0)), precision = 1e-2, crs = 3857) expect_identical(vctrs::vec_c(x, y), c(x, y)) # expect_error(vctrs::vec_c(x, y), "precisions not equal") }) test_that("`crs` attributes of `sfc` vectors must be the same", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 3857) out = vctrs::vec_c(x, y) expect_identical(st_crs(x), st_crs(out)) # Error on different `crs` comes from sf as of 1.0-10 y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 4326) expect_snapshot(error = TRUE, { vctrs::vec_c(x, y) }) }) test_that("`vec_locate_matches()` works with `sfc` vectors", { x <- c( st_sfc(st_point(c(0, 0))), st_sfc(st_point(c(0, 1))), st_sfc(st_point(c(2, 1))), st_sfc(c(st_point(c(0, 1)), st_point(c(0, 1)))) ) y <- c( st_sfc(c(st_point(c(0, 1)), st_point(c(0, 1)))), st_sfc(st_point(c(0, 0))), st_sfc(st_point(c(0, 3))), st_sfc(st_point(c(0, 0))), st_sfc(st_point(c(0, 1))) ) out <- vec_locate_matches(x, y) expect_identical(out$needles, c(1L, 1L, 2L, 3L, 4L)) expect_identical(out$haystack, c(2L, 4L, 5L, NA, 1L)) }) test_that("`vec_rbind()` doesn't leak common type fallbacks (#1331)", { sf = st_sf(id = 1:2, geo = st_sfc(st_point(c(1, 1)), st_point(c(2, 2)))) expect_equal( vec_rbind(sf, sf), data_frame(id = rep(1:2, 2), geo = rep(sf$geo, 2)) ) expect_equal( vec_rbind(sf, sf, .names_to = "id"), data_frame(id = rep(1:2, each = 2), geo = rep(sf$geo, 2)) ) }) # Local Variables: # indent-tabs-mode: t # ess-indent-offset: 4 # tab-width: 4 # End: vctrs/tests/testthat/test-slice.R0000644000176200001440000006077014511320527016626 0ustar liggesusers test_that("vec_slice throws error with non-vector inputs", { expect_error(vec_slice(environment(), 1L), class = "vctrs_error_scalar_type") }) test_that("vec_slice throws error with non-vector subscripts", { expect_snapshot({ (expect_error(vec_slice(1:3, Sys.Date()), class = "vctrs_error_subscript_type")) (expect_error(vec_slice(1:3, matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type")) }) }) test_that("can subset base vectors", { i <- 2:3 expect_identical(vec_slice(lgl(1, 0, 1), i), lgl(0, 1)) expect_identical(vec_slice(int(1, 2, 3), i), int(2, 3)) expect_identical(vec_slice(dbl(1, 2, 3), i), dbl(2, 3)) expect_identical(vec_slice(cpl(1, 2, 3), i), cpl(2, 3)) expect_identical(vec_slice(chr("1", "2", "3"), i), chr("2", "3")) expect_identical(vec_slice(raw2(1, 2, 3), i), raw2(2, 3)) expect_identical(vec_slice(list(1, 2, 3), i), list(2, 3)) }) test_that("can subset shaped base vectors", { i <- 2:3 mat <- as.matrix expect_identical(vec_slice(mat(lgl(1, 0, 1)), i), mat(lgl(0, 1))) expect_identical(vec_slice(mat(int(1, 2, 3)), i), mat(int(2, 3))) expect_identical(vec_slice(mat(dbl(1, 2, 3)), i), mat(dbl(2, 3))) expect_identical(vec_slice(mat(cpl(1, 2, 3)), i), mat(cpl(2, 3))) expect_identical(vec_slice(mat(chr("1", "2", "3")), i), mat(chr("2", "3"))) expect_identical(vec_slice(mat(raw2(1, 2, 3)), i), mat(raw2(2, 3))) expect_identical(vec_slice(mat(list(1, 2, 3)), i), mat(list(2, 3))) }) test_that("can subset with missing indices", { for (i in list(int(2L, NA), lgl(FALSE, TRUE, NA))) { expect_identical(vec_slice(lgl(1, 0, 1), i), lgl(0, NA)) expect_identical(vec_slice(int(1, 2, 3), i), int(2, NA)) expect_identical(vec_slice(dbl(1, 2, 3), i), dbl(2, NA)) expect_identical(vec_slice(cpl2(1, 2, 3), i), cpl2(2, NA)) expect_identical(vec_slice(chr("1", "2", "3"), i), c("2", NA)) expect_identical(vec_slice(raw2(1, 2, 3), i), raw2(2, 0)) expect_identical(vec_slice(list(1, 2, 3), i), list(2, NULL)) } }) test_that("can subset with a recycled NA", { local_name_repair_quiet() expect_identical(vec_slice(1:3, NA), int(NA, NA, NA)) expect_identical(vec_slice(new_vctr(1:3), NA), new_vctr(int(NA, NA, NA))) rownames <- rep_len("", nrow(mtcars)) rownames <- vec_as_names(rownames, repair = "unique") expect_identical(vec_slice(mtcars, NA), structure(mtcars[NA, ], row.names = rownames)) }) test_that("can subset with a recycled TRUE", { expect_identical(vec_slice(1:3, TRUE), 1:3) expect_identical(vec_slice(mtcars, TRUE), mtcars) expect_identical(vec_slice(new_vctr(1:3), TRUE), new_vctr(1:3)) expect_identical(vec_as_location(TRUE, 2), 1:2) }) test_that("can subset with a recycled FALSE", { expect_identical(vec_slice(1:3, FALSE), int()) expect_identical(vec_slice(mtcars, FALSE), mtcars[NULL, ]) expect_identical(vec_slice(new_vctr(1:3), FALSE), new_vctr(integer())) }) test_that("can't index beyond the end of a vector", { expect_snapshot({ (expect_error(vec_slice(1:2, 3L), class = "vctrs_error_subscript_oob")) (expect_error(vec_slice(1:2, -3L), class = "vctrs_error_subscript_oob")) }) }) test_that("slicing non existing elements fails", { expect_error(vec_as_location("foo", 1L, "f"), class = "vctrs_error_subscript_oob") expect_error(vec_slice(c(f = 1), "foo"), class = "vctrs_error_subscript_oob") }) test_that("can subset object of any dimensionality", { x0 <- c(1, 1) x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_equal(vec_slice(x0, 1L), 1) expect_identical(vec_slice(x1, 1L), ones(1)) expect_identical(vec_slice(x2, 1L), ones(1, 3)) expect_identical(vec_slice(x3, 1L), ones(1, 3, 4)) expect_identical(vec_slice(x4, 1L), ones(1, 3, 4, 5)) }) test_that("can subset using logical subscript", { x0 <- c(1, 1) expect_identical(vec_slice(x0, TRUE), x0) expect_identical(vec_slice(x0, c(TRUE, FALSE)), 1) expect_error( vec_slice(x0, c(TRUE, FALSE, TRUE)), class = "vctrs_error_subscript_size" ) expect_error( vec_slice(x0, lgl()), class = "vctrs_error_subscript_size" ) expect_error( vec_slice(mtcars, c(TRUE, FALSE)), class = "vctrs_error_subscript_size" ) }) test_that("can subset data frame columns", { df <- data.frame(x = 1:2) df$y <- data.frame(a = 2:1) expect_equal(vec_slice(df, 1L)$y, vec_slice(df$y, 1L)) }) test_that("can subset empty data frames", { df <- new_data_frame(n = 3L) expect_equal(vec_size(vec_slice(df, integer())), 0) expect_equal(vec_size(vec_slice(df, 1L)), 1) expect_equal(vec_size(vec_slice(df, 1:3)), 3) df$df <- df expect_equal(vec_size(vec_slice(df, integer())), 0) expect_equal(vec_size(vec_slice(df, 1L)), 1) expect_equal(vec_size(vec_slice(df, 1:3)), 3) }) test_that("ignores NA in logical subsetting", { x <- c(NA, 1, 2) expect_equal(vec_slice(x, x > 0), c(NA, 1, 2)) }) test_that("ignores NA in integer subsetting", { expect_equal(vec_slice(0:2, c(NA, 2:3)), c(NA, 1, 2)) }) test_that("can't slice with missing argument", { expect_error(vec_slice(1:3)) expect_error(vec_slice(mtcars)) expect_error(vec_slice(new_vctr(1:3))) }) test_that("can slice with NULL argument", { expect_identical(vec_slice(1:3, NULL), integer()) expect_identical(vec_slice(iris, NULL), iris[0, ]) expect_identical(vec_slice(new_vctr(1:3), NULL), new_vctr(integer())) }) test_that("slicing unclassed structures preserves attributes", { x <- structure(1:3, foo = "bar") expect_identical(vec_slice(x, 1L), structure(1L, foo = "bar")) }) test_that("can slice with negative indices", { expect_identical(vec_slice(1:3, -c(1L, 3L)), 2L) expect_identical(vec_slice(mtcars, -(1:30)), vec_slice(mtcars, 31:32)) expect_error(vec_slice(1:3, -c(1L, NA)), class = "vctrs_error_subscript_type") expect_error(vec_slice(1:3, c(-1L, 1L)), class = "vctrs_error_subscript_type") }) test_that("0 is ignored in negative indices", { expect_identical(vec_slice(1:3, c(-2L, 0L)), int(1L, 3L)) expect_identical(vec_slice(1:3, c(0L, -2L)), int(1L, 3L)) }) test_that("0 is ignored in positive indices", { expect_identical(vec_slice(1:3, 0L), int()) expect_identical(vec_slice(1:3, c(0L, 0L)), int()) expect_identical(vec_slice(1:3, c(0L, 2L, 0L)), 2L) }) test_that("can slice with double indices", { expect_identical(vec_slice(1:3, dbl(2, 3)), 2:3) expect_snapshot((expect_error(vec_as_location(2^31, 3L), class = "vctrs_error_subscript_type"))) }) test_that("can slice with symbols", { expect_identical(vec_as_location(quote(b), 26, letters), 2L) }) test_that("can `vec_slice()` S3 objects without dispatch infloop", { expect_identical(new_vctr(1:3)[1], new_vctr(1L)) expect_identical(new_vctr(as.list(1:3))[1], new_vctr(list(1L))) }) test_that("can `vec_slice()` records", { out <- vec_slice(new_rcrd(list(a = 1L, b = 2L)), rep(1, 3)) expect_size(out, 3) out <- vec_init(new_rcrd(list(a = 1L, b = 2L)), 2) expect_size(out, 2) }) test_that("vec_restore() is called after proxied slicing", { local_methods( vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, to, ...) "dispatch" ) expect_identical(vec_slice(foobar(1:3), 2), "dispatch") }) test_that("vec_slice() is proxied", { local_proxy() x <- vec_slice(new_proxy(1:3), 2:3) expect_identical(proxy_deref(x), 2:3) }) test_that("dimensions are preserved by vec_slice()", { # Fallback case x <- foobar(1:4) dim(x) <- c(2, 2) dimnames(x) <- list(a = c("foo", "bar"), b = c("quux", "hunoz")) out <- vec_slice(x, 1) exp <- foobar( c(1L, 3L), dim = c(1, 2), dimnames = list(a = "foo", b = c("quux", "hunoz") )) expect_identical(out, exp) # Native case attrib <- NULL local_methods( vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, to, ...) attrib <<- attributes(x) ) vec_slice(x, 1) exp <- list(dim = 1:2, dimnames = list(a = "foo", b = c("quux", "hunoz"))) expect_identical(attrib, exp) }) test_that("can slice shaped objects by name", { x <- matrix(1:2) expect_error(vec_slice(x, "foo"), "unnamed") dimnames(x) <- list(c("foo", "bar")) expect_equal(vec_slice(x, "foo"), vec_slice(x, 1L)) expect_error(vec_slice(x, "baz"), class = "vctrs_error_subscript_oob") }) test_that("vec_slice() unclasses input before calling `vec_restore()`", { oo <- NULL local_methods( vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, ...) oo <<- is.object(x) ) x <- foobar(1:4) dim(x) <- c(2, 2) vec_slice(x, 1) expect_false(oo) }) test_that("can call `vec_slice()` from `[` methods with shaped objects without infloop", { local_methods( `[.vctrs_foobar` = function(x, i, ...) vec_slice(x, i) ) x <- foobar(1:4) dim(x) <- c(2, 2) exp <- foobar(c(1L, 3L)) dim(exp) <- c(1, 2) expect_identical(x[1], exp) }) test_that("vec_slice() restores attributes on shaped S3 objects correctly", { x <- factor(c("a", "b", "c", "d", "e", "f")) dim(x) <- c(3, 2) expect <- factor(c("a", "c", "d", "f"), levels = levels(x)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, c(1, 3)), expect) }) test_that("vec_slice() falls back to `[` with S3 objects", { local_methods( `[.vctrs_foobar` = function(x, i, ...) "dispatched" ) expect_identical(vec_slice(foobar(NA), 1), "dispatched") expect_error(vec_slice(foobar(list(NA)), 1), class = "vctrs_error_scalar_type") local_methods( vec_proxy.vctrs_foobar = function(x, ...) x ) expect_identical(vec_slice(foobar(list(NA)), 1), foobar(list(NA))) }) test_that("vec_slice() doesn't restore when attributes have already been restored", { local_methods( `[.vctrs_foobar` = function(x, i, ...) structure("dispatched", foo = "bar"), vec_restore.vctrs_foobar = function(...) stop("not called") ) expect_error(vec_slice(foobar(NA), 1), NA) }) test_that("vec_slice() doesn't restore when `[` method intentionally dropped attributes", { local_methods( `[.vctrs_foobar` = function(x, i, ...) unstructure(NextMethod()), vec_restore.vctrs_foobar = function(...) stop("not called") ) expect_identical(vec_slice(foobar(NA), 1), NA) }) test_that("can vec_slice() without inflooping when restore calls math generics", { local_methods( new_foobar = function(x) { new_vctr(as.double(x), class = "vctrs_foobar") }, vec_restore.vctrs_foobar = function(x, ...) { abs(x) sum(x) mean(x) is.finite(x) is.infinite(x) is.nan(x) new_foobar(x) } ) expect_identical(new_foobar(1:10)[1:2], new_foobar(1:2)) }) test_that("vec_restore() is called after slicing data frames", { local_methods( vec_restore.vctrs_tabble = function(...) "dispatched" ) df <- structure(mtcars, class = c("vctrs_tabble", "data.frame")) expect_identical(vec_slice(df, 1), "dispatched") }) test_that("additional subscripts are forwarded to `[`", { local_methods( `[.vctrs_foobar` = function(x, i, ...) vec_index(x, i, ...) ) x <- foobar(c("foo", "bar", "quux", "hunoz")) dim(x) <- c(2, 2) exp <- foobar("quux") dim(exp) <- c(1, 1) expect_identical(x[1, 2], exp) }) test_that("can use names to vec_slice() a named object", { x0 <- c(a = 1, b = 2) x1 <- c(a = 1, a = 2) expect_identical(vec_slice(x0, letters[1]), c(a = 1)) expect_identical(vec_slice(x0, letters[2:1]), c(b = 2, a = 1)) expect_identical(vec_slice(x1, letters[1]), c(a = 1)) expect_error(vec_slice(x0, letters[3:1]), class = "vctrs_error_subscript_oob") expect_error(vec_slice(x1, letters[2]), class = "vctrs_error_subscript_oob") }) test_that("can't use names to vec_slice() an unnamed object", { expect_error( vec_slice(1:3, letters[1]), "Can't use character names to index an unnamed vector.", fixed = TRUE ) expect_error( vec_slice(1:3, letters[25:27]), "Can't use character names to index an unnamed vector.", fixed = TRUE ) }) test_that("can slice with missing character indices (#244)", { expect_identical(vec_as_location(na_chr, 2L, c("x", "")), na_int) expect_identical(vec_slice(c(x = 1), na_chr), set_names(na_dbl, "")) expect_identical(vec_slice(c(x = "foo"), na_chr), set_names(na_chr, "")) }) test_that("can slice with numerics (#577)", { expect_identical(vec_as_location(1:2, 3), 1:2) expect_error(vec_as_location(1:2, 3.5), class = "vctrs_error_cast_lossy") }) test_that("missing indices don't create NA names", { x <- set_names(letters) expect_identical(vec_slice(x, na_int), set_names(na_chr, "")) expect_identical(vec_slice(x, int(1, NA, 3, NA)), chr(a = "a", NA, c = "c", NA)) # Preserves existing NA names x <- set_names(1:2, c(NA, "foo")) expect_identical(vec_slice(x, 1:2), x) }) test_that("vec_slice() asserts vectorness (#301)", { expect_error(vec_slice(NULL, 1), class = "vctrs_error_scalar_type") }) test_that("slicing an unspecified logical vector returns a logical vector", { expect_identical(vec_slice(NA, integer()), logical()) expect_identical(vec_slice(NA, c(1, 1)), c(NA, NA)) }) test_that("slicing an unspecified() object returns an unspecified()", { expect_identical(vec_slice(unspecified(1), integer()), unspecified()) expect_identical(vec_slice(unspecified(1), c(1, 1)), unspecified(2)) }) test_that("vec_slice() works with Altrep classes with custom extract methods", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) idx <- c(9, 10, 11) expect_equal(vec_slice(x, idx), c("foo", "foo", "bar")) }) test_that("Unnamed vector with character subscript is caught", { expect_snapshot(error = TRUE, vec_slice(1:3, letters[1])) }) test_that("Negative subscripts are checked", { expect_snapshot(error = TRUE, vec_slice(1:3, -c(1L, NA))) expect_snapshot(error = TRUE, vec_slice(1:3, c(-1L, 1L))) }) test_that("oob error messages are properly constructed", { expect_snapshot(error = TRUE, vec_slice(c(bar = 1), "foo")) # Multiple OOB indices expect_snapshot(error = TRUE, vec_slice(letters, c(100, 1000))) expect_snapshot(error = TRUE, vec_slice(letters, c(1, 100:103, 2, 104:110))) expect_snapshot(error = TRUE, vec_slice(set_names(letters), c("foo", "bar"))) expect_snapshot(error = TRUE, vec_slice(set_names(letters), toupper(letters))) }) # vec_init ---------------------------------------------------------------- test_that("na of atomic vectors is as expected", { expect_equal(vec_init(TRUE), NA) expect_equal(vec_init(1L), NA_integer_) expect_equal(vec_init(1), NA_real_) expect_equal(vec_init("x"), NA_character_) expect_equal(vec_init(1i), NA_complex_) }) test_that("na of factor preserves levels", { f1 <- factor("a", levels = c("a", "b")) f2 <- vec_init(f1) expect_equal(levels(f1), levels(f2)) }) test_that("na of POSIXct preserves tz", { dt1 <- as.POSIXct("2010-01-01", tz = "America/New_York") dt2 <- vec_init(dt1) expect_equal(attr(dt2, "tzone"), "America/New_York") }) test_that("na of list is list(NULL)", { expect_equal(vec_init(list()), list(NULL)) }) test_that("na of array is 1d slice", { x1 <- array(1:12, c(2, 3, 4)) x2 <- vec_init(x1) expect_equal(x2, array(NA_integer_, c(1, 3, 4))) }) test_that("na of list-array is 1d slice", { x1 <- array(as.list(1:12), c(2, 3, 4)) x2 <- vec_init(x1) expect_equal(x2, array(list(), c(1, 3, 4))) }) test_that("vec_init() asserts vectorness (#301)", { expect_error(vec_init(NULL, 1L), class = "vctrs_error_scalar_type") }) test_that("vec_init() works with Altrep classes", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 1L, bar = 2L)) expect_equal(vec_init(x, 2), rep(NA_character_, 2)) }) test_that("vec_init() validates `n`", { expect_snapshot({ (expect_error(vec_init(1L, 1.5))) (expect_error(vec_init(1L, c(1, 2)))) (expect_error(vec_init(1L, -1L))) (expect_error(vec_init(1L, NA))) (expect_error(vec_init(1L, NA_integer_))) }) }) # vec_slice + compact_rep ------------------------------------------------- # `i` is 1-based test_that("names are repaired correctly with compact reps and `NA_integer_`", { x <- list(a = 1L, b = 2L) expect <- set_names(list(NULL, NULL), c("", "")) expect_equal(vec_slice_rep(x, NA_integer_, 2L), expect) }) test_that("names are recycled correctly with compact reps", { expect_named(vec_slice_rep(c(x = 1L), 1L, 3L), c("x", "x", "x")) }) test_that("vec_slice() with compact_reps work with Altrep classes", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) expect_equal(vec_slice_rep(x, 10L, 3L), rep("foo", 3)) }) # vec_slice + compact_seq ------------------------------------------------- # `start` is 0-based test_that("can subset base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE expect_identical(vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl(0, 1)) expect_identical(vec_slice_seq(int(1, 2, 3), start, size, increasing), int(2, 3)) expect_identical(vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl(2, 3)) expect_identical(vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl(2, 3)) expect_identical(vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr("2", "3")) expect_identical(vec_slice_seq(raw2(1, 2, 3), start, size, increasing), raw2(2, 3)) expect_identical(vec_slice_seq(list(1, 2, 3), start, size, increasing), list(2, 3)) }) test_that("can subset base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE expect_identical(vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl(1, 0)) expect_identical(vec_slice_seq(int(1, 2, 3), start, size, increasing), int(3, 2)) expect_identical(vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl(3, 2)) expect_identical(vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl(3, 2)) expect_identical(vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr("3", "2")) expect_identical(vec_slice_seq(raw2(1, 2, 3), start, size, increasing), raw2(3, 2)) expect_identical(vec_slice_seq(list(1, 2, 3), start, size, increasing), list(3, 2)) }) test_that("can subset base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE expect_identical(vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl()) expect_identical(vec_slice_seq(int(1, 2, 3), start, size, increasing), int()) expect_identical(vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl()) expect_identical(vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl()) expect_identical(vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr()) expect_identical(vec_slice_seq(raw2(1, 2, 3), start, size, increasing), raw2()) expect_identical(vec_slice_seq(list(1, 2, 3), start, size, increasing), list()) }) test_that("can subset shaped base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical(vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl(0, 1))) expect_identical(vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int(2, 3))) expect_identical(vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl(2, 3))) expect_identical(vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl(2, 3))) expect_identical(vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr("2", "3"))) expect_identical(vec_slice_seq(mat(raw2(1, 2, 3)), start, size, increasing), mat(raw2(2, 3))) expect_identical(vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list(2, 3))) }) test_that("can subset shaped base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE mat <- as.matrix expect_identical(vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl(1, 0))) expect_identical(vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int(3, 2))) expect_identical(vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl(3, 2))) expect_identical(vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl(3, 2))) expect_identical(vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr("3", "2"))) expect_identical(vec_slice_seq(mat(raw2(1, 2, 3)), start, size, increasing), mat(raw2(3, 2))) expect_identical(vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list(3, 2))) }) test_that("can subset shaped base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE mat <- as.matrix expect_identical(vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl())) expect_identical(vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int())) expect_identical(vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl())) expect_identical(vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl())) expect_identical(vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr())) expect_identical(vec_slice_seq(mat(raw2(1, 2, 3)), start, size, increasing), mat(raw2())) expect_identical(vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list())) }) test_that("can subset object of any dimensionality with compact seqs", { x0 <- c(1, 1) x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_equal(vec_slice_seq(x0, 0L, 1L), 1) expect_identical(vec_slice_seq(x1, 0L, 1L), ones(1)) expect_identical(vec_slice_seq(x2, 0L, 1L), ones(1, 3)) expect_identical(vec_slice_seq(x3, 0L, 1L), ones(1, 3, 4)) expect_identical(vec_slice_seq(x4, 0L, 1L), ones(1, 3, 4, 5)) }) test_that("can subset data frames with compact seqs", { df <- data_frame(x = 1:5, y = letters[1:5]) expect_equal(vec_slice_seq(df, 0L, 0L), vec_slice(df, integer())) expect_equal(vec_slice_seq(df, 0L, 1L), vec_slice(df, 1L)) expect_equal(vec_slice_seq(df, 0L, 3L), vec_slice(df, 1:3)) expect_equal(vec_slice_seq(df, 2L, 3L, FALSE), vec_slice(df, 3:1)) df$df <- df expect_equal(vec_slice_seq(df, 0L, 0L), vec_slice(df, integer())) expect_equal(vec_slice_seq(df, 0L, 1L), vec_slice(df, 1L)) expect_equal(vec_slice_seq(df, 0L, 3L), vec_slice(df, 1:3)) expect_equal(vec_slice_seq(df, 2L, 3L, FALSE), vec_slice(df, 3:1)) }) test_that("can subset S3 objects using the fallback method with compact seqs", { x <- factor(c("a", "b", "c", "d")) expect_equal(vec_slice_seq(x, 0L, 0L), vec_slice(x, integer())) expect_equal(vec_slice_seq(x, 0L, 1L), vec_slice(x, 1L)) expect_equal(vec_slice_seq(x, 2L, 2L), vec_slice(x, 3:4)) expect_equal(vec_slice_seq(x, 3L, 2L, FALSE), vec_slice(x, 4:3)) }) test_that("vec_slice() with compact_seqs work with Altrep classes", { skip_if(getRversion() < "3.5") x <- .Call(vctrs_altrep_rle_Make, c(foo = 2L, bar = 3L)) expect_equal(vec_slice_seq(x, 1L, 3L), c("foo", "bar", "bar")) }) test_that("vec_slice() handles symbols and OO objects", { expect_identical(vec_slice(c(a = 1, b = 2), quote(b)), c(b = 2)) expect_identical(vec_slice(c(a = 1, b = 2), factor("b")), c(b = 2)) expect_error(vec_slice(c(a = 1, b = 2), foobar("b")), class = "vctrs_error_subscript_type") }) test_that("vec_init() handles names in columns", { expect_identical( vec_init(data_frame(x = c(a = 1, b = 2)))$x, named(na_dbl) ) expect_identical( vec_init(data_frame(x = c(1, 2)))$x, na_dbl ) }) test_that("vec_slice() restores unrestored but named foreign classes", { x <- foobar(c(x = 1)) expect_identical(vec_slice(x, 1), x) expect_identical(vec_chop(x), list(x)) expect_identical(vec_chop(x, indices = list(1)), list(x)) expect_identical(vec_ptype(x), foobar(named(dbl()))) expect_identical(vec_ptype(x), foobar(named(dbl()))) expect_identical(vec_ptype_common(x, x), foobar(named(dbl()))) out <- vec_ptype_common_fallback(x, x) expect_true(is_common_class_fallback(out)) expect_identical(fallback_class(out), "vctrs_foobar") }) test_that("scalar type error is thrown when `vec_slice_unsafe()` is called directly (#1139)", { x <- foobar(as.list(1:3)) expect_error(vec_slice_seq(x, 1L, 1L), class = "vctrs_error_scalar_type") }) test_that("column sizes are checked before slicing (#552)", { x <- structure(list(a = 1, b = 2:3), row.names = 1:2, class = "data.frame") expect_error( vctrs::vec_slice(x, 2), "Column `a` (size 1) must match the data frame (size 2)", fixed = TRUE ) }) test_that("base_vec_rep() slices data frames with the base::rep() UI", { df <- data_frame(x = data_frame(y = 1:2)) expect_identical( base_vec_rep(df, length.out = 4), vec_slice(df, c(1:2, 1:2)) ) }) test_that("vec_size_assign() slices data frames with the base::rep() UI", { df <- data_frame(x = data_frame(y = 1:3)) expect_identical( vec_size_assign(df, 2), vec_slice(df, 1:2) ) expect_identical( vec_size_assign(df, 4), vec_slice(df, c(1:3, NA)) ) }) vctrs/tests/testthat/helper-encoding.R0000644000176200001440000000137414202760666017621 0ustar liggesusersencodings <- function(bytes = FALSE) { string <- "\u00B0C" utf8 <- iconv(string, from = Encoding(string), to = "UTF-8") unknown <- iconv(string, from = Encoding(string), to = "", mark = FALSE) latin1 <- iconv(string, from = Encoding(string), to = "latin1") out <- list(utf8 = utf8, unknown = unknown, latin1 = latin1) if (bytes) { out <- list2(!!! out, bytes = encoding_bytes()) } out } encoding_bytes <- function() { string <- "\u00B0C" unknown <- iconv(string, from = Encoding(string), to = "", mark = FALSE) bytes <- unknown Encoding(bytes) <- "bytes" bytes } expect_equal_encoding <- function(object, expected) { args <- vec_recycle_common(object, expected) expect_equal(Encoding(args[[1L]]), Encoding(args[[2L]])) } vctrs/tests/testthat/test-slice-chop.R0000644000176200001440000010062214402367170017550 0ustar liggesusers test_that("vec_chop() throws error with non-vector inputs", { expect_error(vec_chop(NULL), class = "vctrs_error_scalar_type") expect_error(vec_chop(environment()), class = "vctrs_error_scalar_type") }) test_that("atomics are split into a list", { x <- 1:5 expect_equal(vec_chop(x), as.list(x)) x <- letters[1:5] expect_equal(vec_chop(x), as.list(x)) }) test_that("atomic names are kept", { x <- set_names(1:5) result <- lapply(vec_chop(x), names) expect_equal(result, as.list(names(x))) }) test_that("base R classed objects are split into a list", { fctr <- factor(c("a", "b")) expect <- lapply(vec_seq_along(fctr), vec_slice, x = fctr) expect_equal(vec_chop(fctr), expect) date <- new_date(c(0, 1)) expect <- lapply(vec_seq_along(date), vec_slice, x = date) expect_equal(vec_chop(date), expect) }) test_that("base R classed object names are kept", { fctr <- set_names(factor(c("a", "b"))) result <- lapply(vec_chop(fctr), names) expect_equal(result, as.list(names(fctr))) }) test_that("list elements are split", { x <- list(1, 2) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("data frames are split rowwise", { x <- data_frame(x = 1:2, y = c("a", "b")) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("vec_chop() keeps data frame row names", { x <- data_frame(x = 1:2, y = c("a", "b")) rownames(x) <- c("r1", "r2") result <- lapply(vec_chop(x), rownames) expect_equal(result, list("r1", "r2")) }) test_that("vec_chop() keeps data frame row names for data frames with 0 columns (#1722)", { x <- data_frame(.size = 3) rownames(x) <- c("r1", "r2", "r3") out <- lapply(vec_chop(x), rownames) expect_identical(out, list("r1", "r2", "r3")) out <- vec_chop(x, indices = list(c(2, NA), 3)) out <- lapply(out, rownames) expect_identical(out, list(c("r2", "...2"), "r3")) out <- vec_chop(x, sizes = c(1, 2, 0)) out <- lapply(out, rownames) expect_identical(out, list("r1", c("r2", "r3"), character())) }) test_that("data frames with 0 columns retain the right number of rows (#1722)", { x <- data_frame(.size = 4) one <- data_frame(.size = 1L) expect_identical( vec_chop(x), list(one, one, one, one) ) expect_identical( vec_chop(x, indices = list(c(1, 3, 2), c(3, NA))), list( data_frame(.size = 3), data_frame(.size = 2) ) ) expect_identical( vec_chop(x, sizes = c(3, 1, 0)), list( data_frame(.size = 3), data_frame(.size = 1), data_frame(.size = 0) ) ) }) test_that("matrices / arrays are split rowwise", { x <- array(1:12, c(2, 2, 2)) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("matrix / array row names are kept", { x <- array(1:12, c(2, 2, 2), dimnames = list(c("r1", "r2"), c("c1", "c2"))) result <- lapply(vec_chop(x), rownames) expect_equal(result, list("r1", "r2")) }) test_that("matrices / arrays without row names have other dimension names kept", { x <- array(1:12, c(2, 2, 2), dimnames = list(NULL, c("c1", "c2"))) result <- lapply(vec_chop(x), colnames) expect_equal(result, list(c("c1", "c2"), c("c1", "c2"))) }) test_that("vec_chop() doesn't restore when attributes have already been restored", { local_methods( `[.vctrs_foobar` = function(x, i, ...) structure("dispatched", foo = "bar"), vec_restore.vctrs_foobar = function(...) structure("dispatched-and-restored", foo = "bar") ) result <- vec_chop(foobar(NA))[[1]] expect_equal(result, structure("dispatched", foo = "bar")) result <- vec_chop(foobar(NA), indices = list(1))[[1]] expect_equal(result, structure("dispatched", foo = "bar")) result <- vec_chop(foobar(NA), sizes = 1)[[1]] expect_equal(result, structure("dispatched", foo = "bar")) }) test_that("vec_chop() does not restore when attributes have not been restored by `[`", { local_methods( `[.vctrs_foobar` = function(x, i, ...) "dispatched", vec_restore.vctrs_foobar = function(...) "dispatched-and-restored" ) result <- vec_chop(foobar(NA))[[1]] expect_equal(result, "dispatched") result <- vec_chop(foobar(NA), indices = list(1))[[1]] expect_equal(result, "dispatched") result <- vec_chop(foobar(NA), sizes = 1)[[1]] expect_equal(result, "dispatched") }) test_that("vec_chop() falls back to `[` for shaped objects with no proxy", { x <- foobar(1) dim(x) <- c(1, 1) result <- vec_chop(x)[[1]] expect_equal(result, x) result <- vec_chop(x, indices = list(1))[[1]] expect_equal(result, x) result <- vec_chop(x, sizes = 1)[[1]] expect_equal(result, x) }) test_that("`indices` are validated", { expect_snapshot(error = TRUE, { vec_chop(1, indices = 1) }) expect_snapshot({ (expect_error(vec_chop(1, indices = list(1.5)), class = "vctrs_error_subscript_type")) }) expect_snapshot({ (expect_error(vec_chop(1, indices = list(2)), class = "vctrs_error_subscript_oob")) }) }) test_that("`sizes` are validated", { expect_snapshot(error = TRUE, { vec_chop("a", sizes = "a") }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = 2) }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = -1) }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = NA_integer_) }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = c(1, 1)) }) }) test_that("can't use both `indices` and `sizes`", { expect_snapshot(error = TRUE, { vec_chop(1, indices = list(1), sizes = 1) }) }) test_that("`sizes` allows `0`", { expect_identical( vec_chop(c("a", "b"), sizes = c(1, 0, 0, 1, 0)), list("a", character(), character(), "b", character()) ) }) test_that("size 0 `indices` list is allowed", { expect_equal(vec_chop(1, indices = list()), list()) }) test_that("individual index values of size 0 are allowed", { expect_equal(vec_chop(1, indices = list(integer())), list(numeric())) df <- data.frame(a = 1, b = "1") expect_equal(vec_chop(df, indices = list(integer())), list(vec_ptype(df))) }) test_that("individual index values of `NULL` are allowed", { expect_equal(vec_chop(1, indices = list(NULL)), list(numeric())) df <- data.frame(a = 1, b = "1") expect_equal(vec_chop(df, indices = list(NULL)), list(vec_ptype(df))) }) test_that("data frame row names are kept when `indices` or `sizes` are used", { x <- data_frame(x = 1:2, y = c("a", "b")) rownames(x) <- c("r1", "r2") result <- lapply(vec_chop(x, indices = list(1, 1:2)), rownames) expect_equal(result, list("r1", c("r1", "r2"))) result <- lapply(vec_chop(x, sizes = c(1, 0, 1)), rownames) expect_equal(result, list("r1", character(), "r2")) }) test_that("vec_chop(, indices/sizes =) can be equivalent to the default", { x <- 1:5 indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices = indices), vec_chop(x)) sizes <- vec_rep(1L, times = vec_size(x)) expect_equal(vec_chop(x, sizes = sizes), vec_chop(x)) }) test_that("vec_chop(, indices/sizes =) can be equivalent to the default", { x <- data.frame(x = 1:5) indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices = indices), vec_chop(x)) sizes <- vec_rep(1L, times = vec_size(x)) expect_equal(vec_chop(x, sizes = sizes), vec_chop(x)) }) test_that("vec_chop(, indices/sizes =) can be equivalent to the default", { x <- array(1:8, c(2, 2, 2)) indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices = indices), vec_chop(x)) sizes <- vec_rep(1L, times = vec_size(x)) expect_equal(vec_chop(x, sizes = sizes), vec_chop(x)) }) test_that("`indices` cannot use names", { x <- set_names(1:3, c("a", "b", "c")) expect_error(vec_chop(x, indices = list("a", c("b", "c"))), class = "vctrs_error_subscript_type") x <- array(1:4, c(2, 2), dimnames = list(c("r1", "r2"))) expect_error(vec_chop(x, indices = list("r1")), class = "vctrs_error_subscript_type") x <- data.frame(x = 1, row.names = "r1") expect_error(vec_chop(x, indices = list("r1")), class = "vctrs_error_subscript_type") }) test_that("fallback method with `indices` and `sizes` works", { fctr <- factor(c("a", "b")) indices <- list(1, c(1, 2)) sizes <- c(1, 0, 1) expect_equal( vec_chop(fctr, indices = indices), map(indices, vec_slice, x = fctr) ) expect_equal( vec_chop(fctr, sizes = sizes), list(vec_slice(fctr, 1), vec_slice(fctr, 0), vec_slice(fctr, 2)) ) }) test_that("vec_chop() falls back to `[` for shaped objects with no proxy when `indices` or `sizes` are provided", { x <- foobar(1) dim(x) <- c(1, 1) result <- vec_chop(x, indices = list(1))[[1]] expect_equal(result, x) result <- vec_chop(x, sizes = 1)[[1]] expect_equal(result, x) }) test_that("vec_chop() with data frame proxies always uses the proxy's length info", { local_methods( vec_proxy.vctrs_proxy = function(x, ...) { x <- proxy_deref(x) new_data_frame(list(x = x$x, y = x$y)) }, vec_restore.vctrs_proxy = function(x, to, ...) { new_proxy(list(x = x$x, y = x$y)) } ) x <- new_proxy(list(x = 1:2, y = 3:4)) result <- vec_chop(x) result1 <- result[[1]] result2 <- result[[2]] expect1 <- new_proxy(list(x = 1L, y = 3L)) expect2 <- new_proxy(list(x = 2L, y = 4L)) expect_identical(proxy_deref(result1), proxy_deref(expect1)) expect_identical(proxy_deref(result2), proxy_deref(expect2)) }) test_that("ALTREP objects always generate materialized chops (#1450)", { skip_if(getRversion() <= "3.5.0") x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) # `x` starts in compact form expect_false(.Call(vctrs_altrep_rle_is_materialized, x)) result <- vec_chop(x) # Chopping materializes `x` expect_true(.Call(vctrs_altrep_rle_is_materialized, x)) # And chopped elements are not ALTREP vectors expect_false(any(map_lgl(result, is_altrep))) expect <- vec_chop(c(rep("foo", 10), rep("bar", 5))) expect_identical(result, expect) }) test_that("`vec_chop(x, indices)` backwards compatible behavior works", { # No issues here expect_identical( vec_chop(1:2, list(1, 2)), vec_chop(1:2, indices = list(1, 2)) ) # Errors still talk about `indices` expect_snapshot(error = TRUE, { vec_chop(1:2, 1) }) expect_snapshot(error = TRUE, { vec_chop(1, list(1), sizes = 1) }) # These cases aren't allowed because they weren't possible previously either expect_snapshot(error = TRUE, { vec_chop(1, list(1), 2) }) expect_snapshot(error = TRUE, { vec_chop(1, list(1), indices = list(1)) }) }) # vec_chop + compact_seq -------------------------------------------------- # `start` is 0-based test_that("can chop base vectors with compact seqs", { start <- 1L size <- 2L expect_identical(vec_chop_seq(lgl(1, 0, 1), start, size), list(lgl(0, 1))) expect_identical(vec_chop_seq(int(1, 2, 3), start, size), list(int(2, 3))) expect_identical(vec_chop_seq(dbl(1, 2, 3), start, size), list(dbl(2, 3))) expect_identical(vec_chop_seq(cpl(1, 2, 3), start, size), list(cpl(2, 3))) expect_identical(vec_chop_seq(chr("1", "2", "3"), start, size), list(chr("2", "3"))) expect_identical(vec_chop_seq(raw2(1, 2, 3), start, size), list(raw2(2, 3))) expect_identical(vec_chop_seq(list(1, 2, 3), start, size), list(list(2, 3))) }) test_that("can chop with a decreasing compact seq", { expect_equal(vec_chop_seq(int(1, 2, 3), 1L, 2L, FALSE), list(int(2, 1))) }) test_that("can chop with multiple compact seqs", { start <- c(1L, 0L) size <- c(1L, 3L) expect_equal( vec_chop_seq(int(1, 2, 3), start, size), list(int(2), int(1, 2, 3)) ) }) test_that("can chop S3 objects using the fallback method with compact seqs", { x <- factor(c("a", "b", "c", "d")) expect_equal(vec_chop_seq(x, 0L, 0L), list(vec_slice(x, integer()))) expect_equal(vec_chop_seq(x, 0L, 1L), list(vec_slice(x, 1L))) expect_equal(vec_chop_seq(x, 2L, 2L), list(vec_slice(x, 3:4))) }) test_that("data frames with 0 columns retain the right number of rows with compact seqs (#1722)", { x <- data_frame(.size = 4) out <- vec_chop_seq(x, starts = c(0L, 0L, 2L), sizes = c(0L, 2L, 1L)) out <- map_int(out, vec_size) expect_identical(out, c(0L, 2L, 1L)) }) # list_unchop -------------------------------------------------------------- test_that("`x` must be a list", { expect_snapshot(error = TRUE, { list_unchop(1, indices = list(1)) }) expect_snapshot(error = TRUE, { list_unchop(1, indices = list(1), error_call = call("foo"), error_arg = "arg") }) expect_snapshot(error = TRUE, { list_unchop(data.frame(x=1), indices = list(1)) }) }) test_that("`indices` must be a list", { expect_snapshot(error = TRUE, { list_unchop(list(1), indices = 1) }) expect_snapshot(error = TRUE, { list_unchop(list(1), indices = 1, error_call = call("foo")) }) expect_snapshot(error = TRUE, { list_unchop(list(1), indices = data.frame(x=1)) }) }) test_that("`indices` must be a list of integers", { expect_error(list_unchop(list(1), indices = list("x")), class = "vctrs_error_subscript_type") expect_error(list_unchop(list(1), indices = list(TRUE)), class = "vctrs_error_subscript_type") expect_error(list_unchop(list(1), indices = list(quote(name))), class = "vctrs_error_subscript_type") }) test_that("`x` and `indices` must be lists of the same size", { expect_error(list_unchop(list(1, 2), indices = list(1)), "`x` and `indices` must be lists of the same size") }) test_that("can unchop with an AsIs list (#1463)", { x <- I(list(1, 2)) expect_identical(list_unchop(x), c(1, 2)) }) test_that("can unchop empty vectors", { expect_null(list_unchop(list())) expect_null(list_unchop(list(), indices = list())) expect_identical(list_unchop(list(), indices = list(), ptype = numeric()), numeric()) }) test_that("can unchop a list of NULL", { expect_null(list_unchop(list(NULL), indices = list(integer()))) expect_identical(list_unchop(list(NULL), indices = list(integer()), ptype = numeric()), numeric()) expect_identical(list_unchop(list(NULL, NULL), indices = list(integer(), integer()), ptype = numeric()), numeric()) }) test_that("NULLs are ignored when unchopped with other vectors", { expect_identical(list_unchop(list("a", NULL, "b")), c("a", "b")) expect_identical(list_unchop(list("a", NULL, "b"), indices = list(2, integer(), 1)), c("b", "a")) }) test_that("can use a `NULL` element with a corresponding index", { # FIXME: Probably not quite right, but not entirely clear what it should be: # - Maybe `unspecified(2)`? # - Or should `NULL`s even be allowed in `list_unchop()`? expect_null(list_unchop(list(NULL), indices = list(1:2))) expect_identical( list_unchop(list(NULL), indices = list(1:2), ptype = integer()), c(NA_integer_, NA_integer_) ) x <- list("a", NULL, c("b", "c")) indices <- list(3L, c(1L, 4L), c(2L, 5L)) expect_identical(list_unchop(x, indices = indices), c(NA, "b", "a", NA, "c")) }) test_that("can unchop atomic vectors", { expect_identical(list_unchop(list(1, 2), indices = list(2, 1)), c(2, 1)) expect_identical(list_unchop(list("a", "b"), indices = list(2, 1)), c("b", "a")) }) test_that("can unchop lists", { x <- list(list("a", "b"), list("c")) indices <- list(c(2, 3), 1) expect_identical(list_unchop(x, indices = indices), list("c", "a", "b")) }) test_that("can unchop data frames", { df1 <- data_frame(x = 1:2) df2 <- data_frame(x = 3:4) x <- list(df1, df2) indices <- list(c(3, 1), c(2, 4)) expect <- vec_slice(vec_c(df1, df2), vec_order(vec_c(!!! indices))) expect_identical(list_unchop(x, indices = indices), expect) }) test_that("can unchop factors", { fctr1 <- factor("z") fctr2 <- factor(c("x", "y")) x <- list(fctr1, fctr2) indices <- list(2, c(3, 1)) # levels are in the order they are seen! expect <- factor(c("y", "z", "x"), levels = c("z", "x", "y")) expect_identical(list_unchop(x, indices = indices), expect) }) test_that("can fallback when unchopping matrices", { mat1 <- matrix(1:4, nrow = 2, ncol = 2) mat2 <- matrix(5:10, nrow = 3, ncol = 2) x <- list(mat1, mat2) indices <- list(c(4, 1), c(2, 3, 5)) expect <- vec_slice(vec_c(mat1, mat2), vec_order(vec_c(!!! indices))) expect_identical(list_unchop(x, indices = indices), expect) expect_identical(list_unchop(x), vec_c(mat1, mat2)) }) test_that("can fallback when unchopping arrays of >2D", { arr1 <- array(1:8, c(2, 2, 2)) arr2 <- matrix(9:10, c(1, 2)) x <- list(arr1, arr2) indices <- list(c(3, 1), 2) expect <- vec_slice(vec_c(arr1, arr2), vec_order(vec_c(!!! indices))) expect_identical(list_unchop(x, indices = indices), expect) expect_identical(list_unchop(x), vec_c(arr1, arr2)) }) test_that("can unchop with all size 0 elements and get the right ptype", { x <- list(integer(), integer()) indices <- list(integer(), integer()) expect_identical(list_unchop(x, indices = indices), integer()) }) test_that("can unchop with some size 0 elements", { x <- list(integer(), 1:2, integer()) indices <- list(integer(), 2:1, integer()) expect_identical(list_unchop(x, indices = indices), 2:1) }) test_that("NULL is a valid index", { expect_equal(list_unchop(list(1, 2), indices = list(NULL, 1)), 2) expect_error(list_unchop(list(1, 2), indices = list(NULL, 2)), class = "vctrs_error_subscript_oob") }) test_that("unchopping recycles elements of x to the size of the index", { x <- list(1, 2) indices <- list(c(3, 4, 5), c(2, 1)) expect_identical(list_unchop(x, indices = indices), c(2, 2, 1, 1, 1)) x <- list(1:2) indices <- list(1:3) expect_snapshot({ (expect_error(list_unchop(x, indices = indices))) (expect_error(list_unchop(x, indices = indices, error_call = call("foo"), error_arg = "arg"))) }) }) test_that("unchopping takes the common type", { x <- list(1, "a") indices <- list(1, 2) expect_snapshot({ (expect_error(list_unchop(x, indices = indices), class = "vctrs_error_incompatible_type")) (expect_error(list_unchop(x, indices = indices, error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) }) x <- list(1, 2L) expect_type(list_unchop(x, indices = indices), "double") }) test_that("common type failure uses positional errors", { expect_snapshot({ x <- list(1, a = "x", 2) # Looking for `x[[1]]` and `x$a` (expect_error(list_unchop(x))) (expect_error(list_unchop(x, indices = list(2, 1, 3)))) # Directed cast should also produce directional errors (#1690) (expect_error(list_unchop(x, ptype = double()))) (expect_error(list_unchop(x, indices = list(2, 1, 3), ptype = double()))) # Lossy cast y <- list(1, a = 2.5) (expect_error(list_unchop(y, ptype = integer()))) (expect_error(list_unchop(y, indices = list(2, 1), ptype = integer()))) }) }) test_that("can specify a ptype to override common type", { indices <- list(1, 2) x <- list(1, 2L) expect_identical(list_unchop(x, indices = indices, ptype = integer()), c(1L, 2L)) x <- list(1.5, 2) expect_snapshot({ (expect_error(list_unchop(x, indices = indices, ptype = integer()))) (expect_error(list_unchop(x, indices = indices, ptype = integer(), error_call = call("foo"), error_arg = "arg"))) }) }) test_that("leaving `indices = NULL` unchops sequentially", { x <- list(1:2, 3:5, 6L) expect_identical(list_unchop(x), 1:6) }) test_that("outer names are kept", { x <- list(x = 1, y = 2) expect_named(list_unchop(x), c("x", "y")) expect_named(list_unchop(x, indices = list(2, 1)), c("y", "x")) }) test_that("outer names are recycled in the right order", { x <- list(x = 1, y = 2) expect_error(list_unchop(x, indices = list(c(1, 2), 3)), "Can't merge") expect_named(list_unchop(x, indices = list(c(1, 3), 2), name_spec = "{outer}_{inner}"), c("x_1", "y", "x_2")) expect_named(list_unchop(x, indices = list(c(3, 1), 2), name_spec = "{outer}_{inner}"), c("x_2", "y", "x_1")) }) test_that("outer names can be merged with inner names", { x <- list(x = c(a = 1), y = c(b = 2)) expect_error(list_unchop(x), "Can't merge") expect_named(list_unchop(x, name_spec = "{outer}_{inner}"), c("x_a", "y_b")) expect_named(list_unchop(x, indices = list(2, 1), name_spec = "{outer}_{inner}"), c("y_b", "x_a")) }) test_that("preserves names when inputs are cast to a common type (#1689)", { expect_named(list_unchop(list(c(a = 1)), ptype = integer()), "a") expect_named(list_unchop(list(c(a = 1)), ptype = integer(), indices = list(1)), "a") # With name spec name_spec <- "{outer}_{inner}" expect_named(list_unchop(list(foo = c(a = 1)), ptype = integer(), name_spec = name_spec), "foo_a") expect_named(list_unchop(list(foo = c(a = 1)), ptype = integer(), name_spec = name_spec, indices = list(1)), "foo_a") # When `x` elements are recycled, names are also recycled x <- list(c(a = 1), c(b = 2)) indices <- list(1:2, 3:4) expect_named(list_unchop(x, indices = indices, ptype = integer()), c("a", "a", "b", "b")) }) test_that("not all inputs have to be named", { x <- list(c(a = 1), 2, c(c = 3)) indices <- list(2, 1, 3) expect_named(list_unchop(x, indices = indices), c("", "a", "c")) }) test_that("list_unchop() keeps data frame row names", { df1 <- data.frame(x = 1:2, row.names = c("r1", "r2")) df2 <- data.frame(x = 3:4, row.names = c("r3", "r4")) x <- list(df1, df2) indices <- list(c(3, 1), c(2, 4)) result <- list_unchop(x, indices = indices) expect <- c("r2", "r3", "r1", "r4") expect_identical(vec_names(result), expect) }) test_that("individual data frame columns retain vector names", { df1 <- data_frame(x = c(a = 1, b = 2)) df2 <- data_frame(x = c(c = 3)) x <- list(df1, df2) indices <- list(c(1, 2), 3) result <- list_unchop(x, indices = indices) expect_named(result$x, c("a", "b", "c")) # Names should be identical to equivalent `vec_c()` call expect_identical(list_unchop(x, indices = indices), vec_c(!!!x)) }) test_that("df-col row names are repaired silently", { df1 <- data_frame(x = new_data_frame(list(a = 1), row.names = "inner")) df2 <- data_frame(x = new_data_frame(list(a = 2), row.names = "inner")) x <- list(df1, df2) indices <- list(1, 2) expect_silent({ result <- list_unchop(x, indices = indices) }) expect_identical(vec_names(result$x), c("inner...1", "inner...2")) }) test_that("monitoring - can technically assign to the same location twice", { x <- list(1:2, 3L) indices <- list(1:2, 1L) expect_identical(list_unchop(x, indices = indices), c(3L, 2L, NA)) }) test_that("index values are validated", { x <- list(1, 2) indices1 <- list(4, 1) indices2 <- list(c(1, 4), 2) indices3 <- list(c(1, 3, 4), 2) expect_error(list_unchop(x, indices = indices1), class = "vctrs_error_subscript_oob") expect_error(list_unchop(x, indices = indices2), class = "vctrs_error_subscript_oob") expect_identical(list_unchop(x, indices = indices3), c(1, 2, 1, 1)) }) test_that("name repair is respected and happens after ordering according to `indices`", { local_name_repair_quiet() x <- list(c(a = 1), c(a = 2)) indices <- list(2, 1) expect_named(list_unchop(x, indices = indices), c("a", "a")) expect_named(list_unchop(x, indices = indices, name_repair = "unique"), c("a...1", "a...2")) }) test_that("list_unchop() can repair names quietly", { local_name_repair_verbose() x <- c(x = "a", x = "b", x = "c") indices <- list(2, c(3, 1)) expect_snapshot({ res <- list_unchop(vec_chop(x, indices = indices), indices = indices, name_repair = "unique_quiet") }) expect_named(res, c("x...1", "x...2", "x...3")) x <- c("if" = "a", "in" = "b", "for" = "c") indices <- list(2, c(3, 1)) expect_snapshot({ res <- list_unchop(vec_chop(x, indices = indices), indices = indices, name_repair = "universal_quiet") }) expect_named(res, c(".if", ".in", ".for")) }) test_that("list_unchop() errors on unsupported location values", { expect_snapshot({ (expect_error( list_unchop(list(1, 2), indices = list(c(1, 2), 0)), class = "vctrs_error_subscript_type" )) (expect_error( list_unchop(list(1), indices = list(-1)), class = "vctrs_error_subscript_type" )) }) }) test_that("missing values propagate", { expect_identical( list_unchop(list(1, 2), indices = list(c(NA_integer_, NA_integer_), c(NA_integer_, 3))), c(NA, NA, 2, NA) ) }) test_that("list_unchop() works with simple homogeneous foreign S3 classes", { expect_identical(list_unchop(list(foobar(1), foobar(2))), vec_c(foobar(c(1, 2)))) }) test_that("list_unchop() fails with complex foreign S3 classes", { expect_snapshot({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(list_unchop(list(x, y)), class = "vctrs_error_incompatible_type")) (expect_error(list_unchop(list(x, y), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) }) }) test_that("list_unchop() fails with complex foreign S4 classes", { expect_snapshot({ joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(list_unchop(list(joe, jane)), class = "vctrs_error_incompatible_type")) (expect_error(list_unchop(list(joe, jane), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) }) }) test_that("list_unchop() falls back to c() if S3 method is available", { # Check off-by-one error expect_error( list_unchop(list(foobar(1), "", foobar(2)), indices = list(1, 2, 3)), class = "vctrs_error_incompatible_type" ) # Fallback when the class implements `c()` method_foobar <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) foobar(res) } local_methods( c.vctrs_foobar = method_foobar ) expect_identical( list_unchop(list(foobar(1), foobar(2))), foobar(c(1, 2)) ) expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(1, 2)), foobar(c(1, 2)) ) expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(2, 1)), foobar(c(2, 1)) ) expect_identical( list_unchop(list(NULL, foobar(1), NULL, foobar(2))), foobar(c(1, 2)) ) # OOB error is respected expect_error( list_unchop(list(foobar(1), foobar(2)), indices = list(1, 3)), class = "vctrs_error_subscript_oob" ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( list_unchop(list(foobar(c(1, 2)), foobar(3)), indices = list(c(1, 3), 1)), foobar(c(3, NA, 2)) ) expect_identical( list_unchop(list(foobar(c(1, 2)), foobar(3)), indices = list(c(2, NA), NA)), foobar(c(NA, 1, NA)) ) # Names are kept expect_identical( list_unchop(list(foobar(c(x = 1, y = 2)), foobar(c(x = 1))), indices = list(c(2, 1), 3)), foobar(c(y = 2, x = 1, x = 1)) ) # Recycles to the size of index expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 3), 2)), foobar(c(1, 2, 1)) ) expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 2), integer())), foobar(c(1, 1)) ) expect_snapshot({ (expect_error( list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 3), integer())), class = "vctrs_error_subscript_oob" )) }) expect_snapshot({ x <- list(foobar(1:2)) indices <- list(1:3) (expect_error(list_unchop(x, indices = indices))) (expect_error(list_unchop(x, indices = indices, error_arg = "arg", error_call = call("foo")))) }) method_vctrs_c_fallback <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) structure(res, class = "vctrs_c_fallback") } # Registered fallback s3_register("base::c", "vctrs_c_fallback", method_vctrs_c_fallback) expect_identical( list_unchop( list( structure(1, class = "vctrs_c_fallback"), structure(2, class = "vctrs_c_fallback") ), indices = list(2, 1) ), structure(c(2, 1), class = "vctrs_c_fallback") ) # Don't fallback for S3 lists which are treated as scalars by default expect_error( list_unchop(list(foobar(list(1)), foobar(list(2)))), class = "vctrs_error_scalar_type" ) }) test_that("list_unchop() falls back for S4 classes with a registered c() method", { joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") expect_snapshot({ (expect_error( list_unchop(list(joe, 1, jane), indices = list(c(1, 2), 3, 4)), class = "vctrs_error_incompatible_type" )) }) local_c_counts() expect_identical( list_unchop(list(joe, jane), indices = list(c(1, 3), 2)), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) expect_identical( list_unchop(list(NULL, joe, jane), indices = list(integer(), c(1, 3), 2)), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( list_unchop(list(joe, jane), indices = list(c(1, 3), 1)), .Counts(c(3L, NA, 2L), name = "Dispatched") ) expect_identical( list_unchop(list(joe, jane), indices = list(c(2, NA), NA)), .Counts(c(NA, 1L, NA), name = "Dispatched") ) }) test_that("list_unchop() fallback doesn't support `name_spec` or `ptype`", { expect_snapshot({ foo <- structure(foobar(1), foo = "foo") bar <- structure(foobar(2), bar = "bar") (expect_error( with_c_foobar(list_unchop(list(foo, bar), name_spec = "{outer}_{inner}")), "name specification" )) # With error call (expect_error( with_c_foobar(list_unchop(list(foo, bar), name_spec = "{outer}_{inner}", error_call = call("foo"))), "name specification" )) # Used to be an error about `ptype` x <- list(foobar(1)) (expect_error( with_c_foobar(list_unchop(x, ptype = "")), class = "vctrs_error_incompatible_type" )) }) }) test_that("list_unchop() supports numeric S3 indices", { local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) UseMethod("vec_ptype2.vctrs_foobar"), vec_ptype2.vctrs_foobar.integer = function(x, y, ...) foobar(integer()), vec_cast.integer.vctrs_foobar = function(x, to, ...) vec_data(x) ) expect_identical(list_unchop(list(1), indices = list(foobar(1L))), 1) }) test_that("list_unchop() does not support non-numeric S3 indices", { expect_snapshot({ (expect_error( list_unchop(list(1), indices = list(factor("x"))), class = "vctrs_error_subscript_type" )) (expect_error( list_unchop(list(1), indices = list(foobar(1L))), class = "vctrs_error_subscript_type" )) }) }) test_that("can ignore names in `list_unchop()` by providing a `zap()` name-spec (#232)", { expect_snapshot({ (expect_error(list_unchop(list(a = c(b = 1:2))))) (expect_error(list_unchop(list(a = c(b = 1:2)), error_call = call("foo")))) }) expect_identical( list_unchop(list(a = c(b = 1:2), b = 3L), name_spec = zap()), 1:3 ) expect_identical( list_unchop( list(a = c(foo = 1:2), b = c(bar = 3L)), indices = list(2:1, 3), name_spec = zap() ), c(2L, 1L, 3L) ) expect_snapshot({ x <- list(a = c(b = letters), b = 3L) (expect_error( list_unchop(x, name_spec = zap()), class = "vctrs_error_incompatible_type" )) x <- list(a = c(foo = 1:2), b = c(bar = "")) (expect_error( list_unchop( x, indices = list(2:1, 3), name_spec = zap() ), class = "vctrs_error_incompatible_type" )) }) }) test_that("list_unchop() falls back to c() methods (#1120)", { expect_error( list_unchop(list(foobar(1), foobar(2, class = "foo"))), class = "vctrs_error_incompatible_type" ) local_methods( c.vctrs_foobar = function(...) { out <- NextMethod() paste0(rep_along(out, "dispatched"), seq_along(out)) } ) # Homogeneous subclasses xs <- list(foobar(1), foobar(2, class = "foo")) expect_identical( list_unchop(xs), c("dispatched1", "dispatched2") ) expect_identical( list_unchop(xs, indices = list(2, 1)), c("dispatched2", "dispatched1") ) # Different subclasses xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_identical( list_unchop(xs), c("dispatched1", "dispatched2", "dispatched3") ) expect_identical( list_unchop(xs, indices = list(c(2, 1), 3)), c("dispatched2", "dispatched1", "dispatched3") ) }) test_that("list_unchop() fails if foreign classes are not homogeneous and there is no c() method", { xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_error( list_unchop(xs), class = "vctrs_error_incompatible_type" ) expect_error( list_unchop(xs, indices = list(c(2, 1), 3)), class = "vctrs_error_incompatible_type" ) }) vctrs/tests/testthat/test-expand.R0000644000176200001440000000770614362266120017010 0ustar liggesuserstest_that("expands the first column slowest by default", { x <- 1:4 y <- 1:3 z <- 1:2 expect_identical( vec_expand_grid(x = x, y = y, z = z), data_frame( x = vec_rep(vec_rep_each(x, times = 6), times = 1), y = vec_rep(vec_rep_each(y, times = 2), times = 4), z = vec_rep(vec_rep_each(z, times = 1), times = 12) ) ) }) test_that("can expand the first column fastest with `.vary`", { x <- 1:4 y <- 1:3 z <- 1:2 expect_identical( vec_expand_grid(x = x, y = y, z = z, .vary = "fastest"), data_frame( x = vec_rep(vec_rep_each(x, times = 1), times = 6), y = vec_rep(vec_rep_each(y, times = 4), times = 2), z = vec_rep(vec_rep_each(z, times = 12), times = 1) ) ) }) test_that("size 0 elements force a size 0 result", { expect_identical( vec_expand_grid(x = 1:3, y = integer(), z = 1:2), data_frame(x = integer(), y = integer(), z = integer()) ) expect_identical( vec_expand_grid(x = integer()), data_frame(x = integer()) ) }) test_that("returns 1 row and 0 cols with no input", { # Because `prod(integer()) == 1L` expect_identical(vec_expand_grid(), data_frame(.size = 1L)) }) test_that("drops `NULL` values", { expect_identical( vec_expand_grid(NULL, NULL), vec_expand_grid() ) # And that happens before all names checks expect_identical( vec_expand_grid(x = 1:2, x = NULL, y = 1:3, NULL), vec_expand_grid(x = 1:2, y = 1:3) ) }) test_that("works with data frame inputs", { x <- data_frame(a = 1:2, b = 2:3) y <- 1:3 expect_identical( vec_expand_grid(x = x, y = y), data_frame( x = vec_rep(vec_rep_each(x, times = 3), times = 1), y = vec_rep(vec_rep_each(y, times = 1), times = 2), ) ) }) test_that("`.name_repair` isn't affected by `.vary`", { expect <- vec_as_names(c("a", "b", "a", "z"), repair = "unique_quiet") expect_named( vec_expand_grid(a = 1, b = 2, a = 3, z = 4, .vary = "slowest", .name_repair = "unique_quiet"), expect ) expect_named( vec_expand_grid(a = 1, b = 2, a = 3, z = 4, .vary = "fastest", .name_repair = "unique_quiet"), expect ) }) test_that("can use `.name_repair`", { expect_identical( vec_expand_grid(a = 1:2, a = 2:3, .name_repair = "minimal"), data_frame(a = c(1L, 1L, 2L, 2L), a = c(2L, 3L, 2L, 3L), .name_repair = "minimal") ) }) test_that("inputs must be named", { expect_snapshot(error = TRUE, { vec_expand_grid(1) }) expect_snapshot(error = TRUE, { vec_expand_grid(x = 1, 2, y = 3) }) }) test_that("catches duplicate names by default", { expect_snapshot(error = TRUE, { vec_expand_grid(a = 1, a = 2) }) }) test_that("errors on non vectors and mentions the element name", { expect_snapshot(error = TRUE, { vec_expand_grid(y = environment()) }) }) test_that("can adjust the `.error_call`", { my_expand_grid <- function() { vec_expand_grid(x = environment(), .error_call = current_env()) } expect_snapshot(error = TRUE, { my_expand_grid() }) }) test_that("errors nicely when expansion results in a size larger than `R_len_t`", { # Windows 32-bit doesn't support long vectors of this size, and the # intermediate `r_ssize` will be too large skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") x <- seq_len((2^31 - 1) / 2) y <- 1:3 expect_snapshot(error = TRUE, { vec_expand_grid(x = x, y = y) }) }) test_that("errors nicely when expansion results in a size larger than `R_xlen_t`", { # Windows 32-bit doesn't support long vectors of this size, and the # intermediate `r_ssize` will be too large skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") x <- seq_len(2^31 - 1) expect_snapshot(error = TRUE, transform = scrub_internal_error_line_number, { vec_expand_grid(x = x, y = x) }) }) test_that("validates `.vary`", { expect_snapshot(error = TRUE, { vec_expand_grid(.vary = 1) }) expect_snapshot(error = TRUE, { vec_expand_grid(.vary = "x") }) }) vctrs/tests/testthat/test-partial-factor.R0000644000176200001440000000267214276722575020455 0ustar liggesusers test_that("has ok print method", { partial <- partial_factor("x") expect_snapshot(partial) both <- vec_ptype2(partial, factor("y")) expect_snapshot(both) empty <- partial_factor() expect_snapshot(empty) learned <- vec_ptype2(empty, factor("y")) expect_snapshot(learned) expect_equal(vec_ptype_abbr(partial), "prtl_fctr") }) test_that("order of levels comes from data", { pfctr <- partial_factor(c("y", "x")) fctr <- factor(levels = c("x", "y")) expect_equal(levels(vec_ptype_common(pfctr, fctr)), c("x", "y")) expect_equal(levels(vec_ptype_common(fctr, pfctr)), c("x", "y")) }) test_that("partial levels added to end if not in data", { pfctr <- partial_factor("y") fctr <- factor(levels = "x") expect_equal(levels(vec_ptype_common(pfctr, fctr)), c("x", "y")) expect_equal(levels(vec_ptype_common(fctr, pfctr)), c("x", "y")) }) test_that("can assert partial factors based on level presence", { pfctr <- partial_factor("y") expect_true(vec_is(factor("y"), pfctr)) expect_false(vec_is(factor("x"), pfctr)) expect_true(vec_is(factor(c("x", "y")), pfctr)) pfctr <- partial_factor(c("y", "z")) expect_false(vec_is(factor("y"), pfctr)) expect_true(vec_is(factor(c("y", "z")), pfctr)) expect_true(vec_is(factor(c("x", "y", "z")), pfctr)) }) # TODO - why is this not working? # test_that("can assert partial factor based on factor type", { # pfctr <- partial_factor() # expect_false(vec_is(1, pfctr)) # }) vctrs/tests/testthat/test-type-factor.R0000644000176200001440000001571114317573545017775 0ustar liggesusers test_that("ptype methods are descriptive", { f <- factor() o <- ordered(character()) expect_equal(vec_ptype_abbr(f), "fct") expect_equal(vec_ptype_abbr(o), "ord") expect_equal(vec_ptype_full(f), "factor<>") expect_equal(vec_ptype_full(o), "ordered<>") }) test_that("slicing factors uses a proxy to not go through `[.factor`", { x <- factor("x") y <- ordered("y") local_methods( `[.factor` = function(x, ...) abort("should not be called") ) expect_identical(vec_slice(x, 1), x) expect_identical(vec_slice(y, 1), y) }) test_that("`vec_c()` throws the right error with subclassed factors (#1015)", { skip("Factors now have a `c()` method") a <- subclass(factor("a")) b <- subclass(factor("b")) # We used to return a subclass expect_identical(vec_c(a, a), subclass(factor(c("a", "a")))) # We used to fail if attributes were incoPatible expect_error(vec_c(a, b), class = "vctrs_error_incompatible_type") }) # Coercion ---------------------------------------------------------------- test_that("factor/character coercions are symmetric and unchanging", { types <- list( ordered(character()), factor(), character() ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) local_options(width = 200) expect_snapshot(print(mat)) }) test_that("factors level are unioned", { # This is technically incorrect, but because of R's existing behaviour # anything else will cause substantial friction. fa <- factor(levels = "a") fb <- factor(levels = "b") expect_equal(vec_ptype_common(fa, fb), factor(levels = c("a", "b"))) expect_equal(vec_ptype_common(fb, fa), factor(levels = c("b", "a"))) }) test_that("coercion errors with factors", { f <- factor(levels = "a") expect_error(vec_ptype_common(f, logical()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype_common(logical(), f), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2(, NA) is symmetric (#687)", { fct <- new_factor() expect_identical( vec_ptype2(fct, NA), vec_ptype2(NA, fct) ) fct <- new_ordered() expect_identical( vec_ptype2(fct, NA), vec_ptype2(NA, fct) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { i64 <- bit64::integer64() expect_identical( vec_ptype2(i64, NA), vec_ptype2(NA, i64) ) }) test_that("vec_ptype2() errors with malformed factors", { x <- structure(1, class = "factor") y <- factor("x") expect_error(vec_ptype2(x, y, x_arg = "z"), "`z` is a corrupt factor") expect_error(vec_ptype2(y, x, y_arg = "z"), "`z` is a corrupt factor") }) test_that("vec_ptype2() errors with malformed ordered factors", { x <- structure(1, class = c("ordered", "factor")) y <- as.ordered(factor("x")) expect_error(vec_ptype2(x, y, x_arg = "z"), "`z` is a corrupt ordered factor") expect_error(vec_ptype2(y, x, y_arg = "z"), "`z` is a corrupt ordered factor") }) test_that("ordered factors with different levels are not compatible", { expect_error( vec_ptype2(ordered("a"), ordered("b")), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(ordered("a"), ordered(c("a", "b"))), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), ordered("b")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), ordered(c("a", "b"))), class = "vctrs_error_incompatible_type" ) }) test_that("factors and ordered factors are not compatible", { expect_error( vec_ptype2(factor("a"), ordered("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(ordered("a"), factor("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(factor("a"), ordered("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), factor("a")), class = "vctrs_error_incompatible_type" ) }) # Casting ----------------------------------------------------------------- test_that("safe casts work as expected", { fa <- factor("a") fab <- factor(c("a", "b")) expect_equal(vec_cast(NULL, fa), NULL) expect_equal(vec_cast(fa, fa), fa) expect_equal(vec_cast(fa, fab), fab[1]) expect_equal(vec_cast("a", fab), fab[1]) expect_equal(vec_cast("a", factor()), fa) expect_equal(vec_cast(fa, factor()), fa) # This used to be allowed expect_error(vec_cast(list("a", "b"), fab), class = "vctrs_error_incompatible_type") }) test_that("can cast to character", { expect_equal(vec_cast(factor("X"), character()), "X") expect_equal(vec_cast(ordered("X"), character()), "X") }) test_that("can cast NA and unspecified to factor", { expect_identical(vec_cast(NA, new_factor()), factor(NA)) expect_identical(vec_cast(NA, new_ordered()), ordered(NA)) expect_identical(vec_cast(unspecified(2), new_factor()), factor(c(NA, NA))) expect_identical(vec_cast(unspecified(2), new_ordered()), ordered(c(NA, NA))) }) test_that("lossy factor casts fail", { fa <- factor("a") fb <- factor("b") expect_lossy(vec_cast(fa, fb), factor(NA, levels = "b"), x = fa, to = fb) expect_lossy(vec_cast("a", fb), factor(NA, levels = "b"), x = chr(), to = fb) }) test_that("invalid casts generate error", { expect_error(vec_cast(double(), factor("a")), class = "vctrs_error_incompatible_type") expect_error(vec_cast(factor("a"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(ordered("a"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(logical(), factor("a")), class = "vctrs_error_incompatible_type") expect_error(vec_cast(logical(), ordered("a")), class = "vctrs_error_incompatible_type") }) test_that("orderedness of factor is preserved", { ord <- ordered(c("a", "b"), levels = c("b", "a")) expect_equal(vec_cast("a", ord), ordered("a", levels = c("b", "a"))) }) test_that("NA are not considered lossy in factor cast (#109)", { f <- factor(c("itsy", "bitsy", NA, "spider", "spider")) expect_warning(vec_cast(f, f[1]), NA) }) test_that("Casting to a factor with explicit NA levels retains them", { f <- factor(c("x", NA), exclude = NULL) expect_identical(vec_cast(f, f), f) expect_identical(vec_cast(f, factor()), f) }) test_that("characters can be cast to ordered", { expect_identical(vec_cast("a", ordered("a")), ordered("a")) expect_error(vec_cast(c("a", "b"), ordered("a")), class = "vctrs_error_cast_lossy") }) # Proxy / restore --------------------------------------------------------- test_that("subclassed factors / ordered factors can be restored (#1015)", { x <- subclass(factor("a")) proxy <- vec_proxy(x) expect_identical(vec_restore(proxy, x), x) y <- subclass(ordered("a")) proxy <- vec_proxy(y) expect_identical(vec_restore(proxy, y), y) }) # Arithmetic and factor --------------------------------------------------- test_that("factors don't support math or arthimetic", { f <- factor("x") expect_error(vec_math("sum", f), class = "vctrs_error_unsupported") expect_error(vec_arith("+", f, f), class = "vctrs_error_unsupported") }) vctrs/tests/testthat/test-subscript-loc.R0000644000176200001440000006667514315060310020323 0ustar liggesusers test_that("vec_as_location2() returns a position", { expect_identical(vec_as_location2(2, 2L), 2L) expect_identical(vec_as_location2("foo", 2L, c("bar", "foo")), 2L) expect_identical(vec_as_location2("0", 4L, as.character(-1:2)), 2L) }) test_that("vec_as_location2() requires integer or character inputs", { expect_snapshot({ (expect_error(vec_as_location2(TRUE, 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(mtcars, 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(env(), 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(foobar(), 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(2.5, 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(Inf, 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(-Inf, 10L), class = "vctrs_error_subscript_type")) "Idem with custom `arg`" (expect_error(vec_as_location2(foobar(), 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) (expect_error(with_tibble_rows(vec_as_location2(TRUE)), class = "vctrs_error_subscript_type")) }) }) test_that("vec_as_location() requires integer, character, or logical inputs", { expect_snapshot({ (expect_error(vec_as_location(mtcars, 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(env(), 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(foobar(), 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(2.5, 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(list(), 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(function() NULL, 10L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(Sys.Date(), 3L), class = "vctrs_error_subscript_type")) "Idem with custom `arg`" (expect_error(vec_as_location(env(), 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(foobar(), 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) }) }) test_that("vec_as_location2() and vec_as_location() require integer- or character-like OO inputs", { expect_identical(vec_as_location2(factor("foo"), 2L, c("bar", "foo")), 2L) expect_identical(vec_as_location(factor("foo"), 2L, c("bar", "foo")), 2L) expect_error(vec_as_location2(foobar(1L), 10L), class = "vctrs_error_subscript_type") expect_error(vec_as_location(foobar(1L), 10L), class = "vctrs_error_subscript_type") # Define subtype of logical and integer local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) UseMethod("vec_ptype2.vctrs_foobar"), vec_ptype2.vctrs_foobar.logical = function(x, y, ...) logical(), vec_ptype2.vctrs_foobar.integer = function(x, y, ...) integer(), vec_ptype2.logical.vctrs_foobar = function(x, y, ...) logical(), vec_ptype2.integer.vctrs_foobar = function(x, y, ...) integer(), vec_cast.vctrs_foobar = function(x, to, ...) UseMethod("vec_cast.vctrs_foobar"), vec_cast.vctrs_foobar.integer = function(x, to, ...) foobar(x), vec_cast.integer.vctrs_foobar = function(x, to, ...) vec_cast(unclass(x), int()), vec_cast.logical.vctrs_foobar = function(x, to, ...) vec_cast(unclass(x), lgl()) ) expect_error(vec_as_location2(foobar(TRUE), 10L), class = "vctrs_error_subscript_type") expect_identical(vec_as_location(foobar(TRUE), 10L), 1:10) expect_identical(vec_as_location(foobar(FALSE), 10L), int()) }) test_that("vec_as_location() and variants check for OOB elements (#1605)", { expect_snapshot({ "Numeric indexing" (expect_error(vec_as_location(10L, 2L), class = "vctrs_error_subscript_oob")) (expect_error(vec_as_location(-10L, 2L), class = "vctrs_error_subscript_oob")) (expect_error(vec_as_location2(10L, 2L), class = "vctrs_error_subscript_oob")) "Character indexing" (expect_error(vec_as_location("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob")) (expect_error(vec_as_location2("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob")) (expect_error(vec_as_location2("foo", 1L, names = "bar", call = call("baz")), class = "vctrs_error_subscript_oob")) }) expect_error(num_as_location(10L, 2L), class = "vctrs_error_subscript_oob") expect_error(num_as_location2(10L, 2L), class = "vctrs_error_subscript_oob") }) test_that("vec_as_location() doesn't require `n` for character indexing", { expect_identical(vec_as_location("b", NULL, names = letters), 2L) }) test_that("vec_as_location2() requires length 1 inputs", { expect_snapshot({ (expect_error(vec_as_location2(1:2, 2L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(c("foo", "bar"), 2L, c("foo", "bar")), class = "vctrs_error_subscript_type")) "Idem with custom `arg`" (expect_error(vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(mtcars, 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) }) }) test_that("vec_as_location2() requires positive integers", { expect_snapshot({ (expect_error(vec_as_location2(0, 2L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(-1, 2L), class = "vctrs_error_subscript_type")) "Idem with custom `arg`" (expect_error(vec_as_location2(0, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) }) }) test_that("vec_as_location2() fails with NA", { expect_snapshot({ (expect_error(vec_as_location2(na_int, 2L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location2(na_chr, 1L, names = "foo"), class = "vctrs_error_subscript_type")) "Idem with custom `arg`" (expect_error(vec_as_location2(na_int, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) }) }) test_that("vec_as_location2() doesn't allow lossy casts", { expect_error(vec_as_location2(2^31, 3L), class = "vctrs_error_subscript_type") # Lossy casts generate missing values, which are disallowed expect_error(allow_lossy_cast(vec_as_location2(2^31, 3L)), class = "vctrs_error_subscript_type") }) test_that("all subscript errors inherit from `vctrs_error_subscript`", { expect_error(vec_as_location(100, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location("foo", 2L, names = c("bar", "baz")), class = "vctrs_error_subscript") expect_error(vec_as_location(foobar(1L), 2L), class = "vctrs_error_subscript") expect_error(vec_as_location(1.5, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location2(TRUE, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location2(1.5, 2L), class = "vctrs_error_subscript") }) test_that("all OOB errors inherit from `vctrs_error_subscript_oob`", { expect_error(vec_as_location(100, 2L), class = "vctrs_error_subscript_oob") expect_error(vec_as_location("foo", 2L, names = c("bar", "baz")), class = "vctrs_error_subscript_oob") }) test_that("vec_as_location() preserves names if possible", { expect_identical(vec_as_location(c(a = 1L, b = 3L), 3L), c(a = 1L, b = 3L)) expect_identical(vec_as_location(c(a = 1, b = 3), 3L), c(a = 1L, b = 3L)) expect_identical(vec_as_location(c(a = "z", b = "y"), 26L, letters), c(a = 26L, b = 25L)) expect_identical(vec_as_location(c(foo = TRUE, bar = FALSE, baz = TRUE), 3L), c(foo = 1L, baz = 3L)) expect_identical(vec_as_location(c(foo = TRUE), 3L), c(foo = 1L, foo = 2L, foo = 3L)) expect_identical(vec_as_location(c(foo = NA), 3L), c(foo = na_int, foo = na_int, foo = na_int)) # Names of negative selections are dropped expect_identical(vec_as_location(c(a = -1L, b = -3L), 3L), 2L) }) test_that("vec_as_location2() optionally allows missing values", { expect_identical(vec_as_location2(NA, 2L, missing = "propagate"), na_int) expect_error(vec_as_location2(NA, 2L, missing = "error"), class = "vctrs_error_subscript_type") }) test_that("num_as_location2() optionally allows missing and negative locations", { expect_identical(num_as_location2(na_dbl, 2L, missing = "propagate"), na_int) expect_identical(num_as_location2(-1, 2L, negative = "ignore"), -1L) expect_error(num_as_location2(-3, 2L, negative = "ignore"), class = "vctrs_error_subscript_oob") expect_error(num_as_location2(0, 2L, negative = "ignore"), class = "vctrs_error_subscript_type") }) test_that("num_as_location() optionally allows negative indices", { expect_identical(num_as_location(dbl(1, -1), 2L, negative = "ignore"), int(1L, -1L)) expect_error(num_as_location(c(1, -10), 2L, negative = "ignore"), class = "vctrs_error_subscript_oob") }) test_that("num_as_location() optionally forbids negative indices", { expect_snapshot({ (expect_error(num_as_location(dbl(1, -1), 2L, negative = "error"), class = "vctrs_error_subscript_type")) }) expect_error(num_as_location(c(1, -10), 2L, negative = "error"), class = "vctrs_error_subscript_type") }) test_that("num_as_location() optionally ignores zero indices", { expect_identical(num_as_location(c(1, 0), 2L, zero = "ignore"), c(1L, 0L)) }) test_that("num_as_location() optionally forbids zero indices", { expect_snapshot({ (expect_error( num_as_location(0L, 1L, zero = "error"), class = "vctrs_error_subscript_type" )) (expect_error( num_as_location(c(0, 0, 0, 0, 0, 0), 1, zero = "error"), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location() handles NULL", { expect_identical( vec_as_location(NULL, 10), vec_as_location(int(), 10), ) }) test_that("vec_as_location() checks for mix of negative and missing locations", { expect_snapshot({ (expect_error( vec_as_location(-c(1L, NA), 30), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(-c(1L, rep(NA, 10)), 30), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location() checks for mix of negative and positive locations", { expect_snapshot({ (expect_error( vec_as_location(c(-1L, 1L), 30), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(c(-1L, rep(1L, 10)), 30), class = "vctrs_error_subscript_type" )) }) }) test_that("logical subscripts must match size of indexed vector", { expect_snapshot({ (expect_error( vec_as_location(c(TRUE, FALSE), 3), class = "vctrs_error_subscript_size" )) }) }) test_that("character subscripts require named vectors", { expect_snapshot({ (expect_error(vec_as_location(letters[1], 3), "unnamed vector")) }) }) test_that("arg is evaluated lazily (#1150)", { expect_silent(vec_as_location(1, 1, arg = { writeLines("oof"); "boo" })) }) test_that("arg works for complex expressions (#1150)", { expect_error(vec_as_location(mean, 1, arg = paste0("foo", "bar")), "foobar") }) test_that("can optionally extend beyond the end", { expect_error(num_as_location(1:5, 3), class = "vctrs_error_subscript_oob") expect_identical(num_as_location(1:5, 3, oob = "extend"), 1:5) expect_identical(num_as_location(4:5, 3, oob = "extend"), 4:5) expect_snapshot({ (expect_error( num_as_location(3, 1, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1:5, 7), 3, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1:5, 7, 1), 3, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( class = "vctrs_error_subscript_oob", num_as_location(c(1:5, 7, 1, 10), 3, oob = "extend") )) }) }) test_that("can extend beyond the end consecutively but non-monotonically (#1166)", { expect_identical(num_as_location(6:4, 3, oob = "extend"), 6:4) expect_identical(num_as_location(c(1:5, 7, 6), 3, oob = "extend"), c(1:5, 7L, 6L)) expect_identical(num_as_location(c(1, NA, 4, 3), 2, oob = "extend"), c(1L, NA, 4L, 3L)) }) test_that("num_as_location() can optionally remove oob values (#1595)", { expect_identical(num_as_location(c(5, 3, 2, 4), 3, oob = "remove"), c(3L, 2L)) expect_identical(num_as_location(c(-4, 5, 2, -1), 3, oob = "remove", negative = "ignore"), c(2L, -1L)) }) test_that("num_as_location() errors when inverting oob negatives unless `oob = 'remove'` (#1630)", { expect_snapshot(error = TRUE, { num_as_location(-4, 3, oob = "error", negative = "invert") }) expect_snapshot(error = TRUE, { num_as_location(c(-4, 4, 5), 3, oob = "extend", negative = "invert") }) expect_identical(num_as_location(-4, 3, oob = "remove", negative = "invert"), c(1L, 2L, 3L)) expect_identical(num_as_location(c(-4, -2), 3, oob = "remove", negative = "invert"), c(1L, 3L)) }) test_that("num_as_location() generally drops zeros when inverting negatives (#1612)", { expect_identical( num_as_location(c(-3, 0, -1), n = 5L, negative = "invert", zero = "remove"), c(2L, 4L, 5L) ) # Trying to "ignore" and retain the zeroes in the output doesn't make sense, # where would they be placed? Instead, think of the ignored zeros as being # inverted as well, they just don't correspond to any location after the # inversion so they aren't in the output. expect_identical( num_as_location(c(-3, 0, -1, 0), n = 5L, negative = "invert", zero = "ignore"), c(2L, 4L, 5L) ) }) test_that("num_as_location() errors on disallowed zeros when inverting negatives (#1612)", { expect_snapshot(error = TRUE, { num_as_location(c(0, -1), n = 2L, negative = "invert", zero = "error") }) expect_snapshot(error = TRUE, { num_as_location(c(-1, 0), n = 2L, negative = "invert", zero = "error") }) }) test_that("num_as_location() with `oob = 'remove'` doesn't remove missings if they are being propagated", { expect_identical(num_as_location(NA_integer_, 1, oob = "remove"), NA_integer_) }) test_that("num_as_location() with `oob = 'remove'` doesn't remove zeros if they are being ignored", { expect_identical(num_as_location(0, 1, oob = "remove", zero = "ignore"), 0L) expect_identical(num_as_location(0, 0, oob = "remove", zero = "ignore"), 0L) }) test_that("num_as_location() with `oob = 'extend'` doesn't allow ignored oob negative values (#1614)", { # This is fine (ignored negative that is in bounds) expect_identical(num_as_location(c(-5L, 6L), 5L, oob = "extend", negative = "ignore"), c(-5L, 6L)) expect_snapshot(error = TRUE, { # Ignored negatives aren't allowed to extend the vector num_as_location(-6L, 5L, oob = "extend", negative = "ignore") }) expect_snapshot(error = TRUE, { # Ensure error only reports negative indices num_as_location(c(-7L, 6L), 5L, oob = "extend", negative = "ignore") }) expect_snapshot(error = TRUE, { num_as_location(c(-7L, NA), 5L, oob = "extend", negative = "ignore") }) }) test_that("num_as_location() with `oob = 'error'` reports negative and positive oob values", { expect_snapshot(error = TRUE, { num_as_location(c(-6L, 7L), n = 5L, oob = "error", negative = "ignore") }) }) test_that("num_as_location() with `missing = 'remove'` retains names (#1633)", { x <- c(a = 1, b = NA, c = 2, d = NA) expect_named(num_as_location(x, n = 2, missing = "remove"), c("a", "c")) }) test_that("num_as_location() with `zero = 'remove'` retains names (#1633)", { x <- c(a = 1, b = 0, c = 2, d = 0) expect_named(num_as_location(x, n = 2, zero = "remove"), c("a", "c")) }) test_that("num_as_location() with `oob = 'remove'` retains names (#1633)", { x <- c(a = 1, b = 3, c = 2, d = 4) expect_named(num_as_location(x, n = 2, oob = "remove"), c("a", "c")) }) test_that("num_as_location() with `negative = 'invert'` drops names (#1633)", { # The inputs don't map 1:1 to outputs x <- c(a = -1, b = -3) expect_named(num_as_location(x, n = 5), NULL) }) test_that("missing values are supported in error formatters", { expect_snapshot({ (expect_error( num_as_location(c(1, NA, 2, 3), 1), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1, NA, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob" )) }) }) test_that("can disallow missing values", { expect_snapshot({ (expect_error( vec_as_location(c(1, NA), 2, missing = "error"), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(c(1, NA, 2, NA), 2, missing = "error", arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(c(1, NA, 2, NA), 2, missing = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(NA, 1, missing = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(NA, 3, missing = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(c(TRUE, NA, FALSE), 3, missing = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(NA_character_, 2, missing = "error", names = c("x", "y"))), class = "vctrs_error_subscript_type" )) }) }) test_that("can alter logical missing value handling (#1595)", { x <- c(a = TRUE, b = NA, c = FALSE, d = NA) expect_identical( vec_as_location(x, n = 4L, missing = "propagate"), c(a = 1L, b = NA, d = NA) ) expect_identical( vec_as_location(x, n = 4L, missing = "remove"), c(a = 1L) ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 4L, missing = "error") }) # Specifically test size 1 case, which has its own special path x <- c(a = NA) expect_identical( vec_as_location(x, n = 2L, missing = "propagate"), c(a = NA_integer_, a = NA_integer_) ) expect_identical( vec_as_location(x, n = 2L, missing = "remove"), named(integer()) ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 2L, missing = "error") }) }) test_that("can alter character missing value handling (#1595)", { x <- c(NA, "z", NA) names(x) <- c("a", "b", "c") names <- c("x", "z") expect_identical( vec_as_location(x, n = 2L, names = names, missing = "propagate"), set_names(c(NA, 2L, NA), names(x)) ) expect_identical( vec_as_location(x, n = 2L, names = names, missing = "remove"), set_names(2L, "b") ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 2L, names = names, missing = "error") }) }) test_that("can alter integer missing value handling (#1595)", { x <- c(NA, 1L, NA, 3L) names(x) <- c("a", "b", "c", "d") expect_identical( vec_as_location(x, n = 4L, missing = "propagate"), x ) expect_identical( vec_as_location(x, n = 4L, missing = "remove"), c(b = 1L, d = 3L) ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 4L, missing = "error") }) }) test_that("can alter negative integer missing value handling (#1595)", { x <- c(-1L, NA, NA, -3L) expect_snapshot(error = TRUE, { num_as_location(x, n = 4L, missing = "propagate", negative = "invert") }) expect_identical( num_as_location(x, n = 4L, missing = "remove", negative = "invert"), c(2L, 4L) ) expect_snapshot(error = TRUE, { num_as_location(x, n = 4L, missing = "error", negative = "invert") }) }) test_that("missing value character indices never match missing value names (#1489)", { x <- NA_character_ names <- NA_character_ expect_identical(vec_as_location(x, n = 1L, names = names, missing = "propagate"), NA_integer_) expect_identical(vec_as_location(x, n = 1L, names = names, missing = "remove"), integer()) }) test_that("empty string character indices never match empty string names (#1489)", { names <- c("", "y") expect_snapshot(error = TRUE, { vec_as_location("", n = 2L, names = names) }) expect_snapshot(error = TRUE, { vec_as_location(c("", "y", ""), n = 2L, names = names) }) }) test_that("scalar logical `FALSE` and `NA` cases don't modify a shared object (#1633)", { x <- vec_as_location(FALSE, n = 2) expect_identical(x, integer()) y <- vec_as_location(c(a = FALSE), n = 2) expect_identical(y, named(integer())) # Still unnamed expect_identical(x, integer()) x <- vec_as_location(NA, n = 2, missing = "remove") expect_identical(x, integer()) y <- vec_as_location(c(a = FALSE), n = 2, missing = "remove") expect_identical(y, named(integer())) # Still unnamed expect_identical(x, integer()) }) test_that("can customise subscript type errors", { expect_snapshot({ "With custom `arg`" (expect_error( num_as_location(-1, 2, negative = "error", arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( num_as_location2(-1, 2, negative = "error", arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(0, 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(na_dbl, 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(c(1, 2), 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(c(TRUE, FALSE), 3, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_size" )) (expect_error( vec_as_location(c(-1, NA), 3, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(c(-1, 1), 3, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( num_as_location(c(1, 4), 2, oob = "extend", arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(0, 1, zero = "error", arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) "With tibble columns" (expect_error( with_tibble_cols(num_as_location(-1, 2, negative = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(num_as_location2(-1, 2, negative = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location2(0, 2)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location2(na_dbl, 2)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location2(c(1, 2), 2)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(c(TRUE, FALSE), 3)), class = "vctrs_error_subscript_size" )) (expect_error( with_tibble_cols(vec_as_location(c(-1, NA), 3)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(c(-1, 1), 3)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(num_as_location(c(1, 4), 2, oob = "extend")), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_cols(num_as_location(0, 1, zero = "error")), class = "vctrs_error_subscript_type" )) }) }) test_that("can customise OOB errors", { expect_snapshot({ (expect_error( vec_slice(set_names(letters), "foo"), class = "vctrs_error_subscript_oob" )) "With custom `arg`" (expect_error( vec_as_location(30, length(letters), arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_oob" )) (expect_error( vec_as_location("foo", NULL, letters, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_oob" )) "With tibble columns" (expect_error( with_tibble_cols(vec_slice(set_names(letters), "foo")), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_cols(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_cols(vec_slice(set_names(letters), -30)), class = "vctrs_error_subscript_oob" )) "With tibble rows" (expect_error( with_tibble_rows(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_rows(vec_slice(set_names(letters), 1:30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_rows(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob" )) "With tidyselect select" (expect_error( with_tidyselect_select(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_select(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_select(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob" )) "With tidyselect relocate" (expect_error( with_tidyselect_relocate(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_relocate(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_relocate(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob" )) }) }) test_that("num_as_location() requires non-S3 inputs", { expect_error(num_as_location(factor("foo"), 2), "must be a numeric vector") }) test_that("vec_as_location() checks dimensionality", { expect_snapshot({ (expect_error(vec_as_location(matrix(TRUE, nrow = 1), 3L), class = "vctrs_error_subscript_type")) (expect_error(vec_as_location(array(TRUE, dim = c(1, 1, 1)), 3L), class = "vctrs_error_subscript_type")) (expect_error(with_tibble_rows(vec_as_location(matrix(TRUE, nrow = 1), 3L)), class = "vctrs_error_subscript_type")) }) }) test_that("vec_as_location() works with vectors of dimensionality 1", { expect_identical(vec_as_location(array(TRUE, dim = 1), 3L), 1:3) }) test_that("vec_as_location() UI", { expect_snapshot(error = TRUE, vec_as_location(1, 1L, missing = "bogus")) }) test_that("num_as_location() UI", { expect_snapshot(error = TRUE, num_as_location(1, 1L, missing = "bogus")) expect_snapshot(error = TRUE, num_as_location(1, 1L, negative = "bogus")) expect_snapshot(error = TRUE, num_as_location(1, 1L, oob = "bogus")) expect_snapshot(error = TRUE, num_as_location(1, 1L, zero = "bogus")) }) test_that("vec_as_location2() UI", { expect_snapshot(error = TRUE, vec_as_location2(1, 1L, missing = "bogus")) }) test_that("vec_as_location() evaluates arg lazily", { expect_silent(vec_as_location(1L, 1L, arg = print("oof"))) }) test_that("vec_as_location2() evaluates arg lazily", { expect_silent(vec_as_location2(1L, 1L, arg = print("oof"))) expect_silent(vec_as_location2_result(1L, 1L, names = NULL, arg = print("oof"), missing = "error", negative = "error", call = NULL)) }) vctrs/tests/testthat/test-type-bare.R0000644000176200001440000004100114511320527017401 0ustar liggesusers test_that("ptype2 base methods are not inherited", { ptypes <- vec_remove(base_empty_types, c("null", "dataframe")) for (ptype in ptypes) { x <- new_vctr(ptype, class = "foobar", inherit_base_type = TRUE) expect_s3_class(vec_ptype2(x, x), "foobar") expect_error(vec_ptype2(x, ptype), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(ptype, x), class = "vctrs_error_incompatible_type") } }) test_that("cast base methods are not inherited", { ptypes <- vec_remove(base_empty_types, c("null", "dataframe")) for (ptype in ptypes) { x <- new_vctr(ptype, class = "foobar", inherit_base_type = TRUE) expect_s3_class(vec_cast(ptype, x), "foobar") expect_error(vec_cast(x, ptype), class = "vctrs_error_incompatible_type") } }) test_that("default cast allows objects with the same type", { x <- structure(1, class = c("foo", "double")) expect_equal(vec_cast(x, x), x) }) # vec_shaped_ptype ------------------------------------------------------- test_that("array dimensions are preserved", { mat1 <- matrix(lgl(), nrow = 1, ncol = 1) mat2 <- matrix(lgl(), nrow = 2, ncol = 2) mat3 <- matrix(lgl(), nrow = 2, ncol = 3) expect_equal(vec_ptype2(mat1, mat1), matrix(lgl(), nrow = 0, ncol = 1)) expect_equal(vec_ptype2(mat1, mat2), matrix(lgl(), nrow = 0, ncol = 2)) expect_error(vec_ptype2(mat2, mat3), class = "vctrs_error_incompatible_type") }) test_that("vec_shaped_ptype()", { int <- function(...) array(NA_integer_, c(...)) expect_identical(vec_shaped_ptype(integer(), int(5), int(10)), new_shape(integer())) expect_identical(vec_shaped_ptype(integer(), int(5, 1), int(10, 1)), new_shape(integer(), 1)) expect_identical(vec_shaped_ptype(integer(), int(5, 1, 2), int(10, 1, 2)), new_shape(integer(), 1:2)) }) test_that("vec_shaped_ptype() evaluates arg lazily", { expect_silent(vec_shaped_ptype(integer(), int(5), int(10), x_arg = print("oof"))) expect_silent(vec_shaped_ptype(integer(), int(5), int(10), y_arg = print("oof"))) }) # vec_cast() -------------------------------------------------------------- # NULL test_that("NULL is idempotent", { expect_equal(vec_cast(NULL, NULL), NULL) expect_equal(vec_cast(list(1:3), NULL), list(1:3)) }) # Logical test_that("safe casts work as expected", { exp <- lgl(TRUE, FALSE) expect_equal(vec_cast(NULL, logical()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), logical()), exp) expect_equal(vec_cast(int(1L, 0L), logical()), exp) expect_equal(vec_cast(dbl(1, 0), logical()), exp) # These used to be allowed expect_error(vec_cast(chr("T", "F"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("TRUE", "FALSE"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("true", "false"), logical()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(1, 0), logical()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- lgl(NA) to <- lgl() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(lgl(NA)) to_mat <- matrix(lgl()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("lossy casts generate warning", { expect_lossy(vec_cast(int(2L, 1L), lgl()), lgl(TRUE, TRUE), x = int(), to = lgl()) expect_lossy(vec_cast(dbl(2, 1), lgl()), lgl(TRUE, TRUE), x = dbl(), to = lgl()) # These used to be allowed expect_error(vec_cast(chr("x", "TRUE"), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("t", "T"), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(chr("f", "F"), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(c(TRUE, FALSE), TRUE), lgl()), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), logical()), class = "vctrs_error_incompatible_type") }) test_that("dimensionality matches output" ,{ x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) x <- matrix(1, nrow = 2, ncol = 2) expect_error(vec_cast(x, logical()), class = "vctrs_error_incompatible_type") }) test_that("the common type of two `NA` vectors is unspecified", { expect_equal(vec_ptype2(NA, NA), unspecified()) }) # Integer test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, integer()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), integer()), int(1L, 0L)) expect_equal(vec_cast(int(1L, 2L), integer()), int(1L, 2L)) expect_equal(vec_cast(dbl(1, 2), integer()), int(1L, 2L)) # These used to be allowed expect_error(vec_cast(chr("1", "2"), integer()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(1L, 2L), integer()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- int(NA) to <- int() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(int(NA)) to_mat <- matrix(int()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("lossy casts generate error", { expect_lossy(vec_cast(c(2.5, 2), int()), int(2, 2), x = dbl(), to = int()) expect_lossy(vec_cast(c(.Machine$integer.max + 1, 1), int()), int(NA, 1L), x = dbl(), to = int()) expect_lossy(vec_cast(c(-.Machine$integer.max - 1, 1), int()), int(NA, 1L), x = dbl(), to = int()) # These used to be allowed expect_error(vec_cast(c("2.5", "2"), int()), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), integer()), class = "vctrs_error_incompatible_type") }) # Double test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, double()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), double()), dbl(1, 0)) expect_equal(vec_cast(int(1, 0), double()), dbl(1, 0)) expect_equal(vec_cast(dbl(1, 1.5), double()), dbl(1, 1.5)) # These used to be allowed expect_error(vec_cast(chr("1", "1.5"), double()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(1, 1.5), double()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- dbl(NA) to <- dbl() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(dbl(NA)) to_mat <- matrix(dbl()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(factor("a"), double()), class = "vctrs_error_incompatible_type") }) # Complex test_that("safe casts to complex works", { expect_identical(vec_cast(NULL, cpl()), NULL) expect_identical(vec_cast(lgl(TRUE, FALSE), cpl()), cpl(1, 0)) expect_identical(vec_cast(int(1, 0), cpl()), cpl(1, 0)) expect_identical(vec_cast(dbl(1, 1.5), cpl()), cpl(1, 1.5)) # This used to be allowed expect_error(vec_cast(list(1, 1.5), cpl()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { # This goes through a special path for expect_equal(vec_cast(lgl(NA), cpl()), NA_complex_) # TODO: Use our own cast routines here? # It isn't great that this logical `NA` cast returns a different `NA` # than the one above with just `lgl(NA)` (which is seen as unspecified). i.e. # check the `Im()` slot between the two in R >=4.4.0. We can fix this with our # own cast routines rather than using `vec_coerce_bare()`. expect_type(vec_cast(lgl(NA, TRUE), cpl()), "complex") expect_identical(is.na(vec_cast(lgl(NA, TRUE), cpl())), c(TRUE, FALSE)) # TODO: Use our own cast routines here? # `as.complex(NA/NA_real_/NA_integer_)` and `Rf_CoerceVector(NA/NA_real_/NA_integer_)` # have gone back and forth about what they return in the `Im()` slot. In some # R versions they return `0` and in others they return `NA_real_`. # https://stat.ethz.ch/pipermail/r-devel/2023-April/082545.html # https://stat.ethz.ch/pipermail/r-devel/2023-September/082864.html # expect_equal(vec_cast(int(NA), cpl()), NA_complex_) expect_type(vec_cast(int(NA), cpl()), "complex") expect_identical(is.na(vec_cast(int(NA), cpl())), TRUE) # expect_equal(vec_cast(dbl(NA), cpl()), NA_complex_) expect_type(vec_cast(dbl(NA), cpl()), "complex") expect_identical(is.na(vec_cast(dbl(NA), cpl())), TRUE) # This used to be allowed expect_error(vec_cast(list(NA), cpl()), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(NA_complex_) to_mat <- matrix(cpl()) # TODO: Use our own cast routines here? # `as.complex(NA/NA_real_/NA_integer_)` and `Rf_CoerceVector(NA/NA_real_/NA_integer_)` # have gone back and forth about what they return in the `Im()` slot. In some # R versions they return `0` and in others they return `NA_real_`. # https://stat.ethz.ch/pipermail/r-devel/2023-April/082545.html # https://stat.ethz.ch/pipermail/r-devel/2023-September/082864.html # expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_type(vec_cast(mat(lgl(NA)), to_mat), "complex") expect_identical(is.na(vec_cast(mat(lgl(NA)), to_mat)), matrix(TRUE)) # expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_type(vec_cast(mat(int(NA)), to_mat), "complex") expect_identical(is.na(vec_cast(mat(int(NA)), to_mat)), matrix(TRUE)) # expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) expect_type(vec_cast(mat(dbl(NA)), to_mat), "complex") expect_identical(is.na(vec_cast(mat(dbl(NA)), to_mat)), matrix(TRUE)) # This used to be allowed expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("complex is coercible to numeric types", { expect_identical(vec_ptype2(cpl(), NULL), cpl()) expect_identical(vec_ptype2(NULL, cpl()), cpl()) expect_identical(vec_ptype2(cpl(), int()), cpl()) expect_identical(vec_ptype2(int(), cpl()), cpl()) expect_identical(vec_ptype2(cpl(), dbl()), cpl()) expect_identical(vec_ptype2(dbl(), cpl()), cpl()) expect_identical(vec_ptype2(cpl(), cpl()), cpl()) expect_identical(vec_c(0, 1i), cpl(0i, 1i)) }) test_that("complex is not coercible to logical", { expect_error(vec_ptype2(cpl(), lgl()), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(lgl(), cpl()), class = "vctrs_error_incompatible_type") }) # Character test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, character()), NULL) expect_equal(vec_cast(NA, character()), NA_character_) # These used to be allowed expect_error(vec_cast(lgl(TRUE, FALSE), character()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list("x", "y"), character()), class = "vctrs_error_incompatible_type") }) test_that("NA casts work as expected", { exp <- chr(NA) to <- chr() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(chr(NA), to), exp) # These used to be allowed expect_error(vec_cast(int(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(dbl(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(chr(NA)) to_mat <- matrix(chr()) expect_equal(vec_cast(mat(chr(NA)), to_mat), exp_mat) # These used to be allowed expect_error(vec_cast(mat(lgl(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(int(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(dbl(NA)), to_mat), class = "vctrs_error_incompatible_type") expect_error(vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type") }) test_that("difftime does not get special treatment", { dt1 <- as.difftime(600, units = "secs") # This used to be allowed expect_error(vec_cast(dt1, character()), class = "vctrs_error_incompatible_type") }) # Raw test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, raw()), NULL) # This used to be allowed expect_error(vec_cast(list(raw(1)), raw()), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { expect_error(vec_cast(raw(1), double()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(double(1), raw()), class = "vctrs_error_incompatible_type") }) test_that("can sort raw", { x <- as.raw(c(3, 1, 2, 4)) expect_identical(vec_order(x), int(2, 3, 1, 4)) expect_identical(x[vec_order(x)], as.raw(1:4)) }) test_that("raw has informative type summaries", { expect_equal(vec_ptype_abbr(raw()), "raw") expect_equal(vec_ptype_full(raw()), "raw") }) test_that("can provide common type with raw", { local_methods( vec_ptype2.raw.vctrs_foobar = function(...) "dispatched-left", vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.raw = function(...) "dispatched-right" ) expect_identical(vec_ptype2(raw(), foobar("")), "dispatched-left") expect_identical(vec_ptype2(foobar(""), raw()), "dispatched-right") }) # Lists test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, list()), NULL) expect_equal(vec_cast(NA, list()), list(NULL)) expect_equal(vec_cast(list(1L, 2L), list()), list(1L, 2L)) # This used to be allowed expect_error(vec_cast(1:2, list()), class = "vctrs_error_incompatible_type") }) test_that("dimensionality matches to" ,{ x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1L, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) }) test_that("data frames are cast to list row wise (#639)", { x <- data.frame(x = 1:2, row.names = c("a", "b")) expect <- list(data.frame(x = 1L), data.frame(x = 2L)) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) test_that("data frames can be cast to shaped lists", { to <- array(list(), dim = c(0, 2, 1)) x <- data.frame(x = 1:2, y = 3:4) expect <- list(vec_slice(x, 1), vec_slice(x, 2)) expect <- array(expect, dim = c(2, 2, 1)) # This used to be allowed expect_error(vec_cast(x, to), class = "vctrs_error_incompatible_type") }) test_that("Casting atomic `NA` values to list results in a `NULL`", { x <- c(NA, 1) expect <- list(NULL, 1) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) test_that("Casting data frame `NA` rows to list results in a `NULL`", { x <- data.frame(x = c(NA, NA, 1), y = c(NA, 1, 2)) expect <- list(NULL, vec_slice(x, 2), vec_slice(x, 3)) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) # Unspecified test_that("unspecified can be cast to bare methods", { for (x in vectors[-4]) { expect_identical(vec_cast(unspecified(3), x), vec_init(x, 3)) } }) vctrs/tests/testthat/test-print-str.R0000644000176200001440000000026014276722575017476 0ustar liggesusers test_that("show attributes", { x <- structure(1:100, x = "a string", y = 1:20, z = data.frame(x = 1:3)) expect_snapshot(obj_str(x)) expect_snapshot(obj_str(mtcars)) }) vctrs/tests/testthat/helper-order.R0000644000176200001440000000123514276722575017152 0ustar liggesusers# Keep in sync with macros in `order.c` GROUP_DATA_SIZE_DEFAULT <- 100000L ORDER_INSERTION_BOUNDARY <- 128L INT_ORDER_COUNTING_RANGE_BOUNDARY <- 100000L # Force radix method for character comparisons base_order <- function(x, na.last = TRUE, decreasing = FALSE) { if (is.data.frame(x)) { x <- unname(x) } else { x <- list(x) } args <- list(na.last = na.last, decreasing = decreasing) # `method` didn't exist on R < 3.3. # It would sometimes use radix sorting automatically. if (getRversion() < "3.3.0") { method <- list() } else { method <- list(method = "radix") } args <- c(x, args, method) rlang::exec("order", !!!args) } vctrs/tests/testthat/test-lifecycle-deprecated.R0000644000176200001440000000275614401377400021564 0ustar liggesusers test_that("vec_as_index() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical(vec_as_index(-2, 10), vec_as_location(-2, 10)) expect_identical( vec_as_index("cyl", length(mtcars), names(mtcars)), vec_as_location("cyl", length(mtcars), names(mtcars)) ) }) test_that("vec_repeat() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical(vec_repeat(1:2, times = 2), vec_rep(1:2, 2)) expect_identical(vec_repeat(1:2, each = 2), vec_rep_each(1:2, 2)) }) test_that("vec_unchop() is soft-deprecated", { local_options(lifecycle_verbosity = "warning") expect_snapshot(vec_unchop(list(1), indices = list(1))) }) test_that("vec_unchop() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical( vec_unchop(list(1L, 2:3), indices = list(2, c(3, 1))), c(3L, 1L, 2L) ) }) test_that("vec_equal_na() is soft-deprecated", { local_options(lifecycle_verbosity = "warning") expect_snapshot(vec_equal_na(c(1, NA))) }) test_that("vec_equal_na() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical( vec_equal_na(c(1, NA, 2, NA)), c(FALSE, TRUE, FALSE, TRUE) ) }) test_that("vec_is_list() still works", { expect_false(vec_is_list(1)) expect_true(vec_is_list(list())) }) test_that("vec_check_list() still works", { my_check <- function(x) vec_check_list(x) expect_snapshot(error = TRUE, { vec_check_list(1) }) expect_snapshot(error = TRUE, { my_check(1) }) }) vctrs/tests/testthat/helper-memory.R0000644000176200001440000000022714276722575017347 0ustar liggesusersmaybe_shared_col <- function(x, i) { .Call(vctrs_maybe_shared_col, x, i) } new_df_unshared_col <- function() { .Call(vctrs_new_df_unshared_col) } vctrs/tests/testthat/helper-expectations.R0000644000176200001440000000472614362266120020536 0ustar liggesusersexpect_dim <- function(x, shape) { dim <- dim2(x) expect_equal(dim, !!shape) } expect_lossy <- function(expr, result, x = NULL, to = NULL) { expr <- enquo(expr) expect_error(eval_tidy(expr), class = "vctrs_error_cast_lossy") out <- allow_lossy_cast(eval_tidy(expr), x_ptype = x, to_ptype = to) expect_identical(!!out, !!result) } expect_args <- function(x, y, x_arg, y_arg) { err <- catch_cnd(vec_ptype2(x, y, x_arg = x_arg, y_arg = y_arg), classes = "vctrs_error_incompatible_type") expect_true(!is_null(err)) expect_true(grepl(paste0("combine `", x_arg, "`"), err$message, fixed = TRUE)) expect_true(grepl(paste0("and `", y_arg, "`"), err$message, fixed = TRUE)) expect_identical(list(err$x_arg, err$y_arg), list(x_arg, y_arg)) } # Work around deparsing of !! on old versions of R as_label2 <- function(expr) { expr <- duplicate(expr, shallow = FALSE) label <- as_label(fix_bang(expr)) label <- gsub("+++", "!!!", label, fixed = TRUE) label <- gsub("++", "!!", label, fixed = TRUE) label } fix_bang <- function(expr) { curr <- expr while (!is_null(curr)) { car <- node_car(curr) if (is_triple_bang(car)) { replace_triple_bang(car) } else if (is_double_bang(car)) { replace_double_bang(car) } else if (is_call(car)) { node_poke_car(curr, fix_bang(car)) } curr <- node_cdr(curr) } expr } is_double_bang <- function(expr) { is_call(expr, "!") && is_call(node_cadr(expr), "!") } is_triple_bang <- function(expr) { is_double_bang(expr) && is_call(node_cadr(node_cadr(expr)), "!") } replace_double_bang <- function(expr) { node_poke_car(expr, sym("+")) node_poke_car(node_cadr(expr), sym("+")) } replace_triple_bang <- function(expr) { replace_double_bang(expr) node_poke_car(node_cadr(node_cadr(expr)), sym("+")) } expect_syntactic <- function(name, exp_syn_name) { expect_identical( syn_name <- make_syntactic(name), exp_syn_name ) expect_identical(syn_name, make.names(syn_name)) } expect_error_cnd <- function(object, class, message = NULL, ..., .fixed = TRUE) { cnd <- expect_error(object, regexp = message, class = class[[1]], fixed = .fixed) expect_true(inherits_all(cnd, class)) exp_fields <- list2(...) expect_true(is_empty(setdiff(!!names(exp_fields), names(cnd)))) expect_equal(cnd[names(exp_fields)], exp_fields) } scrub_internal_error_line_number <- function(x) { # Because it varies by OS sub(pattern = "at line [[:digit:]]+", replacement = "at line ", x = x) } vctrs/tests/testthat/helper-size.R0000644000176200001440000000014114520724751016772 0ustar liggesusers expect_size <- function(object, n) { expect_identical(vec_size(object), vec_cast(n, int())) } vctrs/tests/testthat/test-assert.R0000644000176200001440000003577614404336165017046 0ustar liggesusers test_that("basic assert is idempotent", { x <- new_vctr(1:4) expect_true(vec_is(x)) expect_identical(vec_assert(x), x) expect_identical(vec_assert(x), new_vctr(1:4)) expect_false(withVisible(vec_assert(x))$visible) expect_true(vec_is(1:4)) expect_identical(vec_assert(1:4), 1:4) }) test_that("asserting ptype", { x <- new_vctr(1:4) good <- new_vctr(integer()) expect_true(vec_is(x, good)) expect_error(vec_assert(x, good), NA) # Is this the correct error message? bad <- new_vctr(double()) expect_false(vec_is(x, bad)) expect_error(vec_assert(x, bad), class = "vctrs_error_assert_ptype") }) test_that("asserting size", { x <- new_vctr(1:4) expect_true(vec_is(x, size = 4)) expect_error(vec_assert(x, size = 4), NA) expect_false(vec_is(x, size = 5)) expect_error(vec_assert(x, size = 5), class = "vctrs_error_assert_size") }) test_that("vec_assert() labels input", { expect_error( vec_assert(new_vctr(1:4), size = 5), regexp = "`new_vctr\\(1:4\\)` must have", class = "vctrs_error_assert_size" ) expect_error( vec_assert(new_vctr(1:4), size = 5, arg = "foobar"), regexp = "`foobar` must have", class = "vctrs_error_assert_size" ) }) test_that("bare atomic vectors are vectors but not recursive", { expect_true(obj_is_vector(TRUE)) expect_true(obj_is_vector(1L)) expect_true(obj_is_vector(1)) expect_true(obj_is_vector(1i)) expect_true(obj_is_vector("foo")) expect_true(obj_is_vector(as.raw(1))) }) test_that("S3 atomic vectors are vectors", { expect_true(obj_is_vector(foobar(TRUE))) expect_true(obj_is_vector(foobar(1L))) expect_true(obj_is_vector(foobar(1))) expect_true(obj_is_vector(foobar(1i))) expect_true(obj_is_vector(foobar("foo"))) expect_true(obj_is_vector(foobar(as.raw(1)))) }) test_that("bare lists are vectors", { expect_true(obj_is_vector(list())) }) test_that("S3 lists are not vectors by default", { expect_false(obj_is_vector(foobar())) expect_false(obj_is_list(foobar())) local_foobar_proxy() # TODO: These seem inconsistent. # Should we require that S3 list proxies satisfy `obj_is_list()`? # (i.e. unclass themselves or explicitly inherit from `"list"`?) expect_true(obj_is_vector(foobar())) expect_false(obj_is_list(foobar())) }) test_that("data frames and records are vectors", { expect_true(obj_is_vector(mtcars)) expect_true(obj_is_vector(new_rcrd(list(x = 1, y = 2)))) }) test_that("non-vector base types are scalars", { expect_identical(vec_typeof(quote(foo)), "scalar") expect_identical(vec_typeof(pairlist("")), "scalar") expect_identical(vec_typeof(function() NULL), "scalar") expect_identical(vec_typeof(env()), "scalar") expect_identical(vec_typeof(quote(foo)), "scalar") expect_identical(vec_typeof(~foo), "scalar") expect_identical(vec_typeof(base::`{`), "scalar") expect_identical(vec_typeof(base::c), "scalar") expect_identical(vec_typeof(expression()), "scalar") expect_false(obj_is_vector(quote(foo))) expect_false(obj_is_vector(pairlist(""))) expect_false(obj_is_vector(function() NULL)) expect_false(obj_is_vector(env())) expect_false(obj_is_vector(~foo)) expect_false(obj_is_vector(base::`{`)) expect_false(obj_is_vector(base::c)) expect_false(obj_is_vector(expression())) expect_false(vec_is(quote(foo))) expect_false(vec_is(pairlist(""))) expect_false(vec_is(function() NULL)) expect_false(vec_is(env())) expect_false(vec_is(~foo)) expect_false(vec_is(base::`{`)) expect_false(vec_is(base::c)) expect_false(vec_is(expression())) expect_error(vec_assert(quote(foo)), class = "vctrs_error_scalar_type") expect_error(vec_assert(pairlist("")), class = "vctrs_error_scalar_type") expect_error(vec_assert(function() NULL), class = "vctrs_error_scalar_type") expect_error(vec_assert(env()), class = "vctrs_error_scalar_type") expect_error(vec_assert(~foo), class = "vctrs_error_scalar_type") expect_error(vec_assert(base::`{`), class = "vctrs_error_scalar_type") expect_error(vec_assert(base::c), class = "vctrs_error_scalar_type") expect_error(vec_assert(expression()), class = "vctrs_error_scalar_type") }) test_that("non-vector types can be proxied", { x <- new_proxy(1:3) expect_identical(vec_typeof(x), "scalar") expect_false(obj_is_vector(x)) expect_false(vec_is(x)) expect_error(vec_assert(x), class = "vctrs_error_scalar_type") local_env_proxy() expect_identical(vec_typeof(x), "integer") expect_true(obj_is_vector(x)) expect_true(vec_is(x)) expect_error(regexp = NA, vec_assert(x)) }) test_that("obj_check_vector() is silent on vectors", { expect_null(obj_check_vector(1)) expect_null(obj_check_vector(data_frame())) }) test_that("obj_check_vector() errors on scalars", { expect_snapshot(error = TRUE, { obj_check_vector(quote(foo)) }) expect_snapshot(error = TRUE, { obj_check_vector(foobar()) }) }) test_that("obj_check_vector() error respects `arg` and `call`", { my_check_vector <- function(foo) { obj_check_vector(foo) } expect_snapshot(error = TRUE, { my_check_vector(foobar()) }) }) test_that("vec_assert() uses friendly type in error messages", { # Friendly type will be generated in rlang in the future. Upstream # changes should not cause CRAN failures. skip_on_cran() expect_error(vec_assert(function() NULL), class = "vctrs_error_scalar_type") }) test_that("vec_typeof() handles all types", { for (i in seq_along(empty_types)) { expect_identical(vec_typeof(!!empty_types[[i]]), !!names(empty_types)[[i]]) } }) test_that("bare prototypes don't act as partial types", { expect_false(vec_is(foobar(1), dbl())) expect_error(vec_assert(foobar(1), dbl()), class = "vctrs_error_assert_ptype") }) test_that("data frames are always classified as such even when dispatch is off", { expect_identical(vec_typeof_bare(mtcars), "dataframe") }) test_that("assertion is not applied on proxy", { local_methods( vec_proxy.vctrs_foobar = function(x, ...) unclass(x), vec_restore.vctrs_foobar = function(x, ...) foobar(x), `[.vctrs_foobar` = function(x, i) vec_slice(x, i) ) x <- foobar(list()) expect_true(vec_is(x, x)) expect_false(vec_is(x, list())) expect_error(vec_assert(x, list()), class = "vctrs_error_assert_ptype") expect_error(vec_assert(x, x), regexp = NA) }) test_that("attributes of unclassed vectors are asserted", { x <- structure(FALSE, foo = "bar") y <- structure(TRUE, foo = "bar") expect_false(vec_is(x, FALSE)) expect_false(vec_is(FALSE, x)) expect_true(vec_is(y, x)) expect_true(vec_is(x, y)) }) test_that("unspecified is finalised before assertion", { expect_true(vec_is(NA, TRUE)) expect_true(vec_is(data.frame(x = NA), data.frame(x = lgl()))) expect_error(regexp = NA, vec_assert(NA, TRUE)) expect_error(regexp = NA, vec_assert(data.frame(x = NA), data.frame(x = lgl()))) }) test_that("assertion failures are explained", { local_no_stringsAsFactors() local_options(rlang_backtrace_on_error = "none") expect_snapshot(error = TRUE, vec_assert(lgl(), chr())) expect_snapshot(error = TRUE, vec_assert(lgl(), factor())) expect_snapshot(error = TRUE, vec_assert(lgl(), factor(levels = "foo"))) expect_snapshot(error = TRUE, vec_assert(factor(levels = "bar"), factor(levels = "foo"))) expect_snapshot(error = TRUE, vec_assert(factor(), chr())) expect_snapshot(error = TRUE, vec_assert(lgl(), data.frame())) expect_snapshot(error = TRUE, vec_assert(lgl(), data.frame(x = 1))) expect_snapshot(error = TRUE, vec_assert(lgl(), data.frame(x = 1, y = 2))) expect_snapshot(error = TRUE, vec_assert(data.frame(), chr())) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1), chr())) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1), data.frame(x = "foo"))) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1), data.frame(x = "foo", y = 2))) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1, y = 2), chr())) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo"))) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo", y = 2))) }) test_that("vec_assert() validates `size` (#1470)", { expect_snapshot({ (expect_error(vec_assert(1, size = c(2, 3)))) (expect_error(vec_assert(1, size = 1.5))) (expect_error(vec_assert(1, size = "x"))) }) }) test_that("NULL is not a vector", { expect_false(obj_is_vector(NULL)) expect_false(vec_is(NULL)) }) test_that("names and row names do not influence type identity (#707)", { expect_true(vec_is(c(a = TRUE), logical())) expect_true(vec_is(TRUE, c(a = TRUE))) expect_true(vec_is(structure(mtcars, row.names = 1:32), mtcars)) expect_true(vec_is(mtcars, structure(mtcars, row.names = 1:32))) }) # vec_check_size -------------------------------------------------------- test_that("vec_check_size() is silent if the size is right", { expect_null(vec_check_size(1:5, size = 5L)) expect_null(vec_check_size(data_frame(.size = 10L), size = 10L)) }) test_that("vec_check_size() errors on the wrong size", { expect_snapshot(error = TRUE, { vec_check_size(1:5, size = 1L) }) expect_snapshot(error = TRUE, { vec_check_size(1:5, size = 10L) }) }) test_that("vec_check_size() errors on scalars", { expect_snapshot(error = TRUE, { vec_check_size(quote(foo), size = 1L) }) expect_snapshot(error = TRUE, { vec_check_size(foobar(), size = 1L) }) }) test_that("vec_check_size() error respects `arg` and `call`", { my_check_size <- function(foo, size) { vec_check_size(foo, size) } expect_snapshot(error = TRUE, { my_check_size(1L, size = 5L) }) expect_snapshot(error = TRUE, { my_check_size(foobar(), size = 5L) }) }) test_that("vec_check_size() validates `size`", { expect_snapshot(error = TRUE, { vec_check_size(1, size = "x") }) expect_snapshot(error = TRUE, { vec_check_size(1, size = c(1L, 2L)) }) expect_snapshot(error = TRUE, { vec_check_size(1, size = 1.5) }) }) # obj_is_list ----------------------------------------------------------- test_that("bare lists are lists", { expect_true(obj_is_list(list())) }) test_that("AsIs lists are lists (#1463)", { expect_true(obj_is_list(I(list()))) expect_true(obj_is_list(I(list_of(1)))) expect_false(obj_is_list(I(double()))) }) test_that("list_of are lists", { expect_true(obj_is_list(new_list_of())) }) test_that("Vectors with a non-VECSXP type are not lists", { expect_false(obj_is_list(1)) expect_false(obj_is_list("a")) expect_false(obj_is_list(quote(name))) }) test_that("explicitly classed lists are lists", { x <- structure(list(), class = "list") expect_true(obj_is_list(x)) expect_true(obj_is_list(subclass(x))) }) test_that("explicit inheritance must be in the base class", { x <- structure(1:2, class = c("list", "foobar")) expect_false(obj_is_list(x)) }) test_that("POSIXlt are not considered a list", { expect_false(obj_is_list(as.POSIXlt(new_datetime()))) }) test_that("rcrd types are not lists", { expect_false(obj_is_list(new_rcrd(list(x = 1)))) }) test_that("scalars are not lists", { expect_false(obj_is_list(foobar())) }) test_that("S3 types can't lie about their internal representation", { x <- structure(1:2, class = c("foobar", "list")) expect_false(obj_is_list(x)) }) test_that("data frames of all types are not lists", { expect_false(obj_is_list(data.frame())) expect_false(obj_is_list(subclass(data.frame()))) expect_false(obj_is_list(tibble::tibble())) }) test_that("S3 list with non-list proxy is still a list (#1208)", { x <- structure(list(), class = c("foobar", "list")) local_methods(vec_proxy.foobar = function(x) 1) # This used to be an error (#1003) # expect_error(obj_is_list(x), "`x` inherits") expect_true(obj_is_list(x)) }) test_that("list-rcrds with data frame proxies are considered lists (#1208)", { x <- structure( list(1:2, "x"), special = c("a", "b"), class = c("list_rcrd", "list") ) local_methods( vec_proxy.list_rcrd = function(x) { special <- attr(x, "special") data <- unstructure(x) new_data_frame(list(data = data, special = special)) } ) expect_true(obj_is_list(x)) }) test_that("list_all_vectors() works", { expect_true(list_all_vectors(list(1))) expect_true(list_all_vectors(list_of(1))) expect_false(list_all_vectors(list(1, env()))) expect_snapshot((expect_error(list_all_vectors(env())))) }) test_that("obj_check_list() works", { expect_null(obj_check_list(list(1))) expect_null(obj_check_list(list_of(1))) expect_snapshot({ my_function <- function(my_arg) obj_check_list(my_arg) (expect_error(my_function(env()))) }) }) test_that("obj_check_list() uses a special error when `arg` is the empty string (#1604)", { expect_snapshot(error = TRUE, { obj_check_list(1, arg = "") }) }) test_that("obj_check_list() and list_check_all_vectors() work", { expect_null(list_check_all_vectors(list())) expect_null(list_check_all_vectors(list(1, mtcars))) expect_snapshot({ my_function <- function(my_arg) list_check_all_vectors(my_arg) (expect_error(my_function(env()))) (expect_error(my_function(list(1, env())))) (expect_error(my_function(list(1, name = env())))) (expect_error(my_function(list(1, foo = env())))) }) }) test_that("list_all_size() works", { expect_true(list_all_size(list(), 2)) expect_true(list_all_size(list(integer()), 0)) expect_true(list_all_size(list(NULL), 0)) expect_true(list_all_size(list(1:2, 2:3), 2)) expect_false(list_all_size(list(1:2, 1:3), 2)) expect_false(list_all_size(list(NULL, 1:2), 2)) expect_true(list_all_size(list_of(1:3, 2:4), 3)) expect_false(list_all_size(list_of(1:3, 2:4), 4)) }) test_that("list_check_all_size() works", { expect_null(list_check_all_size(list(), 2)) expect_null(list_check_all_size(list(integer()), 0)) expect_null(list_check_all_size(list(NULL), 0)) expect_null(list_check_all_size(list(1:2, 2:3), 2)) expect_snapshot({ my_function <- function(my_arg, size) list_check_all_size(my_arg, size) # Validates sizes (expect_error(list_check_all_size(list(1:2, 1:3), 2))) (expect_error(my_function(list(1:2, 1:3), 2))) # `NULL` is not ignored (expect_error(my_function(list(NULL, 1:2), 2))) }) }) test_that("list_all_size() and list_check_all_size() error on scalars", { x <- list(env()) expect_snapshot({ # Error considered internal to `list_all_size()` (expect_error(list_all_size(x, 2))) my_function <- function(my_arg, size) list_check_all_size(my_arg, size) (expect_error(my_function(x, 2))) }) }) test_that("list_all_size() and list_check_all_size() throw error using internal call on non-list input", { expect_snapshot({ (expect_error(list_all_size(1, 2))) # `arg` and `call` are ignored (expect_error(list_check_all_size(1, 2, arg = "arg", call = call("foo")))) }) }) test_that("list_all_size() and list_check_all_size() validate `size`", { expect_snapshot({ (expect_error(list_all_size(list(), size = "x"))) (expect_error(list_check_all_size(list(), size = "x"))) }) }) test_that("informative messages when 1d array doesn't match vector", { x <- array(1:3) expect_snapshot((expect_error(vec_assert(x, int())))) }) vctrs/tests/testthat/test-slice-assign.R0000644000176200001440000005556614511320527020117 0ustar liggesusers test_that("slice-assign throws error with non-vector inputs", { x <- environment() expect_error(vec_slice(x, 1L) <- 1L, class = "vctrs_error_scalar_type") }) test_that("slice-assign throws error with non-vector `value`", { x <- 1L expect_error(vec_slice(x, 1L) <- NULL, class = "vctrs_error_scalar_type") expect_error(vec_slice(x, 1L) <- environment(), class = "vctrs_error_scalar_type") }) test_that("can slice-assign NULL", { x <- NULL vec_slice(x, 1L) <- 1 expect_identical(x, NULL) }) test_that("can slice-assign base vectors", { x <- rep(FALSE, 3) vec_slice(x, 2) <- TRUE expect_identical(x, lgl(FALSE, TRUE, FALSE)) x <- rep(0L, 3) vec_slice(x, 2) <- 1L expect_identical(x, int(0L, 1L, 0L)) x <- rep(0., 3) vec_slice(x, 2) <- 1 expect_identical(x, dbl(0, 1, 0)) x <- rep(0i, 3) vec_slice(x, 2) <- 1i expect_identical(x, cpl(0i, 1i, 0i)) x <- rep("", 3) vec_slice(x, 2) <- "foo" expect_identical(x, chr("", "foo", "")) x <- as.raw(rep(0, 3)) vec_slice(x, 2) <- as.raw(1) expect_identical(x, as.raw(c(0, 1, 0))) }) test_that("can assign base vectors", { x <- rep(FALSE, 3) expect_identical(vec_assign(x, 2, TRUE), lgl(FALSE, TRUE, FALSE)) expect_identical(x, rep(FALSE, 3)) x <- rep(0L, 3) expect_identical(vec_assign(x, 2, 1L), int(0L, 1L, 0L)) expect_identical(x, rep(0L, 3)) x <- rep(0., 3) expect_identical(vec_assign(x, 2, 1), dbl(0, 1, 0)) expect_identical(x, rep(0., 3)) x <- rep(0i, 3) expect_identical(vec_assign(x, 2, 1i), cpl(0i, 1i, 0i)) expect_identical(x, rep(0i, 3)) x <- rep("", 3) expect_identical(vec_assign(x, 2, "foo"), chr("", "foo", "")) expect_identical(x, rep("", 3)) x <- as.raw(rep(0, 3)) expect_identical(vec_assign(x, 2, as.raw(1)), as.raw(c(0, 1, 0))) expect_identical(x, as.raw(rep(0, 3))) }) test_that("can assign shaped base vectors", { mat <- as.matrix x <- mat(rep(FALSE, 3)) expect_identical(vec_assign(x, 2, TRUE), mat(lgl(FALSE, TRUE, FALSE))) expect_identical(x, mat(rep(FALSE, 3))) x <- mat(rep(0L, 3)) expect_identical(vec_assign(x, 2, 1L), mat(int(0L, 1L, 0L))) expect_identical(x, mat(rep(0L, 3))) x <- mat(rep(0, 3)) expect_identical(vec_assign(x, 2, 1), mat(dbl(0, 1, 0))) expect_identical(x, mat(rep(0, 3))) x <- mat(rep(0i, 3)) expect_identical(vec_assign(x, 2, 1i), mat(cpl(0i, 1i, 0i))) expect_identical(x, mat(rep(0i, 3))) x <- mat(rep("", 3)) expect_identical(vec_assign(x, 2, "foo"), mat(chr("", "foo", ""))) expect_identical(x, mat(rep("", 3))) x <- mat(as.raw(rep(0, 3))) expect_identical(vec_assign(x, 2, as.raw(1)), mat(as.raw(c(0, 1, 0)))) expect_identical(x, mat(as.raw(rep(0, 3)))) }) test_that("can slice-assign lists", { x <- rep(list(NULL), 3) vec_slice(x, 2) <- list(NA) expect_identical(x, list(NULL, NA, NULL)) }) test_that("can slice-assign shaped lists", { mat <- as.matrix x <- mat(rep(list(NULL), 3)) vec_slice(x, 2) <- list(NA) expect_identical(x, mat(list(NULL, NA, NULL))) }) test_that("can assign lists", { x <- rep(list(NULL), 3) expect_identical(vec_assign(x, 2, list(NA)), list(NULL, NA, NULL)) expect_identical(x, rep(list(NULL), 3)) }) test_that("can assign shaped lists", { mat <- as.matrix x <- mat(rep(list(NULL), 3)) expect_identical(vec_assign(x, 2, list(NA)), mat(list(NULL, NA, NULL))) expect_identical(x, mat(rep(list(NULL), 3))) }) test_that("can assign object of any dimensionality", { x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_identical(vec_assign(x1, 1L, 2L), array(rep(c(2, 1), 1), dim = 2)) expect_identical(vec_assign(x2, 1L, 2L), array(rep(c(2, 1), 3), dim = c(2, 3))) expect_identical(vec_assign(x3, 1L, 2L), array(rep(c(2, 1), 12), dim = c(2, 3, 4))) expect_identical(vec_assign(x4, 1L, 2L), array(rep(c(2, 1), 60), dim = c(2, 3, 4, 5))) }) test_that("atomics can't be assigned in lists", { x <- list(NULL) expect_error(vec_slice(x, 1) <- 1, class = "vctrs_error_incompatible_type") expect_error(vec_assign(x, 1, 2), class = "vctrs_error_incompatible_type") expect_error(vec_slice(x, 1) <- "foo", class = "vctrs_error_incompatible_type") expect_error(vec_assign(x, 1, "foo"), class = "vctrs_error_incompatible_type") }) test_that("Unspecified `NA` vector can be assigned into lists", { x <- list(1, 2) vec_slice(x, 1) <- NA expect_identical(x, list(NULL, 2)) }) test_that("monitoring test - unspecified() can be assigned in lists", { x <- list(1, 2) expect_error(vec_slice(x, 1) <- unspecified(1), NA) expect_equal(x, list(NULL, 2)) }) test_that("can assign and slice-assign data frames", { df <- data.frame(x = 1:2) df$y <- data.frame(a = 2:1) orig <- duplicate(df, shallow = FALSE) other <- data.frame(x = 3) other$y <- data.frame(a = 3) exp <- data.frame(x = int(3, 2)) exp$y <- data.frame(a = int(3, 1)) expect_identical(vec_assign(df, 1, other), exp) expect_identical(df, orig) vec_slice(df, 1) <- other expect_identical(df, exp) }) test_that("can slice-assign using logical index", { x <- c(2, 1) vec_slice(x, TRUE) <- 3 expect_equal(x, c(3, 3)) vec_slice(x, c(TRUE, FALSE)) <- 4 expect_equal(x, c(4, 3)) expect_error( vec_assign(x, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size" ) expect_error( vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size" ) }) test_that("slice-assign ignores NA in logical subsetting", { x <- c(NA, 1, 2) expect_equal(`vec_slice<-`(x, x > 0, 1), c(NA, 1, 1)) expect_equal(`vec_slice<-`(x, x > 0, c(NA, 2:1)), c(NA, 2, 1)) }) test_that("slice-assign with arrays ignores NA in logical subsetting", { mat <- as.matrix x <- c(NA, 1, 2) expect_equal(`vec_slice<-`(mat(x), x > 0, 1), mat(c(NA, 1, 1))) expect_equal(`vec_slice<-`(mat(x), x > 0, c(NA, 2:1)), mat(c(NA, 2, 1))) }) test_that("slice-assign ignores NA in integer subsetting", { x <- 0:2 expect_equal(`vec_slice<-`(x, c(NA, 2:3), 1), c(0, 1, 1)) expect_equal(`vec_slice<-`(x, c(NA, 2:3), c(NA, 2:1)), c(0, 2, 1)) }) test_that("slice-assign with arrays ignores NA in integer subsetting", { mat <- as.matrix x <- mat(0:2) expect_equal(`vec_slice<-`(x, c(NA, 2:3), 1), mat(c(0, 1, 1))) expect_equal(`vec_slice<-`(x, c(NA, 2:3), c(NA, 2:1)), mat(c(0, 2, 1))) }) test_that("can't modify subset with missing argument", { x <- 1:3 expect_error(vec_slice(x, ) <- 2L) }) test_that("can modify subset with recycled NA argument", { x <- 1:3 vec_slice(x, NA) <- 2L expect_identical(x, 1:3) }) test_that("can modify subset with recycled TRUE argument", { x <- 1:3 vec_slice(x, TRUE) <- 2L expect_identical(x, rep(2L, 3)) }) test_that("can modify subset with recycled FALSE argument", { x <- 1:3 vec_slice(x, FALSE) <- 2L expect_identical(x, 1:3) }) test_that("can modify subset with NULL argument", { x <- 1:3 vec_slice(x, NULL) <- 2L expect_identical(x, 1:3) }) test_that("can slice-assign with missing indices", { x <- 1:3 y <- 4:6 test <- c(NA, TRUE, FALSE) vec_slice(x, test) <- vec_slice(y, test) expect_identical(x, int(1, 5, 3)) }) test_that("slice-assign checks vectorness", { x <- foobar(list(1)) expect_error(vec_slice(x, 1) <- 10, class = "vctrs_error_scalar_type") }) test_that("a coercible RHS is cast to LHS before assignment (#140)", { x <- 1:2 expect_error(vec_slice(x, 1) <- "1", class = "vctrs_error_incompatible_type") x <- c("foo", "bar") expect_error(vec_slice(x, 1) <- 1, class = "vctrs_error_incompatible_type") x <- 1:2 expect_error(vec_slice(x, 1) <- 3.5, class = "vctrs_error_cast_lossy") allow_lossy_cast(vec_slice(x, 1) <- 3.5) expect_identical(x, int(3, 2)) x <- matrix(1:4, 2) vec_slice(x, 1) <- matrix(c(FALSE, FALSE), 1) expect_identical(x, matrix(int(0, 2, 0, 4), 2)) expect_error(vec_assign(x, 1, matrix(c("", ""), 1)), class = "vctrs_error_incompatible_type") }) test_that("slice-assign takes the proxy", { local_proxy() x <- new_proxy(1:3) y <- new_proxy(20:21) vec_slice(x, 2:3) <- y expect_identical(proxy_deref(x), int(1, 20, 21)) }) test_that("can use names to vec_slice<-() a named object", { x0 <- c(a = 1, b = 2) x1 <- c(a = 1, a = 2) vec_slice(x0, "b") <- 3 expect_identical(x0, c(a = 1, b = 3)) vec_slice(x1, "a") <- 3 expect_identical(x1, c(a = 3, a = 2)) }) test_that("can't use names to vec_slice<-() an unnamed object", { x0 <- 1:3 expect_error( vec_slice(x0, letters[1]) <- 4L, "Can't use character names to index an unnamed vector.", fixed = TRUE ) expect_error( vec_slice(x0, letters[25:27]) <- 5L, "Can't use character names to index an unnamed vector.", fixed = TRUE ) }) test_that("slice-assign falls back to `[<-` when proxy is not implemented", { obj <- foobar(c("foo", "bar", "baz")) expect_error(vec_slice(obj, 1:2) <- TRUE, class = "vctrs_error_incompatible_type") vec_slice(obj, 1:2) <- foobar("quux") vec_ptype2(foobar(""), foobar("")) vec_cast(foobar(""), foobar("")) #> Error: Can't cast to local_methods( `[<-.vctrs_foobar` = function(x, i, value) { x <- unclass(x) x[i] <- "dispatched" x }, vec_ptype2.logical.vctrs_foobar = function(...) foobar(""), vec_ptype2.vctrs_foobar = function(...) foobar(""), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.logical = function(x, to, ...) foobar(rep("", length(x))) ) obj <- foobar(c("foo", "bar", "baz")) vec_slice(obj, 1:2) <- TRUE expect_identical(obj, foobar(c("dispatched", "dispatched", "baz"))) }) test_that("vec_assign() can always assign unspecified values into foreign vector types", { obj <- foobar(c("foo", "bar", "baz")) expect <- foobar(c(NA, "bar", "baz")) expect_identical(vec_assign(obj, 1, NA), expect) expect_identical(vec_assign(obj, 1, unspecified(1)), expect) }) test_that("slice-assign casts to `to` before falling back to `[<-` (#443)", { called <- FALSE local_methods( vec_proxy.vctrs_proxy = proxy_deref, vec_ptype2.vctrs_proxy = function(...) NULL, vec_ptype2.vctrs_proxy.vctrs_foobar = function(...) new_proxy(NA), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_proxy = function(x, ...) foobar(proxy_deref(x)), `[<-.vctrs_foobar` = function(x, i, value) { called <<- TRUE expect_identical(value, foobar(10)) } ) x <- foobar(1) y <- new_proxy(10) vec_slice(x, 1) <- y expect_true(called) }) test_that("index and value are sliced before falling back", { # Work around a bug in base R `[<-` lhs <- foobar(c(NA, 1:4)) rhs <- foobar(int(0L, 10L)) exp <- foobar(int(10L, 1:4)) expect_identical(vec_assign(lhs, c(NA, 1), rhs), exp) }) test_that("can assign to data frame", { x <- data_frame(x = 1:3) y <- data_frame(x = 20) expect_identical(vec_assign(x, 2, y), data_frame(x = int(1, 20, 3))) }) test_that("can assign to a data frame with matrix columns (#625)", { df <- tibble(x = 1:2, y = matrix(1:4, nrow = 2)) expect_identical(vec_assign(df, 2L, df[1,]), vec_slice(df, c(1, 1))) }) test_that("assigning to a factor doesn't produce corrupt levels (#853)", { x <- factor(c("a", NA), levels = c("a", "b")) value <- factor("b", levels = "b") res <- vec_assign(x, 2, value) expect_identical(res, factor(c("a", "b"))) res <- vec_assign(x, 1:2, value) expect_identical(res, factor(c("b", "b"), levels = c("a", "b"))) }) test_that("can slice-assign unspecified vectors with default type2 method", { local_rational_class() x <- rational(1:2, 2:3) x[[1]] <- NA expect_identical(x, rational(c(NA, 2L), c(NA, 3L))) }) test_that("`vec_assign()` evaluates arg lazily", { expect_silent(vec_assign(1L, 1L, 1L, x_arg = print("oof"))) expect_silent(vec_assign(1L, 1L, 1L, value_arg = print("oof"))) }) test_that("`vec_assign()` requires recyclable value", { expect_snapshot({ (expect_error( vec_assign(1:3, 1:3, 1:2), class = "vctrs_error_recycle_incompatible_size" )) }) }) test_that("logical subscripts must match size of indexed vector", { expect_snapshot({ (expect_error( vec_assign(1:2, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size" )) }) expect_snapshot( (expect_error( vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size" )) ) }) test_that("must assign existing elements", { expect_snapshot({ (expect_error( vec_assign(1:3, 5, 10), class = "vctrs_error_subscript_oob" )) (expect_error( vec_assign(1:3, "foo", 10), "unnamed vector" )) (expect_error( vec_slice(letters, -100) <- "foo", class = "vctrs_error_subscript_oob" )) (expect_error( vec_assign(set_names(letters), "foo", "bar"), class = "vctrs_error_subscript_oob" )) }) }) test_that("must assign with proper negative locations", { expect_snapshot({ (expect_error( vec_assign(1:3, c(-1, 1), 1:2), class = "vctrs_error_subscript_type" )) (expect_error( vec_assign(1:3, c(-1, NA), 1:2), class = "vctrs_error_subscript_type" )) }) }) test_that("`vec_assign()` error args can be overridden", { expect_snapshot({ (expect_error( vec_assign(1:2, 1L, "x", x_arg = "foo", value_arg = "bar"), class = "vctrs_error_incompatible_type" )) (expect_error( vec_assign(1:2, 1L, 1:2, value_arg = "bar"), class = "vctrs_error_recycle_incompatible_size" )) }) }) test_that("names are not assigned by default", { vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) vec_out <- c(a = 1L, b = 4L, c = 3L) expect_identical( vec_assign(vec_x, 2, vec_y), vec_out ) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "FOO") df_out <- new_data_frame(list(x = c(1L, 4L, 3L)), row.names = letters[1:3]) expect_identical( vec_assign(df_x, 2, df_y), df_out ) mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("FOO")) mat_out <- matrix(c(1L, 4L, 3L), dimnames = list(letters[1:3])) expect_identical( vec_assign(mat_x, 2, mat_y), mat_out ) nested_x <- new_data_frame(list(df = df_x, mat = mat_x, vec = vec_x), row.names = c("foo", "bar", "baz")) nested_y <- new_data_frame(list(df = df_y, mat = mat_y, vec = vec_y), row.names = c("quux")) nested_out <- new_data_frame(list(df = df_out, mat = mat_out, vec = vec_out), row.names = c("foo", "bar", "baz")) expect_identical( vec_assign(nested_x, 2, nested_y), nested_out ) }) test_that("can optionally assign names", { vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) vec_out <- c(a = 1L, FOO = 4L, c = 3L) expect_identical( vec_assign_params(vec_x, 2, vec_y, assign_names = TRUE), vec_out ) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "FOO") df_out <- new_data_frame(list(x = c(1L, 4L, 3L)), row.names = c("a", "FOO", "c")) expect_identical( vec_assign_params(df_x, 2, df_y, assign_names = TRUE), df_out ) mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("FOO")) mat_out <- matrix(c(1L, 4L, 3L), dimnames = list(c("a", "FOO", "c"))) expect_identical( vec_assign_params(mat_x, 2, mat_y, assign_names = TRUE), mat_out ) nested_x <- new_data_frame(list(df = df_x, mat = mat_x, vec = vec_x), row.names = c("foo", "bar", "baz")) nested_y <- new_data_frame(list(df = df_y, mat = mat_y, vec = vec_y), row.names = c("quux")) nested_out <- new_data_frame(list(df = df_out, mat = mat_out, vec = vec_out), row.names = c("foo", "quux", "baz")) expect_identical( vec_assign_params(nested_x, 2, nested_y, assign_names = TRUE), nested_out ) }) test_that("can optionally assign names (OO case)", { # In case upstream attributes handling changes skip_on_cran() # `set_names()` must be on the inside, otherwise the POSIXlt object # gets a `balanced` attribute of `NA` oo_x <- as_posixlt(set_names(c("2020-01-01", "2020-01-02", "2020-01-03"), letters[1:3])) oo_y <- as_posixlt(c(FOO = "2020-01-04")) oo_out <- as_posixlt(c(a = "2020-01-01", FOO = "2020-01-04", c = "2020-01-03")) expect_identical( vec_assign_params(oo_x, 2, oo_y, assign_names = TRUE), oo_out ) nested_x <- new_data_frame(list(oo = oo_x), row.names = c("foo", "bar", "baz")) nested_y <- new_data_frame(list(oo = oo_y), row.names = c("quux")) nested_out <- new_data_frame(list(oo = oo_out), row.names = c("foo", "quux", "baz")) expect_identical( vec_assign_params(nested_x, 2, nested_y, assign_names = TRUE), nested_out ) }) test_that("assignment requires that the value proxy is the same type as the output proxy", { x <- foobar(1) y <- foobar("a") local_foobar_proxy() local_methods( vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) x ) expect_error( vec_assign(x, 1, y), "`double` incompatible with `value` proxy of type `character`" ) }) test_that("assignment allows a df `value`'s column to be a different type than its proxy (#1082)", { x <- new_data_frame(list(x = foobar(1))) y <- new_data_frame(list(x = foobar(2))) local_methods( # proxying foobar wraps it in a 1 col df vec_proxy.vctrs_foobar = function(x, ...) { attributes(x) <- NULL new_data_frame(list(vec = x)) }, # restoring extracts the column vec_restore.vctrs_foobar = function(x, to, ...) { foobar(x$vec) }, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x ) expect1 <- new_data_frame(list(x = foobar(c(1, 1)))) expect2 <- new_data_frame(list(x = foobar(2))) expect_identical(vec_rbind(x, x), expect1) expect_identical(vec_assign(x, 1, y), expect2) }) test_that("monitoring: assignment to a data frame with unshared columns doesn't overwrite (#986)", { x <- new_df_unshared_col() value <- new_data_frame(list(x = 2)) expect <- new_data_frame(list(x = 1L)) # - On R < 4.0.0, the NAMED value of the column is 0. # - On R >= 4.0.0, the refcnt of the column is 1 from the call to # `SET_VECTOR_ELT()` in `new_df_unshared_col()`. expect_false(maybe_shared_col(x, 1L)) new <- vec_assign(x, 1, value) # On R < 4.0.0, `vec_assign()` shallow duplicates `x`, which recursively # bumps the NAMED-ness of each column of `x` to the max value of 7 by # calling `ENSURE_NAMEDMAX()` on it. So the columns of `x` are all considered # shared from that. # On R >= 4.0.0, references are tracked more precisely. # - `new_df_unshared_col()` calls `SET_VECTOR_ELT()` when setting the # column into `x`, bumping the column's namedness to 1. # - Then, at the start of `df_assign()`, `x` is shallow duplicated and # assigned to `out`. This calls `ENSURE_NAMEDMAX()` on each column, # however this does nothing on R 4.0.0. The refcnt of each column is instead # incremented by 1 by calls to `SET_VECTOR_ELT()` in `duplicate1()`. # So now it is at 2. # - But then in `df_assign()` we use `SET_VECTOR_ELT()` on `out`, overwriting # each column. This actually decrements the refcnt on the value that was # in `out` before the column was overwritten. The column of `out` that it # decrements the refcnt for is the same SEXP as that column in `x`, so now # it is back to 1, and it is not considered shared. if (getRversion() >= "4.0.0") { expect_false(maybe_shared_col(x, 1L)) } else { expect_true(maybe_shared_col(x, 1L)) } # Expect no changes to `x`! expect_identical(x, expect) }) test_that("monitoring: assignment to atomic vectors doesn't modify by reference", { x <- c(1, 2, 3) expect <- c(1, 2, 3) vec_assign(x, 2, 3) expect_identical(x, expect) }) # vec_assign + compact_seq ------------------------------------------------- # `start` is 0-based test_that("can assign shaped base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical(vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(lgl(1, NA, NA))) expect_identical(vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, NA, NA))) expect_identical(vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, NA, NA))) expect_identical(vec_assign_seq(mat(cpl2(1, 2, 3)), NA, start, size, increasing), mat(cpl2(1, NA, NA))) expect_identical(vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", NA, NA))) expect_identical(vec_assign_seq(mat(raw2(1, 2, 3)), raw2(1), start, size, increasing), mat(raw2(1, 1, 1))) expect_identical(vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, NULL, NULL))) }) test_that("can assign shaped base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE mat <- as.matrix expect_identical(vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(lgl(1, NA, NA))) expect_identical(vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, NA, NA))) expect_identical(vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, NA, NA))) expect_identical(vec_assign_seq(mat(cpl2(1, 2, 3)), NA, start, size, increasing), mat(cpl2(1, NA, NA))) expect_identical(vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", NA, NA))) expect_identical(vec_assign_seq(mat(raw2(1, 2, 3)), raw2(1), start, size, increasing), mat(raw2(1, 1, 1))) expect_identical(vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, NULL, NULL))) }) test_that("can assign shaped base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE mat <- as.matrix expect_identical(vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(mat(lgl(1, 0, 1)))) expect_identical(vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, 2, 3))) expect_identical(vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, 2, 3))) expect_identical(vec_assign_seq(mat(cpl(1, 2, 3)), NA, start, size, increasing), mat(cpl(1, 2, 3))) expect_identical(vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", "2", "3"))) expect_identical(vec_assign_seq(mat(raw2(1, 2, 3)), raw2(1), start, size, increasing), mat(raw2(1, 2, 3))) expect_identical(vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, 2, 3))) }) test_that("can assign object of any dimensionality with compact seqs", { x1 <- ones(3) x2 <- ones(3, 4) x3 <- ones(3, 4, 5) x4 <- ones(3, 4, 5, 6) start <- 0L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical(vec_assign_seq(x1, 2, start, size, increasing), array(rep(c(2, 2, 1), 1), dim = 3)) expect_identical(vec_assign_seq(x2, 2, start, size, increasing), array(rep(c(2, 2, 1), 4), dim = c(3, 4))) expect_identical(vec_assign_seq(x3, 2, start, size, increasing), array(rep(c(2, 2, 1), 20), dim = c(3, 4, 5))) expect_identical(vec_assign_seq(x4, 2, start, size, increasing), array(rep(c(2, 2, 1), 120), dim = c(3, 4, 5, 6))) }) vctrs/tests/testthat/_snaps/0000755000176200001440000000000014520211521015671 5ustar liggesusersvctrs/tests/testthat/_snaps/slice-chop.md0000644000176200001440000003324714532371037020266 0ustar liggesusers# `indices` are validated Code vec_chop(1, indices = 1) Condition Error: ! `indices` must be a list of index values, or `NULL`. --- Code (expect_error(vec_chop(1, indices = list(1.5)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Can't convert from to due to loss of precision. --- Code (expect_error(vec_chop(1, indices = list(2)), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. # `sizes` are validated Code vec_chop("a", sizes = "a") Condition Error: ! Can't convert `sizes` to . --- Code vec_chop("a", sizes = 2) Condition Error: ! `sizes` can't contain sizes larger than 1. --- Code vec_chop("a", sizes = -1) Condition Error: ! `sizes` can't contain negative sizes. --- Code vec_chop("a", sizes = NA_integer_) Condition Error: ! `sizes` can't contain missing values. --- Code vec_chop("a", sizes = c(1, 1)) Condition Error: ! `sizes` must sum to size 1, not size 2. # can't use both `indices` and `sizes` Code vec_chop(1, indices = list(1), sizes = 1) Condition Error: ! Can't supply both `indices` and `sizes`. # `vec_chop(x, indices)` backwards compatible behavior works Code vec_chop(1:2, 1) Condition Error: ! `indices` must be a list of index values, or `NULL`. --- Code vec_chop(1, list(1), sizes = 1) Condition Error: ! Can't supply both `indices` and `sizes`. --- Code vec_chop(1, list(1), 2) Condition Error in `vec_chop()`: ! `...` must be empty. x Problematic arguments: * ..1 = list(1) * ..2 = 2 i Did you forget to name an argument? --- Code vec_chop(1, list(1), indices = list(1)) Condition Error in `vec_chop()`: ! `...` must be empty. x Problematic argument: * ..1 = list(1) i Did you forget to name an argument? # `x` must be a list Code list_unchop(1, indices = list(1)) Condition Error in `list_unchop()`: ! `x` must be a list, not the number 1. --- Code list_unchop(1, indices = list(1), error_call = call("foo"), error_arg = "arg") Condition Error in `foo()`: ! `arg` must be a list, not the number 1. --- Code list_unchop(data.frame(x = 1), indices = list(1)) Condition Error in `list_unchop()`: ! `x` must be a list, not a object. # `indices` must be a list Code list_unchop(list(1), indices = 1) Condition Error in `list_unchop()`: ! `indices` must be a list, not the number 1. --- Code list_unchop(list(1), indices = 1, error_call = call("foo")) Condition Error in `foo()`: ! `indices` must be a list, not the number 1. --- Code list_unchop(list(1), indices = data.frame(x = 1)) Condition Error in `list_unchop()`: ! `indices` must be a list, not a object. # unchopping recycles elements of x to the size of the index Code (expect_error(list_unchop(x, indices = indices))) Output Error in `list_unchop()`: ! Can't recycle `x[[1]]` (size 2) to size 3. Code (expect_error(list_unchop(x, indices = indices, error_call = call("foo"), error_arg = "arg"))) Output Error in `foo()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. # unchopping takes the common type Code (expect_error(list_unchop(x, indices = indices), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . Code (expect_error(list_unchop(x, indices = indices, error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . # common type failure uses positional errors Code x <- list(1, a = "x", 2) (expect_error(list_unchop(x))) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x$a` . Code (expect_error(list_unchop(x, indices = list(2, 1, 3)))) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x$a` . Code (expect_error(list_unchop(x, ptype = double()))) Output Error in `list_unchop()`: ! Can't convert `x$a` to . Code (expect_error(list_unchop(x, indices = list(2, 1, 3), ptype = double()))) Output Error in `list_unchop()`: ! Can't convert `x$a` to . Code y <- list(1, a = 2.5) (expect_error(list_unchop(y, ptype = integer()))) Output Error in `list_unchop()`: ! Can't convert from `x$a` to due to loss of precision. * Locations: 1 Code (expect_error(list_unchop(y, indices = list(2, 1), ptype = integer()))) Output Error in `list_unchop()`: ! Can't convert from `x$a` to due to loss of precision. * Locations: 1 # can specify a ptype to override common type Code (expect_error(list_unchop(x, indices = indices, ptype = integer()))) Output Error in `list_unchop()`: ! Can't convert from `x[[1]]` to due to loss of precision. * Locations: 1 Code (expect_error(list_unchop(x, indices = indices, ptype = integer(), error_call = call( "foo"), error_arg = "arg"))) Output Error in `foo()`: ! Can't convert from `arg[[1]]` to due to loss of precision. * Locations: 1 # list_unchop() can repair names quietly Code res <- list_unchop(vec_chop(x, indices = indices), indices = indices, name_repair = "unique_quiet") --- Code res <- list_unchop(vec_chop(x, indices = indices), indices = indices, name_repair = "universal_quiet") # list_unchop() errors on unsupported location values Code (expect_error(list_unchop(list(1, 2), indices = list(c(1, 2), 0)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript can't contain `0` values. i It has a `0` value at location 1. Code (expect_error(list_unchop(list(1), indices = list(-1)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript can't contain negative locations. # list_unchop() fails with complex foreign S3 classes Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(list_unchop(list(x, y)), class = "vctrs_error_incompatible_type")) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(list_unchop(list(x, y), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # list_unchop() fails with complex foreign S4 classes Code joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(list_unchop(list(joe, jane)), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(list_unchop(list(joe, jane), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # list_unchop() falls back to c() if S3 method is available Code (expect_error(list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 3), integer())), class = "vctrs_error_subscript_oob")) Output Error: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. --- Code x <- list(foobar(1:2)) indices <- list(1:3) (expect_error(list_unchop(x, indices = indices))) Output Error in `list_unchop()`: ! Can't recycle `x[[1]]` (size 2) to size 3. Code (expect_error(list_unchop(x, indices = indices, error_arg = "arg", error_call = call( "foo")))) Output Error in `foo()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. # list_unchop() falls back for S4 classes with a registered c() method Code (expect_error(list_unchop(list(joe, 1, jane), indices = list(c(1, 2), 3, 4)), class = "vctrs_error_incompatible_type")) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . # list_unchop() fallback doesn't support `name_spec` or `ptype` Code foo <- structure(foobar(1), foo = "foo") bar <- structure(foobar(2), bar = "bar") (expect_error(with_c_foobar(list_unchop(list(foo, bar), name_spec = "{outer}_{inner}")), "name specification")) Output Error in `list_unchop()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . Code (expect_error(with_c_foobar(list_unchop(list(foo, bar), name_spec = "{outer}_{inner}", error_call = call("foo"))), "name specification")) Output Error in `foo()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . Code x <- list(foobar(1)) (expect_error(with_c_foobar(list_unchop(x, ptype = "")), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't convert `x[[1]]` to . # list_unchop() does not support non-numeric S3 indices Code (expect_error(list_unchop(list(1), indices = list(factor("x"))), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be numeric, not the string "x". Code (expect_error(list_unchop(list(1), indices = list(foobar(1L))), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be numeric, not a object. # can ignore names in `list_unchop()` by providing a `zap()` name-spec (#232) Code (expect_error(list_unchop(list(a = c(b = 1:2))))) Output Error in `list_unchop()`: ! Can't merge the outer name `a` with a vector of length > 1. Please supply a `.name_spec` specification. Code (expect_error(list_unchop(list(a = c(b = 1:2)), error_call = call("foo")))) Output Error in `list_unchop()`: ! Can't merge the outer name `a` with a vector of length > 1. Please supply a `.name_spec` specification. --- Code x <- list(a = c(b = letters), b = 3L) (expect_error(list_unchop(x, name_spec = zap()), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x$a` and `x$b` . Code x <- list(a = c(foo = 1:2), b = c(bar = "")) (expect_error(list_unchop(x, indices = list(2:1, 3), name_spec = zap()), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x$a` and `x$b` . vctrs/tests/testthat/_snaps/type-list-of.md0000644000176200001440000000363314532371047020571 0ustar liggesusers# print method gives human friendly output Code list_of(1, 2:3) Output [2]> [[1]] [1] 1 [[2]] [1] 2 3 --- Code tibble::tibble(x = list_of(1, 2:3)) Output # A tibble: 2 x 1 x > 1 [1] 2 [2] # str method is reasonably correct Code str(x) Output list [1:2] $ : num 1 $ : num [1:2] 2 3 @ ptype: num(0) --- Code str(list(list(x, y = 2:1))) Output List of 1 $ :List of 2 ..$ : list [1:2] .. ..$ : num 1 .. ..$ : num [1:2] 2 3 .. ..@ ptype: num(0) ..$ y: int [1:2] 2 1 --- Code str(x[0]) Output list [1:0] list() @ ptype: num(0) --- Code str(list(list(x[0], y = 2:1))) Output List of 1 $ :List of 2 ..$ : list [1:0] list() .. ..@ ptype: num(0) ..$ y: int [1:2] 2 1 # list coercions are symmetric and unchanging Code print(mat) Output list list_of list_of list_of list "list" "list" "list" "list" list_of "list" "list_of" "list_of" "list" list_of "list" "list_of" "list_of" "list" list_of "list" "list" "list" "list_of" # error call is passed to inner cast methods Code (expect_error(fn1())) Output Error in `fn1()`: ! Can't convert `..1` to . Code (expect_error(fn2())) Output Error in `fn2()`: ! Can't convert `..1` to . vctrs/tests/testthat/_snaps/partial-factor.md0000644000176200001440000000057014532371033021135 0ustar liggesusers# has ok print method Code partial Output partial_factor< bf275 {partial} > --- Code both Output partial_factor< bf275 {partial} fd1ad > --- Code empty Output partial_factor< > --- Code learned Output partial_factor< fd1ad > vctrs/tests/testthat/_snaps/order.md0000644000176200001440000000163514532371033017343 0ustar liggesusers# `direction` is recycled right with array columns (#1753) Code vec_order_radix(df, direction = c("asc", "desc", "desc")) Condition Error: ! `direction` should have length 1 or length equal to the number of columns of `x` when `x` is a data frame. # `na_value` is recycled right with array columns (#1753) Code vec_order_radix(df, direction = c("smallest", "largest", "largest")) Condition Error: ! `direction` must contain only "asc" or "desc". # dots must be empty (#1647) Code vec_order(1, 2) Condition Error in `vec_order()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? --- Code vec_sort(1, 2) Condition Error in `vec_sort()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? vctrs/tests/testthat/_snaps/size.md0000644000176200001440000000676714532371035017217 0ustar liggesusers# vec_as_short_length() checks inputs Code (expect_error(my_function(-1))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a positive number or zero. Code (expect_error(my_function(1:2))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not an integer vector. Code (expect_error(my_function(1.5))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a whole number, not a fractional number. Code (expect_error(my_function(NA))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not `NA`. Code (expect_error(my_function(na_int))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not an integer `NA`. Code (expect_error(my_function("foo"))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not the string "foo". Code (expect_error(my_function(foobar(1:2)))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not `NULL`. Code (expect_error(my_function(.Machine$double.xmax))) Output Error in `vec_as_short_length()`: ! `my_arg` is too large a number. # vec_as_short_length() has a special error about long vector support Code (expect_error(my_function(.Machine$integer.max + 1))) Output Error in `vec_as_short_length()`: ! `my_arg` is too large a number and long vectors are not supported. # vec_size_common() checks inputs Code (expect_error(vec_size_common(.size = "foo"))) Output Error in `vec_size_common()`: ! `.size` must be a single number, not the string "foo". Code (expect_error(vec_size_common(.size = 1:2))) Output Error in `vec_size_common()`: ! `.size` must be a single number, not an integer vector. # vec_size_common() mentions `arg` in errors Code (expect_error(my_function(this_arg = 1:2, that_arg = int()))) Output Error in `my_function()`: ! Can't recycle `my_arg$this_arg` (size 2) to match `my_arg$that_arg` (size 0). # `.absent` must be supplied when `...` is empty Code (expect_error(vec_size_common(.absent = NULL))) Output Error in `vec_size_common()`: ! `.absent` must be supplied when `...` is empty. # `.absent` must be a length 1 integer if provided Code (expect_error(vec_size_common(.absent = 1), "must be a single integer")) Output Error in `vec_size_common()`: ! `.absent` must be a single integer. Code (expect_error(vec_size_common(.absent = c(1L, 2L)), "must be a single integer")) Output Error in `vec_size_common()`: ! `.absent` must be a single integer. # argument tags are forwarded Code vec_size_common(1:2, 1, 1:4) Condition Error: ! Can't recycle `..1` (size 2) to match `..3` (size 4). --- Code vec_size_common(foo = 1:2, 1, bar = 1:4) Condition Error: ! Can't recycle `foo` (size 2) to match `bar` (size 4). vctrs/tests/testthat/_snaps/interval.md0000644000176200001440000000351614532371025020055 0ustar liggesusers# `missing` is validated Code (expect_error(vec_interval_locate_groups(1, 2, missing = "s"))) Output Error in `vec_interval_locate_groups()`: ! `missing` must be either "group" or "drop". --- Code (expect_error(vec_interval_locate_groups(1, 2, missing = c("group", "drop")))) Output Error in `vec_interval_locate_groups()`: ! `missing` must be a string. # common type is taken Code (expect_error(vec_interval_locate_groups(1, "x"))) Output Error: ! Can't combine `start` and `end` . --- Code (expect_error(vec_interval_locate_containers(1, "x"))) Output Error: ! Can't combine `start` and `end` . # `lower` and `upper` can't contain missing values Code (expect_error(vec_interval_complement(1, 2, lower = NA))) Output Error in `vec_interval_complement()`: ! `lower` can't contain missing values. Code (expect_error(vec_interval_complement(1, 2, upper = NA))) Output Error in `vec_interval_complement()`: ! `upper` can't contain missing values. Code start <- data_frame(x = 1, y = 2) end <- data_frame(x = 1, y = 3) (expect_error(vec_interval_complement(start, end, lower = data_frame(x = 1, y = NA))) ) Output Error in `vec_interval_complement()`: ! `lower` can't contain missing values. Code (expect_error(vec_interval_complement(start, end, upper = data_frame(x = 1, y = NA))) ) Output Error in `vec_interval_complement()`: ! `upper` can't contain missing values. vctrs/tests/testthat/_snaps/names.md0000644000176200001440000002001114532371032017317 0ustar liggesusers# vec_as_names() validates `repair` Code (expect_error(my_vec_as_names("x", my_repair = "foo"), "can't be \"foo\"")) Output Error in `my_vec_as_names()`: ! `my_repair` can't be "foo". See `?vctrs::vec_as_names`. Code (expect_error(my_vec_as_names(1, my_repair = 1), "string or a function")) Output Error in `my_vec_as_names()`: ! `my_repair` must be a string or a function. See `?vctrs::vec_as_names`. # vec_as_names() checks unique names Code (expect_error(my_vec_as_names(chr(NA), my_repair = "check_unique"))) Output Error: ! Names repair functions can't return `NA` values. Code (expect_error(my_vec_as_names(chr(""), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names can't be empty. x Empty name found at location 1. Code (expect_error(my_vec_as_names(chr("a", "a"), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `my_repair` to specify repair strategy. Code (expect_error(my_vec_as_names(chr("..1"), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at location 1. Code (expect_error(my_vec_as_names(chr("..."), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..." at location 1. # vec_as_names() accepts and checks repair function Code my_vec_as_names(c("", ""), my_repair = function(nms) "foo") Condition Error in `my_vec_as_names()`: ! Repaired names have length 1 instead of length 2. # vec_as_names() is noisy by default Code vec_as_names(c("x", "x"), repair = "unique") Message New names: * `x` -> `x...1` * `x` -> `x...2` Output [1] "x...1" "x...2" Code vec_as_names(c("x", "x"), repair = "unique", quiet = TRUE) Output [1] "x...1" "x...2" Code (expect_error(my_vec_as_names(c("x", "x"), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. i Use argument `my_repair` to specify repair strategy. Code vec_as_names(c("1", "1"), repair = "unique_quiet") Output [1] "1...1" "1...2" Code vec_as_names(c("1", "1"), repair = "universal_quiet") Output [1] "...1...1" "...1...2" Code vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = TRUE) Output [1] "1...1" "1...2" Code vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = TRUE) Output [1] "...1...1" "...1...2" Code vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = FALSE) Output [1] "1...1" "1...2" Code vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = FALSE) Output [1] "...1...1" "...1...2" # validate_minimal_names() checks names Code (expect_error(validate_minimal_names(1), "must return a character vector")) Output Error: ! Names repair functions must return a character vector. Code (expect_error(validate_minimal_names(NULL), "can't return `NULL`")) Output Error: ! Names repair functions can't return `NULL`. Code (expect_error(validate_minimal_names(chr(NA)), "can't return `NA` values")) Output Error: ! Names repair functions can't return `NA` values. # validate_unique() checks unique names Code (expect_error(validate_unique(chr(NA)), "`NA`")) Output Error: ! Names repair functions can't return `NA` values. Code (expect_error(validate_unique(chr("")), class = "vctrs_error_names_cannot_be_empty") ) Output Error: ! Names can't be empty. x Empty name found at location 1. Code (expect_error(validate_unique(chr("a", "a")), class = "vctrs_error_names_must_be_unique") ) Output Error: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. Code (expect_error(validate_unique(chr("..1")), class = "vctrs_error_names_cannot_be_dot_dot") ) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at location 1. Code (expect_error(validate_unique(chr("...")), class = "vctrs_error_names_cannot_be_dot_dot") ) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..." at location 1. # vec_set_names() errors with bad `names` Code (expect_error(vec_set_names(1, 1), "character vector, not a double")) Output Error in `vec_set_names()`: ! `names` must be a character vector, not a double. Code (expect_error(vec_set_names(1, c("x", "y")), "The size of `names`, 2")) Output Error in `vec_set_names()`: ! The size of `names`, 2, must be the same as the size of `x`, 1. # unique_names() and as_unique_names() are verbose or silent Code unique_names(1:2) Message New names: * `` -> `...1` * `` -> `...2` Output [1] "...1" "...2" --- Code as_unique_names(c("", "")) Message New names: * `` -> `...1` * `` -> `...2` Output [1] "...1" "...2" # message Code as_universal_names(c("a b", "b c")) Message New names: * `a b` -> `a.b` * `b c` -> `b.c` Output [1] "a.b" "b.c" # messages by default Code vec_repair_names(set_names(1, "a:b"), "universal") Message New names: * `a:b` -> `a.b` Output a.b 1 --- Code vec_repair_names(set_names(1, "a:b"), ~ make.names(.)) Message New names: * `a:b` -> `a.b` Output a.b 1 # NULL name specs works with scalars Code (expect_error(vec_c(foo = c(a = 1, b = 2)), "vector of length > 1")) Output Error in `vec_c()`: ! Can't merge the outer name `foo` with a vector of length > 1. Please supply a `.name_spec` specification. Code (expect_error(vec_c(foo = 1:2), "vector of length > 1")) Output Error in `vec_c()`: ! Can't merge the outer name `foo` with a vector of length > 1. Please supply a `.name_spec` specification. Code (expect_error(vec_c(x = c(xx = 1)), "named vector")) Output Error in `vec_c()`: ! Can't merge the outer name `x` with a named vector. Please supply a `.name_spec` specification. # vec_as_names() uses internal error if `repair_arg` is not supplied Code (expect_error(vec_as_names("", repair = "foobar", call = quote(tilt())))) Output Error in `vec_as_names()`: ! `repair` can't be "foobar". See `?vctrs::vec_as_names`. Code (expect_error(vec_as_names("", repair = env(), call = quote(tilt())))) Output Error in `vec_as_names()`: ! `repair` must be a string or a function. See `?vctrs::vec_as_names`. vctrs/tests/testthat/_snaps/rank.md0000644000176200001440000000147314532371033017163 0ustar liggesusers# `ties` is validated Code vec_rank(1, ties = "foo") Condition Error in `vec_rank()`: ! `ties` must be one of "min", "max", "sequential", or "dense", not "foo". --- Code vec_rank(1, ties = 1) Condition Error in `vec_rank()`: ! `ties` must be a string or character vector. # `incomplete` is validated Code vec_rank(1, incomplete = NA) Condition Error in `vec_rank()`: ! `incomplete` must be a string or character vector. --- Code vec_rank(1, incomplete = c(TRUE, FALSE)) Condition Error in `vec_rank()`: ! `incomplete` must be a string or character vector. --- Code vec_rank(1, incomplete = "foo") Condition Error in `vec_rank()`: ! `incomplete` must be one of "rank" or "na", not "foo". vctrs/tests/testthat/_snaps/type-asis.md0000644000176200001440000000110014532371045020134 0ustar liggesusers# AsIs objects throw ptype2 errors with their underlying types Code (expect_error(vec_ptype2(I(1), I("x")), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine `I(1)` and `I("x")` . # AsIs objects throw cast errors with their underlying types Code (expect_error(vec_cast(I(1), I(factor("x"))), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `I(1)` to >. vctrs/tests/testthat/_snaps/expand.md0000644000176200001440000000404114532371025017502 0ustar liggesusers# inputs must be named Code vec_expand_grid(1) Condition Error in `vec_expand_grid()`: ! All inputs must be named. --- Code vec_expand_grid(x = 1, 2, y = 3) Condition Error in `vec_expand_grid()`: ! All inputs must be named. # catches duplicate names by default Code vec_expand_grid(a = 1, a = 2) Condition Error in `vec_expand_grid()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. # errors on non vectors and mentions the element name Code vec_expand_grid(y = environment()) Condition Error in `vec_expand_grid()`: ! `y` must be a vector, not an environment. # can adjust the `.error_call` Code my_expand_grid() Condition Error in `my_expand_grid()`: ! `x` must be a vector, not an environment. # errors nicely when expansion results in a size larger than `R_len_t` Code vec_expand_grid(x = x, y = y) Condition Error in `vec_expand_grid()`: ! Long vectors are not yet supported. Expansion results in an allocation larger than 2^31-1 elements. Attempted allocation size was 3221225469. # errors nicely when expansion results in a size larger than `R_xlen_t` Code vec_expand_grid(x = x, y = x) Condition Error in `vec_expand_grid()`: ! Result too large for an `r_ssize`. i In file './rlang/c-utils.h' at line . i This is an internal error that was detected in the vctrs package. Please report it at with a reprex () and the full backtrace. # validates `.vary` Code vec_expand_grid(.vary = 1) Condition Error in `vec_expand_grid()`: ! `.vary` must be a string or character vector. --- Code vec_expand_grid(.vary = "x") Condition Error in `vec_expand_grid()`: ! `.vary` must be one of "slowest" or "fastest", not "x". vctrs/tests/testthat/_snaps/type-data-table.md0000644000176200001440000000040114532371046021177 0ustar liggesusers# data table has formatting methods Code dt <- data.table(x = 1, y = 2, z = 3) vec_ptype_abbr(dt) Output [1] "dt[,3]" Code vec_ptype_full(dt) Output [1] "data.table<\n x: double\n y: double\n z: double\n>" vctrs/tests/testthat/_snaps/bind.md0000644000176200001440000003147214532371021017143 0ustar liggesusers# incompatible columns throws common type error Code (expect_error(vec_rbind(x_int, x_chr), class = "vctrs_error_incompatible_type")) Output Error in `vec_rbind()`: ! Can't combine `..1$x` and `..2$x` . Code (expect_error(vec_rbind(x_int, x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type") ) Output Error in `foo()`: ! Can't combine `..1$x` and `..2$x` . Code (expect_error(vec_rbind(x_int, x_chr, .ptype = x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't convert `..1$x` to match type of `x` . # names are supplied if needed Code out <- vec_rbind(data_frame(...1 = 1), 1) Message New names: * `` -> `...1` # can repair names in `vec_rbind()` (#229) Code (expect_error(vec_rbind(.name_repair = "none"), "can't be `\"none\"`")) Output Error: ! `.name_repair` can't be `"none"`. It must be one of `"unique"`, `"universal"`, or `"check_unique"`. Code (expect_error(vec_rbind(.name_repair = "minimal"), "can't be `\"minimal\"`")) Output Error: ! `.name_repair` can't be `"minimal"`. It must be one of `"unique"`, `"universal"`, or `"check_unique"`. Code (expect_error(vec_rbind(list(a = 1, a = 2), .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) Output Error in `vec_rbind()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. # can repair names quietly Code res_unique <- vec_rbind(c(x = 1, x = 2), c(x = 3, x = 4), .name_repair = "unique_quiet") res_universal <- vec_rbind(c(`if` = 1, `in` = 2), c(`if` = 3, `for` = 4), .name_repair = "universal_quiet") --- Code res_unique <- vec_cbind(x = 1, x = 2, .name_repair = "unique_quiet") res_universal <- vec_cbind(`if` = 1, `in` = 2, .name_repair = "universal_quiet") # vec_rbind() fails with arrays of dimensionality > 3 Code (expect_error(vec_rbind(array(NA, c(1, 1, 1))))) Output Error in `vec_rbind()`: ! Can't bind arrays. Code (expect_error(vec_rbind(array(NA, c(1, 1, 1)), .error_call = call("foo")))) Output Error in `foo()`: ! Can't bind arrays. # can assign row names in vec_rbind() Code (expect_error(vec_rbind(foo = df1, df2, .names_to = NULL), "specification")) Output Error in `vec_rbind()`: ! Can't merge the outer name `foo` with a vector of length > 1. Please supply a `.name_spec` specification. # vec_cbind() reports error context Code (expect_error(vec_cbind(foobar(list())))) Output Error in `vec_cbind()`: ! `..1` must be a vector, not a object. Code (expect_error(vec_cbind(foobar(list()), .error_call = call("foo")))) Output Error in `foo()`: ! `..1` must be a vector, not a object. Code (expect_error(vec_cbind(a = 1:2, b = int()))) Output Error in `vec_cbind()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(vec_cbind(a = 1:2, b = int(), .error_call = call("foo")))) Output Error in `foo()`: ! Can't recycle `a` (size 2) to match `b` (size 0). # duplicate names are de-deduplicated Code (expect_named(vec_cbind(x = 1, x = 1), c("x...1", "x...2"))) Message New names: * `x` -> `x...1` * `x` -> `x...2` Output x...1 x...2 1 1 1 Code (expect_named(vec_cbind(data.frame(x = 1), data.frame(x = 1)), c("x...1", "x...2"))) Message New names: * `x` -> `x...1` * `x` -> `x...2` Output x...1 x...2 1 1 1 # can repair names in `vec_cbind()` (#227) Code (expect_error(vec_cbind(a = 1, a = 2, .name_repair = "none"), "can't be `\"none\"`")) Output Error: ! `.name_repair` can't be `"none"`. It must be one of `"unique"`, `"universal"`, `"check_unique"`, or `"minimal"`. Code (expect_error(vec_cbind(a = 1, a = 2, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") ) Output Error in `vec_cbind()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. # can supply `.names_to` to `vec_rbind()` (#229) Code (expect_error(vec_rbind(.names_to = letters))) Output Error in `vec_rbind()`: ! `.names_to` must be `NULL`, a string, or an `rlang::zap()` object. Code (expect_error(vec_rbind(.names_to = 10))) Output Error in `vec_rbind()`: ! `.names_to` must be `NULL`, a string, or an `rlang::zap()` object. Code (expect_error(vec_rbind(.names_to = letters, .error_call = call("foo")))) Output Error in `foo()`: ! `.names_to` must be `NULL`, a string, or an `rlang::zap()` object. # vec_cbind() fails with arrays of dimensionality > 3 Code (expect_error(vec_cbind(a))) Output Error in `vec_cbind()`: ! Can't bind arrays. Code (expect_error(vec_cbind(a, .error_call = call("foo")))) Output Error in `foo()`: ! Can't bind arrays. Code (expect_error(vec_cbind(x = a))) Output Error in `vec_cbind()`: ! Can't bind arrays. # vec_rbind() name repair messages are useful Code vec_rbind(1, 2) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 Code vec_rbind(1, 2, .names_to = NULL) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 Code vec_rbind(1, 2, ...10 = 3) Message New names: * `` -> `...1` New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 3 3 Code vec_rbind(1, 2, ...10 = 3, .names_to = NULL) Message New names: * `` -> `...1` New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 ...1 1 ...2 2 ...3 3 Code vec_rbind(a = 1, b = 2) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 Code vec_rbind(a = 1, b = 2, .names_to = NULL) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 a 1 b 2 Code vec_rbind(c(a = 1), c(b = 2)) Output a b 1 1 NA 2 NA 2 Code vec_rbind(c(a = 1), c(b = 2), .names_to = NULL) Output a b 1 1 NA 2 NA 2 # vec_rbind() is silent when assigning duplicate row names of df-cols Code vec_rbind(df, df) Output mpg 1 21.0 2 21.0 3 22.8 4 21.0 5 21.0 6 22.8 --- Code vec_rbind(mtcars[1:4, ], mtcars[1:3, ]) Output mpg cyl disp hp drat wt qsec vs am gear carb Mazda RX4...1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag...2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 Datsun 710...3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 Mazda RX4...5 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag...6 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 Datsun 710...7 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 # vec_cbind() name repair messages are useful Code vec_cbind(1, 2) Message New names: * `` -> `...1` * `` -> `...2` Output ...1 ...2 1 1 2 Code vec_cbind(1, 2, ...10 = 3) Message New names: * `` -> `...1` * `` -> `...2` * `...10` -> `...3` Output ...1 ...2 ...3 1 1 2 3 Code vec_cbind(a = 1, b = 2) Output a b 1 1 2 Code vec_cbind(c(a = 1), c(b = 2)) Message New names: * `` -> `...1` * `` -> `...2` Output ...1 ...2 1 1 2 # rbind repairs names of data frames (#704) Code (expect_error(vec_rbind(df, df, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") ) Output Error in `vec_rbind()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. Code (expect_error(vec_rbind(df, df, .name_repair = "check_unique", .error_call = call( "foo")), class = "vctrs_error_names_must_be_unique")) Output Error in `foo()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. # vec_rbind() fails with complex foreign S3 classes Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(vec_rbind(set_names(x, "x"), set_names(y, "x")), class = "vctrs_error_incompatible_type") ) Output Error in `vec_rbind()`: ! Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # vec_rbind() fails with complex foreign S4 classes Code joe <- .Counts(1L, name = "Joe") jane <- .Counts(2L, name = "Jane") (expect_error(vec_rbind(set_names(joe, "x"), set_names(jane, "y")), class = "vctrs_error_incompatible_type") ) Output Error in `vec_rbind()`: ! Can't combine `..1` and `..2` . # can't zap names when `.names_to` is supplied Code (expect_error(vec_rbind(foo = c(x = 1), .names_to = "id", .name_spec = zap()))) Output Error in `vec_rbind()`: ! Can't zap outer names when `.names_to` is supplied. Code (expect_error(vec_rbind(foo = c(x = 1), .names_to = "id", .name_spec = zap(), .error_call = call("foo")))) Output Error in `foo()`: ! Can't zap outer names when `.names_to` is supplied. # row-binding performs expected allocations Code ints <- rep(list(1L), 100) named_ints <- rep(list(set_names(1:3, letters[1:3])), 100) # Integers as rows suppressMessages(with_memory_prof(vec_rbind_list(ints))) Output [1] 2.79KB Code suppressMessages(with_memory_prof(vec_rbind_list(named_ints))) Output [1] 3.66KB Code # Data frame with named columns df <- data_frame(x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c( "A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb")))) dfs <- rep(list(df), 100) with_memory_prof(vec_rbind_list(dfs)) Output [1] 10.4KB Code # Data frame with rownames (non-repaired, non-recursive case) df <- data_frame(x = 1:2) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 7.68KB Code # Data frame with rownames (repaired, non-recursive case) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 13.8KB Code # Data frame with rownames (non-repaired, recursive case) (#1217) df <- data_frame(x = 1:2, y = data_frame(x = 1:2)) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 13KB Code # Data frame with rownames (repaired, recursive case) (#1217) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 25.3KB vctrs/tests/testthat/_snaps/group.md0000644000176200001440000000015714532371025017363 0ustar liggesusers# print method is useful Code x Output [1] 1x3 2x2 1x1 vctrs/tests/testthat/_snaps/conditions.md0000644000176200001440000001501614532371023020376 0ustar liggesusers# incompatible type error validates `action` Code (expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = "c")) ) Output Error in `stop_incompatible_type()`: ! `action` must be one of "combine" or "convert", not "c". Code (expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = 1))) Output Error in `stop_incompatible_type()`: ! `action` must be a character vector, not the number 1. # can override arg in OOB conditions Code (expect_error(with_subscript_data(vec_slice(set_names(letters), "foo"), NULL), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(with_subscript_data(vec_slice(set_names(letters), "foo"), quote( foo)), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(with_subscript_data(vec_slice(set_names(letters), "foo"), quote( foo(bar))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. # scalar type errors are informative Code (expect_error(vec_slice(foobar(list(1)), 1), class = "vctrs_error_scalar_type")) Output Error in `vec_slice()`: ! `x` must be a vector, not a object. Code (expect_error(stop_scalar_type(foobar(list(1)), arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a object. # empty names errors are informative Code (expect_error(vec_as_names(c("x", "", "y"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty") ) Output Error: ! Names can't be empty. x Empty name found at location 2. Code (expect_error(vec_as_names(c("x", "", "y", ""), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty")) Output Error: ! Names can't be empty. x Empty names found at locations 2 and 4. Code (expect_error(vec_as_names(rep("", 10), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty") ) Output Error: ! Names can't be empty. x Empty names found at locations 1, 2, 3, 4, 5, etc. # dot dot names errors are informative Code (expect_error(vec_as_names(c("..1", "..1", "..1", "...", "z"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot")) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at locations 1, 2, and 3. * "..." at location 4. Code (expect_error(vec_as_names(c(rep("..1", 20), rep(c("..2", "..3", "..4", "...", "..5"), 2)), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot") ) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at locations 1, 2, 3, 4, 5, etc. * "..2" at locations 21 and 26. * "..3" at locations 22 and 27. * "..4" at locations 23 and 28. * "..." at locations 24 and 29. * ... # unique names errors are informative Code (expect_error(vec_as_names(c("x", "x", "x", "y", "y", "z"), repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) Output Error: ! Names must be unique. x These names are duplicated: * "x" at locations 1, 2, and 3. * "y" at locations 4 and 5. Code (expect_error(vec_as_names(c(rep("x", 20), rep(c("a", "b", "c", "d", "e"), 2)), repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) Output Error: ! Names must be unique. x These names are duplicated: * "x" at locations 1, 2, 3, 4, 5, etc. * "a" at locations 21 and 26. * "b" at locations 22 and 27. * "c" at locations 23 and 28. * "d" at locations 24 and 29. * ... # lossy cast from character to factor mentions loss of generality Code (expect_error(vec_cast("a", factor("b")), class = "vctrs_error_cast_lossy")) Output Error: ! Can't convert from `"a"` to > due to loss of generality. * Locations: 1 # lossy cast `conditionMessage()` result matches `cnd_message()` (#1592) Code cat(conditionMessage(cnd)) Output Can't convert from `1.5` to due to loss of precision. * Locations: 1 # ordered cast failures mention conversion Code (expect_error(vec_cast(ordered("x"), ordered("y")), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `ordered("x")` > to >. # incompatible size errors Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = ""))) Output Error: ! Can't recycle input of size 2 to size 3. Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = ""))) Output Error: ! Can't recycle `foo` (size 2) to size 3. Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "bar")) ) Output Error: ! Can't recycle input of size 2 to match `bar` (size 3). Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = quote(bar)))) Output Error: ! Can't recycle `foo` (size 2) to match `bar` (size 3). vctrs/tests/testthat/_snaps/shape.md0000644000176200001440000000262714532371035017334 0ustar liggesusers# incompatible shapes throw errors Code (expect_error(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine and . x Incompatible sizes 0 and 5 along axis 2. Code (expect_error(vec_shape2(shaped_int(1, 5, 0), shaped_int(1, 1, 5)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine and . x Incompatible sizes 0 and 5 along axis 3. # can override error args Code (expect_error(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1), x_arg = "foo", y_arg = "bar"), class = "vctrs_error_incompatible_type")) Output Error: ! Can't combine `foo` and `bar` . x Incompatible sizes 0 and 5 along axis 2. # can combine shaped native classes (#1290, #1329) Code vec_c(x, y) Condition Error: ! Can't combine `..1` > and `..2` >. x Incompatible sizes 2 and 3 along axis 2. # factor casts support shape Code vec_cast(x, y) Condition Error: ! Can't convert `x` [,1]> to >. Can't decrease dimensionality from 2 to 1. vctrs/tests/testthat/_snaps/runs.md0000644000176200001440000000271614532371034017221 0ustar liggesusers# errors on scalars Code vec_identify_runs(foobar()) Condition Error in `vec_identify_runs()`: ! `x` must be a vector, not a object. --- Code vec_run_sizes(foobar()) Condition Error in `vec_run_sizes()`: ! `x` must be a vector, not a object. # vec_locate_run_bounds() validates `which` Code vec_locate_run_bounds(1, which = "x") Condition Error in `vec_locate_run_bounds()`: ! `which` must be one of "start" or "end", not "x". --- Code vec_locate_run_bounds(1, which = 1) Condition Error in `vec_locate_run_bounds()`: ! `which` must be a string or character vector. --- Code vec_locate_run_bounds(1, which = c("foo", "bar")) Condition Error in `vec_locate_run_bounds()`: ! `arg` must be length 1 or a permutation of `c("start", "end")`. # vec_detect_run_bounds() validates `which` Code vec_detect_run_bounds(1, which = "x") Condition Error in `vec_detect_run_bounds()`: ! `which` must be one of "start" or "end", not "x". --- Code vec_detect_run_bounds(1, which = 1) Condition Error in `vec_detect_run_bounds()`: ! `which` must be a string or character vector. --- Code vec_detect_run_bounds(1, which = c("foo", "bar")) Condition Error in `vec_detect_run_bounds()`: ! `arg` must be length 1 or a permutation of `c("start", "end")`. vctrs/tests/testthat/_snaps/type-rcrd.md0000644000176200001440000000324514532371047020145 0ustar liggesusers# na.fail() works Code na.fail(x) Condition Error in `na.fail()`: ! missing values in object # print and str use format Code r Output [1] (1,1) (1,2) (1,3) (1,4) (1,5) (1,6) (1,7) (1,8) (1,9) [10] (1,10) (1,11) (1,12) (1,13) (1,14) (1,15) (1,16) (1,17) (1,18) [19] (1,19) (1,20) (1,21) (1,22) (1,23) (1,24) (1,25) (1,26) (1,27) [28] (1,28) (1,29) (1,30) (1,31) (1,32) (1,33) (1,34) (1,35) (1,36) [37] (1,37) (1,38) (1,39) (1,40) (1,41) (1,42) (1,43) (1,44) (1,45) [46] (1,46) (1,47) (1,48) (1,49) (1,50) (1,51) (1,52) (1,53) (1,54) [55] (1,55) (1,56) (1,57) (1,58) (1,59) (1,60) (1,61) (1,62) (1,63) [64] (1,64) (1,65) (1,66) (1,67) (1,68) (1,69) (1,70) (1,71) (1,72) [73] (1,73) (1,74) (1,75) (1,76) (1,77) (1,78) (1,79) (1,80) (1,81) [82] (1,82) (1,83) (1,84) (1,85) (1,86) (1,87) (1,88) (1,89) (1,90) [91] (1,91) (1,92) (1,93) (1,94) (1,95) (1,96) (1,97) (1,98) (1,99) [100] (1,100) --- Code str(r[1:10]) Output vctrs_tp [1:10] (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7), (1,8), (1... --- Code str(list(list(list(r, 1:100)))) Output List of 1 $ :List of 1 ..$ :List of 2 .. ..$ : vctrs_tp [1:100] (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7)... .. ..$ : int [1:100] 1 2 3 4 5 6 7 8 9 10 ... # dots are an error (#1295) Code foo[1, 2] Condition Error in `foo[1, 2]`: ! Can't index record vectors on dimensions greater than 1. vctrs/tests/testthat/_snaps/rep.md0000644000176200001440000000741314532371034017017 0ustar liggesusers# `vec_rep()` validates `times` Code (expect_error(my_vec_rep(1, "x"), class = "vctrs_error_incompatible_type")) Output Error in `my_vec_rep()`: ! Can't convert `my_times` to . Code (expect_error(my_vec_rep(1, c(1, 2)))) Output Error in `my_vec_rep()`: ! `my_times` must be a single number. Code (expect_error(my_vec_rep(1, -1))) Output Error in `my_vec_rep()`: ! `my_times` must be a positive number. Code (expect_error(my_vec_rep(1, NA_integer_))) Output Error in `my_vec_rep()`: ! `my_times` can't be missing. --- Code my_vec_rep(1, "x") Condition Error in `my_vec_rep()`: ! Can't convert `my_times` to . --- Code my_vec_rep(1, c(1, 2)) Condition Error in `my_vec_rep()`: ! `my_times` must be a single number. --- Code my_vec_rep(1, -1) Condition Error in `my_vec_rep()`: ! `my_times` must be a positive number. --- Code my_vec_rep(1, NA_integer_) Condition Error in `my_vec_rep()`: ! `my_times` can't be missing. # `vec_rep_each()` validates `times` Code (expect_error(my_vec_rep_each(1, "x"), class = "vctrs_error_incompatible_type")) Output Error in `my_vec_rep_each()`: ! Can't convert `my_times` to . Code (expect_error(my_vec_rep_each(1, -1))) Output Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 1 is negative. Code (expect_error(my_vec_rep_each(c(1, 2), c(1, -1)))) Output Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 2 is negative. Code (expect_error(my_vec_rep_each(1, NA_integer_))) Output Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 1 is missing. Code (expect_error(my_vec_rep_each(c(1, 2), c(1, NA_integer_)))) Output Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 2 is missing. --- Code my_vec_rep_each(1, "x") Condition Error in `my_vec_rep_each()`: ! Can't convert `my_times` to . --- Code my_vec_rep_each(1, -1) Condition Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 1 is negative. --- Code my_vec_rep_each(c(1, 2), c(1, -1)) Condition Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 2 is negative. --- Code my_vec_rep_each(1, NA_integer_) Condition Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 1 is missing. --- Code my_vec_rep_each(c(1, 2), c(1, NA_integer_)) Condition Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 2 is missing. # `vec_rep_each()` uses recyclying errors Code (expect_error(my_vec_rep_each(1:2, 1:3), class = "vctrs_error_recycle_incompatible_size") ) Output Error in `my_vec_rep_each()`: ! Can't recycle `my_times` (size 3) to size 2. --- Code my_vec_rep_each(1:2, 1:3) Condition Error in `my_vec_rep_each()`: ! Can't recycle `my_times` (size 3) to size 2. # errors on scalars Code vec_unrep(environment()) Condition Error in `vec_unrep()`: ! `x` must be a vector, not an environment. vctrs/tests/testthat/_snaps/slice-assign.md0000644000176200001440000000624614532371036020617 0ustar liggesusers# `vec_assign()` requires recyclable value Code (expect_error(vec_assign(1:3, 1:3, 1:2), class = "vctrs_error_recycle_incompatible_size") ) Output Error in `vec_assign()`: ! Can't recycle input of size 2 to size 3. # logical subscripts must match size of indexed vector Code (expect_error(vec_assign(1:2, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size") ) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 2, not 3. --- Code (expect_error(vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size") ) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 32, not 2. # must assign existing elements Code (expect_error(vec_assign(1:3, 5, 10), class = "vctrs_error_subscript_oob")) Output Error: ! Can't assign to elements past the end. i Location 5 doesn't exist. i There are only 3 elements. Code (expect_error(vec_assign(1:3, "foo", 10), "unnamed vector")) Output Error in `vec_assign()`: ! Can't use character names to index an unnamed vector. Code (expect_error(vec_slice(letters, -100) <- "foo", class = "vctrs_error_subscript_oob") ) Output Error: ! Can't negate elements past the end. i Location 100 doesn't exist. i There are only 26 elements. Code (expect_error(vec_assign(set_names(letters), "foo", "bar"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't assign to elements that don't exist. x Element `foo` doesn't exist. # must assign with proper negative locations Code (expect_error(vec_assign(1:3, c(-1, 1), 1:2), class = "vctrs_error_subscript_type") ) Output Error: ! Can't assign elements. x Negative and positive locations can't be mixed. i Subscript has a positive value at location 2. Code (expect_error(vec_assign(1:3, c(-1, NA), 1:2), class = "vctrs_error_subscript_type") ) Output Error: ! Can't assign elements. x Negative locations can't have missing values. i Subscript has a missing value at location 2. # `vec_assign()` error args can be overridden Code (expect_error(vec_assign(1:2, 1L, "x", x_arg = "foo", value_arg = "bar"), class = "vctrs_error_incompatible_type")) Output Error in `vec_assign()`: ! Can't convert `bar` to match type of `foo` . Code (expect_error(vec_assign(1:2, 1L, 1:2, value_arg = "bar"), class = "vctrs_error_recycle_incompatible_size") ) Output Error in `vec_assign()`: ! Can't recycle `bar` (size 2) to size 1. vctrs/tests/testthat/_snaps/type2.md0000644000176200001440000000770314532371052017276 0ustar liggesusers# base coercions are symmetric and unchanging Code mat Output logical integer double character raw list logical "logical" "integer" "double" NA NA NA integer "integer" "integer" "double" NA NA NA double "double" "double" "double" NA NA NA character NA NA NA "character" NA NA raw NA NA NA NA "raw" NA list NA NA NA NA NA "list" # vec_ptype2() data frame methods builds argument tags Code vec_ptype2("foo", 10) Condition Error: ! Can't combine `"foo"` and `10` . --- Code df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) vec_ptype2(df1, df2) Condition Error: ! Can't combine `df1$x$y$z` and `df2$x$y$z` . # can override scalar vector error message for base scalar types Code (expect_error(vec_ptype2(NULL, quote(x), y_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a symbol. Code (expect_error(vec_ptype2(quote(x), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a symbol. # can override scalar vector error message for S3 types Code (expect_error(vec_ptype2(NULL, foobar(), y_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a object. Code (expect_error(vec_ptype2(foobar(), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a object. # ptype2 and cast errors when same class fallback is impossible are informative Code (expect_error(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `foobar(1, bar = TRUE)` to . Code (expect_error(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine `foobar(1, bar = TRUE)` and `foobar(2, baz = TRUE)` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # Incompatible attributes bullets are not show when methods are implemented Code with_foobar_cast <- (function(expr) { with_methods(vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) vec_default_cast(x, to, ...), expr) }) with_foobar_ptype2 <- (function(expr) { with_methods(vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) vec_default_ptype2( x, y, ...), expr) }) (expect_error(with_foobar_cast(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type")) Output Error: ! Can't convert `foobar(1, bar = TRUE)` to . Code (expect_error(with_foobar_ptype2(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type")) Output Error: ! Can't combine `foobar(1, bar = TRUE)` and `foobar(2, baz = TRUE)` . vctrs/tests/testthat/_snaps/partial-frame.md0000644000176200001440000000020114532371033020740 0ustar liggesusers# has ok print method Code pf Output partial_frame< x: integer {partial} y: double > vctrs/tests/testthat/_snaps/c.md0000644000176200001440000002044614532371022016451 0ustar liggesusers# common type failure uses error call and error arg (#1641, #1692) Code vec_c("x", 1, .error_call = call("foo"), .error_arg = "arg") Condition Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . --- Code vec_c("x", .ptype = integer(), .error_call = call("foo"), .error_arg = "arg") Condition Error in `foo()`: ! Can't convert `arg[[1]]` to . # common type failure uses positional errors Code (expect_error(vec_c(1, a = "x", 2))) Output Error in `vec_c()`: ! Can't combine `..1` and `a` . Code (expect_error(vec_c(1, a = "x", 2, .ptype = double(), .error_arg = "arg"))) Output Error in `vec_c()`: ! Can't convert `arg$a` to . Code (expect_error(vec_c(1, a = 2.5, .ptype = integer()))) Output Error in `vec_c()`: ! Can't convert from `a` to due to loss of precision. * Locations: 1 # vec_c() includes index in argument tag Code vec_c(df1, df2) Condition Error in `vec_c()`: ! Can't combine `..1$x$y$z` and `..2$x$y$z` . --- Code vec_c(df1, df1, df2) Condition Error in `vec_c()`: ! Can't combine `..1$x$y$z` and `..3$x$y$z` . --- Code vec_c(foo = df1, bar = df2) Condition Error in `vec_c()`: ! Can't combine `foo$x$y$z` and `bar$x$y$z` . # vec_c() can repair names quietly Code res_unique <- vec_c(x = TRUE, x = 0, .name_repair = "unique_quiet") res_universal <- vec_c(`if` = TRUE, `in` = 0, .name_repair = "universal_quiet") # vec_c() fails with complex foreign S3 classes Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type")) Output Error in `vec_c()`: ! Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(vec_c(x, y, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # vec_c() fails with complex foreign S4 classes Code joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(vec_c(joe, jane), class = "vctrs_error_incompatible_type")) Output Error in `vec_c()`: ! Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(vec_c(joe, jane, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # vec_c() fallback doesn't support `name_spec` or `ptype` Code (expect_error(with_c_foobar(vec_c(foobar(1), foobar(2), .name_spec = "{outer}_{inner}")), "name specification")) Output Error in `vec_c()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . Code (expect_error(with_c_foobar(vec_c(foobar(1), foobar(2), .ptype = "")), class = "vctrs_error_incompatible_type") ) Output Error in `vec_c()`: ! Can't convert `..1` to . Code (expect_error(with_c_foobar(vec_c(foobar(1), foobar(2), .error_call = call( "foo"), .name_spec = "{outer}_{inner}")))) Output Error in `foo()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . # can ignore names in `vec_c()` by providing a `zap()` name-spec (#232) Code (expect_error(vec_c(a = c(b = letters), b = 1, .name_spec = zap()), class = "vctrs_error_incompatible_type") ) Output Error in `vec_c()`: ! Can't combine `a` and `b` . # concatenation performs expected allocations Code ints <- rep(list(1L), 100) dbls <- rep(list(1), 100) # # `vec_c()` # Integers with_memory_prof(vec_c_list(ints)) Output [1] 1.96KB Code # Doubles with_memory_prof(vec_c_list(dbls)) Output [1] 2.35KB Code # Integers to integer with_memory_prof(vec_c_list(ints, ptype = int())) Output [1] 1.7KB Code # Doubles to integer with_memory_prof(vec_c_list(dbls, ptype = int())) Output [1] 1.7KB Code # # `list_unchop()` # Integers with_memory_prof(list_unchop(ints)) Output [1] 1.13KB Code # Doubles with_memory_prof(list_unchop(dbls)) Output [1] 1.52KB Code # Integers to integer with_memory_prof(list_unchop(ints, ptype = int())) Output [1] 896B Code # Doubles to integer with_memory_prof(list_unchop(dbls, ptype = int())) Output [1] 896B Code # # Concatenation with names # Named integers ints <- rep(list(set_names(1:3, letters[1:3])), 100) with_memory_prof(list_unchop(ints)) Output [1] 4.3KB Code # Named matrices mat <- matrix(1:4, 2, dimnames = list(c("foo", "bar"))) mats <- rep(list(mat), 100) with_memory_prof(list_unchop(mats)) Output [1] 5.52KB Code # Data frame with named columns df <- data_frame(x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c( "A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb")))) dfs <- rep(list(df), 100) with_memory_prof(list_unchop(dfs)) Output [1] 8.79KB Code # Data frame with rownames (non-repaired, non-recursive case) df <- data_frame(x = 1:2) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 6.02KB Code # Data frame with rownames (repaired, non-recursive case) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 12.2KB Code # Data frame with rownames (non-repaired, recursive case) (#1217) df <- data_frame(x = 1:2, y = data_frame(x = 1:2)) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 11.4KB Code # Data frame with rownames (repaired, recursive case) (#1217) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 23.7KB Code # list-ofs (#1496) make_list_of <- (function(n) { df <- tibble::tibble(x = new_list_of(vec_chop(1:n), ptype = integer())) vec_chop(df) }) with_memory_prof(list_unchop(make_list_of(1000))) Output [1] 103KB Code with_memory_prof(list_unchop(make_list_of(2000))) Output [1] 205KB Code with_memory_prof(list_unchop(make_list_of(4000))) Output [1] 408KB vctrs/tests/testthat/_snaps/set.md0000644000176200001440000001000614532371035017015 0ustar liggesusers# errors nicely if common type can't be taken Code vec_set_intersect(1, "x") Condition Error in `vec_set_intersect()`: ! Can't combine `x` and `y` . --- Code vec_set_difference(1, "x") Condition Error in `vec_set_difference()`: ! Can't combine `x` and `y` . --- Code vec_set_union(1, "x") Condition Error in `vec_set_union()`: ! Can't combine `x` and `y` . --- Code vec_set_symmetric_difference(1, "x") Condition Error in `vec_set_symmetric_difference()`: ! Can't combine `x` and `y` . # dots must be empty Code vec_set_intersect(1, 2, 3) Condition Error in `vec_set_intersect()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? --- Code vec_set_difference(1, 2, 3) Condition Error in `vec_set_difference()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? --- Code vec_set_union(1, 2, 3) Condition Error in `vec_set_union()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? --- Code vec_set_symmetric_difference(1, 2, 3) Condition Error in `vec_set_symmetric_difference()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? # `ptype` is respected Code vec_set_intersect(1, 1.5, ptype = integer()) Condition Error in `vec_set_intersect()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 --- Code vec_set_difference(1, 1.5, ptype = integer()) Condition Error in `vec_set_difference()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 --- Code vec_set_union(1, 1.5, ptype = integer()) Condition Error in `vec_set_union()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 --- Code vec_set_symmetric_difference(1, 1.5, ptype = integer()) Condition Error in `vec_set_symmetric_difference()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 # `x_arg` and `y_arg` can be adjusted Code vec_set_intersect(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_intersect()`: ! Can't combine `foo` and `bar` . --- Code vec_set_difference(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_difference()`: ! Can't combine `foo` and `bar` . --- Code vec_set_union(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_union()`: ! Can't combine `foo` and `bar` . --- Code vec_set_symmetric_difference(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_symmetric_difference()`: ! Can't combine `foo` and `bar` . --- Code vec_set_intersect(1, "2", x_arg = "", y_arg = "") Condition Error in `vec_set_intersect()`: ! Can't combine and . # `error_call` can be adjusted Code my_set_intersect() Condition Error in `my_set_intersect()`: ! Can't combine `x` and `y` . --- Code my_set_difference() Condition Error in `my_set_difference()`: ! Can't combine `x` and `y` . --- Code my_set_union() Condition Error in `my_set_union()`: ! Can't combine `x` and `y` . --- Code my_set_symmetric_difference() Condition Error in `my_set_symmetric_difference()`: ! Can't combine `x` and `y` . vctrs/tests/testthat/_snaps/type-data-frame.md0000644000176200001440000001364314532371046021216 0ustar liggesusers# data frames print nicely Code vec_ptype_show(mtcars) Output Prototype: data.frame< mpg : double cyl : double disp: double hp : double drat: double wt : double qsec: double vs : double am : double gear: double carb: double > --- Code vec_ptype_show(iris) Output Prototype: data.frame< Sepal.Length: double Sepal.Width : double Petal.Length: double Petal.Width : double Species : factor > # embedded data frames print nicely Code vec_ptype_show(df) Output Prototype: data.frame< x: integer a: data.frame< a: integer b: character > b: list_of c: list_of< data.frame< x: integer y: character > > > # `x` must be a list Code (expect_error(new_data_frame(1), "`x` must be a list")) Output Error: ! `x` must be a list # if supplied, `n` must be an integer of size 1 Code (expect_error(new_data_frame(n = c(1L, 2L)), "must be an integer of size 1")) Output Error in `new_data_frame()`: ! `n` must be an integer of size 1. Code (expect_error(new_data_frame(n = "x"), "must be an integer of size 1")) Output Error in `new_data_frame()`: ! `n` must be an integer of size 1. # if supplied, `n` can't be negative or missing (#1477) Code (expect_error(new_data_frame(n = -1L))) Output Error in `new_data_frame()`: ! `n` can't be negative. Code (expect_error(new_data_frame(n = NA_integer_))) Output Error in `new_data_frame()`: ! `n` can't be missing. # `class` must be a character vector Code (expect_error(new_data_frame(class = 1), "must be NULL or a character vector")) Output Error: ! `class` must be NULL or a character vector # data_frame() and df_list() report error context Code (expect_error(data_frame(a = 1, a = 1))) Output Error in `data_frame()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(data_frame(a = 1, a = 1, .error_call = call("foo")))) Output Error in `foo()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(data_frame(a = 1:2, b = int()))) Output Error in `data_frame()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(data_frame(a = 1:2, b = int(), .error_call = call("foo")))) Output Error in `foo()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(df_list(a = 1, a = 1))) Output Error in `df_list()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(df_list(a = 1, a = 1, .error_call = call("foo")))) Output Error in `foo()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(df_list(a = 1:2, b = int()))) Output Error in `df_list()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(df_list(a = 1:2, b = int(), .error_call = call("foo")))) Output Error in `foo()`: ! Can't recycle `a` (size 2) to match `b` (size 0). # input is tidy recycled Code expect_error(data_frame(1:2, 1:3), class = "vctrs_error_incompatible_size") # `.unpack` is validated Code df_list(.unpack = 1) Condition Error in `df_list()`: ! `.unpack` must be `TRUE` or `FALSE`. --- Code df_list(.unpack = c(TRUE, FALSE)) Condition Error in `df_list()`: ! `.unpack` must be `TRUE` or `FALSE`. # `.name_repair` can be quiet Code dfl_unique <- df_list(1, 2, .name_repair = "unique_quiet") dfl_universal <- df_list(`if` = 1, `in` = 2, .name_repair = "universal_quiet") df_unique <- data_frame(1, 2, .name_repair = "unique_quiet") df_universal <- data_frame(`if` = 1, `in` = 2, .name_repair = "universal_quiet") # data frame fallback handles column types (#999) Code local_error_call(call("my_function")) (expect_error(vec_ptype2(df1, df3), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `df1$x` and `df3$x` . Code (expect_error(vec_ptype2(df3, df1), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `df3$x` and `df1$x` . Code (expect_error(vec_cast(df2, df1), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't convert from `df2` > to > due to loss of precision. vctrs/tests/testthat/_snaps/type-vctr.md0000644000176200001440000000075414532371051020166 0ustar liggesusers# na.fail() works Code na.fail(x) Condition Error in `na.fail()`: ! missing values in object # default print and str methods are useful Code h Output [1] xxx xxx xxx xxx --- Code h[0] Output --- Code str(h) Output hidden [1:4] xxx, xxx, xxx, xxx # default print method shows names Code h Output A B C xxx xxx xxx vctrs/tests/testthat/_snaps/cast.md0000644000176200001440000000426014532371022017155 0ustar liggesusers# Casting to named argument mentions 'match type ' Code vec_cast(1, "", x_arg = "foo", to_arg = "bar") Condition Error: ! Can't convert `foo` to match type of `bar` . --- Code vec_cast(1, "", x_arg = "foo") Condition Error: ! Can't convert `foo` to . # cast errors create helpful messages (#57, #225) Code vec_cast(1.5, 10L) Condition Error: ! Can't convert from `1.5` to due to loss of precision. * Locations: 1 --- Code vec_cast(factor("foo"), 10) Condition Error: ! Can't convert `factor("foo")` > to . --- Code x <- tibble(a = tibble(b = 1.5)) y <- tibble(a = tibble(b = 10L)) vec_cast(x, y) Condition Error: ! Can't convert from `x$a$b` to `a$b` due to loss of precision. * Locations: 1 --- Code x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast(x, y) Condition Error: ! Can't convert `x$a$b` > to match type of `a$b` . --- Code x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast_common(x, y) Condition Error: ! Can't combine `..1$a$b` > and `..2$a$b` . # vec_cast() only attempts to fall back if `to` is a data frame (#1568) Code (expect_error(vec_cast(foobar(mtcars), 1), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `foobar(mtcars)` to . # can signal deprecation warnings for lossy casts Code (expect_warning(expect_true(lossy_cast()))) Output Warning: Coercion with lossy casts was deprecated in vctrs 0.2.0. i Please use `allow_lossy_cast()` instead. i We detected a lossy transformation from `x` to `to` . The result will contain lower-resolution values or missing values. To suppress this warning, wrap your code with `allow_lossy_cast()`. vctrs/tests/testthat/_snaps/type-misc.md0000644000176200001440000000031214532371047020136 0ustar liggesusers# `numeric_version` proxy can handle at most 8 components Code vec_proxy_equal(x) Condition Error in `vec_proxy_equal()`: ! `x` can't contain more than 8 version components. vctrs/tests/testthat/_snaps/slice.md0000644000176200001440000001031414532371040017317 0ustar liggesusers# vec_slice throws error with non-vector subscripts Code (expect_error(vec_slice(1:3, Sys.Date()), class = "vctrs_error_subscript_type")) Output Error in `vec_slice()`: ! Can't subset elements with `i`. x `i` must be logical, numeric, or character, not a object. Code (expect_error(vec_slice(1:3, matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type") ) Output Error in `vec_slice()`: ! Can't subset elements with `i`. x Subscript `i` must be a simple vector, not a matrix. # can't index beyond the end of a vector Code (expect_error(vec_slice(1:2, 3L), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. Code (expect_error(vec_slice(1:2, -3L), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't negate elements past the end. i Location 3 doesn't exist. i There are only 2 elements. # can slice with double indices Code (expect_error(vec_as_location(2^31, 3L), class = "vctrs_error_subscript_type")) Output Error: ! Can't subset elements with `2^31`. x Can't convert from `2^31` to due to loss of precision. # Unnamed vector with character subscript is caught Code vec_slice(1:3, letters[1]) Condition Error in `vec_slice()`: ! Can't use character names to index an unnamed vector. # Negative subscripts are checked Code vec_slice(1:3, -c(1L, NA)) Condition Error in `vec_slice()`: ! Can't subset elements with `i`. x Negative locations can't have missing values. i Subscript `i` has a missing value at location 2. --- Code vec_slice(1:3, c(-1L, 1L)) Condition Error in `vec_slice()`: ! Can't subset elements with `i`. x Negative and positive locations can't be mixed. i Subscript `i` has a positive value at location 2. # oob error messages are properly constructed Code vec_slice(c(bar = 1), "foo") Condition Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. --- Code vec_slice(letters, c(100, 1000)) Condition Error in `vec_slice()`: ! Can't subset elements past the end. i Locations 100 and 1000 don't exist. i There are only 26 elements. --- Code vec_slice(letters, c(1, 100:103, 2, 104:110)) Condition Error in `vec_slice()`: ! Can't subset elements past the end. i Locations 100, 101, 102, ..., 109, and 110 don't exist. i There are only 26 elements. --- Code vec_slice(set_names(letters), c("foo", "bar")) Condition Error in `vec_slice()`: ! Can't subset elements that don't exist. x Elements `foo` and `bar` don't exist. --- Code vec_slice(set_names(letters), toupper(letters)) Condition Error in `vec_slice()`: ! Can't subset elements that don't exist. x Elements `A`, `B`, `C`, `D`, `E`, etc. don't exist. # vec_init() validates `n` Code (expect_error(vec_init(1L, 1.5))) Output Error in `vec_init()`: ! `n` must be a whole number, not a fractional number. Code (expect_error(vec_init(1L, c(1, 2)))) Output Error in `vec_init()`: ! `n` must be a single number, not a double vector. Code (expect_error(vec_init(1L, -1L))) Output Error in `vec_init()`: ! `n` must be a positive number or zero. Code (expect_error(vec_init(1L, NA))) Output Error in `vec_init()`: ! `n` must be a single number, not `NA`. Code (expect_error(vec_init(1L, NA_integer_))) Output Error in `vec_init()`: ! `n` must be a single number, not an integer `NA`. vctrs/tests/testthat/_snaps/recycle.md0000644000176200001440000000531414532371033017654 0ustar liggesusers# vec_recycle_common() reports error context Code (expect_error(my_function(this_arg = 1:2, that_arg = int()))) Output Error in `my_function()`: ! Can't recycle `this_arg` (size 2) to match `that_arg` (size 0). Code (expect_error(my_function(this_arg = 1:2, that_arg = int(), .size = 2))) Output Error in `my_function()`: ! Can't recycle `that_arg` (size 0) to size 2. Code (expect_error(my_function(this_arg = 1:2, that_arg = int(), .arg = "my_arg"))) Output Error in `my_function()`: ! Can't recycle `my_arg$this_arg` (size 2) to match `my_arg$that_arg` (size 0). Code (expect_error(my_function(this_arg = 1:2, that_arg = int(), .size = 2, .arg = "my_arg")) ) Output Error in `my_function()`: ! Can't recycle `my_arg$that_arg` (size 0) to size 2. # vec_recycle(): incompatible lengths get error messages Code (expect_error(vec_recycle(x2, 1), class = "vctrs_error_recycle_incompatible_size") ) Output Error: ! Can't recycle input of size 2 to size 1. # vec_recycle_common(): incompatible lengths get error messages Code (expect_error(vec_recycle_common(1:2, 1:3), class = "vctrs_error_incompatible_size") ) Output Error: ! Can't recycle `..1` (size 2) to match `..2` (size 3). # recycling matrices respects incompatible sizes Code (expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size") ) Output Error: ! Can't recycle `..1` (size 2) to match `..2` (size 4). # recycling data frames respects incompatible sizes Code (expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size") ) Output Error: ! Can't recycle `..1` (size 2) to match `..2` (size 3). # recycling to size 1 has informative error Code (expect_error(vec_recycle(1:2, 1), class = "vctrs_error_recycle_incompatible_size") ) Output Error: ! Can't recycle input of size 2 to size 1. # incompatible recycling size has informative error Code vec_recycle(1:2, 4) Condition Error: ! Can't recycle input of size 2 to size 4. --- Code vec_recycle(1:2, 4, x_arg = "foo") Condition Error: ! Can't recycle `foo` (size 2) to size 4. vctrs/tests/testthat/_snaps/type-factor.md0000644000176200001440000000046114532371047020466 0ustar liggesusers# factor/character coercions are symmetric and unchanging Code print(mat) Output ordered<> factor<> character ordered<> "ordered<>" NA "character" factor<> NA "factor<>" "character" character "character" "character" "character" vctrs/tests/testthat/_snaps/type-date-time.md0000644000176200001440000000221014532371046021052 0ustar liggesusers# datetime coercions are symmetric and unchanging Code print(mat) Output date datetime datetime POSIXlt duration duration date "date" "datetime" "datetime" "datetime" NA NA datetime "datetime" "datetime" "datetime" "datetime" NA NA datetime "datetime" "datetime" "datetime" "datetime" NA NA POSIXlt "datetime" "datetime" "datetime" "datetime" NA NA duration NA NA NA NA "duration" "duration" duration NA NA NA NA "duration" "duration" vctrs/tests/testthat/_snaps/subscript-loc.md0000644000176200001440000011325514532371044021025 0ustar liggesusers# vec_as_location2() requires integer or character inputs Code (expect_error(vec_as_location2(TRUE, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `TRUE`. x `TRUE` must be numeric or character, not `TRUE`. Code (expect_error(vec_as_location2(mtcars, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `mtcars`. x `mtcars` must be numeric or character, not a object. Code (expect_error(vec_as_location2(env(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `env()`. x `env()` must be numeric or character, not an environment. Code (expect_error(vec_as_location2(foobar(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `foobar()`. x `foobar()` must be numeric or character, not a object. Code (expect_error(vec_as_location2(2.5, 10L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `2.5`. x Can't convert from `2.5` to due to loss of precision. Code (expect_error(vec_as_location2(Inf, 10L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `Inf`. x Can't convert from `Inf` to due to loss of precision. Code (expect_error(vec_as_location2(-Inf, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `-Inf`. x Can't convert from `-Inf` to due to loss of precision. Code # Idem with custom `arg` (expect_error(vec_as_location2(foobar(), 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x `foo` must be numeric or character, not a object. Code (expect_error(vec_as_location2(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Can't convert from `foo` to due to loss of precision. Code (expect_error(with_tibble_rows(vec_as_location2(TRUE)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't remove row with `foo(bar)`. x `foo(bar)` must be numeric or character, not `TRUE`. # vec_as_location() requires integer, character, or logical inputs Code (expect_error(vec_as_location(mtcars, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `mtcars`. x `mtcars` must be logical, numeric, or character, not a object. Code (expect_error(vec_as_location(env(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `env()`. x `env()` must be logical, numeric, or character, not an environment. Code (expect_error(vec_as_location(foobar(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `foobar()`. x `foobar()` must be logical, numeric, or character, not a object. Code (expect_error(vec_as_location(2.5, 10L), class = "vctrs_error_subscript_type")) Output Error: ! Can't subset elements with `2.5`. x Can't convert from `2.5` to due to loss of precision. Code (expect_error(vec_as_location(list(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `list()`. x `list()` must be logical, numeric, or character, not an empty list. Code (expect_error(vec_as_location(function() NULL, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `function() NULL`. x `function() NULL` must be logical, numeric, or character, not a function. Code (expect_error(vec_as_location(Sys.Date(), 3L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `Sys.Date()`. x `Sys.Date()` must be logical, numeric, or character, not a object. Code # Idem with custom `arg` (expect_error(vec_as_location(env(), 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x `foo` must be logical, numeric, or character, not an environment. Code (expect_error(vec_as_location(foobar(), 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x `foo` must be logical, numeric, or character, not a object. Code (expect_error(vec_as_location(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Can't convert from `foo` to due to loss of precision. # vec_as_location() and variants check for OOB elements (#1605) Code # Numeric indexing (expect_error(vec_as_location(10L, 2L), class = "vctrs_error_subscript_oob")) Output Error: ! Can't subset elements past the end. i Location 10 doesn't exist. i There are only 2 elements. Code (expect_error(vec_as_location(-10L, 2L), class = "vctrs_error_subscript_oob")) Output Error: ! Can't negate elements past the end. i Location 10 doesn't exist. i There are only 2 elements. Code (expect_error(vec_as_location2(10L, 2L), class = "vctrs_error_subscript_oob")) Output Error: ! Can't extract elements past the end. i Location 10 doesn't exist. i There are only 2 elements. Code # Character indexing (expect_error(vec_as_location("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(vec_as_location2("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't extract elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(vec_as_location2("foo", 1L, names = "bar", call = call("baz")), class = "vctrs_error_subscript_oob")) Output Error in `baz()`: ! Can't extract elements that don't exist. x Element `foo` doesn't exist. # vec_as_location2() requires length 1 inputs Code (expect_error(vec_as_location2(1:2, 2L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `1:2`. x Subscript `1:2` must be size 1, not 2. Code (expect_error(vec_as_location2(c("foo", "bar"), 2L, c("foo", "bar")), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `c("foo", "bar")`. x Subscript `c("foo", "bar")` must be size 1, not 2. Code # Idem with custom `arg` (expect_error(vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be size 1, not 2. Code (expect_error(vec_as_location2(mtcars, 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x `foo` must be numeric or character, not a object. Code (expect_error(vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be size 1, not 2. # vec_as_location2() requires positive integers Code (expect_error(vec_as_location2(0, 2L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `0`. x Subscript `0` must be a positive location, not 0. Code (expect_error(vec_as_location2(-1, 2L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `-1`. x Subscript `-1` must be a positive location, not -1. Code # Idem with custom `arg` (expect_error(vec_as_location2(0, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a positive location, not 0. # vec_as_location2() fails with NA Code (expect_error(vec_as_location2(na_int, 2L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `na_int`. x Subscript `na_int` must be a location, not an integer `NA`. Code (expect_error(vec_as_location2(na_chr, 1L, names = "foo"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `na_chr`. x Subscript `na_chr` must be a location, not a character `NA`. Code # Idem with custom `arg` (expect_error(vec_as_location2(na_int, 2L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a location, not an integer `NA`. # num_as_location() optionally forbids negative indices Code (expect_error(num_as_location(dbl(1, -1), 2L, negative = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `dbl(1, -1)`. x Subscript `dbl(1, -1)` can't contain negative locations. # num_as_location() optionally forbids zero indices Code (expect_error(num_as_location(0L, 1L, zero = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `0L`. x Subscript `0L` can't contain `0` values. i It has a `0` value at location 1. Code (expect_error(num_as_location(c(0, 0, 0, 0, 0, 0), 1, zero = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `c(0, 0, 0, 0, 0, 0)`. x Subscript `c(0, 0, 0, 0, 0, 0)` can't contain `0` values. i It has 6 `0` values at locations 1, 2, 3, 4, 5, etc. # vec_as_location() checks for mix of negative and missing locations Code (expect_error(vec_as_location(-c(1L, NA), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `-c(1L, NA)`. x Negative locations can't have missing values. i Subscript `-c(1L, NA)` has a missing value at location 2. Code (expect_error(vec_as_location(-c(1L, rep(NA, 10)), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `-c(1L, rep(NA, 10))`. x Negative locations can't have missing values. i Subscript `-c(1L, rep(NA, 10))` has 10 missing values at locations 2, 3, 4, 5, 6, etc. # vec_as_location() checks for mix of negative and positive locations Code (expect_error(vec_as_location(c(-1L, 1L), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `c(-1L, 1L)`. x Negative and positive locations can't be mixed. i Subscript `c(-1L, 1L)` has a positive value at location 2. Code (expect_error(vec_as_location(c(-1L, rep(1L, 10)), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `c(-1L, rep(1L, 10))`. x Negative and positive locations can't be mixed. i Subscript `c(-1L, rep(1L, 10))` has 10 positive values at locations 2, 3, 4, 5, 6, etc. # logical subscripts must match size of indexed vector Code (expect_error(vec_as_location(c(TRUE, FALSE), 3), class = "vctrs_error_subscript_size") ) Output Error: ! Can't subset elements with `c(TRUE, FALSE)`. x Logical subscript `c(TRUE, FALSE)` must be size 1 or 3, not 2. # character subscripts require named vectors Code (expect_error(vec_as_location(letters[1], 3), "unnamed vector")) Output Error in `vec_as_location()`: ! Can't use character names to index an unnamed vector. # can optionally extend beyond the end Code (expect_error(num_as_location(3, 1, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript `3` contains non-consecutive location 3. Code (expect_error(num_as_location(c(1, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript `c(1, 3)` contains non-consecutive location 3. Code (expect_error(num_as_location(c(1:5, 7), 3, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript `c(1:5, 7)` contains non-consecutive locations 4 and 7. Code (expect_error(num_as_location(c(1:5, 7, 1), 3, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript `c(1:5, 7, 1)` contains non-consecutive locations 4 and 7. Code (expect_error(class = "vctrs_error_subscript_oob", num_as_location(c(1:5, 7, 1, 10), 3, oob = "extend"))) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript `c(1:5, 7, 1, 10)` contains non-consecutive locations 4, 7, and 10. # num_as_location() errors when inverting oob negatives unless `oob = 'remove'` (#1630) Code num_as_location(-4, 3, oob = "error", negative = "invert") Condition Error: ! Can't negate elements past the end. i Location 4 doesn't exist. i There are only 3 elements. --- Code num_as_location(c(-4, 4, 5), 3, oob = "extend", negative = "invert") Condition Error: ! Can't negate elements past the end. i Location 4 doesn't exist. i There are only 3 elements. # num_as_location() errors on disallowed zeros when inverting negatives (#1612) Code num_as_location(c(0, -1), n = 2L, negative = "invert", zero = "error") Condition Error: ! Can't subset elements with `c(0, -1)`. x Subscript `c(0, -1)` can't contain `0` values. i It has a `0` value at location 1. --- Code num_as_location(c(-1, 0), n = 2L, negative = "invert", zero = "error") Condition Error: ! Can't subset elements with `c(-1, 0)`. x Subscript `c(-1, 0)` can't contain `0` values. i It has a `0` value at location 2. # num_as_location() with `oob = 'extend'` doesn't allow ignored oob negative values (#1614) Code num_as_location(-6L, 5L, oob = "extend", negative = "ignore") Condition Error: ! Can't negate elements past the end. i Location 6 doesn't exist. i There are only 5 elements. --- Code num_as_location(c(-7L, 6L), 5L, oob = "extend", negative = "ignore") Condition Error: ! Can't negate elements past the end. i Location 7 doesn't exist. i There are only 5 elements. --- Code num_as_location(c(-7L, NA), 5L, oob = "extend", negative = "ignore") Condition Error: ! Can't negate elements past the end. i Location 7 doesn't exist. i There are only 5 elements. # num_as_location() with `oob = 'error'` reports negative and positive oob values Code num_as_location(c(-6L, 7L), n = 5L, oob = "error", negative = "ignore") Condition Error: ! Can't subset elements past the end. i Locations 6 and 7 don't exist. i There are only 5 elements. # missing values are supported in error formatters Code (expect_error(num_as_location(c(1, NA, 2, 3), 1), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements past the end. i Locations 2 and 3 don't exist. i There is only 1 element. Code (expect_error(num_as_location(c(1, NA, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript `c(1, NA, 3)` contains non-consecutive location 3. # can disallow missing values Code (expect_error(vec_as_location(c(1, NA), 2, missing = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript can't contain missing values. x It has a missing value at location 2. Code (expect_error(vec_as_location(c(1, NA, 2, NA), 2, missing = "error", arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 2 and 4. Code (expect_error(with_tibble_cols(vec_as_location(c(1, NA, 2, NA), 2, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has missing values at locations 2 and 4. Code (expect_error(with_tibble_cols(vec_as_location(NA, 1, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 1. Code (expect_error(with_tibble_cols(vec_as_location(NA, 3, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 1. Code (expect_error(with_tibble_cols(vec_as_location(c(TRUE, NA, FALSE), 3, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 2. Code (expect_error(with_tibble_cols(vec_as_location(NA_character_, 2, missing = "error", names = c("x", "y"))), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 1. # can alter logical missing value handling (#1595) Code vec_as_location(x, n = 4L, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 2 and 4. --- Code vec_as_location(x, n = 2L, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has a missing value at location 1. # can alter character missing value handling (#1595) Code vec_as_location(x, n = 2L, names = names, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 1 and 3. # can alter integer missing value handling (#1595) Code vec_as_location(x, n = 4L, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 1 and 3. # can alter negative integer missing value handling (#1595) Code num_as_location(x, n = 4L, missing = "propagate", negative = "invert") Condition Error: ! Can't subset elements with `x`. x Negative locations can't have missing values. i Subscript `x` has 2 missing values at locations 2 and 3. --- Code num_as_location(x, n = 4L, missing = "error", negative = "invert") Condition Error: ! Can't subset elements with `x`. x Negative locations can't have missing values. i Subscript `x` has 2 missing values at locations 2 and 3. # empty string character indices never match empty string names (#1489) Code vec_as_location("", n = 2L, names = names) Condition Error: ! Can't subset elements. x Subscript can't contain the empty string. x It has an empty string at location 1. --- Code vec_as_location(c("", "y", ""), n = 2L, names = names) Condition Error: ! Can't subset elements. x Subscript can't contain the empty string. x It has an empty string at locations 1 and 3. # can customise subscript type errors Code # With custom `arg` (expect_error(num_as_location(-1, 2, negative = "error", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Subscript `foo` can't contain negative locations. Code (expect_error(num_as_location2(-1, 2, negative = "error", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a positive location, not -1. Code (expect_error(vec_as_location2(0, 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a positive location, not 0. Code (expect_error(vec_as_location2(na_dbl, 2, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a location, not an integer `NA`. Code (expect_error(vec_as_location2(c(1, 2), 2, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be size 1, not 2. Code (expect_error(vec_as_location(c(TRUE, FALSE), 3, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_size")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Logical subscript `foo` must be size 1 or 3, not 2. Code (expect_error(vec_as_location(c(-1, NA), 3, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Negative locations can't have missing values. i Subscript `foo` has a missing value at location 2. Code (expect_error(vec_as_location(c(-1, 1), 3, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Negative and positive locations can't be mixed. i Subscript `foo` has a positive value at location 2. Code (expect_error(num_as_location(c(1, 4), 2, oob = "extend", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_oob")) Output Error in `my_function()`: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 2. x Subscript `foo` contains non-consecutive location 4. Code (expect_error(num_as_location(0, 1, zero = "error", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Subscript `foo` can't contain `0` values. i It has a `0` value at location 1. Code # With tibble columns (expect_error(with_tibble_cols(num_as_location(-1, 2, negative = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain negative locations. Code (expect_error(with_tibble_cols(num_as_location2(-1, 2, negative = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be a positive location, not -1. Code (expect_error(with_tibble_cols(vec_as_location2(0, 2)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be a positive location, not 0. Code (expect_error(with_tibble_cols(vec_as_location2(na_dbl, 2)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be a location, not an integer `NA`. Code (expect_error(with_tibble_cols(vec_as_location2(c(1, 2), 2)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be size 1, not 2. Code (expect_error(with_tibble_cols(vec_as_location(c(TRUE, FALSE), 3)), class = "vctrs_error_subscript_size") ) Output Error: ! Can't rename columns with `foo(bar)`. x Logical subscript `foo(bar)` must be size 1 or 3, not 2. Code (expect_error(with_tibble_cols(vec_as_location(c(-1, NA), 3)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x Negative locations can't have missing values. i Subscript `foo(bar)` has a missing value at location 2. Code (expect_error(with_tibble_cols(vec_as_location(c(-1, 1), 3)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x Negative and positive locations can't be mixed. i Subscript `foo(bar)` has a positive value at location 2. Code (expect_error(with_tibble_cols(num_as_location(c(1, 4), 2, oob = "extend")), class = "vctrs_error_subscript_oob")) Output Error: ! Can't rename columns beyond the end with non-consecutive locations. i Input has size 2. x Subscript `foo(bar)` contains non-consecutive location 4. Code (expect_error(with_tibble_cols(num_as_location(0, 1, zero = "error")), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain `0` values. i It has a `0` value at location 1. # can customise OOB errors Code (expect_error(vec_slice(set_names(letters), "foo"), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code # With custom `arg` (expect_error(vec_as_location(30, length(letters), arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_oob")) Output Error in `my_function()`: ! Can't subset elements past the end. i Location 30 doesn't exist. i There are only 26 elements. Code (expect_error(vec_as_location("foo", NULL, letters, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_oob")) Output Error in `my_function()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code # With tibble columns (expect_error(with_tibble_cols(vec_slice(set_names(letters), "foo")), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't rename columns that don't exist. x Column `foo` doesn't exist. Code (expect_error(with_tibble_cols(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't rename columns that don't exist. i Location 30 doesn't exist. i There are only 26 columns. Code (expect_error(with_tibble_cols(vec_slice(set_names(letters), -30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't rename columns that don't exist. i Location 30 doesn't exist. i There are only 26 columns. Code # With tibble rows (expect_error(with_tibble_rows(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't remove rows that don't exist. x Rows `foo` and `bar` don't exist. Code (expect_error(with_tibble_rows(vec_slice(set_names(letters), 1:30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't remove rows past the end. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 rows. Code (expect_error(with_tibble_rows(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't remove rows past the end. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 rows. Code # With tidyselect select (expect_error(with_tidyselect_select(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't select columns that don't exist. x Columns `foo` and `bar` don't exist. Code (expect_error(with_tidyselect_select(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't select columns past the end. i Location 30 doesn't exist. i There are only 26 columns. Code (expect_error(with_tidyselect_select(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't select columns past the end. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 columns. Code # With tidyselect relocate (expect_error(with_tidyselect_relocate(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't relocate columns that don't exist. x Columns `foo` and `bar` don't exist. Code (expect_error(with_tidyselect_relocate(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't relocate columns that don't exist. i Location 30 doesn't exist. i There are only 26 columns. Code (expect_error(with_tidyselect_relocate(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't relocate columns that don't exist. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 columns. # vec_as_location() checks dimensionality Code (expect_error(vec_as_location(matrix(TRUE, nrow = 1), 3L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `matrix(TRUE, nrow = 1)`. x Subscript `matrix(TRUE, nrow = 1)` must be a simple vector, not a matrix. Code (expect_error(vec_as_location(array(TRUE, dim = c(1, 1, 1)), 3L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `array(TRUE, dim = c(1, 1, 1))`. x Subscript `array(TRUE, dim = c(1, 1, 1))` must be a simple vector, not an array. Code (expect_error(with_tibble_rows(vec_as_location(matrix(TRUE, nrow = 1), 3L)), class = "vctrs_error_subscript_type")) Output Error: ! Can't remove rows with `foo(bar)`. x Subscript `foo(bar)` must be a simple vector, not a matrix. # vec_as_location() UI Code vec_as_location(1, 1L, missing = "bogus") Condition Error in `vec_as_location()`: ! `missing` must be one of "propagate", "remove", or "error". # num_as_location() UI Code num_as_location(1, 1L, missing = "bogus") Condition Error in `num_as_location()`: ! `missing` must be one of "propagate", "remove", or "error". --- Code num_as_location(1, 1L, negative = "bogus") Condition Error in `num_as_location()`: ! `negative` must be one of "invert", "error", or "ignore". --- Code num_as_location(1, 1L, oob = "bogus") Condition Error in `num_as_location()`: ! `oob` must be one of "error", "remove", or "extend". --- Code num_as_location(1, 1L, zero = "bogus") Condition Error in `num_as_location()`: ! `zero` must be one of "remove", "error", or "ignore". # vec_as_location2() UI Code vec_as_location2(1, 1L, missing = "bogus") Condition Error in `vec_as_location2_result()`: ! `missing` must be one of "error" or "propagate", not "bogus". vctrs/tests/testthat/_snaps/type-unspecified.md0000644000176200001440000000013414532371050021475 0ustar liggesusers# has useful print method Code unspecified() Output [0] vctrs/tests/testthat/_snaps/assert.md0000644000176200001440000002353414532371017017535 0ustar liggesusers# obj_check_vector() errors on scalars Code obj_check_vector(quote(foo)) Condition Error: ! `quote(foo)` must be a vector, not a symbol. --- Code obj_check_vector(foobar()) Condition Error: ! `foobar()` must be a vector, not a object. # obj_check_vector() error respects `arg` and `call` Code my_check_vector(foobar()) Condition Error in `my_check_vector()`: ! `foo` must be a vector, not a object. # assertion failures are explained Code vec_assert(lgl(), chr()) Condition Error: ! `lgl()` must be a vector with type . Instead, it has type . --- Code vec_assert(lgl(), factor()) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(lgl(), factor(levels = "foo")) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(factor(levels = "bar"), factor(levels = "foo")) Condition Error: ! `factor(levels = "bar")` must be a vector with type >. Instead, it has type >. --- Code vec_assert(factor(), chr()) Condition Error: ! `factor()` must be a vector with type . Instead, it has type >. --- Code vec_assert(lgl(), data.frame()) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(lgl(), data.frame(x = 1)) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(lgl(), data.frame(x = 1, y = 2)) Condition Error: ! `lgl()` must be a vector with type: > Instead, it has type . --- Code vec_assert(data.frame(), chr()) Condition Error: ! `data.frame()` must be a vector with type . Instead, it has type >. --- Code vec_assert(data.frame(x = 1), chr()) Condition Error: ! `data.frame(x = 1)` must be a vector with type . Instead, it has type >. --- Code vec_assert(data.frame(x = 1), data.frame(x = "foo")) Condition Error: ! `data.frame(x = 1)` must be a vector with type >. Instead, it has type >. --- Code vec_assert(data.frame(x = 1), data.frame(x = "foo", y = 2)) Condition Error: ! `data.frame(x = 1)` must be a vector with type: > Instead, it has type >. --- Code vec_assert(data.frame(x = 1, y = 2), chr()) Condition Error: ! `data.frame(x = 1, y = 2)` must be a vector with type . Instead, it has type: > --- Code vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo")) Condition Error: ! `data.frame(x = 1, y = 2)` must be a vector with type >. Instead, it has type: > --- Code vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo", y = 2)) Condition Error: ! `data.frame(x = 1, y = 2)` must be a vector with type: > Instead, it has type: > # vec_assert() validates `size` (#1470) Code (expect_error(vec_assert(1, size = c(2, 3)))) Output Error in `vec_assert()`: ! `size` must be length 1, not length 2. Code (expect_error(vec_assert(1, size = 1.5))) Output Error in `vec_assert()`: ! Can't convert from `size` to due to loss of precision. * Locations: 1 Code (expect_error(vec_assert(1, size = "x"))) Output Error in `vec_assert()`: ! Can't convert `size` to . # vec_check_size() errors on the wrong size Code vec_check_size(1:5, size = 1L) Condition Error: ! `1:5` must have size 1, not size 5. --- Code vec_check_size(1:5, size = 10L) Condition Error: ! `1:5` must have size 10, not size 5. # vec_check_size() errors on scalars Code vec_check_size(quote(foo), size = 1L) Condition Error: ! `quote(foo)` must be a vector, not a symbol. --- Code vec_check_size(foobar(), size = 1L) Condition Error: ! `foobar()` must be a vector, not a object. # vec_check_size() error respects `arg` and `call` Code my_check_size(1L, size = 5L) Condition Error in `my_check_size()`: ! `foo` must have size 5, not size 1. --- Code my_check_size(foobar(), size = 5L) Condition Error in `my_check_size()`: ! `foo` must be a vector, not a object. # vec_check_size() validates `size` Code vec_check_size(1, size = "x") Condition Error in `vec_check_size()`: ! `size` must be a scalar integer or double. --- Code vec_check_size(1, size = c(1L, 2L)) Condition Error in `vec_check_size()`: ! `size` must be a scalar integer or double. --- Code vec_check_size(1, size = 1.5) Condition Error in `vec_check_size()`: ! `size` must be a whole number, not a decimal number. # list_all_vectors() works Code (expect_error(list_all_vectors(env()))) Output Error in `list_all_vectors()`: ! `x` must be a list, not an environment. # obj_check_list() works Code my_function <- (function(my_arg) obj_check_list(my_arg)) (expect_error(my_function(env()))) Output Error in `my_function()`: ! `my_arg` must be a list, not an environment. # obj_check_list() uses a special error when `arg` is the empty string (#1604) Code obj_check_list(1, arg = "") Condition Error: ! Input must be a list, not the number 1. # obj_check_list() and list_check_all_vectors() work Code my_function <- (function(my_arg) list_check_all_vectors(my_arg)) (expect_error(my_function(env()))) Output Error in `list_check_all_vectors()`: ! `x` must be a list, not an environment. Code (expect_error(my_function(list(1, env())))) Output Error in `my_function()`: ! `my_arg[[2]]` must be a vector, not an environment. Code (expect_error(my_function(list(1, name = env())))) Output Error in `my_function()`: ! `my_arg$name` must be a vector, not an environment. Code (expect_error(my_function(list(1, foo = env())))) Output Error in `my_function()`: ! `my_arg$foo` must be a vector, not an environment. # list_check_all_size() works Code my_function <- (function(my_arg, size) list_check_all_size(my_arg, size)) (expect_error(list_check_all_size(list(1:2, 1:3), 2))) Output Error: ! `list(1:2, 1:3)[[2]]` must have size 2, not size 3. Code (expect_error(my_function(list(1:2, 1:3), 2))) Output Error in `my_function()`: ! `my_arg[[2]]` must have size 2, not size 3. Code (expect_error(my_function(list(NULL, 1:2), 2))) Output Error in `my_function()`: ! `my_arg[[1]]` must have size 2, not size 0. # list_all_size() and list_check_all_size() error on scalars Code (expect_error(list_all_size(x, 2))) Output Error in `list_all_size()`: ! `x[[1]]` must be a vector, not an environment. Code my_function <- (function(my_arg, size) list_check_all_size(my_arg, size)) (expect_error(my_function(x, 2))) Output Error in `my_function()`: ! `my_arg[[1]]` must be a vector, not an environment. # list_all_size() and list_check_all_size() throw error using internal call on non-list input Code (expect_error(list_all_size(1, 2))) Output Error in `list_all_size()`: ! `x` must be a list, not the number 1. Code (expect_error(list_check_all_size(1, 2, arg = "arg", call = call("foo")))) Output Error in `list_check_all_size()`: ! `x` must be a list, not the number 1. # list_all_size() and list_check_all_size() validate `size` Code (expect_error(list_all_size(list(), size = "x"))) Output Error in `list_all_size()`: ! `size` must be a scalar integer or double. Code (expect_error(list_check_all_size(list(), size = "x"))) Output Error in `list_check_all_size()`: ! `size` must be a scalar integer or double. # informative messages when 1d array doesn't match vector Code (expect_error(vec_assert(x, int()))) Output Error: ! `x` must be a vector with type . Instead, it has type . vctrs/tests/testthat/_snaps/slice-interleave.md0000644000176200001440000000166714532371037021474 0ustar liggesusers# allows for name repair Code vec_interleave(x, x, .name_repair = "unique") Message New names: * `x` -> `x...1` * `x` -> `x...2` Output x...1 x...2 1 1 # can repair names quietly Code res_unique <- vec_interleave(c(x = 1), c(x = 2), .name_repair = "unique_quiet") res_universal <- vec_interleave(c(`if` = 1), c(`in` = 2), .name_repair = "universal_quiet") # uses recycling errors Code vec_interleave(1:2, 1:3) Condition Error in `vec_interleave()`: ! Can't recycle `..1` (size 2) to match `..2` (size 3). # errors if the result would be a long vector Code vec_interleave_indices(3L, 1000000000L) Condition Error in `vec_interleave_indices()`: ! Long vectors are not yet supported in `vec_interleave()`. Result from interleaving would have size 3000000000, which is larger than the maximum supported size of 2^31 - 1. vctrs/tests/testthat/_snaps/error-call.md0000644000176200001440000002467514532371024020303 0ustar liggesusers# failing common type reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't combine `2` and `chr()` . # failing cast reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert `2` to . --- Code (expect_error(my_function(df1, df2))) Output Error in `my_function()`: ! Can't convert `lhs$y` to match type of `y` . --- Code (expect_error(my_function(df1, df2))) Output Error in `my_function()`: ! Can't convert `lhs$y` to match type of `y` . # lossy cast reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `2` to due to loss of precision. * Locations: 1 # failing common size reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't recycle input of size 2 to size 10. --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't recycle `..1` (size 2) to match `..2` (size 10). # unsupported error reports correct error call Code (expect_error(my_function())) Output Error in `dim<-`: ! `dim<-.vctrs_vctr()` not supported. --- Code (expect_error(my_function())) Output Error in `median()`: ! `median.vctrs_vctr()` not implemented. # scalar error reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! `foobar()` must be a vector, not a object. # size error reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! `1:2` must have size 1, not size 2. # bare casts report correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `1.5` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `1.5` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `2L` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert `matrix(TRUE)` to . Can't decrease dimensionality from 2 to 1. # base S3 casts report correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `"a"` to > due to loss of generality. * Locations: 1 # names validation reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Names can't be empty. x Empty name found at location 2. --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. i Use argument `repair` to specify repair strategy. --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..." at location 1. # subscript validation reports correct error calls Code (expect_error(my_function())) Output Error in `vctrs::num_as_location()`: ! `missing` must be one of "propagate", "remove", or "error". --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't subset elements past the end. i Location 10 doesn't exist. i There are only 2 elements. --- Code (expect_error(my_function(1.5))) Output Error in `my_function()`: ! Can't subset elements with `my_arg`. x Can't convert from `my_arg` to due to loss of precision. --- Code (expect_error(my_function(1.5))) Output Error in `my_function()`: ! Can't subset elements. x Can't convert from to due to loss of precision. --- Code (expect_error(my_function(list()))) Output Error in `my_function()`: ! Can't subset elements with `my_arg`. x `my_arg` must be logical, numeric, or character, not an empty list. --- Code (expect_error(my_function(1.5))) Output Error in `vec_as_location()`: ! Can't convert from `n` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function(NA))) Output Error in `my_function()`: ! Can't subset elements. x Subscript can't contain missing values. x It has a missing value at location 1. # `vec_ptype()` reports correct error call Code (expect_error(my_function(env()))) Output Error in `my_function()`: ! Input must be a vector, not an environment. Code (expect_error(my_function(foobar(list())))) Output Error in `my_function()`: ! Input must be a vector, not a object. # `vec_slice()` uses `error_call` Code (expect_error(my_function(env(), 1))) Output Error in `my_function()`: ! `x` must be a vector, not an environment. Code (expect_error(my_function(1, 2))) Output Error in `my_function()`: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. # vec_slice() reports self in error context Code (expect_error(vec_slice(foobar(list()), 1))) Output Error in `vec_slice()`: ! `x` must be a vector, not a object. Code (expect_error(vec_slice(list(), env()))) Output Error in `vec_slice()`: ! Can't subset elements with `i`. x `i` must be logical, numeric, or character, not an environment. # list_sizes() reports error context Code (expect_error(list_sizes(foobar(list())))) Output Error in `list_sizes()`: ! `x` must be a list, not a object. Code (expect_error(list_sizes(list(env())))) Output Error in `list_sizes()`: ! `x[[1]]` must be a vector, not an environment. Code (expect_error(list_sizes(list(1, 2, env())))) Output Error in `list_sizes()`: ! `x[[3]]` must be a vector, not an environment. Code (expect_error(list_sizes(list(1, 2, foo = env())))) Output Error in `list_sizes()`: ! `x$foo` must be a vector, not an environment. # vec_size() reports error context Code (expect_error(vec_size(env()))) Output Error in `vec_size()`: ! `x` must be a vector, not an environment. # vec_cast_common() reports error context Code (expect_error(my_function(my_arg = 1.5, .to = int()))) Output Error in `my_function()`: ! Can't convert from `my_arg` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function(my_arg = 1.5, .to = int(), .arg = "my_arg"))) Output Error in `my_function()`: ! Can't convert from `my_arg$my_arg` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg$this_arg` and `my_arg$that_arg` . --- Code (expect_error(my_function(1, "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg[[1]]` and `my_arg[[2]]` . --- Code (expect_error(my_function(this_arg = x, that_arg = y))) Output Error in `my_function()`: ! Can't combine `this_arg$x` and `that_arg$x` . # vec_ptype_common() reports error context Code (expect_error(my_function(this_arg = 1, that_arg = "foo"))) Output Error in `my_function()`: ! Can't combine `this_arg` and `that_arg` . --- Code (expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg$this_arg` and `my_arg$that_arg` . --- Code (expect_error(my_function(1, "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg[[1]]` and `my_arg[[2]]` . vctrs/tests/testthat/_snaps/ptype-abbr-full.md0000644000176200001440000000045314532371033021232 0ustar liggesusers# data.frames have good default abbr and full methods Code df <- foobar(data.frame(x = 1, y = "", z = TRUE)) vec_ptype_abbr(df) Output [1] "vctrs_fb[,3]" Code vec_ptype_full(df) Output [1] "vctrs_foobar<\n x: double\n y: character\n z: logical\n>" vctrs/tests/testthat/_snaps/subscript.md0000644000176200001440000001111514532371044020242 0ustar liggesusers# can customise subscript errors Code (expect_error(with_tibble_cols(vec_as_subscript(env())), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x `foo(bar)` must be logical, numeric, or character, not an environment. --- Code (expect_error(with_dm_tables(vec_as_subscript(env())), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract tables with `foo(bar)`. x `foo(bar)` must be logical, numeric, or character, not an environment. # vec_as_subscript() checks dimensionality Code (expect_error(vec_as_subscript(matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be a simple vector, not a matrix. Code (expect_error(vec_as_subscript(array(TRUE, dim = c(1, 1, 1))), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be a simple vector, not an array. Code (expect_error(with_tibble_rows(vec_as_subscript(matrix(TRUE, nrow = 1))), class = "vctrs_error_subscript_type")) Output Error: ! Can't remove rows with `foo(bar)`. x Subscript `foo(bar)` must be a simple vector, not a matrix. # vec_as_subscript() forbids subscript types Code vec_as_subscript(1L, logical = "error", numeric = "error") Condition Error: ! Can't subset elements. x Subscript must be character, not the number 1. --- Code vec_as_subscript("foo", logical = "error", character = "error") Condition Error: ! Can't subset elements. x Subscript must be numeric, not the string "foo". --- Code vec_as_subscript(TRUE, logical = "error") Condition Error: ! Can't subset elements. x Subscript must be numeric or character, not `TRUE`. --- Code vec_as_subscript("foo", character = "error") Condition Error: ! Can't subset elements. x Subscript must be logical or numeric, not the string "foo". --- Code vec_as_subscript(NULL, numeric = "error") Condition Error: ! Can't subset elements. x Subscript must be logical or character, not `NULL`. --- Code vec_as_subscript(quote(foo), character = "error") Condition Error: ! Can't subset elements. x Subscript must be logical or numeric, not a symbol. # vec_as_subscript2() forbids subscript types Code vec_as_subscript2(1L, numeric = "error") Condition Error: ! Can't extract element. x Subscript must be character, not the number 1. --- Code vec_as_subscript2("foo", character = "error") Condition Error: ! Can't extract element. x Subscript must be numeric, not the string "foo". --- Code vec_as_subscript2(TRUE) Condition Error: ! Can't extract element. x Subscript must be numeric or character, not `TRUE`. # vec_as_subscript2() retains the call when throwing vec_as_subscript() errors (#1605) Code vec_as_subscript2(1L, numeric = "error", call = call("foo")) Condition Error in `foo()`: ! Can't extract element. x Subscript must be character, not the number 1. --- Code vec_as_subscript2(1.5, call = call("foo")) Condition Error in `foo()`: ! Can't extract element. x Can't convert from to due to loss of precision. # vec_as_subscript2() retains the call when erroring on logical input (#1605) Code vec_as_subscript2(TRUE, call = call("foo")) Condition Error in `foo()`: ! Can't extract element. x Subscript must be numeric or character, not `TRUE`. # `logical = 'cast'` is deprecated Code vec_as_subscript2(TRUE, logical = "cast") Condition Error in `vec_as_subscript2()`: ! `vctrs::vec_as_subscript2(logical = 'cast')` is deprecated. --- Code vec_as_subscript2(TRUE, logical = "error") Condition Error: ! Can't extract element. x Subscript must be numeric or character, not `TRUE`. # lossy cast errors for scalar subscripts work (#1606) Code vec_as_subscript2(1.5) Condition Error: ! Can't extract element. x Can't convert from to due to loss of precision. vctrs/tests/testthat/_snaps/type-table.md0000644000176200001440000000043214532371050020267 0ustar liggesusers# cannot decrease dimensionality Code (expect_error(vec_cast(x, y), class = "vctrs_error_incompatible_type")) Output Error: ! Can't convert `x` to . Can't decrease dimensionality from 3 to 2. vctrs/tests/testthat/_snaps/equal.md0000644000176200001440000000103314532371023017326 0ustar liggesusers# `na_equal` is validated Code vec_equal(1, 1, na_equal = 1) Condition Error in `vec_equal()`: ! `na_equal` must be `TRUE` or `FALSE`, not the number 1. --- Code vec_equal(1, 1, na_equal = c(TRUE, FALSE)) Condition Error in `vec_equal()`: ! `na_equal` must be `TRUE` or `FALSE`, not a logical vector. # can't supply NA as `na_equal` Code vec_equal(NA, NA, na_equal = NA) Condition Error in `vec_equal()`: ! `na_equal` must be `TRUE` or `FALSE`, not `NA`. vctrs/tests/testthat/_snaps/hash.md0000644000176200001440000001042214532371025017146 0ustar liggesusers# hashes are consistent from run to run Code hash Output $lgl [1] 70 a2 85 ef b9 79 37 9e 59 df 73 0b $int [1] 70 a2 85 ef bf 3c 2c cf e0 2d 28 24 3e 2c d4 c2 86 cd 44 6a c1 c6 22 fb 7d [26] 28 01 b7 c4 de 70 e7 cc a2 b3 60 49 7e 5c 87 bd 77 d8 31 2c 27 c0 1a 4b f7 [51] f2 33 aa 4d 23 5e fe 51 52 6a 75 cd 5c f3 a1 18 08 77 d0 cc 3b 74 cd ce 28 [76] 80 da 82 82 70 1e db 49 88 ef 6a 83 16 45 b6 ef 6f a8 63 fc 59 f3 50 ab 1d [101] 56 e3 06 58 61 10 57 e3 99 3e c1 e0 53 77 22 cd 86 b9 75 87 14 33 2e e9 31 [126] 21 82 48 cb 87 24 99 c7 b1 e2 a0 e3 2a 76 8c 99 50 f3 0c 08 81 8d 36 aa b5 [151] 72 f0 41 78 9f 1a fc 7b 81 05 51 8d 37 0d 15 47 b7 a6 7b c3 7b 7b ce e5 26 [176] 1d 43 60 95 97 d2 f2 a8 41 23 3c 26 82 d1 13 cb 66 41 e5 1d 8b 2e 28 1f 9d [201] f1 6f 67 ae 0c 1b 2f a0 3a 45 d9 12 0a 93 29 eb 4d a4 f9 d8 72 99 a2 b1 2e [226] 0e 12 21 ee 74 0d fc 0f f3 a9 bf 4c f9 bf c1 ee b1 42 35 70 ec 24 34 41 bf [251] 2e 12 41 72 24 81 68 83 4e 10 76 9a c5 56 d1 1d 56 95 9d 60 e6 31 5f d2 48 [276] 68 0d dc b3 7a 3a b1 0c 83 22 aa b9 cc 3b 72 d4 6c 58 88 e4 ce 6d 3f 7b f9 [301] 13 5b 06 d1 b5 d1 03 4e 4b 89 b8 59 fe ca aa 94 e7 03 74 de fe d4 11 15 e9 [326] a0 37 7c e0 61 8b 3c da f6 91 e3 3d 55 02 7f 33 24 73 1b bb 11 65 50 e3 51 [351] 16 9c a2 dc 9b 79 e0 27 fc e1 fe c4 5a 7e 4c e4 cd a7 43 f9 d4 82 f5 8e 6b [376] f4 1a a5 4f 41 4b 7a 10 f2 d2 98 6e 7e 7f 11 0f c9 93 eb 84 3a a3 d6 05 9c $dbl1 [1] 6b 2c 06 3b 59 03 e1 f3 6a 26 91 9e dd 27 af 6c d0 c2 58 6f a7 71 b0 a8 c2 [26] f7 fe 63 40 5a f1 9d 92 c5 0e c6 05 c8 d4 68 2f cf 98 c6 69 41 ad 1d 2a 6f [51] bb a3 b1 f9 09 f0 e3 49 63 d1 0a af 42 9d 59 31 8b 1e db 3f f1 61 a8 d7 94 [76] 6e fd ec bf 5c fc 20 d8 cb 7a bb c7 ba 8b 60 53 00 d6 8c 9e 2b fc 76 73 e7 [101] f0 c9 4b ad da 6b 9e 41 9f 88 3f 20 ba 6a f2 99 56 48 e0 57 c0 ca 3d 7b ce [126] 54 60 0e 5b ad 1b 94 a3 cb 2f c3 e0 cb f9 67 f5 ae e1 39 73 17 5d 6d 70 0a [151] a5 bc 01 08 f3 9d 8c de 10 d3 f6 72 2d e8 19 ff fc c6 24 4e 95 b4 90 5e 7b [176] da e2 12 4e f4 b0 4a ed 85 af 2f e3 fc 48 33 5d aa 7f 78 05 2f d3 d2 44 c4 [201] 78 2e c8 e7 65 45 5d 15 af 8b 5e 5c 49 48 fb 55 d1 4e 09 d0 f6 19 7b 98 20 [226] 67 7c 2f 2e ba 70 2a 0a ad c8 48 3d 69 7b c5 99 67 d9 2e f4 5a e2 84 24 9c [251] 00 22 1d 75 e7 c6 fc 9c a3 6a dd 1d 96 b0 53 67 35 59 51 b7 8f a7 3f 78 39 [276] ed fa 73 2d 07 24 3b 9e 97 83 06 0d 2a d4 e0 f2 43 75 c3 6f 09 94 28 25 40 [301] f8 c1 9e 13 41 50 c3 d2 65 6f 01 b2 26 fb 1f d2 a8 5c 11 db b4 e6 4d e1 1d [326] 7d 43 c3 17 cd 2e ca ad 05 b4 bd 74 8d 37 9a 5a 1e 85 d4 0a f9 03 8f a7 6d [351] 23 c5 7b e7 54 ee e1 33 d1 8e a3 5d a4 cb 0d 80 3e c7 80 5c 77 d8 36 fb 94 [376] a5 a5 a2 72 8a 95 ab f3 da 47 90 da c7 49 a1 b1 81 01 19 29 96 b3 5c ca ba $dbl2 [1] b9 79 37 9e ea 40 52 d2 fa 14 1e 2f c1 55 60 05 35 18 46 24 ba 76 ed 56 75 [26] 78 db ad 6c 77 76 c5 67 0a 4b b9 d2 19 61 03 f4 69 d5 5d db d2 f9 7d 55 6a [51] d5 af 33 2d 76 69 9d f6 f8 de 6f d3 54 05 97 a0 7f 08 f0 19 33 e0 e4 4a 20 [76] 61 67 40 d0 00 b5 ad 11 fb 94 d6 28 78 ef 95 69 a6 e5 9f ed cf 10 69 4f 59 [101] bc 28 55 9c e7 bd ea 9d c2 cb 77 e7 9d 22 f6 ee ad 0c 46 0d 6d 15 f2 26 c0 [126] 7e 70 df 9c 1b ce cc cc 52 4f 75 87 f9 6a fa 5f b8 4d 42 e7 1b 72 ea 69 8a [151] 6b 7b ab 97 7b 86 e6 9a 7c 62 ac eb 08 df 3f 39 e8 a1 ec ce 45 f9 26 c5 27 [176] 47 b2 4c 65 ae 11 07 52 d4 6c 74 3d f4 62 1f 67 51 37 e4 1f eb b7 3e 51 26 [201] 5b da 32 b7 d5 fc 9d 73 8d 98 42 fa 90 23 50 bf 06 61 2f ac 54 8c 87 0e 53 [226] 3d 98 5f 44 55 57 9d 69 b7 59 9f 87 fe 1e 00 d7 1c 0c 34 ba 25 ce a1 77 68 [251] cc 7a f5 cf 2f a4 34 2f 60 a7 a0 c7 e9 cd 90 29 c5 55 06 c6 6a 99 e4 bc 46 [276] a4 c0 43 8c 3f f8 8c ee 17 d7 9f 8d 03 48 64 fa d1 55 85 81 c0 cd ac bc 6e [301] d5 59 0a 28 94 df 2a 7d ea ca 7f 09 5e 5e 47 0d 02 ad 8c 67 0a b8 52 e4 17 [326] 3a 25 5d 5b 17 34 01 09 18 7e ca 83 28 f9 f0 c1 b0 10 bc 30 aa 4b 5c 85 68 [351] 75 71 2e 3c c6 e8 a7 9f a9 36 16 81 cb 4b d7 94 88 15 6e f4 f9 a9 03 ac 21 [376] 65 43 cb 1a ca cd 69 96 b6 8e e0 9f 70 be b1 af c9 73 f2 4b c9 6b 2c 06 3b vctrs/tests/testthat/_snaps/match.md0000644000176200001440000006177714532371031017337 0ustar liggesusers# must have at least 1 column to match Code vec_locate_matches(data_frame(), data_frame()) Condition Error in `vec_locate_matches()`: ! Must have at least 1 column to match on. --- Code vec_locate_matches(data_frame(), data_frame(), error_call = call("foo")) Condition Error in `foo()`: ! Must have at least 1 column to match on. # common type of `needles` and `haystack` is taken Code vec_locate_matches(x, y) Condition Error in `vec_locate_matches()`: ! Can't combine `needles` and `haystack` . --- Code vec_locate_matches(x, y, needles_arg = "x", error_call = call("foo")) Condition Error in `foo()`: ! Can't combine `x` and `haystack` . # `incomplete` can error informatively Code (expect_error(vec_locate_matches(NA, 1, incomplete = "error"))) Output Error in `vec_locate_matches()`: ! `needles` can't contain missing values. x Location 1 contains missing values. Code (expect_error(vec_locate_matches(NA, 1, incomplete = "error", needles_arg = "foo")) ) Output Error in `vec_locate_matches()`: ! `foo` can't contain missing values. x Location 1 contains missing values. Code (expect_error(vec_locate_matches(NA, 1, incomplete = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! `foo` can't contain missing values. x Location 1 contains missing values. # `incomplete` is validated Code (expect_error(vec_locate_matches(1, 2, incomplete = 1.5))) Output Error in `vec_locate_matches()`: ! Can't convert from `incomplete` to due to loss of precision. * Locations: 1 Code (expect_error(vec_locate_matches(1, 2, incomplete = c("match", "drop")))) Output Error in `vec_locate_matches()`: ! `incomplete` must be length 1, not length 2. Code (expect_error(vec_locate_matches(1, 2, incomplete = "x"))) Output Error in `vec_locate_matches()`: ! `incomplete` must be one of: "compare", "match", "drop", or "error". Code (expect_error(vec_locate_matches(1, 2, incomplete = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `incomplete` must be one of: "compare", "match", "drop", or "error". # `multiple` is validated Code (expect_error(vec_locate_matches(1, 2, multiple = 1.5))) Output Error in `vec_locate_matches()`: ! `multiple` must be a string. Code (expect_error(vec_locate_matches(1, 2, multiple = c("first", "last")))) Output Error in `vec_locate_matches()`: ! `multiple` must be a string. Code (expect_error(vec_locate_matches(1, 2, multiple = "x"))) Output Error in `vec_locate_matches()`: ! `multiple` must be one of "all", "any", "first", or "last". Code (expect_error(vec_locate_matches(1, 2, multiple = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `multiple` must be one of "all", "any", "first", or "last". # `multiple` can error informatively Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo"))) Output Error in `vec_locate_matches()`: ! Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo", haystack_arg = "bar"))) Output Error in `vec_locate_matches()`: ! Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. # `multiple` can warn informatively Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning"))) Output Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo"))) Output Warning in `vec_locate_matches()`: Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", error_call = call("fn")))) Output Warning in `fn()`: Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", haystack_arg = "bar"))) Output Warning in `vec_locate_matches()`: Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. # errors on multiple matches that come from different nesting containers Code vec_locate_matches(df, df2, condition = c("<=", "<="), multiple = "error") Condition Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # errors when a match from a different nesting container is processed early on Code vec_locate_matches(needles, haystack, condition = "<", multiple = "error") Condition Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `multiple = 'error' / 'warning'` throw correctly when combined with `relationship` Code (expect_error(vec_locate_matches(x, y, relationship = "one-to-one", multiple = "error")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code (expect_error(vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "warning") Condition Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 2 of `needles` matches multiple values. x Location 1 of `haystack` matches multiple values. Output needles haystack 1 1 2 2 2 1 3 2 3 4 3 1 5 3 3 --- Code vec_locate_matches(x, y, relationship = "one-to-one", multiple = "warning") Condition Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code (expect_error(vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "warning") Condition Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Output needles haystack 1 1 2 2 2 1 3 2 3 # `relationship` handles one-to-one case Code (expect_error(vec_locate_matches(c(2, 1), c(1, 1), relationship = "one-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Code (expect_error(vec_locate_matches(c(1, 1), c(1, 2), relationship = "one-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 1 of `haystack` matches multiple values. # `relationship` handles one-to-many case Code (expect_error(vec_locate_matches(c(1, 2, 2), c(2, 1), relationship = "one-to-many")) ) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 1 of `haystack` matches multiple values. # `relationship` handles many-to-one case Code (expect_error(vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "many-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. # `relationship` handles warn-many-to-many case Code (expect_warning(vec_locate_matches(c(1, 2, 1), c(1, 2, 2), relationship = "warn-many-to-many")) ) Output Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 2 of `needles` matches multiple values. x Location 1 of `haystack` matches multiple values. Code (expect_warning(vec_locate_matches(c(1, 1, 2), c(2, 2, 1), relationship = "warn-many-to-many")) ) Output Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 3 of `needles` matches multiple values. x Location 3 of `haystack` matches multiple values. # `relationship` considers `incomplete` matches as possible multiple matches Code (expect_error(vec_locate_matches(x, y, relationship = "one-to-many"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 1 of `haystack` matches multiple values. # `relationship` errors on multiple matches that come from different nesting containers Code (expect_error(vec_locate_matches(df, df2, condition = c("<=", "<="), relationship = "many-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` errors when a match from a different nesting container is processed early on Code (expect_error(vec_locate_matches(needles, haystack, condition = "<", relationship = "many-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` can still detect problematic `haystack` relationships when `multiple = first/last` are used Code (expect_error(vec_locate_matches(c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 2 of `haystack` matches multiple values. Code (expect_error(vec_locate_matches(c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-many"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 2 of `haystack` matches multiple values. # `relationship` and `remaining` work properly together Code out <- vec_locate_matches(c(1, 2, 2), c(2, 3, 1, 1, 4), relationship = "warn-many-to-many", remaining = NA_integer_) Condition Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 1 of `needles` matches multiple values. x Location 1 of `haystack` matches multiple values. # `relationship` errors if `condition` creates multiple matches Code (expect_error(vec_locate_matches(1, c(1, 2), condition = "<=", relationship = "many-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` still errors if `filter` hasn't removed all multiple matches Code (expect_error(vec_locate_matches(1, c(1, 2, 1), condition = "<=", filter = "min", relationship = "many-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` errors when we have >1 size 1 matches across containers (tidyverse/dplyr#6835) Code vec_locate_matches(x, y, condition = c("<=", ">="), filter = c("none", "none"), relationship = "one-to-one") Condition Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` errors respect argument tags and error call Code (expect_error(vec_locate_matches(1L, c(1L, 1L), relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. Code (expect_error(vec_locate_matches(c(1L, 1L), 1L, relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `bar` can match at most 1 value from `foo`. x Location 1 of `bar` matches multiple values. Code (expect_error(vec_locate_matches(c(1L, 1L), 1L, relationship = "one-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `bar` can match at most 1 value from `foo`. x Location 1 of `bar` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), relationship = "many-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. # `relationship` warnings respect argument tags and error call Code (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Warning in `fn()`: Detected an unexpected many-to-many relationship between `foo` and `bar`. x Location 1 of `foo` matches multiple values. x Location 1 of `bar` matches multiple values. Code (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", error_call = call("fn")))) Output Warning in `fn()`: Detected an unexpected many-to-many relationship between `foo` and `haystack`. x Location 1 of `foo` matches multiple values. x Location 1 of `haystack` matches multiple values. Code (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", haystack_arg = "bar", error_call = call("fn")))) Output Warning in `fn()`: Detected an unexpected many-to-many relationship between `needles` and `bar`. x Location 1 of `needles` matches multiple values. x Location 1 of `bar` matches multiple values. # `relationship` is validated Code (expect_error(vec_locate_matches(1, 2, relationship = 1.5))) Output Error in `vec_locate_matches()`: ! `relationship` must be a string. Code (expect_error(vec_locate_matches(1, 2, relationship = c("one-to-one", "one-to-many")))) Output Error in `vec_locate_matches()`: ! `relationship` must be a string. Code (expect_error(vec_locate_matches(1, 2, relationship = "x"))) Output Error in `vec_locate_matches()`: ! `relationship` must be one of "none", "one-to-one", "one-to-many", "many-to-one", "many-to-many", or "warn-many-to-many". Code (expect_error(vec_locate_matches(1, 2, relationship = "x", error_call = call( "fn")))) Output Error in `vec_locate_matches()`: ! `relationship` must be one of "none", "one-to-one", "one-to-many", "many-to-one", "many-to-many", or "warn-many-to-many". # `no_match` can error informatively Code (expect_error(vec_locate_matches(1, 2, no_match = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` must have a match in `haystack`. x Location 1 of `needles` does not have a match. Code (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo")) ) Output Error in `vec_locate_matches()`: ! Each value of `foo` must have a match in `haystack`. x Location 1 of `foo` does not have a match. Code (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` must have a match in `haystack`. x Location 1 of `foo` does not have a match. Code (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo", haystack_arg = "bar"))) Output Error in `vec_locate_matches()`: ! Each value of `foo` must have a match in `bar`. x Location 1 of `foo` does not have a match. # errors with the right location on unmatched needles when different nesting containers are present Code (expect_error(vec_locate_matches(df, df2, condition = ">=", no_match = "error")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` must have a match in `haystack`. x Location 2 of `needles` does not have a match. # `no_match` is validated Code (expect_error(vec_locate_matches(1, 2, no_match = 1.5))) Output Error in `vec_locate_matches()`: ! Can't convert from `no_match` to due to loss of precision. * Locations: 1 Code (expect_error(vec_locate_matches(1, 2, no_match = c(1L, 2L)))) Output Error in `vec_locate_matches()`: ! `no_match` must be length 1, not length 2. Code (expect_error(vec_locate_matches(1, 2, no_match = "x"))) Output Error in `vec_locate_matches()`: ! `no_match` must be either "drop" or "error". Code (expect_error(vec_locate_matches(1, 2, no_match = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `no_match` must be either "drop" or "error". # `remaining` can error informatively Code (expect_error(vec_locate_matches(1, 2, remaining = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` must be matched by `needles`. x Location 1 of `haystack` was not matched. Code (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo")) ) Output Error in `vec_locate_matches()`: ! Each value of `haystack` must be matched by `foo`. x Location 1 of `haystack` was not matched. Code (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `haystack` must be matched by `foo`. x Location 1 of `haystack` was not matched. Code (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo", haystack_arg = "bar"))) Output Error in `vec_locate_matches()`: ! Each value of `bar` must be matched by `foo`. x Location 1 of `bar` was not matched. # `remaining` is validated Code (expect_error(vec_locate_matches(1, 2, remaining = 1.5))) Output Error in `vec_locate_matches()`: ! Can't convert from `remaining` to due to loss of precision. * Locations: 1 Code (expect_error(vec_locate_matches(1, 2, remaining = c(1L, 2L)))) Output Error in `vec_locate_matches()`: ! `remaining` must be length 1, not length 2. Code (expect_error(vec_locate_matches(1, 2, remaining = "x"))) Output Error in `vec_locate_matches()`: ! `remaining` must be either "drop" or "error". Code (expect_error(vec_locate_matches(1, 2, remaining = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `remaining` must be either "drop" or "error". # potential overflow on large output size is caught informatively Code (expect_error(vec_locate_matches(1:1e+07, 1:1e+07, condition = ">="))) Output Error in `vec_locate_matches()`: ! Match procedure results in an allocation larger than 2^31-1 elements. i Attempted allocation size was 50000005000000. i This is an internal error that was detected in the vctrs package. Please report it at with a reprex () and the full backtrace. vctrs/tests/testthat/_snaps/print-str.md0000644000176200001440000000203614532371033020166 0ustar liggesusers# show attributes Code obj_str(x) Output int [1:100] 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16... @ x: chr "a string" @ y: int [1:20] 1 2 3 4 5 6 7 8 9 10 ... @ z:'data.frame': 3 obs. of 1 variable: ..$ x: int [1:3] 1 2 3 --- Code obj_str(mtcars) Output df[,11] [1:32] 'data.frame': 32 obs. of 11 variables: $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... $ cyl : num 6 6 4 6 8 6 8 4 4 6 ... $ disp: num 160 160 108 258 360 ... $ hp : num 110 110 93 110 175 105 245 62 95 123 ... $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... $ wt : num 2.62 2.88 2.32 3.21 3.44 ... $ qsec: num 16.5 17 18.6 19.4 17 ... $ vs : num 0 0 1 1 0 1 0 1 1 1 ... $ am : num 1 1 1 0 0 0 0 0 0 0 ... $ gear: num 4 4 4 3 3 3 3 4 4 4 ... $ carb: num 4 4 1 1 2 1 4 2 2 4 ... @ row.names: chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ... vctrs/tests/testthat/_snaps/compare.md0000644000176200001440000000151314532371022017647 0ustar liggesusers# error is thrown when comparing complexes (#1655) Code (expect_error(vec_compare(complex(), complex()))) Output Error in `vec_compare()`: ! Can't compare complexes. # `na_equal` is validated Code (expect_error(vec_compare(1, 1, na_equal = 1))) Output Error in `vec_compare()`: ! `na_equal` must be `TRUE` or `FALSE`, not the number 1. Code (expect_error(vec_compare(1, 1, na_equal = c(TRUE, FALSE)))) Output Error in `vec_compare()`: ! `na_equal` must be `TRUE` or `FALSE`, not a logical vector. # can't supply NA as `na_equal` Code vec_compare(NA, NA, na_equal = NA) Condition Error in `vec_compare()`: ! `na_equal` must be `TRUE` or `FALSE`, not `NA`. vctrs/tests/testthat/_snaps/type-tibble.md0000644000176200001440000000324314532371050020444 0ustar liggesusers# can't cast vector to tibble Code local_error_call(call("my_function")) (expect_error(vec_ptype2(v, dt), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `v` and `dt` . Code (expect_error(vec_ptype2(dt, v), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `dt` and `v` . Code (expect_error(vec_cast(v, dt), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't convert `v` to . # can use ptype2 and cast with tibble that has incorrect class vector Code local_error_call(call("my_function")) (expect_error(vec_cast(tib1, tib2), class = "vctrs_error_cast")) Output Error in `my_function()`: ! Can't convert from `tib1` > to > due to loss of precision. Code (expect_error(vec_cast(tib1, data.frame(y = 2)), class = "vctrs_error_cast")) Output Error in `my_function()`: ! Can't convert from `tib1` > to > due to loss of precision. Code (expect_error(vec_cast(data.frame(x = 1), tib2), class = "vctrs_error_cast")) Output Error in `my_function()`: ! Can't convert from `data.frame(x = 1)` > to > due to loss of precision. vctrs/tests/testthat/_snaps/lifecycle-deprecated.md0000644000176200001440000000134614532371025022265 0ustar liggesusers# vec_unchop() is soft-deprecated Code vec_unchop(list(1), indices = list(1)) Condition Warning: `vec_unchop()` was deprecated in vctrs 0.5.0. i Please use `list_unchop()` instead. Output [1] 1 # vec_equal_na() is soft-deprecated Code vec_equal_na(c(1, NA)) Condition Warning: `vec_equal_na()` was deprecated in vctrs 0.5.0. i Please use `vec_detect_missing()` instead. Output [1] FALSE TRUE # vec_check_list() still works Code vec_check_list(1) Condition Error: ! `1` must be a list, not the number 1. --- Code my_check(1) Condition Error in `my_check()`: ! `x` must be a list, not the number 1. vctrs/tests/testthat/_snaps/type-sf.md0000644000176200001440000000023114532371050017605 0ustar liggesusers# `crs` attributes of `sfc` vectors must be the same Code vctrs::vec_c(x, y) Condition Error: ! arguments have different crs vctrs/tests/testthat/_snaps/type.md0000644000176200001440000000733314532371051017212 0ustar liggesusers# output tests Code vec_ptype_show() Output Prototype: NULL --- Code vec_ptype_show(integer()) Output Prototype: integer --- Code vec_ptype_show(integer(), double()) Output Prototype: 0. ( , ) = 1. ( , ) = --- Code vec_ptype_show(logical(), integer(), double()) Output Prototype: 0. ( , ) = 1. ( , ) = 2. ( , ) = # vec_ptype_common() includes index in argument tag Code vec_ptype_common(df1, df2) Condition Error: ! Can't combine `..1$x$y$z` and `..2$x$y$z` . --- Code vec_ptype_common(df1, df1, df2) Condition Error: ! Can't combine `..1$x$y$z` and `..3$x$y$z` . --- Code vec_ptype_common(large_df1, large_df2) Condition Error: ! Can't combine `..1$foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar$y$z` and `..2$foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar$y$z` . --- Code vec_ptype_common(foo = TRUE, bar = "foo") Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = TRUE, baz = FALSE, bar = "foo") Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = df1, bar = df2) Condition Error: ! Can't combine `foo$x$y$z` and `bar$x$y$z` . --- Code vec_ptype_common(df1, df1, bar = df2) Condition Error: ! Can't combine `..1$x$y$z` and `bar$x$y$z` . --- Code vec_ptype_common(TRUE, !!!list(1, "foo")) Condition Error: ! Can't combine `..2` and `..3` . --- Code vec_ptype_common(TRUE, !!!list(1, 2), "foo") Condition Error: ! Can't combine `..2` and `..4` . --- Code vec_ptype_common(1, !!!list(TRUE, FALSE), "foo") Condition Error: ! Can't combine `..1` and `..4` . --- Code vec_ptype_common(foo = TRUE, !!!list(FALSE, FALSE), bar = "foo") Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = 1, "foo")) Condition Error: ! Can't combine `foo` and `..3` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = "foo")) Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), baz = "chr") Condition Error: ! Can't combine `foo` and `baz` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), !!!list(baz = "chr")) Condition Error: ! Can't combine `foo` and `baz` . # vec_ptype_common() handles spliced names consistently (#1570) Code vec_ptype_common(a = "foo", b = "bar", y = NULL, z = 1) Condition Error: ! Can't combine `a` and `z` . Code vec_ptype_common(!!!args1, !!!args2) Condition Error: ! Can't combine `a` and `z` . Code vec_ptype_common(!!!args1, "{y_name}" := NULL, "{z_name}" := 1) Condition Error: ! Can't combine `a` and `z` . vctrs/tests/testthat/_snaps/dictionary.md0000644000176200001440000000217314532371023020372 0ustar liggesusers# vec_match() and vec_in() check types Code df1 <- data_frame(x = data_frame(foo = 1)) df2 <- data_frame(x = data_frame(foo = "")) (expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type")) Output Error in `vec_match()`: ! Can't combine `x$foo` and `x$foo` . Code (expect_error(vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type")) Output Error in `vec_match()`: ! Can't combine `n$x$foo` and `h$x$foo` . Code (expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type")) Output Error in `vec_in()`: ! Can't combine `x$foo` and `x$foo` . Code (expect_error(vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type") ) Output Error in `vec_in()`: ! Can't combine `n$x$foo` and `h$x$foo` . vctrs/tests/testthat/helper-restart.R0000644000176200001440000000300214362266120017476 0ustar liggesusers# Example usage of ptype2 and cast restart. This handler treats any # input that inherits from as a . In other words, it # allows incompatible inputs to benefit from all # coercion methods. with_ordered_restart <- function(expr) { withCallingHandlers( expr, vctrs_error_incompatible_type = function(cnd) { x <- cnd[["x"]] y <- cnd[["y"]] restart <- FALSE if (is.ordered(x)) { restart <- TRUE x <- factor(as.character(x), levels = levels(x)) } if (is.ordered(y)) { restart <- TRUE y <- factor(as.character(y), levels = levels(y)) } # Don't recurse and let ptype2 error keep its course if (!restart) { return(zap()) } x_arg <- cnd[["x_arg"]] y_arg <- cnd[["y_arg"]] call <- cnd[["call"]] # Recurse with factor methods and restart with the result if (inherits(cnd, "vctrs_error_ptype2")) { out <- vec_ptype2(x, y, x_arg = x_arg, y_arg = y_arg, call = call) restart <- "vctrs_restart_ptype2" } else if (inherits(cnd, "vctrs_error_cast")) { out <- vec_cast(x, y, x_arg = x_arg, to_arg = y_arg, call = call) restart <- "vctrs_restart_cast" } else { return(zap()) } # Old-R compat for `tryInvokeRestart()` try_restart <- function(restart, ...) { if (!is_null(findRestart(restart))) { invokeRestart(restart, ...) } } try_restart(restart, out) } ) } vctrs/tests/testthat/test-slice-interleave.R0000644000176200001440000000431614362266120020756 0ustar liggesuserstest_that("interleaving is working as expected", { expect_identical( vec_interleave(1:3, 4:6), c(1L, 4L, 2L, 5L, 3L, 6L) ) expect_identical( vec_interleave(1:3, 4:6, 7:9), c(1L, 4L, 7L, 2L, 5L, 8L, 3L, 6L, 9L) ) }) test_that("data frames can be interleaved", { x <- data_frame(x = 1:2, y = c("a", "b")) y <- data_frame(x = 3:4, y = c("c", "d")) expect_identical( vec_interleave(x, y), vec_slice(vec_c(x, y), c(1, 3, 2, 4)) ) }) test_that("works with `NULL` inputs", { expect_identical( vec_interleave(1:3, NULL, 4:6), vec_interleave(1:3, 4:6) ) }) test_that("allows for name repair", { x <- c(x = 1) expect_identical( vec_interleave(x, x), c(x = 1, x = 1) ) expect_snapshot(vec_interleave(x, x, .name_repair = "unique")) }) test_that("can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_interleave(c(x = 1), c(x = 2), .name_repair = "unique_quiet") res_universal <- vec_interleave(c("if" = 1), c("in" = 2), .name_repair = "universal_quiet") }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in")) }) test_that("works with name specs", { x <- c(x = 1) y <- 1 expect_named( vec_interleave(x = x, y = y, .name_spec = "{outer}_{inner}"), c("x_x", "y") ) }) test_that("recycles inputs", { expect_identical( vec_interleave(1:3, NA), c(1L, NA, 2L, NA, 3L, NA) ) expect_identical( vec_interleave(integer(), NA), integer() ) }) test_that("works with no inputs", { expect_identical(vec_interleave(), NULL) }) test_that("works with length zero input", { expect_identical(vec_interleave(integer(), integer()), integer()) }) test_that("respects ptype", { expect_identical(vec_interleave(.ptype = character()), character()) expect_identical(vec_interleave(1L, 2L, .ptype = numeric()), c(1, 2)) }) test_that("uses recycling errors", { expect_snapshot(error = TRUE, vec_interleave(1:2, 1:3)) }) test_that("errors if the result would be a long vector", { # Internal multiplication overflows `r_ssize` resulting in a different error skip_on_os("windows") expect_snapshot( error = TRUE, vec_interleave_indices(3L, 1e9L) ) }) vctrs/tests/testthat/test-size.R0000644000176200001440000001307314315060310016464 0ustar liggesuserstest_that("vec_as_short_length() checks inputs", { expect_equal(vec_as_short_length(0), 0) expect_equal(vec_as_short_length(1L), 1) my_function <- function(my_arg) vec_as_short_length(my_arg) expect_snapshot({ (expect_error(my_function(-1))) (expect_error(my_function(1:2))) (expect_error(my_function(1.5))) (expect_error(my_function(NA))) (expect_error(my_function(na_int))) (expect_error(my_function("foo"))) (expect_error(my_function(foobar(1:2)))) (expect_error(my_function(.Machine$double.xmax))) }) }) test_that("vec_as_short_length() has a special error about long vector support", { # In particular, skips on 32-bit Windows where `r_ssize == int` skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") my_function <- function(my_arg) vec_as_short_length(my_arg) expect_snapshot({ (expect_error(my_function(.Machine$integer.max + 1))) }) }) # vec_size ----------------------------------------------------------------- test_that("vec_size must be called with vector", { expect_error(vec_size(mean), class = "vctrs_error_scalar_type") }) test_that("length is number of rows", { expect_equal(vec_size(integer()), 0) expect_equal(vec_size(array(integer())), 0) expect_equal(vec_size(1:2), 2) expect_equal(vec_size(array(dim = 2)), 2) expect_equal(vec_size(matrix(nrow = 2, ncol = 3)), 2) expect_equal(vec_size(array(dim = c(2, 1, 5))), 2) }) test_that("length of record is number of rows, not fields", { r <- new_rcrd(list(x = 1:10)) expect_equal(vec_size(r), 10) }) test_that("handles three types of data frame rownames", { df1 <- df2 <- df3 <- data.frame(x = 1:3) rownames(df1) <- NULL rownames(df2) <- 3:1 rownames(df3) <- letters[1:3] expect_equal(vec_size(df1), 3) expect_equal(vec_size(df2), 3) expect_equal(vec_size(df3), 3) }) test_that("handles positive short row names (#220)", { data <- structure(mtcars, row.names = c(NA, 32)) expect_identical(vec_size(data), 32L) }) test_that("size is proxied", { local_env_proxy() expect_size(new_proxy(1:3), 3) expect_size(new_proxy(list(1, 2, 3)), 3) expect_size(new_proxy(foobar(list(1, 2, 3))), 3) }) test_that("`NULL` has size zero", { expect_identical(vec_size(NULL), 0L) }) test_that("can take the size of unspecified objects", { expect_size(NA, 1) expect_size(c(NA, NA), 2) expect_size(unspecified(2), 2) }) # vec_size_common --------------------------------------------------------- test_that("vec_size_common() checks inputs", { expect_snapshot({ (expect_error(vec_size_common(.size = "foo"))) (expect_error(vec_size_common(.size = 1:2))) }) }) test_that("vec_size_common() mentions `arg` in errors", { my_function <- function(...) vec_size_common(..., .arg = "my_arg") expect_snapshot({ (expect_error(my_function(this_arg = 1:2, that_arg = int()))) }) }) test_that("vec_size_common with no input is 0L unless `.absent` is provided", { expect_identical(vec_size_common(), 0L) expect_identical(vec_size_common(NULL), 0L) expect_equal(vec_size_common(.absent = na_int), na_int) }) test_that("`.absent` must be supplied when `...` is empty", { expect_snapshot({ (expect_error(vec_size_common(.absent = NULL))) }) }) test_that("`.absent` must be a length 1 integer if provided", { expect_snapshot({ (expect_error(vec_size_common(.absent = 1), "must be a single integer")) (expect_error(vec_size_common(.absent = c(1L, 2L)), "must be a single integer")) }) }) test_that("`NULL` is treated as the absence of input", { expect_equal(vec_size_common(1:5, NULL), vec_size_common(1:5)) }) test_that("size 1 is overshadowed by any other size", { expect_equal(vec_size_common(1, integer()), 0) expect_equal(vec_size_common(1, 1:5), 5) }) test_that("if not size 1, sizes must be identical", { expect_equal(vec_size_common(integer(), integer()), 0) expect_error(vec_size_common(1:2, integer()), class = "vctrs_error_incompatible_size") expect_error(vec_size_common(1:2, 1:3), class = "vctrs_error_incompatible_size") }) test_that("argument tags are forwarded", { expect_snapshot(error = TRUE, vec_size_common(1:2, 1, 1:4)) expect_snapshot(error = TRUE, vec_size_common(foo = 1:2, 1, bar = 1:4)) }) test_that("can pass size", { expect_identical(vec_size_common(1:2, 1:3, .size = 5L), 5L) }) test_that("provided size is cast to an integer", { expect_identical(vec_size_common(.size = 1), 1L) }) # list_sizes -------------------------------------------------------------- test_that("only lists are allowed", { expect_error(list_sizes(mtcars), "must be a list") expect_error(list_sizes(1), "must be a list") }) test_that("computes element sizes", { expect_identical(list_sizes(list(1, 1:3, c("a", "b"))), c(1L, 3L, 2L)) }) test_that("retains list names", { x <- list(1, x = 2, a = 3) expect_named(list_sizes(x), c("", "x", "a")) x <- list_of(y = 1, x = 2, a = 3) expect_named(list_sizes(x), c("y", "x", "a")) }) test_that("retains names of empty lists", { x <- structure(list(), names = character()) expect_named(list_sizes(x), character()) }) # sequences --------------------------------------------------------------- test_that("vec_seq_along returns size-0 output for size-0 input", { expect_equal(vec_seq_along(character()), integer()) expect_equal(vec_seq_along(data.frame()), integer()) }) test_that("vec_init_along can be called with single argument", { expect_equal(vec_init_along(1:3), rep(NA_integer_, 3)) }) # %0% -------------------------------------------------------------------- test_that("uses y when x is empty", { expect_equal(1 %0% 2, 1) expect_equal(1[0] %0% 2, 2) }) vctrs/tests/testthat/test-type-unspecified.R0000644000176200001440000000650714276722575021023 0ustar liggesusers test_that("unknown type is idempotent", { types <- list( unspecified(), logical(), integer(), double(), character(), list(), new_list_of(ptype = integer()), new_factor(), new_ordered(), new_date(), new_datetime(), new_duration(), vec_ptype(matrix(1:4, 2)), vec_ptype(array(1:5, c(1, 5))) ) lhs <- map(types, vec_ptype2, x = unspecified()) expect_identical(types, lhs) lhs <- map(types, vec_ptype2, x = NA) expect_identical(types, lhs) rhs <- map(types, vec_ptype2, y = unspecified()) expect_identical(types, rhs) rhs <- map(types, vec_ptype2, y = NA) expect_identical(types, rhs) }) test_that("common type of unspecified and NULL is unspecified", { expect_identical(vec_ptype2(unspecified(), NULL), unspecified()) expect_identical(vec_ptype2(NULL, unspecified()), unspecified()) expect_identical(vec_ptype2(NA, NULL), unspecified()) expect_identical(vec_ptype2(NULL, NA), unspecified()) }) test_that("cannot take the common type of unspecified and a scalar list", { expect_error(vec_ptype2(unspecified(), foobar()), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(foobar(), unspecified()), class = "vctrs_error_scalar_type") }) test_that("subsetting works", { expect_identical(unspecified(4)[2:3], unspecified(2)) }) test_that("has useful print method", { expect_snapshot(unspecified()) }) test_that("can finalise data frame containing unspecified columns", { df <- data.frame(y = NA, x = c(1, 2, NA)) ptype <- vec_ptype(df) expect_identical(ptype$y, unspecified()) finalised <- vec_ptype_finalise(ptype) expect_identical(finalised$y, lgl()) common <- vec_ptype_common(df, df) expect_identical(common$y, lgl()) }) test_that("can cast to common type data frame containing unspecified columns", { df <- data.frame(y = NA, x = c(1, 2, NA)) expect_identical(vec_cast_common(df, df), list(df, df)) }) test_that("unspecified vectors are always unspecified (#222)", { expect_true(is_unspecified(unspecified())) expect_true(is_unspecified(unspecified(1))) }) test_that("S3 vectors and shaped vectors are never unspecified", { expect_false(is_unspecified(foobar(NA))) expect_false(is_unspecified(foobar(lgl(NA, NA)))) expect_false(is_unspecified(matrix(NA, 2))) }) test_that("can finalise lengthy unspecified vectors", { expect_identical(vec_ptype_finalise(unspecified(3)), rep(NA, 3)) expect_identical(ununspecify(unspecified(3)), rep(NA, 3)) }) test_that("unspecified() validates input", { expect_identical(unspecified(1), unspecified(1L)) expect_error(unspecified(1:3), "must be a single number") }) test_that("tibble::type_sum() knows about unspecified", { expect_identical(tibble::type_sum(unspecified(3)), "???") }) test_that("casting to a scalar type errors", { expect_error(vec_cast(NA, quote(x)), class = "vctrs_error_scalar_type") expect_error(vec_cast(unspecified(1), quote(x)), class = "vctrs_error_scalar_type") }) test_that("monitoring test - can cast to unspecified from unspecified", { expect_identical(vec_cast(NA, unspecified()), unspecified(1)) expect_identical(vec_cast(unspecified(1), unspecified()), unspecified(1)) }) test_that("monitoring test - casting unspecified input to NA unspecified results in NA vector", { expect_identical(vec_cast(unspecified(1), NA), NA) expect_identical(vec_cast(NA, NA), NA) }) vctrs/tests/testthat/test-empty.R0000644000176200001440000000200514315060310016641 0ustar liggesuserstest_that("can compact missing elements", { x <- list(NULL, 1, NULL) expect_identical(list_drop_empty(x), list(1)) }) test_that("can compact empty elements", { x <- list(1, NULL, integer(), NULL) expect_identical(list_drop_empty(x), list(1)) }) test_that("emptyness works with data frames", { x <- data_frame() y <- data_frame(.size = 2L) lst <- list(x, y) expect_identical(list_drop_empty(lst), list(y)) }) test_that("emptyness works with rcrd types", { x <- new_rcrd(list(foo = integer(), bar = numeric())) y <- new_rcrd(list(foo = 1L, bar = 1)) lst <- list(x, y) expect_identical(list_drop_empty(lst), list(y)) }) test_that("works with empty lists", { expect_identical(list_drop_empty(list()), list()) }) test_that("retains list type", { x <- list_of(NULL, integer()) expect_identical(list_drop_empty(x), list_of(.ptype = integer())) }) test_that("validates `x`", { expect_error(list_drop_empty(1), "must be a list") expect_error(list_drop_empty(data_frame()), "must be a list") }) vctrs/tests/testthat/test-shape.R0000644000176200001440000000747514315060310016623 0ustar liggesusers # common shape ------------------------------------------------------------ test_that("vec_shape2() applies recycling rules", { expect_equal(vec_shape2(shaped_int(1, 5, 5), shaped_int(1)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1, 5), shaped_int(1, 5, 1)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 5, 1), shaped_int(1, 1, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 1, 1)), c(0L, 0L, 5L)) }) test_that("incompatible shapes throw errors", { expect_snapshot({ (expect_error(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1)), class = "vctrs_error_incompatible_type")) (expect_error(vec_shape2(shaped_int(1, 5, 0), shaped_int(1, 1, 5)), class = "vctrs_error_incompatible_type")) }) }) test_that("can override error args", { expect_snapshot({ (expect_error( vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1), x_arg = "foo", y_arg = "bar"), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_shape2() evaluates arg lazily", { expect_silent(vec_shape2(shaped_int(1, 5, 5), shaped_int(1), x_arg = print("oof"))) expect_silent(vec_shape2(shaped_int(1, 5, 5), shaped_int(1), y_arg = print("oof"))) }) # broadcasting ------------------------------------------------------------- test_that("can broadcast to higher dimension, but not lower", { expect_identical(shape_broadcast_(1, NULL), 1) expect_null(shape_broadcast_(NULL, 1)) expect_equal( shape_broadcast_(1, shaped_int(0, 4)), array(1, c(1, 4)) ) expect_error( shape_broadcast_(shaped_int(1, 1, 1), shaped_int(4, 4)), class = "vctrs_error_incompatible_type" ) expect_error( shape_broadcast_(shaped_int(3, 2), shaped_int(3, 3)), class = "vctrs_error_incompatible_type" ) }) test_that("shape_broadcast_() applies recycling rules", { expect_equal( shape_broadcast_(array(1:4, c(1, 1, 4)), shaped_int(0, 4, 4))[1, , ], matrix(1:4, 4, 4, byrow = TRUE) ) expect_equal( shape_broadcast_(array(1:4, c(1, 4, 1)), shaped_int(0, 4, 4))[1, , ], matrix(1:4, 4, 4) ) expect_equal( shape_broadcast_(array(1L, c(1, 1)), shaped_int(1, 0)), matrix(integer(), nrow = 1) ) expect_error( shape_broadcast_(array(1L, c(1, 2)), shaped_int(1, 0)), "Non-recyclable dimensions", class = "vctrs_error_incompatible_type" ) expect_error( shape_broadcast_(array(1L, c(1, 0)), shaped_int(1, 1)), "Non-recyclable dimensions", class = "vctrs_error_incompatible_type" ) }) test_that("can combine shaped native classes (#1290, #1329)", { x <- new_datetime(c(1, 1e6)) dim(x) <- c(1, 2) out <- vec_c(x, x) expect_s3_class(out, c("POSIXct", "POSIXt")) expect_dim(out, c(2, 2)) y <- new_datetime(1:3 + 0.0) dim(y) <- c(1, 3) expect_snapshot(error = TRUE, vec_c(x, y)) d <- structure(Sys.Date(), dim = 1) expect_equal( vec_rbind(data.frame(d), data.frame(d)), data.frame(d = structure(rep(Sys.Date(), 2), dim = 2)) ) }) test_that("factor casts support shape", { x <- factor(c("x", "y", "z")) dim(x) <- c(3, 1) dimnames(x) <- list(c("r1", "r2", "r3"), "c1") y <- factor(c("w", "x", "y", "z")) dim(y) <- c(2, 2) exp <- factor( c("x", "y", "z", "x", "y", "z"), levels = c("w", "x", "y", "z") ) dim(exp) <- c(3, 2) dimnames(exp) <- list(c("r1", "r2", "r3"), c("c1", "c1")) expect_equal(vec_cast(x, y), exp) x <- factor(c("x", "y", "z")) dim(x) <- c(3, 1) y <- factor(c("x", "y", "z")) expect_snapshot(error = TRUE, vec_cast(x, y)) }) vctrs/tests/testthat/test-subscript.R0000644000176200001440000001012614315060310017524 0ustar liggesusers test_that("vec_as_subscript() coerces unspecified vectors", { expect_identical( vec_as_subscript(NA), NA ) expect_identical( vec_as_subscript(NA, logical = "error"), na_int ) expect_identical( vec_as_subscript(NA, logical = "error", numeric = "error"), na_chr ) }) test_that("vec_as_subscript() coerces subtypes and supertypes", { expect_identical(vec_as_subscript(factor("foo")), "foo") with_lgl_subtype({ expect_identical(vec_as_subscript(new_lgl_subtype(TRUE)), TRUE) }) with_lgl_supertype({ expect_identical(vec_as_subscript(new_lgl_supertype(TRUE)), TRUE) }) }) test_that("vec_as_subscript() handles NULL", { expect_identical(vec_as_subscript(NULL), int()) expect_error( vec_as_subscript(NULL, numeric = "error"), class = "vctrs_error_subscript_type" ) }) test_that("vec_as_subscript() handles symbols", { expect_identical(vec_as_subscript(quote(foo)), "foo") expect_identical(vec_as_subscript(quote(``)), "\u5e78") expect_error( vec_as_subscript(quote(foo), character = "error"), class = "vctrs_error_subscript_type" ) }) test_that("can customise subscript errors", { expect_snapshot({ (expect_error( with_tibble_cols(vec_as_subscript(env())), class = "vctrs_error_subscript_type" )) }) expect_snapshot({ (expect_error( with_dm_tables(vec_as_subscript(env())), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_subscript() checks dimensionality", { expect_snapshot({ (expect_error(vec_as_subscript(matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type")) (expect_error(vec_as_subscript(array(TRUE, dim = c(1, 1, 1))), class = "vctrs_error_subscript_type")) (expect_error(with_tibble_rows(vec_as_subscript(matrix(TRUE, nrow = 1))), class = "vctrs_error_subscript_type")) }) }) test_that("vec_as_subscript() works with vectors of dimensionality 1", { arr <- array(TRUE, dim = 1) expect_identical(vec_as_subscript(arr), arr) }) test_that("vec_as_subscript() forbids subscript types", { expect_snapshot(error = TRUE, vec_as_subscript(1L, logical = "error", numeric = "error")) expect_snapshot(error = TRUE, vec_as_subscript("foo", logical = "error", character = "error")) expect_snapshot(error = TRUE, vec_as_subscript(TRUE, logical = "error")) expect_snapshot(error = TRUE, vec_as_subscript("foo", character = "error")) expect_snapshot(error = TRUE, vec_as_subscript(NULL, numeric = "error")) expect_snapshot(error = TRUE, vec_as_subscript(quote(foo), character = "error")) }) test_that("vec_as_subscript2() forbids subscript types", { expect_snapshot(error = TRUE, vec_as_subscript2(1L, numeric = "error")) expect_snapshot(error = TRUE, vec_as_subscript2("foo", character = "error")) expect_snapshot(error = TRUE, vec_as_subscript2(TRUE)) }) test_that("vec_as_subscript2() retains the call when throwing vec_as_subscript() errors (#1605)", { expect_snapshot(error = TRUE, vec_as_subscript2(1L, numeric = "error", call = call("foo"))) expect_snapshot(error = TRUE, vec_as_subscript2(1.5, call = call("foo"))) }) test_that("vec_as_subscript2() retains the call when erroring on logical input (#1605)", { expect_snapshot(error = TRUE, vec_as_subscript2(TRUE, call = call("foo"))) }) test_that("vec_as_subscript() evaluates arg lazily", { expect_silent(vec_as_subscript(1L, arg = print("oof"))) expect_silent(vec_as_subscript_result(1L, arg = print("oof"), NULL, logical = "error", numeric = "cast", character = "error")) }) test_that("vec_as_subscript2() evaluates arg lazily", { expect_silent(vec_as_subscript2(1L, arg = print("oof"))) expect_silent(vec_as_subscript2_result(1L, arg = print("oof"), NULL, numeric = "cast", character = "error")) }) test_that("`logical = 'cast'` is deprecated", { expect_snapshot( error = TRUE, vec_as_subscript2(TRUE, logical = "cast") ) # `logical = "error"` still works expect_snapshot( error = TRUE, vec_as_subscript2(TRUE, logical = "error") ) }) test_that("lossy cast errors for scalar subscripts work (#1606)", { expect_snapshot( error = TRUE, vec_as_subscript2(1.5) ) }) vctrs/tests/testthat/test-utils.R0000644000176200001440000000431714276722575016703 0ustar liggesusers test_that("names preserved if outer name is missing", { x <- c("a", "z", "") expect_equal(outer_names(x, NULL, 3), x) expect_equal(outer_names(x, "", 3), x) expect_equal(outer_names(x, na_chr, 3), x) }) test_that("outer name vectorised if needed", { expect_equal(outer_names(NULL, "x", 1L), c("x")) expect_equal(outer_names(NULL, "x", 2L), c("x1", "x2")) }) test_that("outer and inner names are combined", { expect_equal(outer_names("x", "y", 1), c("y..x")) }) test_that("options are created", { expect_identical( unclass(new_opts(c("a", "c"), letters[1:4])), c(a = TRUE, b = FALSE, c = TRUE, d = FALSE) ) }) test_that("can't supply unknown option", { expect_error( new_opts(c("a", "foo"), letters[1:4]), "Argument must be one of \"a\", \"b\", \"c\" or \"d\"" ) expect_error( new_opts(c("a", "foo"), letters[1:4], arg = "foo"), "`foo` must be one of \"a\", \"b\", \"c\" or \"d\"" ) }) test_that("`has_dim()` doesn't partial match on the `dim` attribute (#948)", { x <- structure(1, dimB = 1) expect_false(has_dim(x)) }) test_that("df_has_base_subset() detects `[` methods", { expect_true(df_has_base_subset(foobar(mtcars))) out <- with_methods( `[.vctrs_foobar` = function(x, i, ...) structure(NextMethod(), dispatched = TRUE), df_has_base_subset(foobar(mtcars)) ) expect_false(out) }) test_that("vec_common_suffix() finds common suffix", { x <- c("foo", "bar", "baz") y <- c("quux", "foo", "hop", "baz") expect_identical(vec_common_suffix(x, y), "baz") x <- c("foo", "bar", "baz") y <- c("quux", "foo", "bar", "baz") expect_identical(vec_common_suffix(x, y), x) x <- letters y <- chr() expect_identical(vec_common_suffix(x, y), chr()) x <- data.frame(x = 1:3, y = c("foo", "bar", "baz")) y <- data.frame(x = 0:3, y = c("foo", "hop", "bar", "baz")) exp <- data.frame(x = 2:3, y = c("bar", "baz")) expect_identical(vec_common_suffix(x, y), exp) }) test_that("fast_c() concatenates", { expect_identical(fast_c(character(), "foo"), "foo") expect_identical(fast_c("foo", character()), "foo") expect_identical(fast_c("foo", c("bar", "baz")), c("foo", "bar", "baz")) expect_identical(fast_c(c("bar", "baz"), "foo"), c("bar", "baz", "foo")) }) vctrs/tests/testthat/test-runs.R0000644000176200001440000002066614363774756016544 0ustar liggesusers# vec_identify_runs ------------------------------------------------------------ test_that("vec_identify_runs() works with size zero input", { expect <- structure(integer(), n = 0L) expect_identical(vec_identify_runs(integer()), expect) expect_identical(vec_identify_runs(data.frame()), expect) }) test_that("works with atomic input of various types", { expect <- structure(c(1L, 1L, 2L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(c(TRUE, TRUE, FALSE, FALSE, TRUE)), expect) expect_identical(vec_identify_runs(c(1L, 1L, 2L, 2L, 3L)), expect) expect_identical(vec_identify_runs(c(1, 1, 2, 2, 3)), expect) expect_identical(vec_identify_runs(complex(real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3))), expect) expect_identical(vec_identify_runs(c("a", "a", "b", "b", "c")), expect) expect_identical(vec_identify_runs(as.raw(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_identify_runs(list(1, 1, 2, 2, 3)), expect) }) test_that("NA values are identical", { expect <- structure(c(1L, 1L), n = 1L) expect_identical(vec_identify_runs(c(NA, NA)), expect) expect_identical(vec_identify_runs(c(NA_integer_, NA_integer_)), expect) expect_identical(vec_identify_runs(c(NA_real_, NA_real_)), expect) expect_identical(vec_identify_runs(c(NA_complex_, NA_complex_)), expect) expect_identical(vec_identify_runs(c(NA_character_, NA_character_)), expect) # No NA type for raw expect_identical(vec_identify_runs(list(NULL, NULL)), expect) }) test_that("NA and NaN are different", { expect <- structure(c(1L, 2L), n = 2L) expect_identical(vec_identify_runs(c(NA_real_, NaN)), expect) }) test_that("normalizes character encodings", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1) expect_identical(vec_identify_runs(x), structure(rep(1L, 3), n = 1L)) }) test_that("errors on scalars", { expect_snapshot(error = TRUE, { vec_identify_runs(foobar()) }) }) test_that("works with data frames rowwise", { df <- data_frame(x = c(1, 1, 1, 2), y = c(1, 1, 2, 3)) expect <- structure(c(1L, 1L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(df), expect) df <- data_frame(x = c(1, 1, 1), y = c(2, 2, 2), z = c("b", "a", "a")) expect <- structure(c(1L, 2L, 2L), n = 2L) expect_identical(vec_identify_runs(df), expect) }) test_that("works with data frames with rows but no columns", { expect <- structure(rep(1L, 5), n = 1L) expect_identical(vec_identify_runs(new_data_frame(n = 5L)), expect) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, 1, 2, 2), b = c(1, 2, 3, 3)) df <- data_frame(x = rep(1, 4), y = col) expect <- structure(c(1L, 2L, 3L, 3L), n = 3L) expect_identical(vec_identify_runs(df), expect) }) test_that("works with columns of various types", { # Use two columns to keep the data frame from being squashed to a vector add_col <- function(col) { x <- rep(1L, 5) data_frame(x = x, y = col) } expect <- structure(c(1L, 1L, 2L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(add_col(c(TRUE, TRUE, FALSE, FALSE, TRUE))), expect) expect_identical(vec_identify_runs(add_col(c(1L, 1L, 2L, 2L, 3L))), expect) expect_identical(vec_identify_runs(add_col(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_identify_runs(add_col(complex(real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_identify_runs(add_col(c("a", "a", "b", "b", "c"))), expect) expect_identical(vec_identify_runs(add_col(as.raw(c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_identify_runs(add_col(list(1, 1, 2, 2, 3))), expect) }) # vec_run_sizes ---------------------------------------------------------------- test_that("vec_run_sizes() works with size zero input", { expect_identical(vec_run_sizes(integer()), integer()) expect_identical(vec_run_sizes(data.frame()), integer()) }) test_that("works with atomic input of various types", { expect <- c(2L, 2L, 1L) expect_identical(vec_run_sizes(c(TRUE, TRUE, FALSE, FALSE, TRUE)), expect) expect_identical(vec_run_sizes(c(1L, 1L, 2L, 2L, 3L)), expect) expect_identical(vec_run_sizes(c(1, 1, 2, 2, 3)), expect) expect_identical(vec_run_sizes(complex(real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3))), expect) expect_identical(vec_run_sizes(c("a", "a", "b", "b", "c")), expect) expect_identical(vec_run_sizes(as.raw(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_run_sizes(list(1, 1, 2, 2, 3)), expect) }) test_that("NA values are identical", { expect <- 2L expect_identical(vec_run_sizes(c(NA, NA)), expect) expect_identical(vec_run_sizes(c(NA_integer_, NA_integer_)), expect) expect_identical(vec_run_sizes(c(NA_real_, NA_real_)), expect) expect_identical(vec_run_sizes(c(NA_complex_, NA_complex_)), expect) expect_identical(vec_run_sizes(c(NA_character_, NA_character_)), expect) # No NA type for raw expect_identical(vec_run_sizes(list(NULL, NULL)), expect) }) test_that("NA and NaN are different", { expect_identical(vec_run_sizes(c(NA_real_, NaN)), c(1L, 1L)) }) test_that("normalizes character encodings", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1) expect_identical(vec_run_sizes(x), 3L) }) test_that("errors on scalars", { expect_snapshot(error = TRUE, { vec_run_sizes(foobar()) }) }) test_that("works with data frames rowwise", { df <- data_frame(x = c(1, 1, 1, 2), y = c(1, 1, 2, 3)) expect_identical(vec_run_sizes(df), c(2L, 1L, 1L)) df <- data_frame(x = c(1, 1, 1), y = c(2, 2, 2), z = c("b", "a", "a")) expect_identical(vec_run_sizes(df), c(1L, 2L)) }) test_that("works with data frames with rows but no columns", { expect_identical(vec_run_sizes(new_data_frame(n = 5L)), 5L) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, 1, 2, 2), b = c(1, 2, 3, 3)) df <- data_frame(x = rep(1, 4), y = col) expect_identical(vec_run_sizes(df), c(1L, 1L, 2L)) }) test_that("works with columns of various types", { # Use two columns to keep the data frame from being squashed to a vector add_col <- function(col) { x <- rep(1L, 5) data_frame(x = x, y = col) } expect <- c(2L, 2L, 1L) expect_identical(vec_run_sizes(add_col(c(TRUE, TRUE, FALSE, FALSE, TRUE))), expect) expect_identical(vec_run_sizes(add_col(c(1L, 1L, 2L, 2L, 3L))), expect) expect_identical(vec_run_sizes(add_col(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_run_sizes(add_col(complex(real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_run_sizes(add_col(c("a", "a", "b", "b", "c"))), expect) expect_identical(vec_run_sizes(add_col(as.raw(c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_run_sizes(add_col(list(1, 1, 2, 2, 3))), expect) }) # vec_locate_run_bounds -------------------------------------------------------- test_that("can locate run starts", { expect_identical( vec_locate_run_bounds(c(1, 3, 3, 1, 5, 5, 6)), c(1L, 2L, 4L, 5L, 7L) ) }) test_that("can locate run ends", { expect_identical( vec_locate_run_bounds(c(1, 3, 3, 1, 5, 5, 6), which = "end"), c(1L, 3L, 4L, 6L, 7L) ) }) test_that("vec_locate_run_bounds() works with size zero input", { expect_identical(vec_locate_run_bounds(integer(), which = "start"), integer()) expect_identical(vec_locate_run_bounds(integer(), which = "end"), integer()) }) test_that("vec_locate_run_bounds() validates `which`", { expect_snapshot(error = TRUE, { vec_locate_run_bounds(1, which = "x") }) expect_snapshot(error = TRUE, { vec_locate_run_bounds(1, which = 1) }) expect_snapshot(error = TRUE, { vec_locate_run_bounds(1, which = c("foo", "bar")) }) }) # vec_detect_run_bounds -------------------------------------------------------- test_that("can detect run starts", { expect_identical( vec_detect_run_bounds(c(1, 3, 3, 1, 5, 5, 6)), c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE) ) }) test_that("can detect run ends", { expect_identical( vec_detect_run_bounds(c(1, 3, 3, 1, 5, 5, 6), which = "end"), c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE) ) }) test_that("vec_detect_run_bounds() works with size zero input", { expect_identical(vec_detect_run_bounds(integer(), which = "start"), logical()) expect_identical(vec_detect_run_bounds(integer(), which = "end"), logical()) }) test_that("vec_detect_run_bounds() validates `which`", { expect_snapshot(error = TRUE, { vec_detect_run_bounds(1, which = "x") }) expect_snapshot(error = TRUE, { vec_detect_run_bounds(1, which = 1) }) expect_snapshot(error = TRUE, { vec_detect_run_bounds(1, which = c("foo", "bar")) }) }) vctrs/tests/testthat/test-equal.R0000644000176200001440000002570514376223321016640 0ustar liggesusers # vectorised -------------------------------------------------------------- test_that("throws error for unsuported type", { expect_error(.Call(vctrs_equal, expression(x), expression(x), TRUE), class = "vctrs_error_scalar_type") }) test_that("C wrapper throws error if length or type doesn't match", { expect_error(.Call(vctrs_equal, 1:2, 1L, TRUE), "same types and lengths") expect_error(.Call(vctrs_equal, 1, 1L, TRUE), "same types and lengths") }) test_that("correct behaviour for basic vectors", { expect_equal(vec_equal(c(TRUE, FALSE), TRUE), c(TRUE, FALSE)) expect_equal(vec_equal(c(1L, 2L), 1L), c(TRUE, FALSE)) expect_equal(vec_equal(c(1, 2), 1), c(TRUE, FALSE)) expect_equal(vec_equal(c("1", "2"), "1"), c(TRUE, FALSE)) expect_equal(vec_equal(as.raw(1:2), as.raw(1L)), c(TRUE, FALSE)) expect_equal(vec_equal(list(1:3, 1:2), list(1:3)), c(TRUE, FALSE)) expect_equal(vec_equal(list(1:3, 1.5), list(1:3)), c(TRUE, FALSE)) expect_equal(vec_equal(list(as.raw(1:3), as.raw(1.5)), list(as.raw(1:3))), c(TRUE, FALSE)) expect_equal(vec_equal(list(1+1i, 1+0i), list(1+1i)), c(TRUE, FALSE)) expect_equal(vec_equal(c(1, 2) + 1i, 1+1i), c(TRUE, FALSE)) }) test_that("NAs are equal", { expect_true(vec_equal(NA, NA, na_equal = TRUE)) expect_true(vec_equal(NA_integer_, NA_integer_, na_equal = TRUE)) expect_true(vec_equal(NA_real_, NA_real_, na_equal = TRUE)) expect_true(vec_equal(NA_character_, NA_character_, na_equal = TRUE)) expect_true(vec_equal(list(NULL), list(NULL), na_equal = TRUE)) }) test_that("double special values", { expect_equal(vec_equal(c(NaN, NA), NaN, na_equal = TRUE), c(TRUE, FALSE)) expect_equal(vec_equal(c(NA, NaN), NA, na_equal = TRUE), c(TRUE, FALSE)) expect_true(vec_equal(Inf, Inf)) expect_true(vec_equal(-Inf, -Inf)) }) test_that("`list(NULL)` is considered a missing value (#653)", { expect_equal(vec_equal(list(NULL), list(NULL)), NA) expect_equal(vec_equal(list(NULL), list(1)), NA) }) test_that("can compare data frames", { df <- data.frame(x = 1:2, y = letters[2:1], stringsAsFactors = FALSE) expect_equal(vec_equal(df, df[1, ]), c(TRUE, FALSE)) }) test_that("can compare data frames with various types of columns", { x1 <- data_frame(x = 1, y = 2) y1 <- data_frame(x = 2, y = 1) x2 <- data_frame(x = "a") y2 <- data_frame(x = "b") x3 <- data_frame(x = FALSE) y3 <- data_frame(x = TRUE) x4 <- data_frame(x = 1L) y4 <- data_frame(x = 2L) x5 <- data_frame(x = as.raw(0)) y5 <- data_frame(x = as.raw(1)) x6 <- data_frame(x = 1+0i) y6 <- data_frame(x = 1+1i) expect_false(vec_equal(x1, y1)) expect_false(vec_equal(x2, y2)) expect_false(vec_equal(x3, y3)) expect_false(vec_equal(x4, y4)) expect_false(vec_equal(x5, y5)) expect_false(vec_equal(x6, y6)) }) test_that("can compare data frames with data frame columns", { df1 <- data_frame(x = data_frame(a = 1)) df2 <- data_frame(x = data_frame(a = 2)) expect_true(vec_equal(df1, df1)) expect_false(vec_equal(df1, df2)) }) test_that("can compare data frames with list columns", { df1 <- data_frame(x = list(a = 1, b = 2), y = c(1, 1)) df2 <- data_frame(x = list(a = 0, b = 2), y = c(1, 1)) expect_equal(vec_equal(df1, df2), c(FALSE, TRUE)) }) test_that("data frames must have same size and columns", { expect_error(.Call(vctrs_equal, data.frame(x = 1), data.frame(x = 1, y = 2), TRUE ), "must have same types and lengths" ) expect_error(.Call(vctrs_equal, data.frame(x = 1, y = 2, z = 2), data.frame(x = 1, y = 2), TRUE ), "must have the same number of columns" ) # Names are not checked, as `vec_cast_common()` should take care of the type. # So if `vec_cast_common()` is not called, or is improperly specified, then # this could result in false equality. expect_true(.Call(vctrs_equal, data.frame(x = 1), data.frame(y = 1), TRUE )) expect_error(.Call(vctrs_equal, data.frame(x = 1:2, y = 3:4), data.frame(x = 1, y = 2), TRUE ), "must have same types and lengths" ) expect_false(.Call(vctrs_equal, data.frame(x = 1), data.frame(x = 2), TRUE )) expect_false(.Call(vctrs_equal, list(data.frame(x = 1)), list(10), TRUE )) }) test_that("can compare data frames with 0 columns", { x <- new_data_frame(n = 1L) expect_true(vec_equal(x, x)) }) test_that("can compare lists of scalars (#643)", { lst <- list(new_sclr(x = 1)) expect_true(vec_equal(lst, lst)) # NA does not propagate lst <- list(new_sclr(y = NA)) expect_true(vec_equal(lst, lst)) df <- data.frame(x = c(1, 4, 3), y = c(2, 8, 9)) model <- lm(y ~ x, df) lst <- list(model) expect_true(vec_equal(lst, lst)) }) test_that("can determine equality of strings with different encodings (#553)", { for (x_encoding in encodings()) { for (y_encoding in encodings()) { expect_equal(vec_equal(x_encoding, y_encoding), TRUE) expect_equal(vec_equal(x_encoding, y_encoding), x_encoding == y_encoding) } } }) test_that("equality can be determined when strings have identical encodings", { encs <- encodings() for (enc in encs) { expect_true(vec_equal(enc, enc)) expect_equal(vec_equal(enc, enc), enc == enc) } }) test_that("equality is known to always fail with bytes", { enc <- encoding_bytes() error <- "translating strings with \"bytes\" encoding" expect_error(vec_equal(enc, enc), error) }) test_that("equality is known to fail when comparing bytes to other encodings", { error <- "translating strings with \"bytes\" encoding" for (enc in encodings()) { expect_error(vec_equal(encoding_bytes(), enc), error) expect_error(vec_equal(enc, encoding_bytes()), error) } }) test_that("`na_equal` is validated", { expect_snapshot(error = TRUE, { vec_equal(1, 1, na_equal = 1) }) expect_snapshot(error = TRUE, { vec_equal(1, 1, na_equal = c(TRUE, FALSE)) }) }) test_that("can compare lists of expressions", { x <- list(expression(x), expression(y)) y <- list(expression(x)) expect_equal(vec_equal(x, y), c(TRUE, FALSE)) }) test_that("vec_equal() silently falls back to base data frame", { expect_silent(expect_identical( vec_equal(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(TRUE, 32) )) }) # object ------------------------------------------------------------------ test_that("can compare NULL",{ expect_true(obj_equal(NULL, NULL)) }) test_that("can compare objects with reference semantics", { expect_true(obj_equal(globalenv(), globalenv())) expect_false(obj_equal(globalenv(), environment())) expect_true(obj_equal(quote(x), quote(x))) expect_false(obj_equal(quote(x), quote(y))) }) test_that("can compare pairlists", { expect_true(obj_equal(quote(x + y), quote(x + y))) expect_true(obj_equal(pairlist(x = 1, y = 2), pairlist(x = 1, y = 2))) }) test_that("can compare functions", { f1 <- function(x, y) x + y f2 <- function(x, y) x + y expect_false(obj_equal(f2, f1)) attr(f1, "srcref") <- NULL attr(f2, "srcref") <- NULL expect_true(obj_equal(f2, f1)) f3 <- f1 formals(f3) <- alist(x = 1) expect_false(obj_equal(f3, f1)) f4 <- f1 body(f4) <- quote(x) expect_false(obj_equal(f4, f2)) }) test_that("not equal if different types or lengths", { expect_false(obj_equal(1, 2)) expect_false(obj_equal(1:2, 1)) }) test_that("not equal if attributes not equal", { x1 <- structure(1:10, x = 1, y = 2) x2 <- structure(1:10, x = 1, y = 3) expect_false(obj_equal(x1, x2)) }) test_that("can compare expressions", { expect_true(obj_equal(expression(x), expression(x))) expect_false(obj_equal(expression(x), expression(y))) }) # na ---------------------------------------------------------------------- test_that("NA propagate symmetrically (#204)", { exp <- c(NA, NA) expect_identical(vec_equal(c(TRUE, FALSE), NA), exp) expect_identical(vec_equal(1:2, NA), exp) expect_identical(vec_equal(c(1, 2), NA), exp) expect_identical(vec_equal(letters[1:2], NA), exp) expect_identical(vec_equal(NA, c(TRUE, FALSE)), exp) expect_identical(vec_equal(NA, 1:2), exp) expect_identical(vec_equal(NA, c(1, 2)), exp) expect_identical(vec_equal(NA, letters[1:2]), exp) }) test_that("NA propagate from data frames columns", { x <- data.frame(x = 1:3) y <- data.frame(x = c(1L, NA, 2L)) expect_identical(vec_equal(x, y), c(TRUE, NA, FALSE)) expect_identical(vec_equal(y, x), c(TRUE, NA, FALSE)) expect_identical(vec_equal(x, y, na_equal = TRUE), c(TRUE, FALSE, FALSE)) expect_identical(vec_equal(y, x, na_equal = TRUE), c(TRUE, FALSE, FALSE)) x <- data.frame(x = 1:3, y = 1:3) y <- data.frame(x = c(1L, NA, 2L), y = c(NA, 2L, 3L)) expect_identical(vec_equal(x, y), c(NA, NA, FALSE)) expect_identical(vec_equal(y, x), c(NA, NA, FALSE)) expect_identical(vec_equal(x, y, na_equal = TRUE), c(FALSE, FALSE, FALSE)) expect_identical(vec_equal(y, x, na_equal = TRUE), c(FALSE, FALSE, FALSE)) }) test_that("NA do not propagate from list components (#662)", { expect_true(obj_equal(NA, NA)) expect_true(vec_equal(list(NA), list(NA))) }) test_that("NA do not propagate from names when comparing objects", { x <- set_names(1:3, c("a", "b", NA)) y <- set_names(1:3, c("a", NA, NA)) expect_true(obj_equal(x, x)) expect_false(obj_equal(x, y)) expect_equal(vec_equal(list(x, x, y), list(x, y, y)), c(TRUE, FALSE, TRUE)) }) test_that("NA do not propagate from attributes", { x <- structure(1:3, foo = NA) y <- structure(1:3, foo = "") expect_true(obj_equal(x, x)) expect_false(obj_equal(x, y)) }) test_that("NA do not propagate from function bodies or formals", { fn <- other <- function() NA body(other) <- TRUE expect_true(vec_equal(list(fn), list(fn))) expect_false(vec_equal(list(fn), list(other))) expect_true(obj_equal(fn, fn)) expect_false(obj_equal(fn, other)) fn <- other <- function(x = NA) NULL formals(other) <- list(x = NULL) expect_true(vec_equal(list(fn), list(fn))) expect_false(vec_equal(list(fn), list(other))) }) test_that("can check equality of unspecified objects", { expect_equal(vec_equal(NA, NA), NA) expect_true(vec_equal(NA, NA, na_equal = TRUE)) expect_equal(vec_equal(unspecified(1), unspecified(1)), NA) expect_true(vec_equal(unspecified(1), unspecified(1), na_equal = TRUE)) expect_equal(vec_equal(NA, unspecified(1)), NA) expect_true(vec_equal(NA, unspecified(1), na_equal = TRUE)) }) test_that("can't supply NA as `na_equal`", { expect_snapshot(error = TRUE, { vec_equal(NA, NA, na_equal = NA) }) }) # proxy ------------------------------------------------------------------- test_that("vec_equal() takes vec_proxy() by default", { local_env_proxy() x <- new_proxy(1:3) y <- new_proxy(3:1) expect_identical(vec_equal(x, y), lgl(FALSE, TRUE, FALSE)) }) test_that("vec_equal() takes vec_proxy_equal() if implemented", { local_comparable_tuple() x <- tuple(1:3, 1:3) y <- tuple(1:3, 4:6) expect_identical(x == y, rep(TRUE, 3)) expect_identical(vec_equal(x, y), rep(TRUE, 3)) # Recursive case foo <- data_frame(x = x) bar <- data_frame(x = y) expect_identical(vec_equal(foo, bar), rep(TRUE, 3)) }) vctrs/tests/testthat/helper-s3.R0000644000176200001440000001234014362266120016344 0ustar liggesusers new_ctor <- function(base_class) { function(x = list(), ..., class = NULL) { if (inherits(x, "tbl_df")) { tibble::new_tibble(x, class = c(class, base_class), nrow = nrow(x)) } else if (is.data.frame(x)) { structure(x, class = c(class, base_class, "data.frame"), ...) } else { structure(x, class = c(class, base_class), ...) } } } foobar <- new_ctor("vctrs_foobar") foobaz <- new_ctor("vctrs_foobaz") quux <- new_ctor("vctrs_quux") expect_foobar <- function(x) expect_s3_class({{ x }}, "vctrs_foobar") expect_foobaz <- function(x) expect_s3_class({{ x }}, "vctrs_foobaz") expect_quux <- function(x) expect_s3_class({{ x }}, "vctrs_quux") with_c_foobar <- function(expr) { with_methods( expr, c.vctrs_foobar = function(...) foobar(NextMethod()) ) } unrownames <- function(x) { row.names(x) <- NULL x } local_methods <- function(..., .frame = caller_env()) { local_bindings(..., .env = global_env(), .frame = .frame) } with_methods <- function(.expr, ...) { local_methods(...) .expr } local_proxy <- function(frame = caller_env()) { local_methods(.frame = frame, vec_proxy.vctrs_proxy = function(x, ...) proxy_deref(x), vec_restore.vctrs_proxy = function(x, to, ...) new_proxy(x), vec_ptype2.vctrs_proxy = function(x, y, ...) UseMethod("vec_ptype2.vctrs_proxy"), vec_ptype2.vctrs_proxy.vctrs_proxy = function(x, y, ...) new_proxy(vec_ptype(proxy_deref(x))), vec_cast.vctrs_proxy = function(x, to, ...) UseMethod("vec_cast.vctrs_proxy"), vec_cast.vctrs_proxy.vctrs_proxy = function(x, to, ...) x ) } new_proxy <- function(x) { structure(list(env(x = x)), class = "vctrs_proxy") } proxy_deref <- function(x, ...) { x[[1]]$x } local_env_proxy <- function(frame = caller_env()) { local_methods(.frame = frame, vec_proxy.vctrs_proxy = proxy_deref, vec_restore.vctrs_proxy = function(x, ...) new_proxy(x), vec_cast.vctrs_proxy = function(x, to, ...) UseMethod("vec_cast.vctrs_proxy"), vec_cast.vctrs_proxy.vctrs_proxy = function(x, to, ...) x, vec_ptype2.vctrs_proxy = function(x, y, ...) UseMethod("vec_ptype2.vctrs_proxy"), vec_ptype2.vctrs_proxy.vctrs_proxy = function(x, y, ...) new_proxy(proxy_deref(x)[0]) ) } local_no_stringsAsFactors <- function(frame = caller_env()) { local_options(.frame = frame, stringsAsFactors = FALSE) } tibble <- function(...) { tibble::tibble(...) } local_foobar_proxy <- function(frame = caller_env()) { local_methods(.frame = frame, vec_proxy.vctrs_foobar = function(x, ...) x) } subclass <- function(x) { class(x) <- c("vctrs_foo", "vctrs_foobar", class(x)) x } # Subclass promoted to logical new_lgl_subtype <- function(x) { stopifnot(is_logical(x)) structure(x, class = "vctrs_lgl_subtype") } local_lgl_subtype <- function(frame = caller_env()) { local_methods(.frame = frame, vec_ptype2.vctrs_lgl_subtype = function(x, y, ...) UseMethod("vec_ptype2.vctrs_lgl_subtype"), vec_ptype2.vctrs_lgl_subtype.vctrs_lgl_subtype = function(x, y, ...) x, vec_ptype2.vctrs_lgl_subtype.logical = function(x, y, ...) y, vec_ptype2.logical.vctrs_lgl_subtype = function(x, y, ...) x, vec_cast.vctrs_lgl_subtype = function(x, to, ...) UseMethod("vec_cast.vctrs_lgl_subtype"), vec_cast.vctrs_lgl_subtype.vctrs_lgl_subtype = function(x, to, ...) x, vec_cast.vctrs_lgl_subtype.logical = function(x, to, ...) new_lgl_subtype(x), vec_cast.logical.vctrs_lgl_subtype = function(x, to, ...) unstructure(x) ) } with_lgl_subtype <- function(expr) { local_lgl_subtype() expr } # Logical promoted to subclass new_lgl_supertype <- function(x) { stopifnot(is_logical(x)) structure(x, class = "vctrs_lgl_supertype") } local_lgl_supertype <- function(frame = caller_env()) { local_methods(.frame = frame, vec_ptype2.vctrs_lgl_supertype = function(x, y, ...) UseMethod("vec_ptype2.vctrs_lgl_supertype"), vec_ptype2.vctrs_lgl_supertype.vctrs_lgl_supertype = function(x, y, ...) x, vec_ptype2.vctrs_lgl_supertype.logical = function(x, y, ...) x, vec_ptype2.logical.vctrs_lgl_supertype = function(x, y, ...) y, vec_cast.vctrs_lgl_supertype = function(x, to, ...) UseMethod("vec_cast.vctrs_lgl_supertype"), vec_cast.vctrs_lgl_supertype.vctrs_lgl_supertype = function(x, to, ...) x, vec_cast.vctrs_lgl_supertype.logical = function(x, to, ...) new_lgl_subtype(x), vec_cast.logical.vctrs_lgl_supertype = function(x, to, ...) unstructure(x) ) } with_lgl_supertype <- function(expr) { local_lgl_supertype() expr } foobar_df_ptype2 <- function(x, y, ...) { foobar(df_ptype2(x, y, ...)) } foobar_df_cast <- function(x, y, ...) { foobar(df_cast(x, y, ...)) } local_foobar_df_methods <- function(expr, frame = caller_env()) { local_methods( .frame = frame, vec_ptype2.vctrs_foobar.vctrs_foobar = foobar_df_ptype2, vec_ptype2.data.frame.vctrs_foobar = foobar_df_ptype2, vec_ptype2.vctrs_foobar.data.frame = foobar_df_ptype2, vec_cast.vctrs_foobar.vctrs_foobar = foobar_df_cast, vec_cast.data.frame.vctrs_foobar = foobar_df_cast, vec_cast.vctrs_foobar.data.frame = foobar_df_cast ) } with_foobar_df_methods <- function(expr) { local_foobar_df_methods() expr } set_tibble <- function(x) { base <- class(x)[-length(class(x))] class(x) <- c(base, "tbl_df", "tbl", "data.frame") x } vctrs/tests/testthat/helper-vctrs.R0000644000176200001440000000433614511320527017164 0ustar liggesuserstestthat_import_from <- function(ns, names, env = caller_env()) { skip_if_not_installed(ns) import_from(ns, names, env = env) } shaped_int <- function(...) { array(NA_integer_, c(...)) } set_rownames_recursively <- function(x, i = NULL) { n <- vec_size(x) stopifnot(n <= length(letters)) for (j in seq_along(x)) { if (is.data.frame(x[[j]])) { x[[j]] <- set_rownames_recursively(x[[j]], i = i) } } row.names(x) <- paste0(letters[seq_len(n)], i) x } expect_waldo_equal <- function(type, act, exp, info, ...) { comp <- waldo::compare(act$val, exp$val, ..., x_arg = "actual", y_arg = "expected") expect( length(comp) == 0, sprintf( "`actual` (%s) not %s to `expected` (%s).\n\n%s", act$lab, type, exp$lab, paste0(comp, collapse = "\n\n") ), info = info ) invisible(act$val) } expect_identical <- function(object, expected, info = NULL, label = NULL, expected.label = NULL, ...) { act <- quasi_label(enquo(object), label, arg = "object") exp <- quasi_label(enquo(expected), expected.label, arg = "expected") expect_waldo_equal("identical", act, exp, info, ...) } expect_equal <- function(object, expected, ..., tolerance = .Machine$double.eps ^ 0.5, info = NULL, label = NULL, expected.label = NULL) { act <- quasi_label(enquo(object), label, arg = "object") exp <- quasi_label(enquo(expected), expected.label, arg = "expected") expect_waldo_equal("equal", act, exp, info, ..., tolerance = tolerance) } raw2 <- function(...) { as.raw(list_unchop(list2(...), ptype = integer())) } cpl2 <- function(...) { # R 4.4.0 changed `as.complex(NA_real/integer/logical)` so that it always uses # a `0` in the imaginary slot. While this is reasonable, it is annoying for # comparison purposes in tests, where we typically propagate the `NA`. As of # rlang 1.1.1, `cpl()` inherits this behavior change so we have a custom version # here that works the same on all R versions. # https://github.com/wch/r-source/commit/1a2aea9ac3c216fea718f33f712764afc34f6ee8 out <- list2(...) out <- as.complex(out) out[is.na(out)] <- complex(real = NA_real_, imaginary = NA_real_) out } vctrs/tests/testthat/helper-rational.R0000644000176200001440000000364614376223321017642 0ustar liggesusers # Rational record class from the S3 vector vignette new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer.") } if (!is_integer(d)) { abort("`d` must be an integer.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } rational <- function(n, d) { args <- vec_cast_common(n, d, .to = integer()) args <- vec_recycle_common(!!! args) new_rational(args[[1L]], args[[2L]]) } format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_proxy_equal.vctrs_rational <- function(x) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational_methods <- list( vec_ptype_abbr.vctrs_rational = function(x, ...) "rtnl", vec_ptype_full.vctrs_rational = function(x, ...) "rational", vec_ptype2.vctrs_rational = function(x, y, ...) UseMethod("vec_ptype2.vctrs_rational"), vec_ptype2.vctrs_rational.vctrs_rational = function(x, y, ...) new_rational(), vec_ptype2.vctrs_rational.integer = function(x, y, ...) new_rational(), vec_ptype2.integer.vctrs_rational = function(x, y, ...) new_rational(), vec_cast.vctrs_rational = function(x, to, ...) UseMethod("vec_cast.vctrs_rational"), vec_cast.vctrs_rational.vctrs_rational = function(x, to, ...) x, vec_cast.double.vctrs_rational = function(x, to, ...) field(x, "n") / field(x, "d"), vec_cast.vctrs_rational.integer = function(x, to, ...) rational(x, 1), vec_proxy_equal.vctrs_rational = vec_proxy_equal.vctrs_rational, vec_proxy_compare.vctrs_rational = vec_proxy_compare.vctrs_rational ) local_rational_class <- function(frame = caller_env()) { local_methods(.frame = frame, !!!rational_methods) } vctrs/tests/testthat/test-proxy-restore.R0000644000176200001440000001147614376223322020374 0ustar liggesusers test_that("default vec_restore() restores attributes except names", { to <- structure(NA, foo = "foo", bar = "bar") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) to <- structure(NA, names = "a", foo = "foo", bar = "bar") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) to <- structure(NA, foo = "foo", names = "a", bar = "bar") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) to <- structure(NA, foo = "foo", bar = "bar", names = "a") expect_identical(vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar")) }) test_that("default vec_restore() restores objectness", { to <- structure(NA, class = "foo") x <- vec_restore.default(NA, to) expect_true(is.object(x)) expect_s3_class(x, "foo") }) test_that("data frame vec_restore() checks type", { expect_error(vec_restore(NA, mtcars), "Attempt to restore data frame from a logical") }) test_that("data frame restore forces character column names", { df <- new_data_frame(list(1)) expect_named(vec_restore(df, df), "") }) test_that("can use vctrs primitives from vec_restore() without inflooping", { local_methods( vec_restore.vctrs_foobar = function(x, to, ...) { vec_ptype(x) vec_init(x) obj_check_vector(x) vec_slice(x, 0) "woot" } ) foobar <- new_vctr(1:3, class = "vctrs_foobar") expect_identical(vec_slice(foobar, 2), "woot") }) test_that("dimensions are preserved by default restore method", { x <- foobar(1:4) dim(x) <- c(2, 2) dimnames(x) <- list(a = c("foo", "bar"), b = c("quux", "hunoz")) exp <- foobar(c(1L, 3L)) dim(exp) <- c(1, 2) dimnames(exp) <- list(a = "foo", b = c("quux", "hunoz")) expect_identical(vec_slice(x, 1), exp) }) test_that("names attribute isn't set when restoring 1D arrays using 2D+ objects", { x <- foobar(1:2) dim(x) <- c(2) nms <- c("foo", "bar") dimnames(x) <- list(nms) res <- vec_restore(x, matrix(1)) expect_null(attributes(res)$names) expect_equal(attr(res, "names"), nms) expect_equal(names(res), nms) }) test_that("arguments are not inlined in the dispatch call (#300)", { local_methods( vec_restore.vctrs_foobar = function(x, to, ...) sys.call(), vec_proxy.vctrs_foobar = unclass ) call <- vec_restore(foobar(list(1)), foobar(list(1))) expect_equal(call, quote(vec_restore.vctrs_foobar(x = x, to = to))) }) test_that("restoring to non-bare data frames calls `vec_bare_df_restore()` before dispatching", { x <- list(x = numeric()) to <- new_data_frame(x, class = "tbl_foobar") local_methods( vec_restore.tbl_foobar = function(x, to, ..., n) { if (is.data.frame(x)) { abort(class = "error_df_restore_was_called") } } ) expect_error(vec_restore(x, to), class = "error_df_restore_was_called") }) test_that("row names are not restored if target is not a data frame", { proxy <- data.frame(x = 1) out <- vec_restore(proxy, to = foobar("")) exp <- list(names = "x", class = "vctrs_foobar") expect_identical(attributes(out), exp) }) test_that("attributes are properly restored when they contain special attributes", { exp <- list(foo = TRUE, bar = TRUE) x <- structure(list(), foo = TRUE, names = chr(), bar = TRUE) out <- vec_restore_default(list(), x) expect_identical(attributes(out), exp) # Was broken by #943 x <- structure(list(), foo = TRUE, names = chr(), row.names = int(), bar = TRUE) out <- vec_restore_default(list(), x) expect_identical(attributes(out), exp) }) test_that("names<- is not called with partial data (#1108)", { x <- set_names(foobar(1:2), c("a", "b")) values <- list() local_methods( `names<-.vctrs_foobar` = function(x, value) { if (!is_null(value)) { values <<- c(values, list(value)) } NextMethod() } ) vec_c(x, x) expect_equal(values, list(c("a", "b", "a", "b"))) }) test_that("recursive proxy and restore work with recursive records", { new_recursive_rcrd <- function(x) { new_rcrd( list(field = x), class = "my_recursive_rcrd" ) } internal <- new_rcrd(list(internal_field = 1:2)) x <- new_recursive_rcrd(data_frame(col = internal)) proxy <- vec_proxy_recurse(x) exp <- data_frame(field = data_frame(col = data_frame(internal_field = 1:2))) expect_equal(proxy, exp) expect_equal(vec_restore_recurse(proxy, x), x) # Non-recursive case doesn't proxy `internal` proxy <- vec_proxy(x) exp <- data_frame(field = data_frame(col = internal)) expect_equal(proxy, exp) expect_equal(vec_restore(proxy, x), x) x_exp <- new_recursive_rcrd(data_frame(col = vec_rep(internal, 2))) expect_equal( list_unchop(list(x, x)), x_exp ) df <- data_frame(x = x) df_exp <- data_frame(x = x_exp) expect_equal(vec_rbind(df, df), df_exp) expect_equal(vec_c(df, df), df_exp) }) vctrs/tests/testthat/helper-shape.R0000644000176200001440000000017614276722575017142 0ustar liggesusers shape_broadcast_ <- function(x, to, x_arg = "x", to_arg = "to") { shape_broadcast(x, to, x_arg = x_arg, to_arg = to_arg) } vctrs/tests/testthat/test-type-sclr.R0000644000176200001440000000536114276722575017465 0ustar liggesusers test_that("sclr is a named list", { x <- new_sclr(x = 1, y = 2) expect_type(x, "list") expect_s3_class(x, "vctrs_sclr") expect_named(x, c("x", "y")) }) test_that("scalar must have unique names", { expect_error(new_sclr(x = 1, x = 2), "not TRUE") }) test_that("can get and set existing fields", { x <- new_sclr(x = 1, y = 2) x$x <- 3 expect_equal(x$x, 3) x[["y"]] <- 4 expect_equal(x[["y"]], 4) expect_error(x$z, "Invalid index") expect_error(x$z <- 1, "Invalid index") }) test_that("as.list strips attributes apart from names", { x <- new_sclr(x = 1, y = 2) y <- as.list(x) expect_type(y, "list") expect_equal(attributes(y), list(names = names(x))) }) test_that("as.data.frame works", { # #167: Not sure if this is the correct behavior. x <- new_sclr(x = 1, y = 2) expect_equal( as.data.frame(x, nm = "a"), new_data_frame(list(a = list(x))) ) }) test_that("putting in a data frame makes a list-col", { x <- new_sclr(x = 1, y = 2) df <- data.frame(x) expect_s3_class(df, "data.frame") expect_equal(df$x, list(x)) }) test_that("vector operations are unsupported", { x <- new_sclr(x = 1, y = 2) expect_error(x["a"], class = "vctrs_error_unsupported") expect_error(x["a"] <- 1, class = "vctrs_error_unsupported") expect_error(names(x) <- "x", class = "vctrs_error_unsupported") expect_error(dim(x) <- 1, class = "vctrs_error_unsupported") expect_error(dimnames(x) <- 1, class = "vctrs_error_unsupported") expect_error(levels(x) <- 1, class = "vctrs_error_unsupported") expect_error(is.na(x) <- 1, class = "vctrs_error_unsupported") expect_error(c(x), class = "vctrs_error_unsupported") expect_error(abs(x), class = "vctrs_error_unsupported") expect_error(x + 1, class = "vctrs_error_unsupported") expect_error(Arg(x), class = "vctrs_error_unsupported") expect_error(sum(x), class = "vctrs_error_unsupported") expect_error(order(x), class = "vctrs_error_unsupported") expect_error(levels(x), class = "vctrs_error_unsupported") expect_error(t(x), class = "vctrs_error_unsupported") expect_error(unique(x), class = "vctrs_error_unsupported") expect_error(duplicated(x), class = "vctrs_error_unsupported") expect_error(anyDuplicated(x), class = "vctrs_error_unsupported") expect_error(as.logical(x), class = "vctrs_error_unsupported") expect_error(as.integer(x), class = "vctrs_error_unsupported") expect_error(as.double(x), class = "vctrs_error_unsupported") expect_error(as.character(x), class = "vctrs_error_unsupported") expect_error(as.Date(x), class = "vctrs_error_unsupported") expect_error(as.POSIXct(x), class = "vctrs_error_unsupported") }) test_that("summary is unimplemented", { x <- new_sclr(x = 1, y = 2) expect_error(summary(x), class = "vctrs_error_unimplemented") }) vctrs/tests/testthat/test-dim.R0000644000176200001440000000126714276722575016315 0ustar liggesusers # vec_dim ----------------------------------------------------------------- test_that("dim is dimensions", { expect_equal(vec_dim(array(dim = c(1))), c(1)) expect_equal(vec_dim(array(dim = c(1, 1))), c(1, 1)) expect_equal(vec_dim(array(dim = c(1, 1, 1))), c(1, 1, 1)) }) test_that("dim_n is number of dimensions", { expect_equal(vec_dim_n(array(dim = c(1))), 1) expect_equal(vec_dim_n(array(dim = c(1, 1))), 2) expect_equal(vec_dim_n(array(dim = c(1, 1, 1))), 3) }) test_that("vector and 1-d array are equivalent", { x1 <- 1:5 x2 <- array(x1) expect_equal(vec_dim(x1), 5) expect_equal(vec_dim(x2), 5) expect_equal(vec_size(x1), 5) expect_equal(vec_size(x2), 5) }) vctrs/tests/testthat/test-conditions.R0000644000176200001440000001304214315060310017657 0ustar liggesusers test_that("conditions inherit from `vctrs_error`", { expect_error(stop_incompatible(NULL, NULL), class = "vctrs_error") expect_error(stop_incompatible_type(NULL, NULL, x_arg = "x", y_arg = "y"), class = "vctrs_error") expect_error(stop_incompatible_cast(NULL, NULL, x_arg = "x", to_arg = "to"), class = "vctrs_error") expect_error(stop_incompatible_op("", NULL, NULL), class = "vctrs_error") expect_error(stop_incompatible_size(NULL, NULL, 0, 0, x_arg = "x", y_arg = "y"), class = "vctrs_error") expect_error(maybe_lossy_cast(NULL, NULL, NULL, TRUE, x_arg = "x", to_arg = "to"), class = "vctrs_error") expect_error(stop_unsupported("", ""), class = "vctrs_error") expect_error(stop_unimplemented("", ""), class = "vctrs_error") expect_error(stop_scalar_type(NULL), class = "vctrs_error") expect_error(stop_names(), class = "vctrs_error") expect_error(stop_names_cannot_be_empty(""), class = "vctrs_error") expect_error(stop_names_cannot_be_dot_dot("..1"), class = "vctrs_error") expect_error(stop_names_must_be_unique("x"), class = "vctrs_error") }) test_that("incompatible cast throws an incompatible type error", { err <- expect_error( stop_incompatible_cast(1, 2, x_arg = "x", to_arg = "to"), class = "vctrs_error_incompatible_type" ) expect_equal(err$x, 1) expect_equal(err$y, 2) expect_equal(err$x_arg, "x") expect_equal(err$y_arg, "to") # Convenience aliases expect_equal(err$to, err$y) expect_equal(err$to_arg, err$y_arg) }) test_that("incompatible type error validates `action`", { expect_snapshot({ (expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = "c"))) (expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = 1))) }) }) test_that("can override arg in OOB conditions", { expect_snapshot({ (expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), NULL ), class = "vctrs_error_subscript_oob" )) (expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), quote(foo) ), class = "vctrs_error_subscript_oob" )) (expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), quote(foo(bar)) ), class = "vctrs_error_subscript_oob" )) }) }) test_that("scalar type errors are informative", { expect_snapshot({ (expect_error( vec_slice(foobar(list(1)), 1), class = "vctrs_error_scalar_type" )) (expect_error( stop_scalar_type(foobar(list(1)), arg = "foo"), class = "vctrs_error_scalar_type" )) }) }) test_that("empty names errors are informative", { expect_snapshot({ (expect_error( vec_as_names(c("x", "", "y"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" )) (expect_error( vec_as_names(c("x", "", "y", ""), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" )) (expect_error( vec_as_names(rep("", 10), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" )) }) }) test_that("dot dot names errors are informative", { expect_snapshot({ (expect_error( vec_as_names(c("..1", "..1", "..1", "...", "z"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot" )) (expect_error( vec_as_names(c(rep("..1", 20), rep(c("..2", "..3", "..4", "...", "..5"), 2)), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot" )) }) }) test_that("unique names errors are informative", { expect_snapshot({ (expect_error( vec_as_names(c("x", "x", "x", "y", "y", "z"), repair = "check_unique"), class = "vctrs_error_names_must_be_unique" )) (expect_error( vec_as_names(c(rep("x", 20), rep(c("a", "b", "c", "d", "e"), 2)), repair = "check_unique"), class = "vctrs_error_names_must_be_unique" )) }) }) test_that("can't supply both `message` and `details`", { expect_error( stop_incompatible_type(1, 2, message = "my message", x_arg = "x", y_arg = "y"), "my message", class = "vctrs_error_incompatible_type" ) expect_error( stop_incompatible_type(1, 2, message = "my message", details = "my details", x_arg = "x", y_arg = "y"), "Can't supply both `message` and `details`." ) }) test_that("lossy cast errors are internal", { # Should not trigger testthat warnings about untested class expect_error(vec_cast(mtcars, mtcars[1:3]), "convert") expect_error(vec_cast(1.5, int()), "convert") }) test_that("lossy cast from character to factor mentions loss of generality", { expect_snapshot({ (expect_error(vec_cast("a", factor("b")), class = "vctrs_error_cast_lossy")) }) }) test_that("lossy cast `conditionMessage()` result matches `cnd_message()` (#1592)", { cnd <- catch_cnd(vec_cast(1.5, to = integer())) expect_identical(conditionMessage(cnd), cnd_message(cnd)) expect_snapshot({ cat(conditionMessage(cnd)) }) }) test_that("ordered cast failures mention conversion", { expect_snapshot({ (expect_error( vec_cast(ordered("x"), ordered("y")), class = "vctrs_error_incompatible_type" )) }) }) test_that("incompatible size errors", { expect_snapshot({ (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = ""))) (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = ""))) (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "bar"))) (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = quote(bar)))) }) }) vctrs/tests/testthat/test-dictionary.R0000644000176200001440000002652214315060310017662 0ustar liggesusers # counting ---------------------------------------------------------------- test_that("vec_count counts number observations", { x <- vec_count(rep(1:3, 1:3), sort = "key") expect_equal(x, data.frame(key = 1:3, count = 1:3)) }) test_that("vec_count(sort = 'count') uses a stable sort when there are ties (#1588)", { x <- c("a", "b", "b", "a", "d") expect_identical( vec_count(x, sort = "count"), data_frame(key = c("a", "b", "d"), count = c(2L, 2L, 1L)) ) }) test_that("vec_count works with matrices", { x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2)) out <- vec_count(x) exp <- data_frame(key = c(NA, NA), count = int(2L, 1L)) exp$key <- vec_slice(x, c(1, 3)) expect_identical(out, exp) }) test_that("vec_count works with arrays", { x <- array(c(rep(1, 3), rep(2, 3)), dim = c(3, 2, 1)) expect <- data.frame(key = NA, count = 3) expect$key <- vec_slice(x, 1L) expect_equal(vec_count(x), expect) }) test_that("vec_count works for zero-length input", { x <- vec_count(integer(), sort = "none") expect_equal(x, data.frame(key = integer(), count = integer())) }) test_that("vec_count works with different encodings", { x <- vec_count(encodings()) expect_equal(x, new_data_frame(list(key = encodings()[1], count = 3L))) }) test_that("vec_count recursively takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect <- data_frame(key = vec_slice(df, c(1, 3)), count = c(2L, 1L)) expect_equal(vec_count(df), expect) }) # duplicates and uniques -------------------------------------------------- test_that("vec_duplicated reports on duplicates regardless of position", { x <- c(1, 1, 2, 3, 4, 4) expect_equal(vec_duplicate_detect(x), c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE)) }) test_that("vec_duplicate_any returns single TRUE/FALSE", { expect_false(vec_duplicate_any(c(1:10))) expect_true(vec_duplicate_any(c(1:10, 1))) }) test_that("vec_duplicate_id gives position of first found", { x <- c(1, 2, 3, 1, 4) expect_equal(vec_duplicate_id(x), c(1, 2, 3, 1, 5)) }) test_that("vec_unique matches unique", { x <- sample(100, 1000, replace = TRUE) expect_equal(vec_unique(x), unique(x)) expect_equal(vec_unique(c("x", "x")), "x") }) test_that("vec_unique matches unique for matrices", { x <- matrix(c(1, 1, 2, 2), ncol = 2) expect_equal(vec_unique(x), unique(x)) }) test_that("vec_unique_count matches length + unique", { x <- sample(100, 1000, replace = TRUE) expect_equal(vec_unique_count(x), length(unique(x))) }) test_that("also works for data frames", { df <- data.frame(x = 1:3, y = letters[3:1], stringsAsFactors = FALSE) idx <- c(1L, 1L, 1L, 2L, 2L, 3L) df2 <- df[idx, , drop = FALSE] rownames(df2) <- NULL expect_equal(vec_duplicate_detect(df2), vec_duplicate_detect(idx)) expect_equal(vec_unique(df2), vec_slice(df, vec_unique(idx))) count <- vec_count(df2, sort = "key") expect_equal(count$key, df) expect_equal(count$count, vec_count(idx)$count) exp <- tibble(x = c(1, 1, 2), y = c(1, 2, 3)) expect_identical(vec_unique(vec_slice(exp, c(1, 1, 2, 3))), exp) }) test_that("vec_unique() handles matrices (#327)", { x <- matrix(c(1, 2, 3, 4), c(2, 2)) y <- matrix(c(1, 2, 3, 5), c(2, 2)) expect_identical(vec_unique(list(x, x)), list(x)) expect_identical(vec_unique(list(x, y)), list(x, y)) x <- matrix(c(1, 2, 1, 1, 2, 1), nrow = 3) expect_identical(vec_unique(x), vec_slice(x, 1:2)) }) test_that("vec_unique() works with 1D arrays", { # 1D arrays are dispatched to `as.data.frame.vector()` which # currently does not strip dimensions. This caused an infinite # recursion. expect_identical(vec_unique(array(1:2)), array(1:2)) x <- new_vctr(c(1, 1, 1, 2, 1, 2), dim = c(3, 2)) expect_identical(vec_unique(x), new_vctr(c(1, 1, 2, 1), dim = c(2, 2))) }) test_that("unique functions take the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_true(vec_in(tuple(2, 100), x)) expect_identical(vec_match(tuple(2, 100), x), 2L) }) test_that("unique functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect_equal(vec_unique(df), vec_slice(df, c(1, 3))) expect_equal(vec_unique_count(df), 2L) expect_equal(vec_unique_loc(df), c(1, 3)) }) test_that("duplicate functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect_equal(vec_duplicate_any(df), TRUE) expect_equal(vec_duplicate_detect(df), c(TRUE, TRUE, FALSE)) expect_equal(vec_duplicate_id(df), c(1, 1, 3)) }) test_that("unique functions treat positive and negative 0 as equivalent (#637)", { expect_equal(vec_unique(c(0, -0)), 0) expect_equal(vec_unique_count(c(0, -0)), 1) expect_equal(vec_unique_loc(c(0, -0)), 1) }) test_that("unique functions work with different encodings", { encs <- encodings() expect_equal(vec_unique(encs), encs[1]) expect_equal(vec_unique_count(encs), 1L) expect_equal(vec_unique_loc(encs), 1L) }) test_that("unique functions can handle scalar types in lists", { x <- list(a ~ b, a ~ b, a ~ c) expect_equal(vec_unique(x), vec_slice(x, c(1, 3))) x <- list(call("x"), call("y"), call("x")) expect_equal(vec_unique(x), vec_slice(x, c(1, 2))) }) test_that("duplicate functions works with different encodings", { encs <- encodings() expect_equal(vec_duplicate_id(encs), rep(1, 3)) expect_equal(vec_duplicate_detect(encs), rep(TRUE, 3)) expect_equal(vec_duplicate_any(encs), TRUE) }) test_that("vec_unique() returns differently encoded strings in the order they appear", { encs <- encodings() x <- c(encs$unknown, encs$utf8) y <- c(encs$utf8, encs$unknown) expect_equal_encoding(vec_unique(x), encs$unknown) expect_equal_encoding(vec_unique(y), encs$utf8) }) test_that("vec_unique() works on lists containing expressions", { x <- list(expression(x), expression(y), expression(x)) expect_equal(vec_unique(x), x[1:2]) }) test_that("vec_unique() works with glm objects (#643)", { # class(model$family$initialize) == "expression" model <- glm(mpg ~ wt, data = mtcars) expect_equal(vec_unique(list(model, model)), list(model)) }) test_that("can take the unique locations of dfs with list-cols", { df <- tibble(x = list(1, 2, 1, 3), y = list(1, 2, 1, 3)) expect_identical(vec_unique_loc(df), c(1L, 2L, 4L)) }) # matching ---------------------------------------------------------------- test_that("vec_match() matches match()", { n <- c(1:3, NA) h <- c(4, 2, 1, NA) expect_equal(vec_match(n, h), match(n, h)) expect_equal(vec_match(1.5, c(2, 1.5, NA)), match(1.5, c(2, 1.5, NA))) expect_equal(vec_match("x", "x"), match("x", "x")) }) test_that("vec_match() and vec_in() check types", { expect_snapshot({ df1 <- data_frame(x = data_frame(foo = 1)) df2 <- data_frame(x = data_frame(foo = "")) (expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type")) (expect_error(vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type")) (expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type")) (expect_error(vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type")) }) }) test_that("vec_in() matches %in%", { n <- c(1:3, NA) h <- c(4, 2, 1, NA) expect_equal(vec_in(n, h), n %in% h) }) test_that("can opt out of NA matching", { n <- c(1, NA) h <- c(1:3, NA) expect_equal(vec_in(n, h, na_equal = FALSE), c(TRUE, NA)) }) test_that("vec_match works with empty data frame", { out <- vec_match( new_data_frame(n = 3L), new_data_frame(n = 0L) ) expect_equal(out, vec_init(integer(), 3)) }) test_that("matching functions take the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_identical(vec_unique_loc(x), 1:2) expect_identical(unique(x), tuple(c(1, 2), 1:2)) expect_true(vec_duplicate_any(x)) expect_identical(vec_duplicate_id(x), c(1L, 2L, 1L)) expect_identical(vec_unique_count(x), 2L) expect_identical(vec_duplicate_detect(x), c(TRUE, FALSE, TRUE)) }) test_that("can take the unique loc of 1d arrays (#461)", { x <- array(c(1, 1, 2, 2, 3)) y <- array(c(1, 1, 2, 2, 3), dimnames = list(NULL)) expect_identical(vctrs::vec_unique_loc(x), int(1, 3, 5)) expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5)) z <- array(c(1, 1, 2, 2, 3, 4), c(3, 2)) expect_silent(expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5))) }) test_that("matching functions work with different encodings", { encs <- encodings() expect_equal(vec_match(encs, encs[1]), rep(1, 3)) expect_equal(vec_in(encs, encs[1]), rep(TRUE, 3)) }) test_that("matching functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2), 1:2) df <- data_frame(x = x) y <- tuple(c(2, 3), c(3, 3)) df2 <- data_frame(x = y) expect_equal(vec_match(df, df2), c(NA, 1)) expect_equal(vec_in(df, df2), c(FALSE, TRUE)) }) test_that("can propagate missing values while matching", { exp <- c(NA, 3L, NA, 1L) expect_identical(vec_match(lgl(NA, TRUE, NA, FALSE), lgl(FALSE, NA, TRUE), na_equal = FALSE), exp) expect_identical(vec_match(int(NA, 1L, NA, 2L), int(2L, NA, 1L), na_equal = FALSE), exp) expect_identical(vec_match(dbl(NA, 1, NA, 2), dbl(2, NA, 1), na_equal = FALSE), exp) expect_identical(vec_match(cpl(NA, 1, NA, 2), cpl(2, NA, 1), na_equal = FALSE), exp) expect_identical(vec_match(chr(NA, "1", NA, "2"), chr("2", NA, "1"), na_equal = FALSE), exp) expect_identical(vec_match(list(NULL, 1, NULL, 2), list(2, NULL, 1), na_equal = FALSE), exp) # No missing values for raw vectors expect_identical(vec_match(raw2(0, 1, 0, 2), raw2(2, 0, 1), na_equal = FALSE), c(2L, 3L, 2L, 1L)) }) test_that("can propagate missingness of incomplete rcrd observations (#1386)", { x <- new_rcrd(list(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA))) expect_identical(vec_match(x, x, na_equal = FALSE), c(1L, NA, NA, NA)) # Matches `vec_detect_complete()` results expect_identical(vec_detect_complete(x), c(TRUE, FALSE, FALSE, FALSE)) }) test_that("can propagate NaN as a missing value (#1252)", { expect_identical(vec_match(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), int(NA, NA)) expect_identical(vec_in(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), lgl(NA, NA)) }) test_that("missing values are propagated across columns", { for (na_value in list(NA, na_int, na_dbl, na_cpl, na_chr, list(NULL))) { df <- data_frame(x = 1, y = data_frame(foo = 2, bar = na_value), z = 3) expect_identical(vec_match(df, df), 1L) expect_identical(vec_match(df, df, na_equal = FALSE), na_int) } }) test_that("can't supply NA as `na_equal`", { expect_error(vec_match(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`") }) test_that("vec_match() and vec_in() silently fall back to base data frame", { expect_silent(expect_identical( vec_match(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), 1:32 )) expect_silent(expect_identical( vec_in(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(TRUE, 32) )) }) test_that("vec_in() evaluates arg lazily", { expect_silent(vec_in(1L, 1L, needles_arg = print("oof"))) expect_silent(vec_in(1L, 1L, haystack_arg = print("oof"))) }) test_that("vec_match() evaluates arg lazily", { expect_silent(vec_match(1L, 1L, needles_arg = print("oof"))) expect_silent(vec_match(1L, 1L, haystack_arg = print("oof"))) }) vctrs/tests/testthat/test-fields.R0000644000176200001440000000445414276722575017013 0ustar liggesusers test_that("n_fields captures number of fields", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(n_fields(r), 2) }) # get --------------------------------------------------------------------- test_that("can extract valid field", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(field(r, "x"), 1) expect_equal(field(r, 1L), 1) }) test_that("can extract field even if encoding is different", { x1 <- "fa\u00e7ile" skip_if_not(Encoding(x1) == "UTF-8") x2 <- iconv(x1, from = "UTF-8", to = "latin1") skip_if_not(Encoding(x2) == "latin1") r <- new_rcrd(setNames(list(1), x1)) expect_equal(field(r, x1), 1) expect_equal(field(r, x2), 1) }) test_that("invalid indices throw error", { r <- new_rcrd(list(x = 1, y = 2)) expect_error(field(r, "z"), "Invalid index") expect_error(field(r, NA_character_), "Invalid index") expect_error(field(r, ""), "Invalid index") expect_error(field(r, letters), "Invalid index") expect_error(field(r, 0L), "Invalid index") expect_error(field(r, NA_integer_), "Invalid index") expect_error(field(r, 0), "Invalid index") expect_error(field(r, NA_real_), "Invalid index") expect_error(field(r, Inf), "Invalid index") expect_error(field(r, mean), "Invalid index") }) test_that("corrupt rcrd throws error", { r <- new_rcrd(list(x = 1, y = 2)) expect_error(field(1:10, 1L), "Corrupt rcrd") expect_error(field(list(), 1L), "Corrupt rcrd") expect_error(field(list(1), "x"), "Corrupt x") expect_error(field(setNames(list(1, 1), "y"), "x"), "Corrupt x") }) # set --------------------------------------------------------------------- test_that("field<- modifies a copy", { r1 <- new_rcrd(list(x = 1, y = 2)) r2 <- r1 field(r1, "x") <- 3 expect_equal(field(r1, "x"), 3) expect_equal(field(r2, "x"), 1) }) test_that("field<- checks inputs", { x <- list() expect_error(field(x, "x") <- 1, "Corrupt rcrd") r <- new_rcrd(list(x = 1)) expect_error(field(r, "x") <- 1:3, "Invalid value") expect_error(field(r, "x") <- environment(), "Invalid value") }) test_that("field<- respects size, not length (#450)", { r1 <- new_rcrd(list(df = new_data_frame(n = 2L))) new_df <- data.frame(x = 1:2) field(r1, 'df') <- new_df expect_equal(field(r1, "df"), new_df) expect_error(field(r1, 'df') <- new_data_frame(n = 3L), "Invalid value") }) vctrs/tests/testthat/test-type-rcrd.R0000644000176200001440000002333214315060310017422 0ustar liggesusers # constructor and accessors ----------------------------------------------- test_that("can construct and access components", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(length(r), 1) expect_equal(n_fields(r), 2) expect_equal(names(r), NULL) expect_equal(fields(r), c("x", "y")) expect_error(r$x, class = "vctrs_error_unsupported") expect_equal(field(r, "x"), 1) }) test_that("requires format method", { x <- new_rcrd(list(x = 1)) expect_error(format(x), class = "vctrs_error_unimplemented") }) test_that("vec_proxy() transforms records to data frames", { expect_identical( vec_proxy(new_rcrd(list(a = "1"))), new_data_frame(list(a = "1")) ) }) test_that("equality, comparison, and order proxies are recursive and fall through (#1503, #1664)", { base <- new_rcrd(list(a = 1), class = "custom") x <- new_rcrd(list(x = base)) expect_identical(vec_proxy_equal(x), 1) expect_identical(vec_proxy_compare(x), 1) expect_identical(vec_proxy_order(x), 1) local_methods(vec_proxy_equal.custom = function(x, ...) rep("equal", length(x))) expect_identical(vec_proxy_equal(x), "equal") expect_identical(vec_proxy_compare(x), "equal") expect_identical(vec_proxy_order(x), "equal") local_methods(vec_proxy_compare.custom = function(x, ...) rep("compare", length(x))) expect_identical(vec_proxy_equal(x), "equal") expect_identical(vec_proxy_compare(x), "compare") expect_identical(vec_proxy_order(x), "compare") local_methods(vec_proxy_order.custom = function(x, ...) rep("order", length(x))) expect_identical(vec_proxy_equal(x), "equal") expect_identical(vec_proxy_compare(x), "compare") expect_identical(vec_proxy_order(x), "order") y <- new_rcrd(list(a = 1), class = "custom2") local_methods(vec_proxy_compare.custom2 = function(x, ...) rep("compare2", length(x))) z <- data_frame(x = x, y = y) # Each column falls back independently expect_identical(vec_proxy_equal(z), data_frame(x = "equal", y = 1)) expect_identical(vec_proxy_compare(z), data_frame(x = "compare", y = "compare2")) expect_identical(vec_proxy_order(z), data_frame(x = "order", y = "compare2")) }) # base methods ------------------------------------------------------------ test_that("has no names", { x <- new_rcrd(list(a = 1, b = 2L)) expect_null(names(x)) expect_null(vec_names(x)) }) test_that("removing names with `NULL` is a no-op (#1419)", { x <- new_rcrd(list(a = 1, b = 2L)) expect_identical(`names<-`(x, NULL), x) expect_identical(vec_set_names(x, NULL), x) }) test_that("setting character names is an error (#1419)", { x <- new_rcrd(list(a = 1, b = 2L)) expect_error(`names<-`(x, "x"), "Can't assign names") expect_error(vec_set_names(x, "x"), "Can't assign names") }) test_that("na.omit() works and retains metadata (#1413)", { x <- new_rcrd(list(a = c(1, 1, NA, NA), b = c(1, NA, 1, NA))) result <- na.omit(x) expect <- vec_slice(x, 1:3) attr(expect, "na.action") <- structure(4L, class = "omit") expect_identical(result, expect) }) test_that("na.fail() works", { # Only considered missing if all fields are missing x <- new_rcrd(list(a = c(1, 1, NA), b = c(1, NA, 1))) expect_identical(na.fail(x), x) x <- new_rcrd(list(a = c(1, 1, NA, NA), b = c(1, NA, 1, NA))) expect_snapshot(error = TRUE, na.fail(x)) }) # coercion ---------------------------------------------------------------- test_that("can't cast list to rcrd", { l <- list( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(b = "4", a = 2)) ) expect_error( vec_cast(l, new_rcrd(list(a = 1L, b = 2L))), class = "vctrs_error_incompatible_type" ) }) test_that("can recast rcrd from list", { r <- new_rcrd(list(x = integer(), y = numeric())) expect_equal( vec_restore(list(x = 1L, y = 1), r), new_rcrd(list(x = 1L, y = 1)) ) }) test_that("can't cast rcrd to list", { r <- new_rcrd(list(x = 1:2, y = 2:3)) expect_error(vec_cast(r, list()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(r, list()), class = "vctrs_error_incompatible_type") }) test_that("default casts are implemented correctly", { r <- new_rcrd(list(x = 1, y = 1)) expect_error(vec_cast(1, r), class = "vctrs_error_incompatible_type") expect_equal(vec_cast(NULL, r), NULL) }) test_that("can't cast incompatible rcrd", { expect_error( vec_cast( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(a = "1")) ), class = "vctrs_error_cast_lossy" ) expect_error( vec_cast( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(a = "1", c = 3L)) ), class = "vctrs_error_cast_lossy" ) expect_error( vec_cast( new_rcrd(list(a = "a", b = 3L)), new_rcrd(list(a = 1, b = 3L)) ), class = "vctrs_error_incompatible_type" ) }) # input validation -------------------------------------------------------- test_that("must be list of equal length vectors", { expect_error(new_rcrd(list()), "list of length 1") expect_error(new_rcrd(list(x = environment())), class = "vctrs_error_scalar_type") expect_error(new_rcrd(list(x = 1:2, y = 1:3)), "same size") }) test_that("names must be unique", { expect_error(new_rcrd(list(1, 2)), class = "vctrs_error_names_cannot_be_empty") expect_error(new_rcrd(list(x = 1, 2)), class = "vctrs_error_names_cannot_be_empty") expect_error(new_rcrd(list(x = 1, x = 2)), class = "vctrs_error_names_must_be_unique") expect_error(new_rcrd(setNames(list(1, 2), "x")), "can't return `NA`") }) test_that("subset assignment throws error", { x <- new_rcrd(list(x = 1)) expect_error( x$y <- 2, class = "vctrs_error_unsupported" ) }) test_that("can supply data frame as fields", { expect_identical( new_rcrd(list(x = 1)), new_rcrd(tibble(x = 1)) ) }) test_that("fields are not recycled", { expect_error( new_rcrd(list(x = 1, y = 1:2)), "must be the same size" ) }) # tuple class ---------------------------------------------------------- # use simple class to test essential features of rcrds test_that("print and str use format", { local_tuple_methods() r <- tuple(1, 1:100) expect_snapshot(r) expect_snapshot(str(r[1:10])) expect_snapshot(str(list(list(list(r, 1:100))))) }) test_that("subsetting methods applied to each field", { local_tuple_methods() x <- tuple(1:2, 1) expect_equal(x[1], tuple(1, 1)) expect_equal(x[[1]], tuple(1, 1)) expect_equal(rep(tuple(1, 1), 2), tuple(c(1, 1), 1)) length(x) <- 1 expect_equal(x, tuple(1, 1)) }) test_that("subset assignment modifies each field", { local_tuple_methods() x <- tuple(c(1, 1), c(2, 2)) expect_error(x[[]] <- tuple(), "missing") x[[1]] <- tuple(3, 3) expect_equal(x, tuple(c(3, 1), c(3, 2))) x[1] <- tuple(4, 4) expect_equal(x, tuple(c(4, 1), c(4, 2))) }) test_that("subset assignment recycles", { local_tuple_methods() x <- tuple(c(1, 1), c(2, 2)) x[1:2] <- tuple(1, 1) expect_equal(x, tuple(c(1, 1), c(1, 1))) x[] <- tuple(2, 2) expect_equal(x, tuple(c(2, 2), c(2, 2))) }) test_that("can sort rcrd", { local_tuple_methods() x <- tuple(c(1, 2, 1), c(3, 1, 2)) expect_equal(xtfrm(x), c(2, 3, 1)) expect_equal(order(x), c(3, 1, 2)) expect_equal(sort(x), tuple(c(1, 1, 2), c(2, 3, 1))) }) test_that("can use dictionary methods on a rcrd", { local_tuple_methods() x <- tuple(c(1, 2, 1), c(3, 1, 3)) expect_equal(unique(x), x[1:2]) expect_equal(duplicated(x), c(FALSE, FALSE, TRUE)) expect_equal(anyDuplicated(x), TRUE) }) test_that("cannot round trip through list", { local_tuple_methods() t <- tuple(1:2, 3:4) # Used to be allowed expect_error(vec_cast(t, list()), class = "vctrs_error_incompatible_type") }) test_that("can convert to list using as.list() or vec_chop() (#1113)", { local_tuple_methods() t <- tuple(1:2, 3:4) expect <- list(tuple(1L, 3L), tuple(2L, 4L)) expect_identical(as.list(t), expect) expect_identical(vec_chop(t), expect) }) test_that("dangerous methods marked as unimplemented", { local_tuple_methods() t <- tuple() expect_error(mean(t), class = "vctrs_error_unsupported") expect_error(abs(t), class = "vctrs_error_unsupported") expect_error(is.finite(t), class = "vctrs_error_unsupported") expect_error(is.nan(t), class = "vctrs_error_unsupported") }) # slicing ----------------------------------------------------------------- test_that("dots are an error (#1295)", { foo <- new_rcrd(list(foo = "foo")) expect_snapshot(error = TRUE, foo[1, 2]) }) test_that("records are restored after slicing the proxy", { expect_identical(new_rcrd(list(x = 1:2))[1], new_rcrd(list(x = 1L))) }) test_that("can slice with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:2))) out <- vec_slice(x, 2) expect_identical( out, new_rcrd(data_frame(x = data_frame(y = 2L))) ) expect_identical( x[2], out ) expect_identical( x[[2]], out ) }) test_that("can rep with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:2))) expect_identical( rep(x, length.out = 4), vec_slice(x, c(1:2, 1:2)) ) }) test_that("can assign with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:3))) y <- new_rcrd(data_frame(x = data_frame(y = FALSE))) exp <- new_rcrd(data_frame(x = data_frame(y = c(1L, 2L, 0L)))) expect_identical(vec_assign(x, 3, y), exp) out <- x out[[3]] <- y expect_identical(out, exp) }) test_that("can resize with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:3))) length(x) <- 2 expect_identical(x, new_rcrd(data_frame(x = data_frame(y = 1:2)))) length(x) <- 4 expect_identical(x, new_rcrd(data_frame(x = data_frame(y = c(1:2, NA, NA))))) }) test_that("`[[` preserves type of record fields (#1205)", { x <- new_rcrd(list(x = 1:3, a = list(1, 2:3, 4:6))) expect_identical(field(x[3], "a"), list(4:6)) expect_identical(field(x[[3]], "a"), list(4:6)) }) vctrs/tests/testthat/helper-names.R0000644000176200001440000000041414315060310017110 0ustar liggesuserslocal_name_repair_quiet <- function(frame = caller_env()) { local_options(rlib_name_repair_verbosity = "quiet", .frame = frame) } local_name_repair_verbose <- function(frame = caller_env()) { local_options(rlib_name_repair_verbosity = "verbose", .frame = frame) } vctrs/tests/testthat/test-type-date-time.R0000644000176200001440000005333614315060310020350 0ustar liggesusers test_that("date-times have informative types", { expect_identical(vec_ptype_abbr(Sys.Date()), "date") expect_identical(vec_ptype_full(Sys.Date()), "date") expect_identical(vec_ptype_abbr(Sys.time()), "dttm") expect_identical(vec_ptype_full(Sys.time()), "datetime") expect_identical(vec_ptype_abbr(new_duration(10)), "drtn") expect_identical(vec_ptype_full(new_duration(10)), "duration") }) test_that("vec_ptype() returns a double date for integer dates", { x <- structure(0L, class = "Date") expect_true(is.double(vec_ptype(x))) }) test_that("dates and times are vectors", { expect_true(vec_is(Sys.Date())) expect_true(vec_is(as.POSIXct("2020-01-01"))) expect_true(vec_is(as.POSIXlt("2020-01-01"))) }) test_that("vec_cast() converts POSIXct with int representation to double when converting zones", { x <- structure(integer(), class = c("POSIXct", "POSIXt"), tzone = "UTC") y <- structure(numeric(), class = c("POSIXct", "POSIXt"), tzone = "America/Los_Angeles") expect_true(is.double(vec_cast(x, y))) }) test_that("vec_c() converts POSIXct with int representation to double representation (#540)", { time1 <- seq(as.POSIXct("2015-12-01", tz = "UTC"), length.out = 2, by = "days") time2 <- vec_c(time1) expect_true(is.double(time2)) time3 <- vec_c(time1, time1) expect_true(is.double(time3)) }) test_that("vec_c() and vec_rbind() convert Dates with int representation to double representation (#396)", { x <- structure(0L, class = "Date") df <- data.frame(x = x) expect_true(is.double(vec_c(x))) expect_true(is.double(vec_c(x, x))) expect_true(is.double(vec_rbind(df)$x)) expect_true(is.double(vec_rbind(df, df)$x)) }) test_that("vec_c() and vec_ptype() standardize missing `tzone` attributes (#561)", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_ptype(x), "tzone"), "") expect_identical(attr(vec_c(x, x), "tzone"), "") }) # constructor ------------------------------------------------------------- test_that("can create a date", { expect_identical(new_date(), structure(double(), class = "Date")) expect_identical(new_date(0), structure(0, class = "Date")) }) test_that("retains input names", { expect_named(new_date(c(x = 0)), "x") }) test_that("drops attributes except names", { expect_identical(new_date(structure(1, foo = "bar")), new_date(1)) }) test_that("only allows doubles", { expect_error(new_date(1L), "must be a double vector") expect_error(new_date("x"), "must be a double vector") }) test_that("can create a datetime", { expect_identical(new_datetime(), structure(double(), class = c("POSIXct", "POSIXt"), tzone = "")) expect_identical(new_datetime(0), structure(0, class = c("POSIXct", "POSIXt"), tzone = "")) }) test_that("retains input names", { expect_named(new_datetime(c(x = 0)), "x") }) test_that("drops attributes except names", { expect_identical(new_datetime(structure(1, foo = "bar")), new_datetime(1)) }) test_that("only allows doubles", { expect_error(new_datetime(1L), "must be a double vector") expect_error(new_datetime("x"), "must be a double vector") }) test_that("tzone is allowed to be `NULL`", { expect_identical(new_datetime(tzone = NULL), new_datetime(tzone = "")) }) test_that("tzone must be character or `NULL`", { expect_error(new_datetime(tzone = 1), "character vector or `NULL`") }) # coerce ------------------------------------------------------------------ test_that("datetime coercions are symmetric and unchanging", { types <- list( new_date(), new_datetime(), new_datetime(tzone = "US/Central"), as.POSIXlt(character(), tz = "US/Central"), difftime(Sys.time() + 1000, Sys.time()), difftime(Sys.time() + 1, Sys.time()) ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) local_options(width = 200) expect_snapshot(print(mat)) }) test_that("tz comes from first non-empty", { # On the assumption that if you've set the time zone explicitly it # should win x <- as.POSIXct("2020-01-01") y <- as.POSIXct("2020-01-01", tz = "America/New_York") expect_identical(vec_ptype2(x, y), y[0]) expect_identical(vec_ptype2(y, x), y[0]) z <- as.POSIXct("2020-01-01", tz = "Pacific/Auckland") expect_identical(vec_ptype2(y, z), y[0]) expect_identical(vec_ptype2(z, y), z[0]) }) test_that("POSIXlt always steered towards POSIXct", { dtc <- as.POSIXct("2020-01-01", tz = "UTC") dtl <- as.POSIXlt("2020-01-01", tz = "UTC") expect_identical(vec_ptype2(dtc, dtl), dtc[0]) expect_identical(vec_ptype2(dtl, dtc), dtc[0]) expect_identical(vec_ptype2(dtl, dtl), dtc[0]) }) test_that("vec_ptype2() on a POSIXlt with multiple time zones returns the first", { x <- as.POSIXlt(new_datetime(), tz = "Pacific/Auckland") expect_identical(attr(x, "tzone"), c("Pacific/Auckland", "NZST", "NZDT")) expect_identical(attr(vec_ptype2(x, new_date()), "tzone"), "Pacific/Auckland") }) test_that("vec_ptype2(, NA) is symmetric (#687)", { date <- new_date() expect_identical( vec_ptype2(date, NA), vec_ptype2(NA, date) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { time <- Sys.time() expect_identical( vec_ptype2(time, NA), vec_ptype2(NA, time) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { dtime <- Sys.time() - Sys.time() expect_identical( vec_ptype2(dtime, NA), vec_ptype2(NA, dtime) ) }) test_that("vec_ptype2() standardizes duration storage type to double", { x <- structure(1L, units = "secs", class = "difftime") expect <- new_duration(double(), units = "secs") expect_identical(vec_ptype2(x, x), expect) }) # cast: dates --------------------------------------------------------------- test_that("safe casts work as expected", { date <- as.Date("2018-01-01") datetime_ct <- as.POSIXct(as.character(date)) datetime_lt <- as.POSIXlt(datetime_ct) expect_identical(vec_cast(NULL, date), NULL) expect_identical(vec_cast(date, date), date) expect_identical(vec_cast(datetime_ct, date), date) expect_identical(vec_cast(datetime_lt, date), date) missing_date <- new_date(NA_real_) expect_identical(vec_cast(missing_date, missing_date), missing_date) expect_identical(vec_cast(as.POSIXct(missing_date), missing_date), missing_date) expect_identical(vec_cast(as.POSIXlt(missing_date), missing_date), missing_date) # These used to be allowed expect_error(vec_cast(17532, date), class = "vctrs_error_incompatible_type") expect_error(vec_cast("2018-01-01", date), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(date), date), class = "vctrs_error_incompatible_type") }) test_that("date - datetime cast can be roundtripped", { date <- as.Date("2018-01-01") datetime <- as.POSIXct("2018-01-01", tz = "America/New_York") expect_identical(vec_cast(vec_cast(date, datetime), date), date) expect_identical(vec_cast(vec_cast(datetime, date), datetime), datetime) }) test_that("lossy casts generate error", { date <- as.Date("2018-01-01") datetime <- as.POSIXct(as.character(date)) + c(0, 3600) expect_lossy(vec_cast(datetime, date), vec_c(date, date), x = datetime, to = date) }) test_that("invalid casts generate error", { date <- as.Date("2018-01-01") expect_error(vec_cast(integer(), date), class = "vctrs_error_incompatible_type") }) test_that("can cast NA and unspecified to Date", { expect_identical(vec_cast(NA, new_date()), new_date(NA_real_)) expect_identical(vec_cast(unspecified(2), new_date()), new_date(dbl(NA, NA))) }) test_that("casting an integer date to another date returns a double date", { x <- structure(0L, class = "Date") expect_true(is.double(vec_cast(x, x))) }) test_that("casting an integer POSIXct to a Date returns a double Date", { x <- .POSIXct(18000L, tz = "America/New_York") expect <- new_date(0) expect_identical(vec_cast(x, new_date()), expect) }) # cast: datetimes ----------------------------------------------------------- test_that("safe casts work as expected", { datetime_c <- as.POSIXct("1970-02-01", tz = "UTC") datetime_l <- as.POSIXlt("1970-02-01", tz = "UTC") expect_identical(vec_cast(NULL, datetime_c), NULL) expect_identical(vec_cast(datetime_c, datetime_c), datetime_c) expect_identical(vec_cast(datetime_l, datetime_c), datetime_c) expect_identical(vec_cast(as.Date(datetime_c), datetime_c), datetime_c) expect_identical(vec_cast(NULL, datetime_l), NULL) expect_identical(vec_cast(datetime_c, datetime_l), datetime_l) expect_identical(vec_cast(datetime_l, datetime_l), datetime_l) expect_identical(vec_cast(as.Date(datetime_l), datetime_l), datetime_l) expect_error(vec_cast(raw(), datetime_l), class = "vctrs_error_incompatible_type") missing_c <- new_datetime(NA_real_, tzone = "UTC") missing_l <- as.POSIXlt(missing_c) expect_identical(vec_cast(missing_c, missing_c), missing_c) expect_identical(vec_cast(missing_l, missing_c), missing_c) expect_identical(vec_cast(as.Date(missing_c), missing_c), missing_c) expect_identical(vec_cast(missing_l, missing_l), missing_l) expect_identical(vec_cast(missing_c, missing_l), missing_l) expect_identical(vec_cast(as.Date(missing_l), missing_l), missing_l) # These used to be allowed expect_error(vec_cast(2678400, datetime_c), class = "vctrs_error_incompatible_type") expect_error(vec_cast("1970-02-01", datetime_c), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(datetime_c), datetime_c), class = "vctrs_error_incompatible_type") expect_error(vec_cast(2678400, datetime_l), class = "vctrs_error_incompatible_type") expect_error(vec_cast("1970-02-01", datetime_l), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(datetime_l), datetime_l), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { datetime <- as.POSIXct("1970-02-01", tz = "UTC") expect_error(vec_cast(integer(), datetime), class = "vctrs_error_incompatible_type") }) test_that("dates become midnight in date-time tzone", { date1 <- as.Date("2010-01-01") datetime_c <- as.POSIXct(character(), tz = "Pacific/Auckland") datetime_l <- as.POSIXlt(character(), tz = "Pacific/Auckland") date2_c <- vec_cast(date1, datetime_c) expect_identical(tzone(date2_c), "Pacific/Auckland") expect_identical(format(date2_c, "%H:%M"), "00:00") date2_l <- vec_cast(date1, datetime_l) expect_identical(tzone(date2_l), "Pacific/Auckland") expect_identical(format(date2_l, "%H:%M"), "00:00") }) test_that("can cast NA and unspecified to POSIXct and POSIXlt", { dtc <- as.POSIXct("2020-01-01") dtl <- as.POSIXlt("2020-01-01") expect_identical(vec_cast(NA, dtc), vec_init(dtc)) expect_identical(vec_cast(NA, dtl), vec_init(dtl)) expect_identical(vec_cast(unspecified(2), dtc), vec_init(dtc, 2)) expect_identical(vec_cast(unspecified(2), dtl), vec_init(dtl, 2)) }) test_that("changing time zones retains the underlying moment in time", { x_ct <- as.POSIXct("2019-01-01", tz = "America/New_York") x_lt <- as.POSIXlt(x_ct) to_ct <- new_datetime(tzone = "America/Los_Angeles") to_lt <- as.POSIXlt(to_ct) expect_ct <- x_ct attr(expect_ct, "tzone") <- "America/Los_Angeles" expect_lt <- as.POSIXlt(expect_ct) expect_identical(vec_cast(x_ct, to_ct), expect_ct) expect_identical(vec_cast(x_ct, to_lt), expect_lt) expect_identical(vec_cast(x_lt, to_ct), expect_ct) expect_identical(vec_cast(x_lt, to_lt), expect_lt) }) test_that("casting to date always retains the zoned year-month-day value", { x <- as.POSIXct("2019-01-01", tz = "Asia/Shanghai") expect_identical(vec_cast(x, new_date()), as.Date("2019-01-01")) }) # cast: durations ------------------------------------------------------------ test_that("safe casts work as expected", { dt1 <- as.difftime(600, units = "secs") dt2 <- as.difftime(10, units = "mins") expect_identical(vec_cast(NULL, dt1), NULL) expect_identical(vec_cast(dt1, dt1), dt1) expect_identical(vec_cast(dt1, dt2), dt2) # These used to be allowed expect_error(vec_cast(600, dt1), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(dt1), dt1), class = "vctrs_error_incompatible_type") }) test_that("invalid casts generate error", { dt <- as.difftime(600, units = "secs") expect_error(vec_cast(integer(), dt), class = "vctrs_error_incompatible_type") }) test_that("can cast NA and unspecified to duration", { expect_identical(vec_cast(NA, new_duration()), new_duration(na_dbl)) expect_identical(vec_cast(unspecified(2), new_duration()), new_duration(dbl(NA, NA))) }) test_that("casting coerces corrupt integer storage durations to double (#1602)", { x <- structure(1L, units = "secs", class = "difftime") expect <- new_duration(1, units = "secs") expect_identical(vec_cast(x, x), expect) # Names are retained through the coercion names(x) <- "a" expect_named(vec_cast(x, x), "a") }) # proxy/restore: dates --------------------------------------------------- test_that("restoring an integer to an integer Date converts to double", { x <- structure(0L, class = "Date") expect_true(is.double(vec_restore(x, x))) }) test_that("vec_proxy() returns a double for Dates with int representation", { x <- structure(0L, class = "Date") expect_true(is.double(vec_proxy(x))) }) # proxy/restore: datetimes ------------------------------------------------ test_that("restoring an integer to an integer POSIXct converts to double", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_true(is.double(vec_restore(x, x))) }) test_that("restoring to a POSIXct with no time zone standardizes to an empty string (#561)", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_restore(x, x), "tzone"), "") }) test_that("restoring to a POSIXlt with no time zone standardizes to an empty string", { # Manually create a POSIXlt without a `tzone` attribute. # This is just: # `x <- as.POSIXlt("1970-01-01")` # which usually won't add a `tzone` attribute, but platforms where the local # time is UTC attach a `tzone` attribute automatically. x <- structure( list( sec = 0, min = 0L, hour = 0L, mday = 1L, mon = 0L, year = 70L, wday = 4L, yday = 0L, isdst = 0L, zone = "EST", gmtoff = NA_integer_ ), class = c("POSIXlt", "POSIXt") ) proxy <- vec_proxy(x) expect_identical(attr(vec_restore(proxy, x), "tzone"), "") }) test_that("proxying a POSIXct with no time zone standardizes to an empty string", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_proxy(x), "tzone"), "") }) test_that("vec_proxy() returns a double for POSIXct with int representation", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_true(is.double(vec_proxy(x))) }) test_that("POSIXlt roundtrips through proxy and restore", { x <- as_posixlt("2020-01-03") out <- vec_restore(vec_proxy(x), x) expect_identical(out, x) }) test_that("subclassed Dates / POSIXct / POSIXlt can be restored (#1015)", { x <- subclass(new_date(0)) proxy <- vec_proxy(x) expect_identical(vec_restore(proxy, x), x) y <- subclass(new_datetime(0)) proxy <- vec_proxy(y) expect_identical(vec_restore(proxy, y), y) z <- subclass(as.POSIXlt(new_datetime(0))) proxy <- vec_proxy(z) expect_identical(vec_restore(proxy, z), z) }) # arithmetic -------------------------------------------------------------- test_that("default is error", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-02 12:00") lt <- as.POSIXlt(dt) t <- as.difftime(12, units = "hours") f <- factor("x") expect_error(vec_arith("+", d, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", dt, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", lt, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", t, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", dt, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", lt, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", d, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("!", t, MISSING()), class = "vctrs_error_incompatible_op") }) test_that("date-time vs date-time", { d <- as.Date("2018-01-01") dt <- as.POSIXct(d) lt <- as.POSIXlt(dt) expect_error(vec_arith("+", d, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, d), d - d) expect_error(vec_arith("+", dt, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, dt), dt - dt) expect_error(vec_arith("+", lt, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, lt), lt - lt) expect_error(vec_arith("+", d, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, dt), difftime(d, dt)) expect_error(vec_arith("+", dt, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, d), difftime(dt, d)) expect_error(vec_arith("+", d, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, lt), difftime(d, lt)) expect_error(vec_arith("+", lt, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, d), difftime(lt, d)) expect_error(vec_arith("+", dt, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, lt), difftime(dt, lt)) expect_error(vec_arith("+", lt, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, dt), difftime(lt, dt)) }) test_that("date-time vs numeric", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-01", tz = "America/New_York") lt <- as.POSIXlt(dt) expect_identical(vec_arith("+", d, 1), d + 1) expect_identical(vec_arith("+", 1, d), d + 1) expect_identical(vec_arith("-", d, 1), d - 1) expect_error(vec_arith("-", 1, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("+", dt, 1), dt + 1) expect_identical(vec_arith("+", 1, dt), dt + 1) expect_identical(vec_arith("-", dt, 1), dt - 1) expect_error(vec_arith("-", 1, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("+", lt, 1), lt + 1) expect_identical(vec_arith("+", 1, lt), lt + 1) expect_identical(vec_arith("-", lt, 1), lt - 1) expect_error(vec_arith("-", 1, lt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, d), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", d, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, dt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", dt, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, lt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", lt, 1), class = "vctrs_error_incompatible_op") }) test_that("POSIXlt + numeric = POSIXct", { lt <- as.POSIXlt("2018-01-01", tz = "America/New_York") expect_s3_class(vec_arith("+", lt, 1), "POSIXct") expect_s3_class(vec_arith("+", 1, lt), "POSIXct") }) test_that("vec_arith() standardizes the `tzone` attribute", { dt <- structure(0, class = c("POSIXct", "POSIXt")) x <- vec_arith("+", dt, 1) expect_identical(attr(x, "tzone"), "") }) test_that("date-time vs difftime", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-01", tz = "UTC") lt <- as.POSIXlt(dt) t <- as.difftime(1, units = "days") th <- as.difftime(c(1, 24), units = "hours") expect_identical(vec_arith("+", d, t), d + t) expect_identical(vec_arith("+", t, d), t + d) expect_identical(vec_arith("+", dt, t), dt + t) expect_identical(vec_arith("+", t, dt), t + dt) expect_identical(vec_arith("+", lt, t), lt + t) expect_identical(vec_arith("+", t, lt), t + lt) expect_lossy(vec_arith("+", d, th), d + th, x = t, to = d) expect_lossy(vec_arith("+", th, d), th + d, x = t, to = d) expect_identical(vec_arith("+", dt, th), dt + th) expect_identical(vec_arith("+", th, dt), th + dt) expect_identical(vec_arith("+", lt, th), lt + th) expect_identical(vec_arith("+", th, lt), th + lt) expect_identical(vec_arith("-", d, t), d - t) expect_error(vec_arith("-", t, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, t), dt - t) expect_error(vec_arith("-", t, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, t), lt - t) expect_error(vec_arith("-", t, lt), class = "vctrs_error_incompatible_op") expect_lossy(vec_arith("-", d, th), d - th, x = t, to = d) expect_error(vec_arith("-", th, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, th), dt - th) expect_error(vec_arith("-", th, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, th), lt - th) expect_error(vec_arith("-", th, lt), class = "vctrs_error_incompatible_op") }) test_that("difftime vs difftime/numeric", { t <- as.difftime(12, units = "hours") expect_identical(vec_arith("-", t, MISSING()), -t) expect_identical(vec_arith("+", t, MISSING()), t) expect_identical(vec_arith("-", t, t), t - t) expect_identical(vec_arith("-", t, 1), t - 1) expect_identical(vec_arith("-", 1, t), 1 - t) expect_identical(vec_arith("+", t, t), 2 * t) expect_identical(vec_arith("+", t, 1), t + 1) expect_identical(vec_arith("+", 1, t), t + 1) expect_identical(vec_arith("*", 2, t), 2 * t) expect_identical(vec_arith("*", t, 2), 2 * t) expect_error(vec_arith("*", t, t), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("/", t, 2), t / 2) expect_error(vec_arith("/", 2, t), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("/", t, t), 1) expect_identical(vec_arith("%/%", t, t), 1) expect_identical(vec_arith("%%", t, t), 0) }) # Math -------------------------------------------------------------------- test_that("date and date times don't support math", { expect_error(vec_math("sum", new_date()), class = "vctrs_error_unsupported") expect_error(vec_math("sum", new_datetime()), class = "vctrs_error_unsupported") expect_error(vec_math("sum", as.POSIXlt(new_datetime())), class = "vctrs_error_unsupported") }) vctrs/tests/testthat/test-proxy.R0000644000176200001440000001121114315060310016663 0ustar liggesusers test_that("vec_data() preserves names (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_names(x), vec_names(vec_data(x))) x <- diag(2) rownames(x) <- letters[1:2] colnames(x) <- LETTERS[1:2] expect_identical(vec_names(x), vec_names(vec_data(x))) }) test_that("vec_data() preserves size (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_size(x), vec_size(vec_data(x))) x <- diag(2) expect_identical(vec_size(x), vec_size(vec_data(x))) }) test_that("vec_data() preserves dim and dimnames (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_dim(x), vec_dim(vec_data(x))) x <- diag(2) expect_identical(vec_dim(x), vec_dim(vec_data(x))) x <- diag(2) rownames(x) <- letters[1:2] colnames(x) <- LETTERS[1:2] expect_identical(dimnames(x), dimnames(vec_data(x))) }) test_that("strips vector attributes apart from names, dim and dimnames", { x <- new_vctr(1:10, a = 1, b = 2) expect_null(attributes(vec_data(x))) x <- new_vctr(c(x = 1, y = 2), a = 1, b = 2) expect_equal(names(attributes(vec_data(x))), "names") x <- new_vctr(1, a = 1, dim = c(1L, 1L)) expect_equal(names(attributes(vec_data(x))), "dim") x <- new_vctr(1, a = 1, dim = c(1L, 1L), dimnames = list("foo", "bar")) expect_equal(names(attributes(vec_data(x))), c("dim", "dimnames")) }) test_that("vec_proxy() is a no-op with data vectors", { for (x in vectors) { expect_identical(vec_proxy(!!x), !!x) } x <- structure(1:3, foo = "bar") expect_identical(vec_proxy(!!x), !!x) }) test_that("vec_proxy() transforms records to data frames", { for (x in records) { expect_identical(vec_proxy(x), new_data_frame(unclass(x))) } }) test_that("vec_proxy() is a no-op with non vectors", { x <- foobar(list()) expect_identical(vec_proxy(x), x) }) test_that("can take the proxy of non-vector objects", { local_env_proxy() expect_identical(vec_proxy(new_proxy(1:3)), 1:3) }) test_that("vec_data() asserts vectorness", { expect_error(vec_data(new_sclr()), class = "vctrs_error_scalar_type") expect_error(vec_data(~foo), class = "vctrs_error_scalar_type") }) test_that("vec_data() is proxied", { local_env_proxy() x <- new_proxy(mtcars) expect_identical(vec_data(x), vec_data(mtcars)) }) test_that("vec_proxy_equal() is recursive over data frames (#641)", { x <- new_data_frame(list(x = foobar(1:3), y = 41:43)) default <- vec_proxy_equal(x) expect_s3_class(default$x, "vctrs_foobar") local_methods(vec_proxy_equal.vctrs_foobar = function(...) c(0, 0, 0)) overridden <- vec_proxy_equal(x) expect_identical(overridden$x, c(0, 0, 0)) }) test_that("vec_proxy_equal() returns a POSIXct for POSIXlt objects (#901)", { x <- as.POSIXlt(new_date(0), tz = "UTC") expect_s3_class(vec_proxy_equal(x), "POSIXct") }) test_that("vec_proxy_equal() defaults to vec_proxy() and vec_proxy_compare() defaults to vec_proxy_equal() (#1140)", { foobar_proxy <- function(x, ...) data_frame(x = unclass(x), y = seq_along(x)) local_methods(vec_proxy.vctrs_foobar = foobar_proxy) x <- foobar(3:1) expect_identical(vec_proxy(x), foobar_proxy(x)) expect_identical(vec_proxy_equal(x), foobar_proxy(x)) expect_identical(vec_proxy_compare(x), foobar_proxy(x)) local_methods(vec_proxy_equal.vctrs_foobar = function(x, ...) foobar_proxy(letters[x])) expect_identical(vec_proxy_equal(x), data_frame(x = letters[3:1], y = 1:3)) expect_identical(vec_proxy_compare(x), data_frame(x = letters[3:1], y = 1:3)) }) test_that("equal/compare/order proxy methods that return data frames are automatically flattened", { x <- new_vctr(1:2, class = "custom") equal <- data_frame(a = 1:2, b = 3:4) order <- data_frame(a = 3:4, b = 4:5) local_methods( vec_proxy_equal.custom = function(x, ...) data_frame(col = equal), vec_proxy_order.custom = function(x, ...) data_frame(col = order) ) expect_identical(vec_proxy_equal(x), equal) expect_identical(vec_proxy_compare(x), equal) expect_identical(vec_proxy_order(x), order) }) test_that("equal/compare/order proxy methods that return 1 column data frames are automatically unwrapped", { x <- new_vctr(1:2, class = "custom") equal <- 1:2 order <- 3:4 local_methods( vec_proxy_equal.custom = function(x, ...) data_frame(a = equal), vec_proxy_order.custom = function(x, ...) data_frame(col = data_frame(a = order)) ) expect_identical(vec_proxy_equal(x), equal) expect_identical(vec_proxy_compare(x), equal) expect_identical(vec_proxy_order(x), order) }) test_that("vec_data() preserves data frames", { expect_identical( vec_data(tibble(x = 1)), data_frame(x = 1) ) # Rownames are preserved expect_identical( vec_data(mtcars), mtcars ) }) vctrs/tests/testthat/test-type.R0000644000176200001440000002202514315060310016470 0ustar liggesusers test_that("vec_ptype() is a no-op for NULL", { expect_null(vec_ptype(NULL)) }) test_that("vec_ptype() is a no-op for partial types", { expect_identical(vec_ptype(partial_factor("x")), partial_factor("x")) expect_identical(vec_ptype(partial_frame(x = 1)), partial_frame(x = 1)) }) test_that("vec_ptype() errors on scalars", { expect_error(vec_ptype(quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype(quote(fn())), class = "vctrs_error_scalar_type") }) test_that(".ptype argument overrides others", { expect_equal(vec_ptype_common(.ptype = 1:10), numeric()) }) test_that(".ptype required in strict mode", { old <- options(vctrs.no_guessing = TRUE) on.exit(options(old)) expect_error(vec_ptype_common(), "strict mode") }) test_that("can feed ptype into itself", { expect_equal(vec_ptype_common(vec_ptype_common(1:10)), numeric()) }) test_that("finalised prototypes created from under specified inputs", { expect_equal(vec_ptype_common(), NULL) expect_equal(vec_ptype_common(NULL), NULL) expect_equal(vec_ptype_common(NA), logical()) expect_equal(vec_ptype_common(NA, NULL), logical()) expect_equal(vec_ptype_common(NULL, NA), logical()) }) test_that("finalised prototypes created from under specified data frame cols", { df <- data.frame(x = NA) expect_equal(vec_ptype_common(df)$x, logical()) }) test_that("non-missing logical get correct type", { expect_equal(vec_ptype_common(TRUE), logical()) }) test_that("output tests", { expect_snapshot(vec_ptype_show()) expect_snapshot(vec_ptype_show(integer())) expect_snapshot(vec_ptype_show(integer(), double())) expect_snapshot(vec_ptype_show(logical(), integer(), double())) }) test_that("vec_ptype_common() handles matrices", { m <- matrix(1:4, nrow = 2) expect_identical(vec_ptype_common(m, m), matrix(int(), ncol = 2)) }) test_that("vec_ptype_common() includes index in argument tag", { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) # Create a column name too large for default buffer nm <- str_dup("foobarfoobar", 10) large_df1 <- set_names(df1, nm) large_df2 <- set_names(df2, nm) expect_snapshot(error = TRUE, vec_ptype_common(df1, df2)) expect_snapshot(error = TRUE, vec_ptype_common(df1, df1, df2)) expect_snapshot(error = TRUE, vec_ptype_common(large_df1, large_df2)) # Names expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, bar = "foo")) expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, baz = FALSE, bar = "foo")) expect_snapshot(error = TRUE, vec_ptype_common(foo = df1, bar = df2)) expect_snapshot(error = TRUE, vec_ptype_common(df1, df1, bar = df2)) # One splice box expect_snapshot(error = TRUE, vec_ptype_common(TRUE, !!!list(1, "foo"))) expect_snapshot(error = TRUE, vec_ptype_common(TRUE, !!!list(1, 2), "foo")) expect_snapshot(error = TRUE, vec_ptype_common(1, !!!list(TRUE, FALSE), "foo")) # One named splice box expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, !!!list(FALSE, FALSE), bar = "foo")) expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = 1, "foo"))) expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = "foo"))) expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), baz = "chr")) # Two splice boxes in next and current expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), !!!list(baz = "chr"))) }) test_that("proxied types are have s3 bare type", { for (x in proxied_empty_types) { expect_identical(vec_typeof_bare(x), "s3") } }) test_that("vec_ptype() preserves attributes of unproxied structures", { expect_identical(vec_ptype(foobar(dbl(1))), foobar(dbl())) }) test_that("vec_ptype() errors on scalar lists", { expect_error(vec_ptype(foobar(list())), class = "vctrs_error_scalar_type") }) test_that("can retrieve type info", { exp <- list(type = "integer", proxy_method = NULL) expect_identical(vec_type_info(1:3), exp) exp <- list(type = "s3", proxy_method = NULL) expect_identical(vec_type_info(~foo), exp) x <- as.POSIXlt(new_datetime(0)) exp <- list(type = "s3", proxy_method = vec_proxy.POSIXlt) expect_identical(vec_type_info(x), exp) }) test_that("can retrieve proxy info", { exp <- list(type = "integer", proxy_method = NULL, proxy = 1:3) expect_identical(vec_proxy_info(1:3), exp) exp <- list(type = "scalar", proxy_method = NULL, proxy = ~foo) expect_identical(vec_proxy_info(~foo), exp) x <- as.POSIXlt(new_datetime(0)) proxy <- new_data_frame(unclass(x)) exp <- list(type = "dataframe", proxy_method = vec_proxy.POSIXlt, proxy = proxy) expect_identical(vec_proxy_info(x), exp) }) test_that("class_type() detects classes", { expect_identical(class_type(list()), "none") expect_identical(class_type(foobar(list())), "unknown") expect_identical(class_type(structure(list(), class = "list")), "list") expect_identical(class_type(subclass(structure(list(), class = "list"))), "list") expect_identical(class_type(I(subclass(structure(list(), class = "list")))), "list") expect_identical(class_type(I(list())), "bare_asis") expect_identical(class_type(I(1)), "bare_asis") expect_identical(class_type(data.frame()), "bare_data_frame") expect_identical(class_type(tibble::tibble()), "bare_tibble") expect_identical(class_type(subclass(data.frame())), "data_frame") expect_identical(class_type(new_factor()), "bare_factor") expect_identical(class_type(new_ordered()), "bare_ordered") expect_identical(class_type(subclass(new_factor())), "unknown") expect_identical(class_type(subclass(new_ordered())), "unknown") expect_identical(class_type(new_date()), "bare_date") expect_identical(class_type(new_datetime()), "bare_posixct") expect_identical(class_type(as.POSIXlt(new_date())), "bare_posixlt") expect_identical(class_type(subclass(new_date())), "unknown") expect_identical(class_type(subclass(new_datetime())), "unknown") expect_identical(class_type(NA), "none") expect_identical(class_type(foobar()), "unknown") }) test_that("vec_ptype() handles class-less yet OBJECT gremlins", { gremlin <- stats::model.frame(freeny) expect_error(vec_ptype(gremlin), NA) expect_error(vec_c(gremlin), NA) expect_error(vec_init(gremlin), NA) expect_error(vec_slice(gremlin, 1), NA) }) test_that("explicit list subclasses are vectors", { list_subclass <- function(x) { structure(x, class = c("custom_subclass", "list")) } x <- list_subclass(list()) expect_true(vec_is(x)) df <- data.frame(x = 1:2) df$z <- list_subclass(list(1, 2)) expect_identical(vec_slice(df, 1)$z, list_subclass(list(1))) }) test_that("the type of a classed data frame with an unspecified column retains unspecifiedness", { df1 <- subclass(data_frame(x = 1, y = NA)) df2 <- subclass(data_frame(x = 1, y = unspecified(1))) expect <- subclass(data_frame(x = numeric(), y = unspecified())) expect_identical(vec_ptype(df1), expect) expect_identical(vec_ptype(df2), expect) }) test_that("vec_ptype() methods can be written", { local_methods( vec_ptype.vctrs_foobar = function(x, ...) "dispatch" ) expect_identical(vec_ptype(foobar()), "dispatch") }) test_that("vec_ptype_finalise() works with NULL", { expect_identical(vec_ptype_finalise(NULL), NULL) }) test_that("vec_ptype_finalise() works recursively over bare data frames", { df <- new_data_frame(list(x = numeric(), y = unspecified(), z = partial_factor())) expect <- data_frame(x = numeric(), y = logical(), z = factor()) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() works recursively over classed data frames", { df <- new_data_frame(list(x = numeric(), y = unspecified(), z = partial_factor())) df <- subclass(df) expect <- subclass(data_frame(x = numeric(), y = logical(), z = factor())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() can handle data frame columns", { df <- data_frame(x = numeric(), y = data_frame(z = unspecified())) expect <- data_frame(x = numeric(), y = data_frame(z = logical())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() requires vector types", { expect_error(vec_ptype_finalise(quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype_finalise(foobar()), class = "vctrs_error_scalar_type") }) # This might change in the future if we decide that prototypes don't # have names test_that("vec_ptype() preserves type of names and row names", { expect_identical(vec_ptype(c(foo = 1)), named(dbl())) expect_identical(vec_ptype(mtcars), mtcars[0, ]) expect_identical(vec_ptype(foobar(mtcars)), foobar(mtcars[0, ])) }) test_that("vec_ptype_common() handles spliced names consistently (#1570)", { args1 <- list(a = "foo", b = "bar") args2 <- list(y = NULL, z = 1) y_name <- "y" z_name <- "z" expect_snapshot(error = TRUE, { vec_ptype_common( a = "foo", b = "bar", y = NULL, z = 1 ) vec_ptype_common( !!!args1, !!!args2 ) vec_ptype_common( !!!args1, "{y_name}" := NULL, "{z_name}" := 1 ) }) }) vctrs/tests/testthat/test-names.R0000644000176200001440000006772114362266120016637 0ustar liggesuserslocal_name_repair_quiet() # vec_names() --------------------------------------------------------- test_that("vec_names() retrieves names", { expect_null(vec_names(letters)) expect_identical(vec_names(set_names(letters)), letters) expect_identical(vec_names(mtcars), row.names(mtcars)) expect_null(vec_names(unrownames(mtcars))) expect_identical(vec_names(Titanic), dimnames(Titanic)[[1]]) x <- matrix(1L, dimnames = list("row", "col")) expect_identical(vec_names(x), dimnames(x)[[1]]) }) test_that("vec_names() dispatches", { local_methods( names.vctrs_foobar = function(x) "dispatched!" ) expect_identical(vec_names(foobar()), "dispatched!") }) # vec_names2() ------------------------------------------------------------- test_that("vec_names2() repairs names", { expect_identical(vec_names2(1:2), c("", "")) expect_identical(vec_names2(1:2, repair = "unique"), c("...1", "...2")) expect_identical(vec_names2(set_names(1:2, c("_foo", "_bar")), repair = "universal"), c("._foo", "._bar")) }) test_that("vec_names2() treats data frames and arrays as vectors", { expect_identical(vec_names2(mtcars), row.names(mtcars)) expect_identical(vec_names2(as.matrix(mtcars)), row.names(mtcars)) df <- unrownames(mtcars) exp <- rep_len("", nrow(mtcars)) expect_identical(vec_names2(df), exp) expect_identical(vec_names2(as.matrix(df)), exp) }) test_that("vec_names2() accepts and checks repair function", { expect_identical(vec_names2(1:2, repair = function(nms) rep_along(nms, "foo")), c("foo", "foo")) expect_error(vec_names2(1:2, repair = function(nms) "foo"), "length 1 instead of length 2") }) test_that("vec_names2() repairs names before invoking repair function", { x <- set_names(1:2, c(NA, NA)) expect_identical(vec_names2(x, repair = identity), c("", "")) }) test_that("vec_names2() result is correct for *_quiet repair", { expect_identical(vec_names2(1:2, repair = "unique"), vec_names2(1:2, repair = "unique_quiet")) expect_identical(vec_names2(1:2, repair = "universal"), vec_names2(1:2, repair = "universal_quiet")) }) # vec_as_names() ----------------------------------------------------------- test_that("vec_as_names() requires character vector", { expect_error(vec_as_names(NULL), "`names` must be a character vector") }) test_that("vec_as_names() validates `repair`", { expect_snapshot({ (expect_error(my_vec_as_names("x", my_repair = "foo"), "can't be \"foo\"")) (expect_error(my_vec_as_names(1, my_repair = 1), "string or a function")) }) }) test_that("vec_as_names() repairs names", { expect_identical(vec_as_names(chr(NA, NA)), c("", "")) expect_identical(vec_as_names(chr(NA, NA), repair = "unique"), c("...1", "...2")) expect_identical(vec_as_names(chr("_foo", "_bar"), repair = "universal"), c("._foo", "._bar")) expect_identical(vec_as_names(chr("a", "b"), repair = "check_unique"), c("a", "b")) }) test_that("vec_as_names() checks unique names", { expect_snapshot({ (expect_error(my_vec_as_names(chr(NA), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr(""), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr("a", "a"), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr("..1"), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr("..."), my_repair = "check_unique"))) }) }) test_that("vec_as_names() result is correct for *_quiet repair", { expect_identical( vec_as_names(chr("_foo", "_bar"), repair = "unique"), vec_as_names(chr("_foo", "_bar"), repair = "unique_quiet") ) expect_identical( vec_as_names(chr("_foo", "_bar"), repair = "universal"), vec_as_names(chr("_foo", "_bar"), repair = "universal_quiet") ) }) test_that("vec_as_names() keeps the names of a named vector", { x_unnamed <- c(NA, "", "..1", "...2") x_names <- letters[1:4] x <- set_names(x_unnamed, x_names) expect_identical( set_names(vec_as_names(x_unnamed, repair = "minimal"), x_names), vec_as_names(x, repair = "minimal") ) expect_identical( set_names(vec_as_names(x_unnamed, repair = "unique"), x_names), vec_as_names(x, repair = "unique") ) expect_identical( set_names(vec_as_names(x_unnamed, repair = "universal"), x_names), vec_as_names(x, repair = "universal") ) }) test_that("vec_as_names() accepts and checks repair function", { f <- local({ local_obj <- "foo" ~ rep_along(.x, local_obj) }) expect_identical(vec_as_names(c("", ""), repair = f), c("foo", "foo")) expect_snapshot(error = TRUE, my_vec_as_names(c("", ""), my_repair = function(nms) "foo")) }) test_that("vec_as_names() repairs names before invoking repair function", { expect_identical(vec_as_names(chr(NA, NA), repair = identity), c("", "")) }) test_that("vec_as_names() is noisy by default", { local_name_repair_verbose() expect_snapshot({ # Noisy name repair vec_as_names(c("x", "x"), repair = "unique") # Quiet name repair vec_as_names(c("x", "x"), repair = "unique", quiet = TRUE) # Hint at repair argument, if known (expect_error( my_vec_as_names(c("x", "x"), my_repair = "check_unique") )) # request quiet via name repair string, don't specify `quiet` vec_as_names(c("1", "1"), repair = "unique_quiet") vec_as_names(c("1", "1"), repair = "universal_quiet") # request quiet via name repair string, specify `quiet` = TRUE vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = TRUE) vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = TRUE) # request quiet via name repair string, specify `quiet` = FALSE vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = FALSE) vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = FALSE) }) }) test_that("validate_minimal_names() checks names", { expect_snapshot({ (expect_error(validate_minimal_names(1), "must return a character vector")) (expect_error(validate_minimal_names(NULL), "can't return `NULL`")) (expect_error(validate_minimal_names(chr(NA)), "can't return `NA` values")) }) }) test_that("validate_unique() checks unique names", { expect_snapshot({ (expect_error(validate_unique(chr(NA)), "`NA`")) (expect_error(validate_unique(chr("")), class = "vctrs_error_names_cannot_be_empty")) (expect_error(validate_unique(chr("a", "a")), class = "vctrs_error_names_must_be_unique")) (expect_error(validate_unique(chr("..1")), class = "vctrs_error_names_cannot_be_dot_dot")) (expect_error(validate_unique(chr("...")), class = "vctrs_error_names_cannot_be_dot_dot")) }) }) test_that("vec_as_names_validate() validates repair arguments", { expect_identical( validate_name_repair_arg(c("unique", "check_unique")), "unique" ) expect_identical( validate_name_repair_arg(~ toupper(.))(letters), LETTERS ) }) test_that("vec_as_names() is quiet when function is supplied (#1018)", { expect_silent( vctrs::vec_as_names( c("a", "b"), repair = function(x) paste0(x, "a"), quiet = FALSE ) ) }) test_that("vec_as_names() evaluates repair_arg lazily", { expect_silent(vec_as_names(letters, repair_arg = print("oof"))) }) # vec_repair_names() ------------------------------------------------------- test_that("vec_repair_names() repairs names", { expect_identical(vec_repair_names(1:2), set_names(1:2, c("", ""))) expect_identical(vec_repair_names(1:2, "unique"), set_names(1:2, c("...1", "...2"))) expect_identical(vec_repair_names(set_names(1:2, c("_foo", "_bar")), "universal"), set_names(1:2, c("._foo", "._bar"))) }) test_that("vec_repair_names() handles data frames and arrays", { df <- data.frame(x = 1:2) expect_identical(vec_repair_names(df), df) expect_identical(row.names(vec_repair_names(as.matrix(df))), c("", "")) expect_identical(row.names(vec_repair_names(as.matrix(df), "unique")), c("...1", "...2")) }) # vec_set_names() ----------------------------------------------------------- test_that("vec_set_names() sets atomic names", { x <- 1:2 names <- c("x1", "x2") exp <- set_names(x, names) expect_equal(vec_set_names(x, names), exp) }) test_that("vec_set_names() sets matrix/array names", { x <- matrix(1:2) names <- c("x1", "x2") exp <- x rownames(exp) <- names expect_equal(vec_set_names(x, names), exp) y <- array(1:4, dim = c(2, 1, 2)) exp <- y rownames(exp) <- names expect_equal(vec_set_names(y, names), exp) }) test_that("vec_set_names() doesn't alter names", { x <- matrix(1, dimnames = list(rows = "a", cols = "x")) vec_set_names(x, "y") expect_equal(vec_names2(x), "a") expect_equal(colnames(x), "x") vec_set_names(x, NULL) expect_equal(vec_names2(x), "a") expect_equal(colnames(x), "x") y <- array(1:4, dim = c(1, 2, 2), dimnames = list(rows = "a", one = 1:2, two = 1:2)) vec_set_names(y, "y") expect_equal(vec_names2(y), "a") vec_set_names(y, NULL) expect_equal(vec_names2(y), "a") }) test_that("vec_set_names() sets row names on data frames", { expect_identical( vec_set_names(data_frame(x = 1), "foo"), new_data_frame(list(x = 1), row.names = "foo") ) expect_identical( vec_set_names(data_frame(x = 1:2), c("foo", "foo")), new_data_frame(list(x = 1:2), row.names = c("foo...1", "foo...2")) ) }) test_that("vec_set_names() correctly sets names on POSIXlt objects", { x <- as.POSIXlt(new_datetime(0)) exp <- set_names(x, "a") expect_equal(vec_set_names(x, "a"), exp) }) test_that("vec_set_names() falls back to `names<-` with proxied objects", { x <- structure(1, class = "foobar") exp <- set_names(x, "a") expect_equal(vec_set_names(x, "a"), exp) local_methods(`names<-.foobar` = function(x, value) "fallback!") expect_equal(vec_set_names(x, "a"), "fallback!") }) test_that("vec_set_names() falls back to `rownames<-` with shaped proxied objects", { x <- structure(1:2, dim = c(2L, 1L), class = "foobar") names <- c("r1", "r2") exp <- x rownames(exp) <- names expect_equal(vec_set_names(x, names), exp) # `rownames<-` is not generic, but eventually calls `dimnames<-` which is local_methods(`dimnames<-.foobar` = function(x, value) "fallback!") expect_equal(vec_set_names(x, names), "fallback!") }) test_that("vec_set_names() can set NULL names", { x <- 1:2 expect_equal(vec_set_names(x, NULL), x) x_named <- set_names(x) expect_equal(vec_set_names(x_named, NULL), x) x_mat <- as.matrix(x) expect_equal(vec_set_names(x_mat, NULL), x_mat) x_mat_named <- x_mat rownames(x_mat_named) <- c("1", "2") exp <- matrix(x_mat, dimnames = list(NULL, NULL)) expect_equal(vec_set_names(x_mat_named, NULL), exp) }) test_that("vec_set_names() errors with bad `names`", { expect_snapshot({ (expect_error(vec_set_names(1, 1), "character vector, not a double")) (expect_error(vec_set_names(1, c("x", "y")), "The size of `names`, 2")) }) }) test_that("vec_names() and vec_set_names() work with 1-dimensional arrays", { x <- array(1:2, dimnames = list(c("a", "b"))) expect_identical(vec_names(x), c("a", "b")) expect_identical(vec_names(vec_set_names(x, c("A", "B"))), c("A", "B")) }) # minimal names ------------------------------------------------------------- test_that("minimal names are made from `n` when `name = NULL`", { expect_identical(minimal_names(1:2), c("", "")) }) test_that("as_minimal_names() checks input", { expect_error(as_minimal_names(1:3), "must be a character vector") }) test_that("minimal names have '' instead of NAs", { expect_identical(as_minimal_names(c("", NA, "", NA)), c("", "", "", "")) }) test_that("repairing minimal names copes with NULL input names", { x <- 1:3 x_named <- vec_repair_names(x) expect_equal(names(x_named), rep("", 3)) }) test_that("as_minimal_names() is idempotent", { x <- c("", "", NA) expect_identical(as_minimal_names(x), as_minimal_names(as_minimal_names(x))) }) test_that("minimal_names() treats data frames and arrays as vectors", { expect_identical(minimal_names(mtcars), row.names(mtcars)) expect_identical(minimal_names(as.matrix(mtcars)), row.names(mtcars)) df <- unrownames(mtcars) exp <- rep_len("", nrow(mtcars)) expect_identical(minimal_names(df), exp) expect_identical(minimal_names(as.matrix(df)), exp) }) test_that("as_minimal_names() copies on write", { nms <- chr(NA, NA) as_minimal_names(nms) expect_identical(nms, chr(NA, NA)) nms <- c("a", "b") out <- as_minimal_names(nms) expect_true(is_reference(nms, out)) }) # unique names ------------------------------------------------------------- test_that("unique_names() handles unnamed vectors", { expect_identical(unique_names(1:3), c("...1", "...2", "...3")) }) test_that("as_unique_names() is a no-op when no repairs are needed", { x <- c("x", "y") out <- as_unique_names(x) expect_true(is_reference(out, x)) expect_identical(out, c("x", "y")) }) test_that("as_unique_names() eliminates emptiness and duplication", { x <- c("", "x", "y", "x") expect_identical(as_unique_names(x), c("...1", "x...2", "y", "x...4")) }) test_that("as_unique_names(): solo empty or NA gets suffix", { expect_identical(as_unique_names(""), "...1") expect_identical(as_unique_names(NA_character_), "...1") }) test_that("as_unique_names() treats ellipsis like empty string", { expect_identical(as_unique_names("..."), as_unique_names("")) }) test_that("two_three_dots() does its job and no more", { x <- c(".", ".1", "...1", "..1a") expect_identical(two_to_three_dots(x), x) expect_identical(two_to_three_dots(c("..1", "..22")), c("...1", "...22")) }) test_that("two dots then number treated like three dots then number", { expect_identical(as_unique_names("..2"), as_unique_names("...5")) }) test_that("as_unique_names() strips positional suffixes, re-applies as needed", { x <- c("...20", "a...1", "b", "", "a...2...34") expect_identical(as_unique_names(x), c("...1", "a...2", "b", "...4", "a...5")) expect_identical(as_unique_names("a...1"), "a") expect_identical(as_unique_names(c("a...2", "a")), c("a...1", "a...2")) expect_identical(as_unique_names(c("a...3", "a", "a")), c("a...1", "a...2", "a...3")) expect_identical(as_unique_names(c("a...2", "a", "a")), c("a...1", "a...2", "a...3")) expect_identical(as_unique_names(c("a...2", "a...2", "a...2")), c("a...1", "a...2", "a...3")) }) test_that("as_unique_names() is idempotent", { x <- c("...20", "a...1", "b", "", "a...2") expect_identical(as_unique_names(!!x), as_unique_names(as_unique_names(!!x))) }) test_that("unique-ification has an 'algebraic'-y property", { ## inspired by, but different from, this guarantee about base::make.unique() ## make.unique(c(A, B)) == make.unique(c(make.unique(A), B)) ## If A is already unique, then make.unique(c(A, B)) preserves A. ## I haven't formulated what we guarantee very well yet, but it's probably ## implicit in this test (?) x <- c("...20", "a...1", "b", "", "a...2", "d") y <- c("", "a...3", "b", "...3", "e") ## fix names on each, catenate, fix the whole z1 <- as_unique_names( c( as_unique_names(x), as_unique_names(y) ) ) ## fix names on x, catenate, fix the whole z2 <- as_unique_names( c( as_unique_names(x), y ) ) ## fix names on y, catenate, fix the whole z3 <- as_unique_names( c( x, as_unique_names(y) ) ) ## catenate, fix the whole z4 <- as_unique_names( c( x, y ) ) expect_identical(z1, z2) expect_identical(z1, z3) expect_identical(z1, z4) }) test_that("unique_names() and as_unique_names() are verbose or silent", { local_name_repair_verbose() expect_snapshot(unique_names(1:2)) expect_snapshot(as_unique_names(c("", ""))) expect_message(regexp = NA, unique_names(1:2, quiet = TRUE)) expect_message(regexp = NA, as_unique_names(c("", ""), quiet = TRUE)) }) test_that("names with only duplicates are repaired", { expect_identical(unique_names(list(x = NA, x = NA)), c("x...1", "x...2")) }) # Universal names ---------------------------------------------------------- test_that("zero-length input", { expect_equal(as_universal_names(character()), character()) }) test_that("universal names are not changed", { expect_equal(as_universal_names(letters), letters) }) test_that("as_universal_names() is idempotent", { x <- c(NA, "", "x", "x", "a1:", "_x_y}") expect_identical(as_universal_names(x), as_universal_names(as_universal_names(x))) }) test_that("dupes get a suffix", { expect_equal( as_universal_names(c("a", "b", "a", "c", "b")), c("a...1", "b...2", "a...3", "c", "b...5") ) }) test_that("as_universal_names(): solo empty or NA gets suffix", { expect_identical(as_universal_names(""), "...1") expect_identical(as_universal_names(NA_character_), "...1") }) test_that("as_universal_names() treats ellipsis like empty string", { expect_identical(as_universal_names("..."), as_universal_names("")) }) test_that("solo dot is unchanged", { expect_equal(as_universal_names("."), ".") }) test_that("dot, dot gets suffix", { expect_equal(as_universal_names(c(".", ".")), c("....1", "....2")) }) test_that("dot-dot, dot-dot gets suffix", { expect_equal(as_universal_names(c("..", "..")), c(".....1", ".....2")) }) test_that("empty, dot becomes suffix, dot", { expect_equal(as_universal_names(c("", ".")), c("...1", ".")) }) test_that("empty, empty, dot becomes suffix, suffix, dot", { expect_equal(as_universal_names(c("", "", ".")), c("...1", "...2", ".")) }) test_that("dot, dot, empty becomes suffix, suffix, suffix", { expect_equal(as_universal_names(c(".", ".", "")), c("....1", "....2", "...3")) }) test_that("dot, empty, dot becomes suffix, suffix, suffix", { expect_equal(as_universal_names(c(".", "", ".")), c("....1", "...2", "....3")) }) test_that("empty, dot, empty becomes suffix, dot, suffix", { expect_equal(as_universal_names(c("", ".", "")), c("...1", ".", "...3")) }) test_that("'...j' gets stripped then names are modified", { expect_equal(as_universal_names(c("...6", "...1...2")), c("...1", "...2")) expect_equal(as_universal_names("if...2"), ".if") }) test_that("complicated inputs", { expect_equal( as_universal_names(c("", ".", NA, "if...4", "if", "if...8", "for", "if){]1")), c("...1", ".", "...3", ".if...4", ".if...5", ".if...6", ".for", "if...1") ) }) test_that("message", { local_name_repair_verbose() expect_snapshot(as_universal_names(c("a b", "b c"))) }) test_that("quiet", { expect_message( as_universal_names("", quiet = TRUE), NA ) }) test_that("unique then universal is universal, with shuffling", { x <- c("", ".2", "..3", "...4", "....5", ".....6", "......7", "...") expect_identical(as_universal_names(as_unique_names(x)), as_universal_names(x)) x2 <- x[c(7L, 4L, 3L, 6L, 5L, 1L, 2L, 8L)] expect_identical(as_universal_names(as_unique_names(x2)), as_universal_names(x2)) x3 <- x[c(3L, 2L, 4L, 6L, 8L, 1L, 5L, 7L)] expect_identical(as_universal_names(as_unique_names(x3)), as_universal_names(x3)) }) test_that("zero-length inputs given character names", { out <- vec_repair_names(character(), "universal") expect_equal(names(out), character()) }) test_that("unnamed input gives uniquely named output", { out <- vec_repair_names(1:3, "universal") expect_equal(names(out), c("...1", "...2", "...3")) }) test_that("messages by default", { local_name_repair_verbose() expect_snapshot(vec_repair_names(set_names(1, "a:b"), "universal")) expect_snapshot(vec_repair_names(set_names(1, "a:b"), ~ make.names(.))) }) test_that("quiet = TRUE", { expect_message(vec_repair_names(set_names(1, ""), "universal", quiet = TRUE), NA) }) test_that("non-universal names", { out <- vec_repair_names(set_names(1, "a b"), "universal") expect_equal(names(out), "a.b") expect_equal(as_universal_names("a b"), "a.b") }) # make_syntactic() --------------------------------------------------------- test_that("make_syntactic(): empty or NA", { expect_syntactic( c("", NA_character_), c(".", ".") ) }) test_that("make_syntactic(): reserved words", { expect_syntactic( c("if", "TRUE", "Inf", "NA_real_", "normal"), c(".if", ".TRUE", ".Inf", ".NA_real_", "normal") ) }) test_that("make_syntactic(): underscore", { expect_syntactic( c( "_", "_1", "_a}"), c("._", "._1", "._a.") ) }) test_that("make_syntactic(): dots", { expect_syntactic( c(".", "..", "...", "...."), c(".", "..", "....", "....") ) }) test_that("make_syntactic(): number", { expect_syntactic( c( "0", "1", "22", "333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): number then character", { expect_syntactic( c( "0a", "1b", "22c", "333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): number then non-character", { expect_syntactic( c( "0)", "1&", "22*", "333@"), c("..0.", "..1.", "..22.", "..333.") ) }) test_that("make_syntactic(): dot then number", { expect_syntactic( c( ".0", ".1", ".22", ".333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot then number then character", { expect_syntactic( c( ".0a", ".1b", ".22c", ".333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): dot then number then non-character", { expect_syntactic( c( ".0)", ".1&", ".22*", ".333@"), c("..0.", "..1.", "..22.", "..333.") ) }) test_that("make_syntactic(): dot dot then number", { expect_syntactic( c( "..0", "..1", "..22", "..333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot dot dot then number", { expect_syntactic( c("...0", "...1", "...22", "...333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot dot dot dot then number", { expect_syntactic( c("....0", "....1", "....22", "....333"), c("....0", "....1", "....22", "....333") ) }) test_that("make_syntactic(): dot dot dot dot dot then number", { expect_syntactic( c(".....0", ".....1", ".....22", ".....333"), c(".....0", ".....1", ".....22", ".....333") ) }) test_that("make_syntactic(): dot dot then number then character", { expect_syntactic( c("..0a", "..1b", "..22c", "..333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): dot dot then number then non-character", { expect_syntactic( c("..0)", "..1&", "..22*", "..333@"), c("..0.", "..1.", "..22.", "..333.") ) }) # Duplication -------------------------------------------------------------- test_that("Minimal name repair duplicates if needed", { x1 <- NA_character_ x3 <- c(x1, x1) # Called to check absence of side effect vec_as_names(x3, repair = "minimal") expect_identical(x3, c(NA_character_, NA_character_)) }) test_that("Unique name repair duplicates if needed", { x1 <- "fa\u00e7ile" x3 <- c(x1, x1) # Called to check absence of side effect vec_as_names(x3, repair = "unique") expect_identical(x3, c("fa\u00e7ile", "fa\u00e7ile")) }) # Encoding ------------------------------------------------------------- test_that("Name repair works with non-UTF-8 names", { x1 <- "fa\u00e7ile" skip_if_not(Encoding(x1) == "UTF-8") x2 <- iconv(x1, from = "UTF-8", to = "latin1") skip_if_not(Encoding(x2) == "latin1") x3 <- c(x2, x2) expect_equal(vec_as_names(x3, repair = "unique"), paste0(x3, "...", 1:2)) }) # Conditions ----------------------------------------------------------- test_that("names cannot be empty", { expect_error_cnd( stop_names_cannot_be_empty(c("", "")), class = c("vctrs_error_names_cannot_be_empty", "vctrs_error_names", "vctrs_error"), message = "Names can't be empty.", names = c("", "") ) }) test_that("names cannot be dot dot", { expect_error_cnd( stop_names_cannot_be_dot_dot(c("..1", "..2")), class = c("vctrs_error_names_cannot_be_dot_dot", "vctrs_error_names", "vctrs_error"), message = "Names can't be of the form `...` or `..j`.", names = c("..1", "..2") ) }) test_that("names must be unique", { expect_error_cnd( stop_names_must_be_unique(c("x", "y", "y", "x")), class = c("vctrs_error_names_must_be_unique", "vctrs_error_names", "vctrs_error"), message = "Names must be unique.", names = c("x", "y", "y", "x") ) }) # Legacy repair -------------------------------------------------------- test_that("vec_as_names_legacy() works", { expect_identical(vec_as_names_legacy(chr()), chr()) expect_identical(vec_as_names_legacy(c("a", "a", "", "")), c("a", "a1", "V1", "V2")) expect_identical(vec_as_names_legacy(c("a", "a", "", ""), sep = "_"), c("a", "a_1", "V_1", "V_2")) expect_identical(vec_as_names_legacy(c("a", "a", "", ""), prefix = "foo"), c("a", "a1", "foo1", "foo2")) expect_identical(vec_as_names_legacy(c("a", "a", "", ""), prefix = "foo", sep = "_"), c("a", "a_1", "foo_1", "foo_2")) # From tibble expect_identical(vec_as_names_legacy(c("x", "x")), c("x", "x1")) expect_identical(vec_as_names_legacy(c("", "")), c("V1", "V2")) expect_identical(vec_as_names_legacy(c("", "V1")), c("V2", "V1")) expect_identical(vec_as_names_legacy(c("", "V", "V")), c("V2", "V", "V1")) }) # Name specification --------------------------------------------------- test_that("NULL name specs works with scalars", { expect_identical(apply_name_spec(NULL, "foo", NULL, 1L), "foo") expect_named(vec_c(foo = 1), "foo") expect_identical(apply_name_spec(NULL, "foo", chr(), 0L), chr()) expect_equal(vec_c(foo = dbl()), set_names(dbl(), "")) expect_named(vec_c(foo = set_names(dbl())), chr()) expect_named(vec_c(foo = set_names(dbl()), bar = set_names(dbl())), chr()) expect_error(apply_name_spec(NULL, "foo", c("a", "b")), "vector of length > 1") expect_error(apply_name_spec(NULL, "foo", NULL, 2L), "vector of length > 1") expect_snapshot({ (expect_error(vec_c(foo = c(a = 1, b = 2)), "vector of length > 1")) (expect_error(vec_c(foo = 1:2), "vector of length > 1")) (expect_error(vec_c(x = c(xx = 1)), "named vector")) }) }) test_that("function name spec is applied", { spec <- function(outer, inner) { sep <- if (is_character(inner)) "_" else ":" paste0(outer, sep, inner) } expect_identical(apply_name_spec(spec, "foo", NULL, 1L), "foo") expect_named(vec_c(foo = 1, .name_spec = spec), "foo") expect_identical(apply_name_spec(spec, "foo", c("a", "b")), c("foo_a", "foo_b")) expect_named(vec_c(foo = c(a = 1, b = 2), .name_spec = spec), c("foo_a", "foo_b")) expect_identical(apply_name_spec(spec, "foo", NULL, 2L), c("foo:1", "foo:2")) expect_named(vec_c(foo = 1:2, .name_spec = spec), c("foo:1", "foo:2")) }) test_that("can pass lambda formula as name spec", { expect_named(vec_c(foo = c(a = 1, b = 2), .name_spec = ~ paste(.x, .y, sep = "_")), c("foo_a", "foo_b")) expect_error(vec_c(foo = c(a = 1, b = 2), .name_spec = env()), "Can't convert `.name_spec`", fixed = TRUE) }) test_that("can pass glue string as name spec", { expect_named(vec_c(foo = c(a = 1, b = 2), .name_spec = "{outer}_{inner}"), c("foo_a", "foo_b")) expect_named(vec_c(foo = 1:2, .name_spec = "{outer}_{inner}"), c("foo_1", "foo_2")) expect_error(vec_c(foo = c(a = 1, b = 2), .name_spec = c("a", "b")), "single string") }) test_that("`outer` is recycled before name spec is invoked", { expect_identical(vec_c(outer = 1:2, .name_spec = "{outer}"), c(outer = 1L, outer = 2L)) }) test_that("apply_name_spec() recycles return value not arguments (#1099)", { out <- unstructure(apply_name_spec("foo", "outer", c("a", "b", "c"))) expect_identical(out, c("foo", "foo", "foo")) inner <- NULL outer <- NULL spec <- function(outer, inner) { inner <<- inner outer <<- outer } apply_name_spec(spec, "outer", c("a", "b", "c")) expect_identical(inner, c("a", "b", "c")) expect_identical(outer, "outer") }) test_that("r_chr_paste_prefix() works", { nms <- c("foo", "bar") expect_equal( .Call(ffi_chr_paste_prefix, nms, "baz", "."), c("baz.foo", "baz.bar") ) # Greater than `VCTRS_PASTE_BUFFER_MAX_SIZE` long_prefix <- strrep("a", 5000) expect_equal( .Call(ffi_chr_paste_prefix, nms, long_prefix, "."), paste0(long_prefix, ".", nms) ) }) test_that("vec_as_names() uses internal error if `repair_arg` is not supplied", { expect_snapshot({ (expect_error(vec_as_names("", repair = "foobar", call = quote(tilt())))) (expect_error(vec_as_names("", repair = env(), call = quote(tilt())))) }) }) vctrs/tests/testthat/helper-performance.R0000644000176200001440000000232614315060310020312 0ustar liggesusersskip_if_not_testing_performance <- function(x) { opt <- Sys.getenv("VCTRS_TEST_PERFORMANCE", unset = "false") testing <- identical(opt, "true") if (testing) { return() } skip("Not testing performance") } expect_time_lt <- function(expr, expect) { time <- time_of({{ expr }}) expect_lt(time, expect) } time_of <- function(expr) { expr <- enquo(expr) time <- system.time(eval_tidy(expr)) unclass(time)[["elapsed"]] } # From r-lib/bench with_memory_prof <- function(expr) { f <- tempfile() on.exit(unlink(f)) tryCatch( utils::Rprofmem(f, threshold = 1), error = function(...) skip("Can't profile memory on this system.") ) on.exit(utils::Rprofmem(NULL), add = TRUE) res <- force(expr) utils::Rprofmem(NULL) bytes <- parse_allocations(f)$bytes bytes <- sum(bytes, na.rm = TRUE) new_vctrs_bytes(bytes) } parse_allocations <- function(filename) { if (!is_installed("profmem")) { testthat::skip("profmem must be installed.") } readRprofmem <- env_get(ns_env("profmem"), "readRprofmem") tryCatch( readRprofmem(filename), error = function(cnd) { testthat::skip(sprintf( "Memory profiling failed: %s", conditionMessage(cnd) )) } ) } vctrs/tests/testthat.R0000644000176200001440000000006614276722575014563 0ustar liggesuserslibrary(testthat) library(vctrs) test_check("vctrs") vctrs/src/0000755000176200001440000000000014532404540012203 5ustar liggesusersvctrs/src/equal.h0000644000176200001440000001664014341702470013473 0ustar liggesusers#ifndef VCTRS_EQUAL_H #define VCTRS_EQUAL_H #include "vctrs-core.h" #include "missing.h" #include "utils.h" // equal_object() never propagates missingness, so // it can return a `bool` bool equal_object(SEXP x, SEXP y); bool equal_object_normalized(SEXP x, SEXP y); bool equal_names(SEXP x, SEXP y); // ----------------------------------------------------------------------------- static inline int lgl_equal_na_equal(int x, int y) { return x == y; } static inline int int_equal_na_equal(int x, int y) { return x == y; } static inline int dbl_equal_na_equal(double x, double y) { switch (dbl_classify(x)) { case VCTRS_DBL_number: break; case VCTRS_DBL_missing: return dbl_classify(y) == VCTRS_DBL_missing; case VCTRS_DBL_nan: return dbl_classify(y) == VCTRS_DBL_nan; } return isnan(y) ? false : x == y; } static inline int cpl_equal_na_equal(Rcomplex x, Rcomplex y) { return dbl_equal_na_equal(x.r, y.r) && dbl_equal_na_equal(x.i, y.i); } static inline int chr_equal_na_equal(SEXP x, SEXP y) { return x == y; } static inline int raw_equal_na_equal(Rbyte x, Rbyte y) { return x == y; } static inline int list_equal_na_equal(SEXP x, SEXP y) { return equal_object_normalized(x, y); } // ----------------------------------------------------------------------------- #define P_EQUAL_NA_EQUAL(CTYPE, EQUAL_NA_EQUAL) do { \ return EQUAL_NA_EQUAL(((const CTYPE*) p_x)[i], ((const CTYPE*) p_y)[j]); \ } while (0) static r_no_return inline int p_nil_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { r_stop_internal("Can't compare NULL for equality."); } static inline int p_lgl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(int, lgl_equal_na_equal); } static inline int p_int_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(int, int_equal_na_equal); } static inline int p_dbl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(double, dbl_equal_na_equal); } static inline int p_cpl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(Rcomplex, cpl_equal_na_equal); } static inline int p_chr_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(SEXP, chr_equal_na_equal); } static inline int p_raw_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(Rbyte, raw_equal_na_equal); } static inline int p_list_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(SEXP, list_equal_na_equal); } #undef P_EQUAL_NA_EQUAL static inline bool p_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_equal_na_equal(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_equal_na_equal", type); } } // ----------------------------------------------------------------------------- static inline int lgl_equal_na_propagate(int x, int y) { if (lgl_is_missing(x) || lgl_is_missing(y)) { return NA_LOGICAL; } else { return lgl_equal_na_equal(x, y); } } static inline int int_equal_na_propagate(int x, int y) { if (int_is_missing(x) || int_is_missing(y)) { return NA_LOGICAL; } else { return int_equal_na_equal(x, y); } } static inline int dbl_equal_na_propagate(double x, double y) { if (dbl_is_missing(x) || dbl_is_missing(y)) { return NA_LOGICAL; } else { // Faster than `dbl_equal_na_equal()`, // which has unneeded missing value checks return x == y; } } static inline int cpl_equal_na_propagate(Rcomplex x, Rcomplex y) { int real_equal = dbl_equal_na_propagate(x.r, y.r); int imag_equal = dbl_equal_na_propagate(x.i, y.i); if (real_equal == NA_LOGICAL || imag_equal == NA_LOGICAL) { return NA_LOGICAL; } else { return real_equal && imag_equal; } } static inline int chr_equal_na_propagate(SEXP x, SEXP y) { if (chr_is_missing(x) || chr_is_missing(y)) { return NA_LOGICAL; } else { return chr_equal_na_equal(x, y); } } static inline int raw_equal_na_propagate(Rbyte x, Rbyte y) { return raw_equal_na_equal(x, y); } static inline int list_equal_na_propagate(SEXP x, SEXP y) { if (list_is_missing(x) || list_is_missing(y)) { return NA_LOGICAL; } else { return list_equal_na_equal(x, y); } } // ----------------------------------------------------------------------------- #define P_EQUAL_NA_PROPAGATE(CTYPE, EQUAL_NA_PROPAGATE) do { \ return EQUAL_NA_PROPAGATE(((const CTYPE*) p_x)[i], ((const CTYPE*) p_y)[j]); \ } while (0) static r_no_return inline int p_nil_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { r_stop_internal("Can't compare NULL for equality."); } static inline int p_lgl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(int, lgl_equal_na_propagate); } static inline int p_int_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(int, int_equal_na_propagate); } static inline int p_dbl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(double, dbl_equal_na_propagate); } static inline int p_cpl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(Rcomplex, cpl_equal_na_propagate); } static inline int p_chr_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(SEXP, chr_equal_na_propagate); } static inline int p_raw_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(Rbyte, raw_equal_na_propagate); } static inline int p_list_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(SEXP, list_equal_na_propagate); } #undef P_EQUAL_NA_PROPAGATE static inline bool p_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_equal_na_propagate(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_equal_na_propagate", type); } } // ----------------------------------------------------------------------------- #endif vctrs/src/match-joint.c0000644000176200001440000003151714362266120014574 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/match-joint-decl.h" #define VEC_JOINT_XTFRM_LOOP(CMP) do { \ while (i < x_n_groups && j < y_n_groups) { \ const int x_group_size = v_x_group_sizes[i]; \ const int y_group_size = v_y_group_sizes[j]; \ \ const int x_loc = v_x_o[x_o_loc] - 1; \ const int y_loc = v_y_o[y_o_loc] - 1; \ \ const int cmp = CMP( \ p_x_vec, x_loc, \ p_y_vec, y_loc, \ nan_distinct \ ); \ \ if (cmp == -1) { \ for (int k = 0; k < x_group_size; ++k) { \ v_x_ranks[v_x_o[x_o_loc] - 1] = rank; \ ++x_o_loc; \ } \ ++i; \ } else if (cmp == 1) { \ for (int k = 0; k < y_group_size; ++k) { \ v_y_ranks[v_y_o[y_o_loc] - 1] = rank; \ ++y_o_loc; \ } \ ++j; \ } else { \ for (int k = 0; k < x_group_size; ++k) { \ v_x_ranks[v_x_o[x_o_loc] - 1] = rank; \ ++x_o_loc; \ } \ for (int k = 0; k < y_group_size; ++k) { \ v_y_ranks[v_y_o[y_o_loc] - 1] = rank; \ ++y_o_loc; \ } \ ++i; \ ++j; \ } \ \ ++rank; \ } \ } while(0) /* * `vec_joint_xtfrm()` takes two vectors of the same type and computes an * xtfrm-like integer proxy for each that takes into account the values between * the two columns. It is approximately equal to the idea of: * `vec_rank(vec_c(x, y), ties = "dense")` * followed by splitting the ranks back up into two vectors matching the sizes * of x and y. The reason we don't do that is because it limits the maximum size * that `vec_locate_matches()` can work on to * `vec_size(x) + vec_size(y) <= INT_MAX`, * since you have to combine the vectors together. * * The sole purpose of this function is to support `vec_locate_matches()`. * * # For example: * x <- c(2, 1.5, 1) * y <- c(3, 1.2, 2) * # vec_joint_xtfrm(x, y) theoretically results in: * x <- c(4L, 3L, 1L) * y <- c(5L, 2L, 4L) * # While the above result is the general idea, we actually start counting * # from `INT_MIN + 1` to maximally utilize the `int` space while still * # avoiding `INT_MIN == NA_INTEGER`. So the result is really: * x <- c(-2147483644L, -2147483645L, -2147483647L) * y <- c(-2147483643L, -2147483646L, -2147483644L) */ // [[ include("match-joint.h") ]] r_obj* vec_joint_xtfrm(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, bool nan_distinct, r_obj* chr_proxy_collate) { int n_prot = 0; r_obj* out = KEEP_N(r_alloc_list(2), &n_prot); // These aren't true ranks, but that name makes the most sense r_obj* x_ranks = r_alloc_integer(x_size); r_list_poke(out, 0, x_ranks); int* v_x_ranks = r_int_begin(x_ranks); r_obj* y_ranks = r_alloc_integer(y_size); r_list_poke(out, 1, y_ranks); int* v_y_ranks = r_int_begin(y_ranks); // Retain the results of applying the proxy, normalizing the encoding, and // doing the collation transform, since we will make comparisons directly // on these objects. // This also uses a special variant of `vec_proxy_order()` to support list // columns, which have proxies that can't be computed independently. r_obj* proxies = KEEP_N(vec_joint_proxy_order(x, y), &n_prot); r_obj* x_proxy = r_list_get(proxies, 0); x_proxy = KEEP_N(vec_normalize_encoding(x_proxy), &n_prot); x_proxy = KEEP_N(proxy_apply_chr_proxy_collate(x_proxy, chr_proxy_collate), &n_prot); r_obj* y_proxy = r_list_get(proxies, 1); y_proxy = KEEP_N(vec_normalize_encoding(y_proxy), &n_prot); y_proxy = KEEP_N(proxy_apply_chr_proxy_collate(y_proxy, chr_proxy_collate), &n_prot); // Called with `direction = "asc", na_value = "smallest"` to match the // comparison helpers in `match-compare.h` r_obj* x_info = KEEP_N(vec_order_info( x_proxy, chrs_asc, chrs_smallest, nan_distinct, r_null, true ), &n_prot); r_obj* y_info = KEEP_N(vec_order_info( y_proxy, chrs_asc, chrs_smallest, nan_distinct, r_null, true ), &n_prot); const int* v_x_o = r_int_cbegin(r_list_get(x_info, 0)); const int* v_x_group_sizes = r_int_cbegin(r_list_get(x_info, 1)); r_ssize x_n_groups = r_length(r_list_get(x_info, 1)); const int* v_y_o = r_int_cbegin(r_list_get(y_info, 0)); const int* v_y_group_sizes = r_int_cbegin(r_list_get(y_info, 1)); r_ssize y_n_groups = r_length(r_list_get(y_info, 1)); const enum vctrs_type type = vec_proxy_typeof(x_proxy); const struct poly_vec* p_x_poly = new_poly_vec(x_proxy, type); KEEP_N(p_x_poly->shelter, &n_prot); const void* p_x_vec = p_x_poly->p_vec; const struct poly_vec* p_y_poly = new_poly_vec(y_proxy, type); KEEP_N(p_y_poly->shelter, &n_prot); const void* p_y_vec = p_y_poly->p_vec; r_ssize i = 0; r_ssize j = 0; r_ssize x_o_loc = 0; r_ssize y_o_loc = 0; // Start rank as small as possible (while still different from NA), // to maximally utilize `int` storage int rank = INT_MIN + 1; // Now that we have the ordering of both vectors, // it is just a matter of merging two sorted arrays switch (type) { case VCTRS_TYPE_logical: VEC_JOINT_XTFRM_LOOP(p_lgl_order_compare_na_equal); break; case VCTRS_TYPE_integer: VEC_JOINT_XTFRM_LOOP(p_int_order_compare_na_equal); break; case VCTRS_TYPE_double: VEC_JOINT_XTFRM_LOOP(p_dbl_order_compare_na_equal); break; case VCTRS_TYPE_complex: VEC_JOINT_XTFRM_LOOP(p_cpl_order_compare_na_equal); break; case VCTRS_TYPE_character: VEC_JOINT_XTFRM_LOOP(p_chr_order_compare_na_equal); break; case VCTRS_TYPE_dataframe: VEC_JOINT_XTFRM_LOOP(p_df_order_compare_na_equal); break; default: stop_unimplemented_vctrs_type("vec_joint_xtfrm", type); } while (i < x_n_groups) { // Finish up remaining x groups const int x_group_size = v_x_group_sizes[i]; for (int k = 0; k < x_group_size; ++k) { v_x_ranks[v_x_o[x_o_loc] - 1] = rank; ++x_o_loc; } ++i; ++rank; } while (j < y_n_groups) { // Finish up remaining y groups const int y_group_size = v_y_group_sizes[j]; for (int k = 0; k < y_group_size; ++k) { v_y_ranks[v_y_o[y_o_loc] - 1] = rank; ++y_o_loc; } ++j; ++rank; } FREE(n_prot); return out; } #undef VEC_JOINT_XTFRM_LOOP // ----------------------------------------------------------------------------- /* * Specialized internal variant of `vec_proxy_order()` used in * `vec_joint_xtfrm()`. * * If we know that the `vec_proxy_order()` method of a type doesn't depend on * the data itself, then we just call `vec_proxy_order()` on `x` and `y` * separately. We know this is true for most base types (except lists) and * for the base R S3 types that we support natively in vctrs, so those get a * fast path. * * Otherwise, it is possible that the `vec_proxy_order()` method is dependent * on the data itself, like it is with lists and the bignum classes, so we need * to compute the order proxy "jointly" by combining `x` and `y` together. * * For example * x <- list(1.5, 2) * y <- list(2, 1.5) * vec_proxy_order(x) * # [1] 1 2 * vec_proxy_order(y) # can't compare proxies when taken individually * # [1] 1 2 * vec_proxy_order(c(x, y)) # jointly comparable * # [1] 1 2 2 1 * * Combining `x` and `y` has the downsides that it: * - Is slower than the independent proxy method * - Limits the maximum data size to `vec_size(x) + vec_size(y) <= INT_MAX` * * Data frames are analyzed one column at a time, so if one of the columns * requires a joint proxy, then we only have to combine those individual columns * together rather than the entire data frames. */ static inline r_obj* vec_joint_proxy_order(r_obj* x, r_obj* y) { if (r_typeof(x) != r_typeof(y)) { r_stop_internal("`x` and `y` should have the same type."); } switch (vec_typeof(x)) { case VCTRS_TYPE_unspecified: case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: { return vec_joint_proxy_order_independent(x, y); } case VCTRS_TYPE_list: { return vec_joint_proxy_order_dependent(x, y); } case VCTRS_TYPE_dataframe: { return df_joint_proxy_order(x, y); } case VCTRS_TYPE_s3: { return vec_joint_proxy_order_s3(x, y); } case VCTRS_TYPE_null: case VCTRS_TYPE_scalar: { stop_unimplemented_vctrs_type("vec_joint_proxy_order", vec_typeof(x)); } } r_stop_unreachable(); } static inline r_obj* vec_joint_proxy_order_independent(r_obj* x, r_obj* y) { r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, vec_proxy_order(x)); r_list_poke(out, 1, vec_proxy_order(y)); FREE(1); return out; } static inline r_obj* vec_joint_proxy_order_dependent(r_obj* x, r_obj* y) { r_ssize x_size = vec_size(x); r_ssize y_size = vec_size(y); r_obj* x_slicer = KEEP(compact_seq(0, x_size, true)); r_obj* y_slicer = KEEP(compact_seq(x_size, y_size, true)); r_obj* ptype = KEEP(vec_ptype(x, vec_args.empty, r_lazy_null)); r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, x); r_list_poke(out, 1, y); // Combine // NOTE: Without long vector support, this limits the maximum allowed // size of `vec_locate_matches()` input to // `vec_size(x) + vec_size(y) <= INT_MAX` // when foreign columns are used. r_obj* combined = KEEP(vec_c( out, ptype, r_null, p_no_repair_opts, vec_args.empty, r_lazy_null )); // Compute joint order-proxy r_obj* proxy = KEEP(vec_proxy_order(combined)); // Separate and store back in `out` r_list_poke(out, 0, vec_slice_unsafe(proxy, x_slicer)); r_list_poke(out, 1, vec_slice_unsafe(proxy, y_slicer)); FREE(6); return out; } static inline r_obj* vec_joint_proxy_order_s3(r_obj* x, r_obj* y) { const enum vctrs_class_type type = class_type(x); if (type != class_type(y)) { r_stop_internal("`x` and `y` should have the same class type."); } switch (type) { case VCTRS_CLASS_bare_factor: case VCTRS_CLASS_bare_ordered: case VCTRS_CLASS_bare_date: case VCTRS_CLASS_bare_posixct: case VCTRS_CLASS_bare_posixlt: { return vec_joint_proxy_order_independent(x, y); } case VCTRS_CLASS_bare_asis: case VCTRS_CLASS_list: case VCTRS_CLASS_unknown: { return vec_joint_proxy_order_dependent(x, y); } case VCTRS_CLASS_bare_tibble: case VCTRS_CLASS_data_frame: { return df_joint_proxy_order(x, y); } case VCTRS_CLASS_bare_data_frame: { r_stop_internal("Bare data frames should have been handled earlier."); } case VCTRS_CLASS_none: { r_stop_internal("Unclassed objects should have been handled earlier."); } } r_stop_unreachable(); } static inline r_obj* df_joint_proxy_order(r_obj* x, r_obj* y) { x = KEEP(r_clone_referenced(x)); y = KEEP(r_clone_referenced(y)); const r_ssize n_cols = r_length(x); if (n_cols != r_length(y)) { r_stop_internal("`x` and `y` must have the same number of columns."); } r_obj* const* v_x = r_list_cbegin(x); r_obj* const* v_y = r_list_cbegin(y); for (r_ssize i = 0; i < n_cols; ++i) { r_obj* proxies = vec_joint_proxy_order(v_x[i], v_y[i]); r_list_poke(x, i, r_list_get(proxies, 0)); r_list_poke(y, i, r_list_get(proxies, 1)); } x = KEEP(df_flatten(x)); x = KEEP(vec_proxy_unwrap(x)); y = KEEP(df_flatten(y)); y = KEEP(vec_proxy_unwrap(y)); r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, x); r_list_poke(out, 1, y); FREE(7); return out; } vctrs/src/rep.h0000644000176200001440000000067014362266120013146 0ustar liggesusers#ifndef VCTRS_REP_H #define VCTRS_REP_H r_obj* vec_rep(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg); r_obj* vec_rep_each(r_obj* x, r_obj* times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg); #endif vctrs/src/expand.c0000644000176200001440000000675314401377400013640 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/expand-decl.h" r_obj* ffi_vec_expand_grid(r_obj* xs, r_obj* ffi_vary, r_obj* ffi_name_repair, r_obj* frame) { struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; enum vctrs_expand_vary vary = parse_vary(ffi_vary); struct name_repair_opts name_repair_opts = new_name_repair_opts( ffi_name_repair, lazy_args.dot_name_repair, false, error_call ); KEEP(name_repair_opts.shelter); r_obj* out = vec_expand_grid(xs, vary, &name_repair_opts, error_call); FREE(1); return out; } r_obj* vec_expand_grid(r_obj* xs, enum vctrs_expand_vary vary, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call) { obj_check_list(xs, vec_args.empty, error_call); if (vec_any_missing(xs)) { // Drop `NULL`s before any other checks r_obj* complete = KEEP(vec_detect_complete(xs)); xs = vec_slice(xs, complete); FREE(1); } KEEP(xs); const r_ssize n = r_length(xs); r_obj* out = KEEP(r_alloc_list(n)); r_obj* names = KEEP(vec_names2(xs)); if (!r_is_minimal_names(names)) { r_abort_lazy_call(error_call, "All inputs must be named."); } names = vec_as_names(names, p_name_repair_opts); r_attrib_poke_names(out, names); const struct vec_error_opts error_opts = { .p_arg = vec_args.empty, .call = error_call }; r_obj* sizes = KEEP(list_sizes(xs, &error_opts)); const int* v_sizes = r_int_cbegin(sizes); r_obj* cumulative = KEEP(r_alloc_raw(n * sizeof(r_ssize))); r_ssize* v_cumulative = r_raw_begin(cumulative); r_ssize size = 1; for (r_ssize i = 0; i < n; ++i) { size = r_ssize_mult(size, v_sizes[i]); v_cumulative[i] = size; } // TODO: Support long vectors here if (size > R_LEN_T_MAX) { r_abort_lazy_call( error_call, "Long vectors are not yet supported. " "Expansion results in an allocation larger than 2^31-1 elements. " "Attempted allocation size was %.0lf.", (double) size ); } r_obj* const* v_xs = r_list_cbegin(xs); r_obj* ffi_times_each = KEEP(r_alloc_integer(1)); int* p_ffi_times_each = r_int_begin(ffi_times_each); for (r_ssize i = 0; i < n; ++i) { r_obj* x = v_xs[i]; r_ssize times_each = 0; r_ssize times = 0; if (size != 0) { switch (vary) { case VCTRS_EXPAND_VARY_slowest: { times_each = size / v_cumulative[i]; times = v_cumulative[i] / v_sizes[i]; break; }; case VCTRS_EXPAND_VARY_fastest: { times_each = v_cumulative[i] / v_sizes[i]; times = size / v_cumulative[i]; break; } } } *p_ffi_times_each = r_ssize_as_integer(times_each); x = KEEP(vec_rep_each(x, ffi_times_each, error_call, vec_args.x, vec_args.empty)); x = vec_rep(x, r_ssize_as_integer(times), error_call, vec_args.x, vec_args.empty); r_list_poke(out, i, x); FREE(1); } init_data_frame(out, size); FREE(6); return out; } static inline enum vctrs_expand_vary parse_vary(r_obj* vary) { if (!r_is_string(vary)) { r_stop_internal("`vary` must be a string."); } const char* c_vary = r_chr_get_c_string(vary, 0); if (!strcmp(c_vary, "slowest")) return VCTRS_EXPAND_VARY_slowest; if (!strcmp(c_vary, "fastest")) return VCTRS_EXPAND_VARY_fastest; r_stop_internal( "`vary` must be either \"slowest\" or \"fastest\"." ); } vctrs/src/empty.c0000644000176200001440000000164014401377400013505 0ustar liggesusers#include "vctrs.h" #include "decl/empty-decl.h" // [[ register() ]] r_obj* vctrs_list_drop_empty(r_obj* x) { return list_drop_empty(x); } static r_obj* list_drop_empty(r_obj* x) { if (!obj_is_list(x)) { r_abort("`x` must be a list."); } r_ssize i = 0; const r_ssize size = vec_size(x); r_obj* const* v_x = r_list_cbegin(x); // Locate first element to drop for (; i < size; ++i) { if (vec_size(v_x[i]) == 0) { break; } } if (i == size) { // Nothing to drop return x; } r_obj* keep = KEEP(r_alloc_logical(size)); int* v_keep = r_lgl_begin(keep); for (r_ssize j = 0; j < i; ++j) { // Keep everything before first element to drop v_keep[j] = true; } // `i` should be dropped so handle that here v_keep[i] = false; ++i; for (; i < size; ++i) { v_keep[i] = vec_size(v_x[i]) != 0; } r_obj* out = vec_slice(x, keep); FREE(1); return out; } vctrs/src/size-common.c0000644000176200001440000001127514373202700014612 0ustar liggesusers#include "vctrs.h" #include "decl/size-common-decl.h" // [[ register(external = TRUE) ]] r_obj* ffi_size_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); struct r_lazy internal_call = { .x = env, .env = r_null }; r_obj* size = r_node_car(args); args = r_node_cdr(args); r_obj* absent = r_node_car(args); if (size != r_null) { r_ssize out = vec_as_short_length(size, vec_args.dot_size, internal_call); return r_int(out); } if (absent != r_null && (r_typeof(absent) != R_TYPE_integer || r_length(absent) != 1)) { r_abort_lazy_call(internal_call, "%s must be a single integer.", r_c_str_format_error_arg(".absent")); } struct size_common_opts size_opts = { .p_arg = &arg, .call = call }; r_obj* xs = KEEP(rlang_env_dots_list(env)); r_ssize common = vec_size_common_opts(xs, -1, &size_opts); r_obj* out; if (common < 0) { if (absent == r_null) { r_abort_lazy_call(internal_call, "%s must be supplied when %s is empty.", r_c_str_format_error_arg(".absent"), r_c_str_format_error_arg("...")); } out = absent; } else { out = r_int(common); } FREE(1); return out; } r_ssize vec_size_common_opts(r_obj* xs, r_ssize absent, const struct size_common_opts* opts) { struct size_common_opts mut_opts = *opts; r_obj* common = KEEP(reduce(r_null, vec_args.empty, opts->p_arg, xs, &vctrs_size2_common, &mut_opts)); r_ssize out; if (common == r_null) { out = absent; } else { out = vec_size(common); } FREE(1); return out; } static r_obj* vctrs_size2_common(r_obj* x, r_obj* y, struct counters* counters, void* data) { struct size_common_opts* opts = data; if (x != r_null) { obj_check_vector(x, counters->curr_arg, opts->call); } if (y != r_null) { obj_check_vector(y, counters->next_arg, opts->call); } if (x == r_null) { counters_shift(counters); return y; } if (y == r_null) { return x; } r_ssize nx = vec_size(x); r_ssize ny = vec_size(y); if (nx == ny) { return x; } if (nx == 1) { counters_shift(counters); return y; } if (ny == 1) { return x; } stop_incompatible_size(x, y, nx, ny, counters->curr_arg, counters->next_arg, opts->call); } // [[ register(external = TRUE) ]] r_obj* ffi_recycle_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy internal_call = { .x = env, .env = r_null }; struct r_lazy arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); struct size_common_opts size_opts = { .p_arg = &arg, .call = call }; r_obj* size = r_node_car(args); args = r_node_cdr(args); r_obj* xs = KEEP(rlang_env_dots_list(env)); r_ssize common; if (size == r_null) { common = vec_size_common_opts(xs, -1, &size_opts); } else { common = vec_as_short_length(size, vec_args.dot_size, internal_call); } r_obj* out = vec_recycle_common_opts(xs, common, &size_opts); FREE(1); return out; } r_obj* vec_recycle_common_opts(r_obj* xs, r_ssize size, const struct size_common_opts* p_opts) { if (size < 0) { return xs; } xs = KEEP(r_clone_referenced(xs)); r_ssize n = vec_size(xs); r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg(p_opts->p_arg, r_names(xs), n, &i); KEEP(p_x_arg->shelter); for (; i < n; ++i) { r_obj* elt = r_list_get(xs, i); r_list_poke(xs, i, vec_check_recycle(elt, size, p_x_arg, p_opts->call)); } FREE(2); return xs; } vctrs/src/compare.h0000644000176200001440000002202514315060310013773 0ustar liggesusers#ifndef VCTRS_COMPARE_H #define VCTRS_COMPARE_H #include "vctrs-core.h" #include "equal.h" #include "missing.h" #include // ----------------------------------------------------------------------------- r_obj* vec_compare(r_obj* x, r_obj* y, bool na_equal); // ----------------------------------------------------------------------------- // https://stackoverflow.com/questions/10996418 static inline int int_compare_scalar(int x, int y) { return (x > y) - (x < y); } static inline int dbl_compare_scalar(double x, double y) { return (x > y) - (x < y); } static inline int chr_compare_scalar(r_obj* x, r_obj* y) { // Assume translation handled by `vec_normalize_encoding()` int cmp = strcmp(r_str_c_string(x), r_str_c_string(y)); return cmp / abs(cmp); } // ----------------------------------------------------------------------------- static inline int qsort_int_compare_scalar(const void* x, const void* y) { return int_compare_scalar(*((int*) x), *((int*) y)); } // ----------------------------------------------------------------------------- static inline r_no_return int nil_compare_na_equal(r_obj* x, r_obj* y) { r_stop_internal("Can't compare NULL values."); } static inline int lgl_compare_na_equal(int x, int y) { return int_compare_scalar(x, y); } static inline int int_compare_na_equal(int x, int y) { return int_compare_scalar(x, y); } static inline int dbl_compare_na_equal(double x, double y) { enum vctrs_dbl x_class = dbl_classify(x); enum vctrs_dbl y_class = dbl_classify(y); switch (x_class) { case VCTRS_DBL_number: { switch (y_class) { case VCTRS_DBL_number: return dbl_compare_scalar(x, y); case VCTRS_DBL_missing: return 1; case VCTRS_DBL_nan: return 1; } } case VCTRS_DBL_missing: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return 0; case VCTRS_DBL_nan: return 1; } } case VCTRS_DBL_nan: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return -1; case VCTRS_DBL_nan: return 0; } } } r_stop_unreachable(); } static inline r_no_return int cpl_compare_na_equal(Rcomplex x, Rcomplex y) { r_stop_internal("Can't compare complex types."); } static inline int chr_compare_na_equal(r_obj* x, r_obj* y) { if (chr_equal_na_equal(x, y)) { return 0; } else if (chr_is_missing(x)) { return -1; } else if (chr_is_missing(y)) { return 1; } else { return chr_compare_scalar(x, y); } } static inline r_no_return int raw_compare_na_equal(Rbyte x, Rbyte y) { r_stop_internal("Can't compare raw types."); } static inline r_no_return int list_compare_na_equal(r_obj* x, r_obj* y) { r_stop_internal("Can't compare list types."); } // ----------------------------------------------------------------------------- #define P_COMPARE_NA_EQUAL(CTYPE, COMPARE_NA_EQUAL) do { \ return COMPARE_NA_EQUAL(((CTYPE const*) p_x)[i], ((CTYPE const*) p_y)[j]); \ } while (0) static inline int p_nil_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(r_obj*, nil_compare_na_equal); } static inline int p_lgl_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(int, lgl_compare_na_equal); } static inline int p_int_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(int, int_compare_na_equal); } static inline int p_dbl_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(double, dbl_compare_na_equal); } static inline int p_cpl_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(Rcomplex, cpl_compare_na_equal); } static inline int p_chr_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(r_obj*, chr_compare_na_equal); } static inline int p_raw_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(Rbyte, raw_compare_na_equal); } static inline int p_list_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(r_obj*, list_compare_na_equal); } #undef P_COMPARE_NA_EQUAL static inline int p_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_logical: return p_lgl_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_compare_na_equal(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_compare_na_equal", type); } } // ----------------------------------------------------------------------------- static inline r_no_return int nil_compare_na_propagate(r_obj* x, r_obj* y) { r_stop_internal("Can't compare NULL values."); } static inline int lgl_compare_na_propagate(int x, int y) { if (lgl_is_missing(x) || lgl_is_missing(y)) { return r_globals.na_int; } else { return int_compare_scalar(x, y); } } static inline int int_compare_na_propagate(int x, int y) { if (int_is_missing(x) || int_is_missing(y)) { return r_globals.na_int; } else { return int_compare_scalar(x, y); } } static inline int dbl_compare_na_propagate(double x, double y) { if (dbl_is_missing(x) || dbl_is_missing(y)) { return r_globals.na_int; } else { return dbl_compare_scalar(x, y); } } static inline r_no_return int cpl_compare_na_propagate(Rcomplex x, Rcomplex y) { r_stop_internal("Can't compare complex types."); } static inline int chr_compare_na_propagate(r_obj* x, r_obj* y) { if (chr_is_missing(x) || chr_is_missing(y)) { return r_globals.na_int; } else if (chr_equal_na_equal(x, y)) { return 0; } else { return chr_compare_scalar(x, y); } } static inline r_no_return int raw_compare_na_propagate(Rbyte x, Rbyte y) { r_stop_internal("Can't compare raw types."); } static inline r_no_return int list_compare_na_propagate(r_obj* x, r_obj* y) { r_stop_internal("Can't compare list types."); } // ----------------------------------------------------------------------------- #define P_COMPARE_NA_PROPAGATE(CTYPE, COMPARE_NA_PROPAGATE) do { \ return COMPARE_NA_PROPAGATE(((CTYPE const*) p_x)[i], ((CTYPE const*) p_y)[j]); \ } while (0) static inline int p_nil_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(r_obj*, nil_compare_na_propagate); } static inline int p_lgl_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(int, lgl_compare_na_propagate); } static inline int p_int_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(int, int_compare_na_propagate); } static inline int p_dbl_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(double, dbl_compare_na_propagate); } static inline int p_cpl_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(Rcomplex, cpl_compare_na_propagate); } static inline int p_chr_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(r_obj*, chr_compare_na_propagate); } static inline int p_raw_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(Rbyte, raw_compare_na_propagate); } static inline int p_list_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(r_obj*, list_compare_na_propagate); } #undef P_COMPARE_NA_PROPAGATE static inline int p_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_logical: return p_lgl_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_compare_na_propagate(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_compare_na_propagate", type); } } // ----------------------------------------------------------------------------- #endif // VCTRS_COMPARE_H vctrs/src/type-info.h0000644000176200001440000000413214401377400014265 0ustar liggesusers#ifndef VCTRS_TYPE_INFO_H #define VCTRS_TYPE_INFO_H #include enum vctrs_type { VCTRS_TYPE_null = 0, VCTRS_TYPE_unspecified, VCTRS_TYPE_logical, VCTRS_TYPE_integer, VCTRS_TYPE_double, VCTRS_TYPE_complex, VCTRS_TYPE_character, VCTRS_TYPE_raw, VCTRS_TYPE_list, VCTRS_TYPE_dataframe, VCTRS_TYPE_scalar, VCTRS_TYPE_s3 = 255 }; /** * @member type The vector type of the original data. * @member proxy_method The function of the `vec_proxy()` method, if * any. This method is looked up with [vec_proxy_method()]. */ struct vctrs_type_info { r_obj* shelter; enum vctrs_type type; r_obj* proxy_method; }; /** * @inheritMembers vctrs_type_info * @member type If `proxy_method` was found, the vector type of the * proxy data. Otherwise, the vector type of the original data. * This is never `vctrs_type_s3`. * @member proxy If `proxy_method` was found, the result of invoking * the method. Otherwise, the original data. */ struct vctrs_proxy_info { r_obj* shelter; enum vctrs_type type; r_obj* proxy_method; r_obj* proxy; }; /** * Return the type information of a vector or its proxy * * `vec_type_info()` returns the vctrs type of `x`. `vec_proxy_info()` * returns the vctrs type of `x` or its proxy if it has one. The * former returns `vctrs_type_s3` with S3 objects (expect for native * types like bare data frames). The latter returns the bare type of * the proxy, if any. It never returns `vctrs_type_s3`. * * `vec_proxy_info()` returns both the proxy method and the proxy * data. `vec_type_info()` only returns the proxy method, which it * needs to determine whether S3 lists and non-vector base types are * scalars or proxied vectors. */ struct vctrs_type_info vec_type_info(r_obj* x); struct vctrs_proxy_info vec_proxy_info(r_obj* x); enum vctrs_type vec_typeof(r_obj* x); enum vctrs_type vec_proxy_typeof(r_obj* x); const char* vec_type_as_str(enum vctrs_type type); bool obj_is_list(r_obj* x); bool obj_is_vector(r_obj* x); bool list_all_vectors(r_obj* x); r_no_return void stop_unimplemented_vctrs_type(const char* fn, enum vctrs_type type); #endif vctrs/src/arg-counter.c0000644000176200001440000001573714315107243014611 0ustar liggesusers#include "arg-counter.h" #include "vctrs.h" #include "decl/arg-counter-decl.h" static struct counters* new_counters(r_obj* names, struct vctrs_arg* p_curr_arg, struct vctrs_arg* p_parent_arg, struct counters* prev_box_counters, struct counters* next_box_counters) { // This protects `shelter` and `names`. We leave space for // protecting `prev_box_counters` and `next_box_counters` later on. r_obj* shelter = KEEP(r_alloc_list(COUNTERS_SHELTER_N)); r_obj* data_shelter = r_alloc_raw(sizeof(struct counters)); r_list_poke(shelter, COUNTERS_SHELTER_data, data_shelter); // `names` might be from a splice box whose reduction has already // finished. We protect those from upstack. r_list_poke(shelter, COUNTERS_SHELTER_names, names); struct counters* p_counters = r_raw_begin(data_shelter); p_counters->shelter = shelter; p_counters->curr = 0; p_counters->next = 0; p_counters->names = names; p_counters->names_curr = 0; p_counters->names_next = 0; p_counters->curr_counter_arg_data = new_counter_arg_data(p_parent_arg, &p_counters->curr, &p_counters->names, &p_counters->names_curr); p_counters->next_counter_arg_data = new_counter_arg_data(p_parent_arg, &p_counters->next, &p_counters->names, &p_counters->names_next); p_counters->curr_counter = new_counter_arg(p_parent_arg, (void*) &p_counters->curr_counter_arg_data); p_counters->next_counter = new_counter_arg(p_parent_arg, (void*) &p_counters->next_counter_arg_data); p_counters->curr_arg = p_curr_arg; p_counters->next_arg = (struct vctrs_arg*) &p_counters->next_counter; p_counters->prev_box_counters = prev_box_counters; p_counters->next_box_counters = next_box_counters; FREE(1); return p_counters; } static void init_next_box_counters(struct vctrs_arg* p_parent_arg, struct counters* p_counters, r_obj* names) { p_counters->prev_box_counters = p_counters->next_box_counters; r_list_poke(p_counters->shelter, COUNTERS_SHELTER_prev, r_list_get(p_counters->shelter, COUNTERS_SHELTER_next)); struct counters* p_next = new_counters(names, p_counters->curr_arg, p_parent_arg, NULL, NULL); r_list_poke(p_counters->shelter, COUNTERS_SHELTER_next, p_next->shelter); p_counters->next_box_counters = p_next; p_next->next = p_counters->next; } static void counters_inc(struct counters* counters) { ++(counters->next); ++(counters->names_next); } /** * Swap counters so that the `next` counter (the one being increased * on iteration and representing the new input in the reduction) * becomes the current counter (the one representing the result so * far of the reduction). */ void counters_shift(struct counters* p_counters) { // Swap the counters data SWAP(struct vctrs_arg, p_counters->curr_counter, p_counters->next_counter); SWAP(r_ssize*, p_counters->curr_counter_arg_data.i, p_counters->next_counter_arg_data.i); SWAP(r_ssize*, p_counters->curr_counter_arg_data.names_i, p_counters->next_counter_arg_data.names_i); // Update the handles to `vctrs_arg` p_counters->curr_arg = (struct vctrs_arg*) &p_counters->curr_counter; p_counters->next_arg = (struct vctrs_arg*) &p_counters->next_counter; // Update the current index p_counters->curr = p_counters->next; } // Reduce `impl` with argument counters r_obj* reduce(r_obj* current, struct vctrs_arg* p_current_arg, struct vctrs_arg* p_parent_arg, r_obj* rest, r_obj* (*impl)(r_obj* current, r_obj* next, struct counters* counters, void* data), void* data) { // Store the box counters here as they might outlive their frame struct counters next_box_counters; struct counters prev_box_counters; struct counters* p_counters = new_counters(r_names(rest), p_current_arg, p_parent_arg, &prev_box_counters, &next_box_counters); KEEP(p_counters->shelter); r_obj* out = reduce_impl(current, rest, p_parent_arg, p_counters, false, impl, data); FREE(1); return out; } static r_obj* reduce_impl(r_obj* current, r_obj* rest, struct vctrs_arg* p_parent_arg, struct counters* counters, bool spliced, r_obj* (*impl)(r_obj* current, r_obj* next, struct counters* counters, void* data), void* data) { r_ssize n = r_length(rest); for (r_ssize i = 0; i < n; ++i, counters_inc(counters)) { KEEP(current); r_obj* next = r_list_get(rest, i); // Don't call `rlang_is_splice_box()` if we're already looking at a // spliced list because it's expensive if (spliced || !rlang_is_splice_box(next)) { current = impl(current, next, counters, data); } else { next = KEEP(rlang_unbox(next)); current = reduce_splice_box(current, next, p_parent_arg, counters, impl, data); FREE(1); } FREE(1); } return current; } static r_obj* reduce_splice_box(r_obj* current, r_obj* rest, struct vctrs_arg* p_parent_arg, struct counters* counters, r_obj* (*impl)(r_obj* current, r_obj* rest, struct counters* counters, void* data), void* data) { init_next_box_counters(p_parent_arg, counters, r_names(rest)); struct counters* box_counters = counters->next_box_counters; current = reduce_impl(current, rest, p_parent_arg, box_counters, true, impl, data); counters->curr_arg = box_counters->curr_arg; counters->next = box_counters->next; return current; } vctrs/src/version.c0000644000176200001440000000166514532374152014051 0ustar liggesusers#define R_NO_REMAP #include const char* vctrs_version = "0.6.5"; /** * This file records the expected package version in the shared * library (or DLL) of the package. This is useful to check that users * have properly installed your package. Installation issues where the * package is updated but the DLL isn't are common on Windows in * particular. To automatically check that the native library of the * package was properly installed: * * - Register the function below as a C callable under the name * "vctrs_linked_version". * * - Call `rlang::check_linked_version(pkg_name)` from your * `.onLoad()` hook. If you don't depend on rlang copy the * compat-linked-version.R file from the rlang repository to your R * folder. Find it at * */ // [[ register() ]] SEXP vctrs_linked_version(void) { return Rf_mkString(vctrs_version); } vctrs/src/subscript.c0000644000176200001440000002234714373202700014372 0ustar liggesusers#include "vctrs.h" #include "decl/subscript-decl.h" r_obj* vec_as_subscript_opts(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { if (vec_dim_n(subscript) != 1) { *err = new_error_subscript_type(subscript, opts, fns_cnd_body_subscript_dim); return r_null; } r_keep_loc subscript_pi; KEEP_HERE(subscript, &subscript_pi); r_obj* orig_names = KEEP(r_names(subscript)); switch (r_typeof(subscript)) { case R_TYPE_null: if (opts->numeric == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = r_globals.empty_int; } break; case R_TYPE_symbol: if (opts->character == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = rlang_sym_as_character(subscript); } break; default: break; } KEEP_AT(subscript, subscript_pi); if (!obj_is_vector(subscript)) { *err = new_error_subscript_type(subscript, opts, r_null); FREE(2); return r_null; } if (r_is_object(subscript)) { subscript = obj_cast_subscript(subscript, opts, err); } else if (r_typeof(subscript) == R_TYPE_double) { subscript = dbl_cast_subscript(subscript, opts, err); } KEEP_AT(subscript, subscript_pi); if (*err) { FREE(2); return r_null; } // Coerce unspecified vectors to integer only if logical indices are // not allowed if (opts->logical == SUBSCRIPT_TYPE_ACTION_ERROR && vec_is_unspecified(subscript)) { struct vctrs_arg* arg = opts->subscript_arg; if (opts->numeric == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = vec_cast(subscript, r_globals.empty_int, arg, NULL, r_lazy_null); } else { subscript = vec_cast(subscript, r_globals.empty_chr, arg, NULL, r_lazy_null); } } KEEP_AT(subscript, subscript_pi); enum subscript_type_action action = SUBSCRIPT_TYPE_ACTION_ERROR; switch (r_typeof(subscript)) { case R_TYPE_logical: action = opts->logical; break; case R_TYPE_integer: action = opts->numeric; break; case R_TYPE_character: action = opts->character; break; default: break; } if (action == SUBSCRIPT_TYPE_ACTION_ERROR) { *err = new_error_subscript_type(subscript, opts, r_null); FREE(2); return r_null; } if (orig_names != r_null) { // FIXME: Handle names in cast methods subscript = r_clone_referenced(subscript); KEEP_AT(subscript, subscript_pi); r_attrib_poke_names(subscript, orig_names); } FREE(2); return subscript; } static r_obj* obj_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { int dir = 0; struct ptype2_opts ptype2_opts = { .x = subscript, .y = r_null, .p_x_arg = opts->subscript_arg }; struct cast_opts cast_opts = { .x = subscript, .to = r_null, .p_x_arg = opts->subscript_arg }; ptype2_opts.y = cast_opts.to = r_globals.empty_lgl; if (vec_is_coercible(&ptype2_opts, &dir)) { return vec_cast_opts(&cast_opts); } ptype2_opts.y = cast_opts.to = r_globals.empty_int; if (vec_is_coercible(&ptype2_opts, &dir)) { return vec_cast_opts(&cast_opts); } ptype2_opts.y = cast_opts.to = r_globals.empty_chr; if (vec_is_coercible(&ptype2_opts, &dir)) { return vec_cast_opts(&cast_opts); } *err = new_error_subscript_type(subscript, opts, r_null); return r_null; } static r_obj* dbl_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { double* p = r_dbl_begin(subscript); r_ssize n = r_length(subscript); r_obj* out = KEEP(r_alloc_integer(n)); int* out_p = r_int_begin(out); for (r_ssize i = 0; i < n; ++i) { double elt = p[i]; // Generally `(int) nan` results in the correct `na_int` value, // but this is not guaranteed, so we have to explicitly check for it. // https://stackoverflow.com/questions/10366485/problems-casting-nan-floats-to-int if (isnan(elt)) { out_p[i] = r_globals.na_int; continue; } if (!isfinite(elt) || elt <= INT_MIN || elt > INT_MAX) { // Once we throw lazy errors from the cast method, we should // throw the error here as well FREE(1); return dbl_cast_subscript_fallback(subscript, opts, err); } int elt_int = (int) elt; if (elt != elt_int) { FREE(1); return dbl_cast_subscript_fallback(subscript, opts, err); } out_p[i] = elt_int; } FREE(1); return out; } static r_obj* dbl_cast_subscript_fallback(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { struct cast_opts cast_opts = { .x = subscript, .to = r_globals.empty_int, opts->subscript_arg }; r_obj* out = KEEP(vec_cast_e(&cast_opts, err)); if (*err) { r_obj* err_obj = KEEP(*err); r_obj* body = KEEP(vctrs_eval_mask1(syms_new_dbl_cast_subscript_body, syms_lossy_err, err_obj)); *err = new_error_subscript_type(subscript, opts, body); FREE(3); return r_null; } FREE(1); return out; } // FFI ----------------------------------------------------------------- // [[ register() ]] r_obj* ffi_as_subscript(r_obj* subscript, r_obj* logical, r_obj* numeric, r_obj* character, r_obj* frame) { struct r_lazy arg_ = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; struct subscript_opts opts = { .logical = parse_subscript_arg_type(logical, "logical"), .numeric = parse_subscript_arg_type(numeric, "numeric"), .character = parse_subscript_arg_type(character, "character"), .subscript_arg = &arg, .call = call }; ERR err = NULL; r_obj* out = vec_as_subscript_opts(subscript, &opts, &err); KEEP2(out, err); out = r_result_get(out, err); FREE(2); return out; } // [[ register() ]] r_obj* ffi_as_subscript_result(r_obj* subscript, r_obj* logical, r_obj* numeric, r_obj* character, r_obj* frame) { struct r_lazy arg_ = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; struct subscript_opts opts = { .logical = parse_subscript_arg_type(logical, "logical"), .numeric = parse_subscript_arg_type(numeric, "numeric"), .character = parse_subscript_arg_type(character, "character"), .subscript_arg = &arg, .call = call }; ERR err = NULL; r_obj* out = vec_as_subscript_opts(subscript, &opts, &err); KEEP2(out, err); out = r_result(out, err); FREE(2); return out; } // Arguments ------------------------------------------------------------------- static void stop_subscript_arg_type(const char* kind) { r_abort("`%s` must be one of \"cast\" or \"error\".", kind); } static enum subscript_type_action parse_subscript_arg_type(r_obj* x, const char* kind) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_subscript_arg_type(kind); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "cast")) return SUBSCRIPT_TYPE_ACTION_CAST; if (!strcmp(str, "error")) return SUBSCRIPT_TYPE_ACTION_ERROR; stop_subscript_arg_type(kind); r_stop_unreachable(); } // Conditions ------------------------------------------------------------------ static r_obj* new_error_subscript_type(r_obj* subscript, const struct subscript_opts* opts, r_obj* body) { r_obj* logical = subscript_type_action_chr(opts->logical); r_obj* numeric = subscript_type_action_chr(opts->numeric); r_obj* character = subscript_type_action_chr(opts->character); subscript = KEEP(expr_protect(subscript)); r_obj* subscript_arg = KEEP(vctrs_arg(opts->subscript_arg)); r_obj* ffi_call = r_lazy_eval_protect(opts->call); r_obj* syms[] = { syms_i, syms_subscript_arg, syms_subscript_action, syms_call, syms_logical, syms_numeric, syms_character, syms_body, NULL }; r_obj* args[] = { subscript, subscript_arg, get_opts_action(opts), ffi_call, logical, numeric, character, body, NULL }; r_obj* call = KEEP(r_call_n(syms_new_error_subscript_type, syms, args)); r_obj* out = r_eval(call, vctrs_ns_env); FREE(3); return out; } // Init ---------------------------------------------------------------- void vctrs_init_subscript(r_obj* ns) { syms_new_error_subscript_type = r_sym("new_error_subscript_type"); syms_new_dbl_cast_subscript_body = r_sym("new_cnd_bullets_subscript_lossy_cast"); syms_lossy_err = r_sym("lossy_err"); fns_cnd_body_subscript_dim = r_eval(r_sym("cnd_body_subscript_dim"), ns); } static r_obj* fns_cnd_body_subscript_dim = NULL; static r_obj* syms_new_dbl_cast_subscript_body = NULL; static r_obj* syms_lossy_err = NULL; static r_obj* syms_new_error_subscript_type = NULL; vctrs/src/c.h0000644000176200001440000000265414362266120012606 0ustar liggesusers#ifndef VCTRS_C_H #define VCTRS_C_H #include "vctrs-core.h" #include "names.h" #include "ptype2.h" r_obj* vec_c(r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call); r_obj* vec_c_opts(r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, const struct fallback_opts* fallback_opts, struct vctrs_arg* p_error_arg, struct r_lazy error_call); r_obj* vec_c_fallback_invoke(r_obj* xs, r_obj* name_spec, struct r_lazy error_call); r_obj* vec_c_fallback(r_obj* ptype, r_obj* xs, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call); bool needs_vec_c_fallback(r_obj* ptype); bool needs_vec_c_homogeneous_fallback(r_obj* xs, r_obj* ptype); void df_c_fallback(r_obj* out, r_obj* ptype, r_obj* xs, r_ssize n_rows, r_obj* name_spec, const struct name_repair_opts* name_repair, struct r_lazy error_call); #endif vctrs/src/dictionary.c0000644000176200001440000004243414362266120014524 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/dictionary-decl.h" // Initialised at load time struct vctrs_arg args_needles; struct vctrs_arg args_haystack; // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 static inline uint32_t u32_safe_ceil2(uint32_t x) { // Return 2^0 when `x` is 0 x += (x == 0); x--; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x++; if (x == 0) { // INT32_MAX+2 <= x <= UINT32_MAX (i.e. 2^31+1 <= x <= 2^32-1) would attempt // to ceiling to 2^32, which is 1 greater than `UINT32_MAX`, resulting in // overflow wraparound to 0. r_stop_internal("`x` results in an `uint32_t` overflow."); } return x; } // Dictonary object ------------------------------------------------------------ static struct dictionary* new_dictionary_opts(SEXP x, struct dictionary_opts* opts); // Dictionaries must be protected in consistent stack order with // `PROTECT_DICT()` struct dictionary* new_dictionary(SEXP x) { struct dictionary_opts opts = { .partial = false, .na_equal = true }; return new_dictionary_opts(x, &opts); } struct dictionary* new_dictionary_partial(SEXP x) { struct dictionary_opts opts = { .partial = true, .na_equal = true }; return new_dictionary_opts(x, &opts); } static struct dictionary* new_dictionary_params(SEXP x, bool partial, bool na_equal) { struct dictionary_opts opts; opts.partial = partial; opts.na_equal = na_equal; return new_dictionary_opts(x, &opts); } static struct dictionary* new_dictionary_opts(SEXP x, struct dictionary_opts* opts) { int nprot = 0; SEXP out = PROTECT_N(Rf_allocVector(RAWSXP, sizeof(struct dictionary)), &nprot); struct dictionary* d = (struct dictionary*) RAW(out); d->protect = out; enum vctrs_type type = vec_proxy_typeof(x); struct poly_vec* p_poly_vec = new_poly_vec(x, type); KEEP_N(p_poly_vec->shelter, &nprot); d->p_poly_vec = p_poly_vec; d->p_equal_na_equal = poly_p_equal_na_equal(type); d->p_is_incomplete = poly_p_is_incomplete(type); d->used = 0; if (opts->partial) { d->key = NULL; d->size = 0; } else { uint32_t size = dict_key_size(x); d->key = (R_len_t*) R_alloc(size, sizeof(R_len_t)); for (uint32_t i = 0; i < size; ++i) { d->key[i] = DICT_EMPTY; } d->size = size; } R_len_t n = vec_size(x); if (n) { d->hash = (uint32_t*) R_alloc(n, sizeof(uint32_t)); if (!(d->hash)) { Rf_errorcall(R_NilValue, "Can't allocate hash lookup table. Please free memory."); } memset(d->hash, 0, n * sizeof(uint32_t)); hash_fill(d->hash, n, x, opts->na_equal); } else { d->hash = NULL; } UNPROTECT(nprot); return d; } // Use hash from `x` but value from `d`. `x` does not need a full // initialisation of the key vector and can be created with // `new_dictionary_partial()`. uint32_t dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { uint32_t hash = x->hash[i]; const void* p_d_vec = d->p_poly_vec->p_vec; const void* p_x_vec = x->p_poly_vec->p_vec; // Quadratic probing: will try every slot if d->size is power of 2 // http://research.cs.vt.edu/AVresearch/hashing/quadratic.php for (uint32_t k = 0; k < d->size; ++k) { uint32_t probe = (hash + k * (k + 1) / 2) & (d->size - 1); // Rprintf("Probe: %i\n", probe); // If we circled back to start, dictionary is full if (k > 1 && probe == hash) { break; } // Check for unused slot R_len_t idx = d->key[probe]; if (idx == DICT_EMPTY) { return probe; } // Check for same value as there might be a collision if (d->p_equal_na_equal(p_d_vec, idx, p_x_vec, i)) { return probe; } // Collision. next iteration will find another spot using // quadratic probing. } r_stop_internal("Dictionary is full."); } uint32_t dict_hash_scalar(struct dictionary* d, R_len_t i) { return dict_hash_with(d, d, i); } bool dict_is_incomplete(struct dictionary* d, R_len_t i) { return d->hash[i] == HASH_MISSING && d->p_is_incomplete(d->p_poly_vec->p_vec, i); } void dict_put(struct dictionary* d, uint32_t hash, R_len_t i) { d->key[hash] = i; d->used++; } // Assume worst case, that every value is distinct, aiming for a load factor // of at most 50%. We round up to power of 2 to ensure quadratic probing // strategy works. Maximum power of 2 we can store in a uint32_t is 2^31, // as 2^32 is 1 greater than the max uint32_t value, so we clamp sizes that // would result in 2^32 to INT32_MAX to ensure that our maximum ceiling value // is only 2^31. This will increase the max load factor above 50% for `x` with // length greater than 1073741824 (2147483648 * .50), but it ensures that // it can run. See https://github.com/r-lib/vctrs/pull/1760 for further // discussion of why 50% was chosen. static inline uint32_t dict_key_size(SEXP x) { const R_len_t x_size = vec_size(x); if (x_size > R_LEN_T_MAX) { // Ensure we catch the switch to supporting long vectors in `vec_size()` r_stop_internal("Dictionary functions do not support long vectors."); } const double load_adjusted_size = x_size / 0.50; if (load_adjusted_size > UINT32_MAX) { r_stop_internal("Can't safely cast load adjusted size to a `uint32_t`."); } uint32_t size = (uint32_t)load_adjusted_size; // Clamp to `INT32_MAX` to avoid overflow in `u32_safe_ceil2()`, // at the cost of an increased maximum load factor for long input size = size > INT32_MAX ? INT32_MAX : size; size = u32_safe_ceil2(size); size = (size < 16) ? 16 : size; if (x_size > size) { // Should never happen with `R_len_t` sizes. // This is a defensive check that will be useful when we support long vectors. r_stop_internal("Hash table size must be at least as large as input to avoid a load factor of >100%."); } // Rprintf("size: %u\n", size); return size; } // R interface ----------------------------------------------------------------- // TODO: rename to match R function names // TODO: separate out into individual files SEXP vctrs_unique_loc(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); struct growable g = new_growable(INTSXP, 256); PROTECT_GROWABLE(&g, &nprot); for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); growable_push_int(&g, i + 1); } } SEXP out = growable_values(&g); UNPROTECT(nprot); return out; } // [[ include("vctrs.h") ]] SEXP vec_unique(SEXP x) { SEXP index = PROTECT(vctrs_unique_loc(x)); SEXP out = vec_slice(x, index); UNPROTECT(1); return out; } SEXP vctrs_duplicated_any(SEXP x) { bool out = duplicated_any(x); return Rf_ScalarLogical(out); } // [[ include("vctrs.h") ]] bool duplicated_any(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); bool out = false; for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } else { out = true; break; } } UNPROTECT(nprot); return out; } SEXP vctrs_n_distinct(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } } UNPROTECT(nprot); return Rf_ScalarInteger(d->used); } SEXP vctrs_id(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_out = INTEGER(out); for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } p_out[i] = d->key[hash] + 1; } UNPROTECT(nprot); return out; } // [[ register() ]] SEXP vctrs_match(SEXP needles, SEXP haystack, SEXP na_equal, SEXP frame) { struct r_lazy call = { .x = frame, .env = r_null }; struct r_lazy needles_arg_ = { .x = syms.needles_arg, .env = frame }; struct vctrs_arg needles_arg = new_lazy_arg(&needles_arg_); struct r_lazy haystack_arg_ = { .x = syms.haystack_arg, .env = frame }; struct vctrs_arg haystack_arg = new_lazy_arg(&haystack_arg_); return vec_match_params(needles, haystack, r_bool_as_int(na_equal), &needles_arg, &haystack_arg, call); } static inline void vec_match_loop(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle); static inline void vec_match_loop_propagate(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle); SEXP vec_match_params(SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { int nprot = 0; int _; SEXP type = vec_ptype2_params(needles, haystack, needles_arg, haystack_arg, call, &_); PROTECT_N(type, &nprot); needles = vec_cast_params(needles, type, needles_arg, vec_args.empty, call, S3_FALLBACK_false); PROTECT_N(needles, &nprot); haystack = vec_cast_params(haystack, type, haystack_arg, vec_args.empty, call, S3_FALLBACK_false); PROTECT_N(haystack, &nprot); needles = PROTECT_N(vec_proxy_equal(needles), &nprot); needles = PROTECT_N(vec_normalize_encoding(needles), &nprot); haystack = PROTECT_N(vec_proxy_equal(haystack), &nprot); haystack = PROTECT_N(vec_normalize_encoding(haystack), &nprot); R_len_t n_haystack = vec_size(haystack); R_len_t n_needle = vec_size(needles); struct dictionary* d = new_dictionary_params(haystack, false, na_equal); PROTECT_DICT(d, &nprot); // Load dictionary with haystack for (int i = 0; i < n_haystack; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } } struct dictionary* d_needles = new_dictionary_params(needles, true, na_equal); PROTECT_DICT(d_needles, &nprot); // Locate needles SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n_needle), &nprot); int* p_out = INTEGER(out); if (na_equal) { vec_match_loop(p_out, d, d_needles, n_needle); } else { vec_match_loop_propagate(p_out, d, d_needles, n_needle); } UNPROTECT(nprot); return out; } static inline void vec_match_loop(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle) { for (R_len_t i = 0; i < n_needle; ++i) { uint32_t hash = dict_hash_with(d, d_needles, i); if (d->key[hash] == DICT_EMPTY) { // TODO: Return `no_match` instead p_out[i] = NA_INTEGER; } else { p_out[i] = d->key[hash] + 1; } } } static inline void vec_match_loop_propagate(int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle) { for (R_len_t i = 0; i < n_needle; ++i) { if (dict_is_incomplete(d_needles, i)) { p_out[i] = NA_INTEGER; continue; } uint32_t hash = dict_hash_with(d, d_needles, i); if (d->key[hash] == DICT_EMPTY) { // TODO: Return `no_match` instead p_out[i] = NA_INTEGER; } else { p_out[i] = d->key[hash] + 1; } } } // [[ register() ]] SEXP vctrs_in(SEXP needles, SEXP haystack, SEXP na_equal_, SEXP frame) { struct r_lazy call = { .x = frame, .env = r_null }; int nprot = 0; bool na_equal = r_bool_as_int(na_equal_); int _; struct r_lazy needles_arg_ = { .x = syms.needles_arg, .env = frame }; struct vctrs_arg needles_arg = new_lazy_arg(&needles_arg_); struct r_lazy haystack_arg_ = { .x = syms.haystack_arg, .env = frame }; struct vctrs_arg haystack_arg = new_lazy_arg(&haystack_arg_); SEXP type = vec_ptype2_params(needles, haystack, &needles_arg, &haystack_arg, call, &_); PROTECT_N(type, &nprot); needles = vec_cast_params(needles, type, &needles_arg, vec_args.empty, call, S3_FALLBACK_false); PROTECT_N(needles, &nprot); haystack = vec_cast_params(haystack, type, &haystack_arg, vec_args.empty, call, S3_FALLBACK_false); PROTECT_N(haystack, &nprot); needles = PROTECT_N(vec_proxy_equal(needles), &nprot); needles = PROTECT_N(vec_normalize_encoding(needles), &nprot); haystack = PROTECT_N(vec_proxy_equal(haystack), &nprot); haystack = PROTECT_N(vec_normalize_encoding(haystack), &nprot); R_len_t n_haystack = vec_size(haystack); R_len_t n_needle = vec_size(needles); struct dictionary* d = new_dictionary_params(haystack, false, na_equal); PROTECT_DICT(d, &nprot); // Load dictionary with haystack for (int i = 0; i < n_haystack; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } } struct dictionary* d_needles = new_dictionary_params(needles, true, na_equal); PROTECT_DICT(d_needles, &nprot); // Locate needles SEXP out = PROTECT_N(Rf_allocVector(LGLSXP, n_needle), &nprot); int* p_out = LOGICAL(out); bool propagate = !na_equal; for (int i = 0; i < n_needle; ++i) { if (propagate && dict_is_incomplete(d_needles, i)) { p_out[i] = NA_LOGICAL; } else { uint32_t hash = dict_hash_with(d, d_needles, i); p_out[i] = (d->key[hash] != DICT_EMPTY); } } UNPROTECT(nprot); return out; } SEXP vctrs_count(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP count = PROTECT_N(Rf_allocVector(INTSXP, d->size), &nprot); int* p_count = INTEGER(count); for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); p_count[hash] = 0; } p_count[hash]++; } // Create output SEXP out_loc = PROTECT_N(Rf_allocVector(INTSXP, d->used), &nprot); int* p_out_loc = INTEGER(out_loc); // Reuse `count` storage, which will be narrowed SEXP out_count = count; int* p_out_count = p_count; int i = 0; for (uint32_t hash = 0; hash < d->size; ++hash) { if (d->key[hash] == DICT_EMPTY) continue; p_out_loc[i] = d->key[hash] + 1; p_out_count[i] = p_count[hash]; i++; } out_count = PROTECT_N(r_int_resize(out_count, d->used), &nprot); SEXP out = PROTECT_N(Rf_allocVector(VECSXP, 2), &nprot); SET_VECTOR_ELT(out, 0, out_loc); SET_VECTOR_ELT(out, 1, out_count); SEXP names = PROTECT_N(Rf_allocVector(STRSXP, 2), &nprot); SET_STRING_ELT(names, 0, Rf_mkChar("loc")); SET_STRING_ELT(names, 1, Rf_mkChar("count")); Rf_setAttrib(out, R_NamesSymbol, names); init_data_frame(out, d->used); UNPROTECT(nprot); return out; } SEXP vctrs_duplicated(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(LGLSXP, n), &nprot); int* p_out = LOGICAL(out); memset(p_out, 0, n * sizeof(int)); uint32_t* p_hashes = (uint32_t*) R_alloc(n, sizeof(uint32_t)); // Forward pass for (R_len_t i = 0; i < n; ++i) { const uint32_t hash = dict_hash_scalar(d, i); p_hashes[i] = hash; if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } else { p_out[i] = 1; } } for (uint32_t i = 0; i < d->size; ++i) { d->key[i] = DICT_EMPTY; } // Reverse pass for (R_len_t i = n - 1; i >= 0; --i) { const uint32_t hash = p_hashes[i]; if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); } else { p_out[i] = 1; } } UNPROTECT(nprot); return out; } void vctrs_init_dictionary(SEXP ns) { args_needles = new_wrapper_arg(NULL, "needles"); args_haystack = new_wrapper_arg(NULL, "haystack"); } vctrs/src/rlang-dev.c0000644000176200001440000000023414315060310014215 0ustar liggesusers#include "vctrs.h" #include "decl/rlang-dev-decl.h" const char* r_obj_type_friendly_length(r_obj* x) { return r_obj_type_friendly_full(x, true, true); } vctrs/src/strides.h0000644000176200001440000001316214315060310014024 0ustar liggesusers#ifndef VCTRS_STRIDES_H #define VCTRS_STRIDES_H #include "vctrs-core.h" #include "utils.h" #include "dim.h" /* * Array slicing works by treating the array as a 1D structure, and transforming * the `index` passed from R into a series of 1D indices that are used to * extract elements from `x` into the new result. * * Strides represent the offset between elements in the same dimension. For * a (2, 2, 2) array, the strides would be [1, 2, 4]. This means that if you * flattened this 3D array to 1D in a column major order, there is 1 space * between row elements, 2 spaces between column elements and 4 spaces between * elements in the third dimension. In practice, we only need the shape strides * since the first stride is always 1, so `vec_strides()` only returns the shape * strides. Strides are computed as a cumulative product of the `dim`, with an * initial value of `1`, this is what `vec_strides()` does. * * Using the strides, any array index can be converted to a 1D index. * This is what `vec_strided_loc()` does. In a (2, 2, 2) array, to find * the location at the index [1, 0, 1] (C-based index, 2nd row, 1st col, * 2nd elem in 3rd dim) you compute a sum product between the array index * and the strides. So it looks like: * loc = 1 * (1) + 0 * (2) + 1 * (4) = 5 * (loc is a C-based index into `x`) * Since the first stride is always one, we leave it off and just do: * loc = 1 + 0 * (2) + 1 * (4) = 5 * * Example: * x = (3, 3, 2) array * vec_slice(x, 2:3) * * strides = [3, 9] // (shape strides) * * Indices are C-based * * | array index | x index | how? * ------------------------------------------------------- * out[0] | [1, 0, 0] | 1 | 1 + 0 * (3) + 0 * (9) * out[1] | [2, 0, 0] | 2 | * out[2] | [1, 1, 0] | 4 | * ... | ... | ... | * out[9] | [2, 1, 1] | 14 | 2 + 1 * (3) + 1 * (9) * out[10] | [1, 2, 1] | 16 | * out[11] | [2, 2, 1] | 17 | * ^ ^ ^ * | \/ * | |- shape_index * |- size_index */ struct strides_info { SEXP dim; const int* p_dim; SEXP strides; const int* p_strides; SEXP index; const int* p_index; SEXP steps; const int* p_steps; SEXP shape_index; int* p_shape_index; R_len_t dim_n; R_len_t shape_n; R_len_t index_n; R_len_t shape_elem_n; }; #define PROTECT_STRIDES_INFO(info, n) do { \ PROTECT((info)->dim); \ PROTECT((info)->strides); \ PROTECT((info)->index); \ PROTECT((info)->steps); \ PROTECT((info)->shape_index); \ *(n) += 5; \ } while(0) static inline SEXP vec_strides(const int* p_dim, const R_len_t shape_n) { SEXP strides = PROTECT(Rf_allocVector(INTSXP, shape_n)); int* p_strides = INTEGER(strides); int stride = 1; for (int i = 0; i < shape_n; ++i) { stride *= p_dim[i]; p_strides[i] = stride; } UNPROTECT(1); return strides; } static inline SEXP vec_steps(const int* p_index, const R_len_t index_n) { SEXP steps = PROTECT(Rf_allocVector(INTSXP, index_n)); int* p_steps = INTEGER(steps); // Indices come in 1-based int index_previous = 1; for (R_len_t i = 0; i < index_n; ++i) { const int index_current = p_index[i]; if (index_current == NA_INTEGER) { p_steps[i] = NA_INTEGER; continue; } p_steps[i] = index_current - index_previous; index_previous = index_current; } UNPROTECT(1); return steps; } static inline R_len_t vec_strided_loc(const int* p_shape_index, const int* p_strides, const R_len_t shape_n) { R_len_t loc = 0; for (R_len_t i = 0; i < shape_n; ++i) { loc += p_strides[i] * p_shape_index[i]; } return loc; } // Increment the `shape_index` value. This iterates through the array like: // [size, 0, 0] // [size, 1, 0] // [size, 0, 1] // [size, 1, 1] // ... static inline void vec_shape_index_increment(struct strides_info* p_info) { for (int j = 0; j < p_info->shape_n; ++j) { p_info->p_shape_index[j]++; if (p_info->p_shape_index[j] < p_info->p_dim[j + 1]) { break; } p_info->p_shape_index[j] = 0; } } static inline struct strides_info new_strides_info(SEXP x, SEXP index) { SEXP dim = PROTECT(vec_dim(x)); const int* p_dim = INTEGER_RO(dim); R_len_t dim_n = Rf_length(dim); R_len_t shape_n = dim_n - 1; R_len_t index_n = vec_subscript_size(index); SEXP strides = PROTECT(vec_strides(p_dim, shape_n)); const int* p_strides = INTEGER_RO(strides); const int* p_index = INTEGER_RO(index); // If using a compact rep/seq, the `steps` won't be used, but we still // need to put something in the struct SEXP steps; if (is_compact(index)) { steps = r_globals.empty_int; } else { steps = vec_steps(p_index, index_n); } PROTECT(steps); const int* p_steps = INTEGER_RO(steps); // Initialize `shape_index` to the first element SEXP shape_index = PROTECT(Rf_allocVector(INTSXP, shape_n)); int* p_shape_index = INTEGER(shape_index); for (int i = 0; i < shape_n; ++i) { p_shape_index[i] = 0; } R_len_t shape_elem_n = 1; for (int i = 1; i < dim_n; ++i) { shape_elem_n *= p_dim[i]; } struct strides_info info = { .dim = dim, .p_dim = p_dim, .strides = strides, .p_strides = p_strides, .index = index, .p_index = p_index, .steps = steps, .p_steps = p_steps, .shape_index = shape_index, .p_shape_index = p_shape_index, .dim_n = dim_n, .shape_n = shape_n, .index_n = index_n, .shape_elem_n = shape_elem_n }; UNPROTECT(4); return info; } #endif vctrs/src/altrep-lazy-character.c0000644000176200001440000000733414511320527016553 0ustar liggesusers#include "vctrs.h" #include "altrep.h" #if (!HAS_ALTREP) #include void vctrs_init_altrep_lazy_character(DllInfo* dll) { } r_obj* ffi_altrep_lazy_character_is_materialized(r_obj* x) { r_stop_internal("Need R 3.5+ for Altrep support."); return r_null; } r_obj* ffi_altrep_new_lazy_character(r_obj* fn) { r_stop_internal("Need R 3.5+ for Altrep support."); return r_null; } #else // Initialised at load time R_altrep_class_t altrep_lazy_character_class; r_obj* ffi_altrep_lazy_character_is_materialized(r_obj* x) { return r_lgl(R_altrep_data2(x) != r_null); } r_obj* ffi_altrep_new_lazy_character(r_obj* fn) { r_obj* out = R_new_altrep(altrep_lazy_character_class, fn, r_null); r_mark_shared(out); return out; } // ----------------------------------------------------------------------------- // ALTVEC r_obj* altrep_lazy_character_Materialize(r_obj* vec) { r_obj* out = R_altrep_data2(vec); if (out != r_null) { return out; } r_obj* fn = R_altrep_data1(vec); r_obj* call = KEEP(r_new_call(fn, r_null)); // `fn()` evaluated in the global environment out = r_eval(call, r_envs.global); if (r_typeof(out) != R_TYPE_character) { r_stop_internal("`fn` must evaluate to a character vector."); } R_set_altrep_data2(vec, out); UNPROTECT(1); return out; } void* altrep_lazy_character_Dataptr(r_obj* vec, Rboolean writeable) { return STDVEC_DATAPTR(altrep_lazy_character_Materialize(vec)); } const void* altrep_lazy_character_Dataptr_or_null(r_obj* vec) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { return NULL; } else { return STDVEC_DATAPTR(out); } } // ----------------------------------------------------------------------------- // ALTREP R_xlen_t altrep_lazy_character_Length(r_obj* vec) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { out = altrep_lazy_character_Materialize(vec); } return r_length(out); } // What gets printed when .Internal(inspect()) is used Rboolean altrep_lazy_character_Inspect(r_obj* x, int pre, int deep, int pvec, void (*inspect_subtree)(r_obj*, int, int, int)) { Rprintf("vctrs_altrep_lazy_character (materialized=%s)\n", R_altrep_data2(x) != r_null ? "T" : "F"); return TRUE; } // ----------------------------------------------------------------------------- // ALTSTRING r_obj* altrep_lazy_character_Elt(r_obj* vec, R_xlen_t i) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { out = altrep_lazy_character_Materialize(vec); } return STRING_ELT(out, i); } void altrep_lazy_character_Set_elt(r_obj* vec, R_xlen_t i, r_obj* value) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { out = altrep_lazy_character_Materialize(vec); } SET_STRING_ELT(out, i, value); } // ----------------------------------------------------------------------------- void vctrs_init_altrep_lazy_character(DllInfo* dll) { altrep_lazy_character_class = R_make_altstring_class("altrep_lazy_character", "vctrs", dll); // ALTVEC R_set_altvec_Dataptr_method(altrep_lazy_character_class, altrep_lazy_character_Dataptr); R_set_altvec_Dataptr_or_null_method(altrep_lazy_character_class, altrep_lazy_character_Dataptr_or_null); // ALTREP R_set_altrep_Length_method(altrep_lazy_character_class, altrep_lazy_character_Length); R_set_altrep_Inspect_method(altrep_lazy_character_class, altrep_lazy_character_Inspect); // ALTSTRING R_set_altstring_Elt_method(altrep_lazy_character_class, altrep_lazy_character_Elt); R_set_altstring_Set_elt_method(altrep_lazy_character_class, altrep_lazy_character_Set_elt); } #endif // R version >= 3.5.0 vctrs/src/poly-op.h0000644000176200001440000000133214315060310013742 0ustar liggesusers#ifndef VCTRS_POLY_OP #define VCTRS_POLY_OP #include "vctrs-core.h" struct poly_vec { r_obj* shelter; r_obj* vec; const void* p_vec; }; struct poly_vec* new_poly_vec(r_obj* proxy, enum vctrs_type type); struct poly_df_data { enum vctrs_type* v_col_type; const void** v_col_ptr; r_ssize n_col; }; typedef int (poly_binary_int_fn)(const void* x, r_ssize i, const void* y, r_ssize j); poly_binary_int_fn* poly_p_equal_na_equal(enum vctrs_type type); poly_binary_int_fn* poly_p_compare_na_equal(enum vctrs_type type); typedef bool (poly_unary_bool_fn)(const void* x, r_ssize i); poly_unary_bool_fn* poly_p_is_missing(enum vctrs_type type); poly_unary_bool_fn* poly_p_is_incomplete(enum vctrs_type type); #endif vctrs/src/slice.c0000644000176200001440000004505114373205357013463 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "altrep.h" #define SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ const CTYPE* data = CONST_DEREF(x); \ r_ssize n = r_length(subscript); \ int* subscript_data = r_int_begin(subscript); \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ for (r_ssize i = 0; i < n; ++i, ++subscript_data, ++out_data) { \ int j = *subscript_data; \ *out_data = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ } \ \ FREE(1); \ return out #define SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ const CTYPE* data = CONST_DEREF(x); \ \ int* subscript_data = r_int_begin(subscript); \ r_ssize j = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ CTYPE elt = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ \ for (r_ssize i = 0; i < n; ++i, ++out_data) { \ *out_data = elt; \ } \ \ FREE(1); \ return out #define SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF) \ int* subscript_data = r_int_begin(subscript); \ r_ssize start = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ r_ssize step = subscript_data[2]; \ \ const CTYPE* data = CONST_DEREF(x) + start; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ for (int i = 0; i < n; ++i, ++out_data, data += step) { \ *out_data = *data; \ } \ \ FREE(1); \ return out #define SLICE(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ if (!materialize && ALTREP(x)) { \ r_obj* alt_subscript = KEEP(compact_materialize(subscript)); \ r_obj* out = ALTVEC_EXTRACT_SUBSET_PROXY(x, alt_subscript, r_null); \ FREE(1); \ if (out != NULL) { \ return out; \ } \ } \ if (is_compact_rep(subscript)) { \ SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \ } else { \ SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } static r_obj* lgl_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_logical, int, r_lgl_begin, r_lgl_cbegin, r_globals.na_lgl); } static r_obj* int_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_integer, int, r_int_begin, r_int_cbegin, r_globals.na_int); } static r_obj* dbl_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_double, double, r_dbl_begin, r_dbl_cbegin, r_globals.na_dbl); } static r_obj* cpl_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_complex, r_complex, r_cpl_begin, r_cpl_cbegin, r_globals.na_cpl); } static r_obj* raw_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_raw, char, (char*) r_raw_begin, (char*) r_raw_cbegin, 0); } #define SLICE_BARRIER_SUBSCRIPT(RTYPE, CONST_DEREF, SET, NA_VALUE) \ r_obj* const * data = CONST_DEREF(x); \ \ r_ssize n = r_length(subscript); \ int* subscript_data = r_int_begin(subscript); \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ \ for (r_ssize i = 0; i < n; ++i, ++subscript_data) { \ int j = *subscript_data; \ r_obj* elt = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ SET(out, i, elt); \ } \ \ FREE(1); \ return out #define SLICE_BARRIER_COMPACT_REP(RTYPE, CONST_DEREF, SET, NA_VALUE) \ r_obj* const * data = CONST_DEREF(x); \ \ int* subscript_data = r_int_begin(subscript); \ r_ssize j = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ \ r_obj* elt = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ \ for (r_ssize i = 0; i < n; ++i) { \ SET(out, i, elt); \ } \ \ FREE(1); \ return out #define SLICE_BARRIER_COMPACT_SEQ(RTYPE, CONST_DEREF, SET) \ r_obj* const * data = CONST_DEREF(x); \ \ int* subscript_data = r_int_begin(subscript); \ r_ssize start = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ r_ssize step = subscript_data[2]; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ \ for (r_ssize i = 0; i < n; ++i, start += step) { \ SET(out, i, data[start]); \ } \ \ FREE(1); \ return out #define SLICE_BARRIER(RTYPE, CONST_DEREF, SET, NA_VALUE) \ if (is_compact_rep(subscript)) { \ SLICE_BARRIER_COMPACT_REP(RTYPE, CONST_DEREF, SET, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_BARRIER_COMPACT_SEQ(RTYPE, CONST_DEREF, SET); \ } else { \ SLICE_BARRIER_SUBSCRIPT(RTYPE, CONST_DEREF, SET, NA_VALUE); \ } static r_obj* chr_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke, r_globals.na_str); } static r_obj* chr_names_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke, r_strs.empty); } static r_obj* list_slice(r_obj* x, r_obj* subscript) { SLICE_BARRIER(R_TYPE_list, r_list_cbegin, r_list_poke, r_null); } static r_obj* df_slice(r_obj* x, r_obj* subscript) { r_ssize n = r_length(x); r_ssize size = df_size(x); r_obj* out = KEEP(r_alloc_list(n)); // FIXME: Should that be restored? r_obj* nms = r_names(x); r_attrib_poke(out, r_syms.names, nms); for (r_ssize i = 0; i < n; ++i) { r_obj* elt = r_list_get(x, i); if (vec_size(elt) != size) { r_stop_internal("Column `%s` (size %" R_PRI_SSIZE ") must match the data frame (size %" R_PRI_SSIZE ").", r_chr_get_c_string(nms, i), vec_size(elt), size); } r_obj* sliced = vec_slice_unsafe(elt, subscript); r_list_poke(out, i, sliced); } init_data_frame(out, vec_subscript_size(subscript)); r_obj* row_nms = KEEP(df_rownames(x)); if (r_typeof(row_nms) == R_TYPE_character) { row_nms = slice_rownames(row_nms, subscript); r_attrib_poke(out, r_syms.row_names, row_nms); } FREE(2); return out; } r_obj* vec_slice_fallback(r_obj* x, r_obj* subscript) { // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. if (is_integer64(x)) { return vctrs_dispatch2(syms.vec_slice_fallback_integer64, fns.vec_slice_fallback_integer64, syms_x, x, syms_i, subscript); } return vctrs_dispatch2(syms.vec_slice_fallback, fns.vec_slice_fallback, syms_x, x, syms_i, subscript); } static r_obj* vec_slice_dispatch(r_obj* x, r_obj* subscript) { // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. if (is_integer64(x)) { return vctrs_dispatch2(syms.vec_slice_dispatch_integer64, fns.vec_slice_dispatch_integer64, syms_x, x, syms_i, subscript); } return vctrs_dispatch2(syms_bracket, fns_bracket, syms_x, x, syms_i, subscript); } bool vec_requires_fallback(r_obj* x, struct vctrs_proxy_info info) { return r_is_object(x) && info.proxy_method == r_null && info.type != VCTRS_TYPE_dataframe; } r_obj* vec_slice_base(enum vctrs_type type, r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { switch (type) { case VCTRS_TYPE_logical: return lgl_slice(x, subscript, materialize); case VCTRS_TYPE_integer: return int_slice(x, subscript, materialize); case VCTRS_TYPE_double: return dbl_slice(x, subscript, materialize); case VCTRS_TYPE_complex: return cpl_slice(x, subscript, materialize); case VCTRS_TYPE_character: return chr_slice(x, subscript, materialize); case VCTRS_TYPE_raw: return raw_slice(x, subscript, materialize); case VCTRS_TYPE_list: return list_slice(x, subscript); default: stop_unimplemented_vctrs_type("vec_slice_base", type); } } r_obj* slice_names(r_obj* names, r_obj* subscript) { if (names == r_null) { return names; } else { // Ensures `NA_integer_` subscripts utilize `""` as the name return chr_names_slice(names, subscript, VCTRS_MATERIALIZE_false); } } r_obj* slice_rownames(r_obj* names, r_obj* subscript) { if (names == r_null) { return names; } names = KEEP(chr_slice(names, subscript, VCTRS_MATERIALIZE_false)); // Rownames can't contain `NA` or duplicates names = vec_as_unique_names(names, true); FREE(1); return names; } r_obj* vec_slice_unsafe(r_obj* x, r_obj* subscript) { int nprot = 0; struct vctrs_proxy_info info = vec_proxy_info(x); KEEP_N(info.shelter, &nprot); r_obj* data = info.proxy; // Fallback to `[` if the class doesn't implement a proxy. This is // to be maximally compatible with existing classes. if (vec_requires_fallback(x, info)) { if (info.type == VCTRS_TYPE_scalar) { obj_check_vector(x, NULL, r_lazy_null); } if (is_compact(subscript)) { subscript = KEEP_N(compact_materialize(subscript), &nprot); } r_obj* out; if (has_dim(x)) { out = KEEP_N(vec_slice_fallback(x, subscript), &nprot); } else { out = KEEP_N(vec_slice_dispatch(x, subscript), &nprot); } // Take over attribute restoration only if there is no `[` method if (!vec_is_restored(out, x)) { out = vec_restore(out, x, vec_owned(out)); } FREE(nprot); return out; } switch (info.type) { case VCTRS_TYPE_null: r_stop_internal("Unexpected `NULL`."); case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: case VCTRS_TYPE_list: { r_obj* out; if (has_dim(x)) { out = KEEP_N(vec_slice_shaped(info.type, data, subscript), &nprot); r_obj* names = KEEP_N(r_attrib_get(x, r_syms.dim_names), &nprot); if (names != r_null) { names = KEEP_N(r_clone(names), &nprot); r_obj* row_names = r_list_get(names, 0); row_names = KEEP_N(slice_names(row_names, subscript), &nprot); r_list_poke(names, 0, row_names); r_attrib_poke(out, r_syms.dim_names, names); } } else { out = KEEP_N(vec_slice_base(info.type, data, subscript, VCTRS_MATERIALIZE_false), &nprot); r_obj* names = KEEP_N(r_names(x), &nprot); names = KEEP_N(slice_names(names, subscript), &nprot); r_attrib_poke_names(out, names); } out = vec_restore(out, x, vec_owned(out)); FREE(nprot); return out; } case VCTRS_TYPE_dataframe: { r_obj* out = KEEP_N(df_slice(data, subscript), &nprot); out = vec_restore(out, x, vec_owned(out)); FREE(nprot); return out; } default: stop_unimplemented_vctrs_type("vec_slice_impl", info.type); } } bool vec_is_restored(r_obj* x, r_obj* to) { // Don't restore if there is an actual `[` method that ignored // attributes. Some methods like [.ts intentionally strip the class // and attributes. FIXME: This branch is now probably sufficient. if (s3_find_method("[", to, base_method_table) != r_null) { return true; } r_obj* attrib = r_attrib(x); if (attrib == r_null) { return false; } // Class is restored if it contains any other attributes than names. // We might want to add support for data frames later on. r_obj* node = attrib; while (node != r_null) { if (r_node_tag(node) == r_syms.names) { node = r_node_cdr(node); continue; } return true; } return false; } r_obj* ffi_slice(r_obj* x, r_obj* i, r_obj* frame) { struct vec_slice_opts opts = { .x_arg = vec_args.x, .i_arg = vec_args.i, .call = {.x = r_syms.error_call, .env = frame} }; return vec_slice_opts(x, i, &opts); } r_obj* vec_slice_opts(r_obj* x, r_obj* i, const struct vec_slice_opts* opts) { obj_check_vector(x, opts->x_arg, opts->call); r_obj* names = KEEP(vec_names(x)); i = KEEP(vec_as_location_ctxt(i, vec_size(x), names, opts->i_arg, opts->call)); r_obj* out = vec_slice_unsafe(x, i); FREE(2); return out; } r_obj* vec_init(r_obj* x, r_ssize n) { obj_check_vector(x, vec_args.x, lazy_calls.vec_init); if (n < 0) { r_abort_lazy_call(lazy_calls.vec_init, "%s must be a positive integer.", r_c_str_format_error_arg("n")); } r_obj* i = KEEP(compact_rep(r_globals.na_int, n)); r_obj* out = vec_slice_unsafe(x, i); FREE(1); return out; } // [[ register() ]] r_obj* ffi_init(r_obj* x, r_obj* ffi_n, r_obj* ffi_frame) { struct r_lazy call = { .x = ffi_frame, .env = r_null }; r_ssize n = vec_as_short_length(ffi_n, vec_args.n, call); r_obj* out = vec_init(x, n); return out; } // Exported for testing // [[ register() ]] r_obj* ffi_slice_seq(r_obj* x, r_obj* ffi_start, r_obj* ffi_size, r_obj* ffi_increasing) { r_ssize start = r_int_get(ffi_start, 0); r_ssize size = r_int_get(ffi_size, 0); bool increasing = r_lgl_get(ffi_increasing, 0); r_obj* subscript = KEEP(compact_seq(start, size, increasing)); r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } // Exported for testing // [[ register() ]] r_obj* ffi_slice_rep(r_obj* x, r_obj* ffi_i, r_obj* ffi_n) { r_ssize i = r_int_get(ffi_i, 0); r_ssize n = r_int_get(ffi_n, 0); r_obj* subscript = KEEP(compact_rep(i, n)); r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } void vctrs_init_slice(r_obj* ns) { syms.vec_slice_dispatch_integer64 = r_sym("vec_slice_dispatch_integer64"); syms.vec_slice_fallback = r_sym("vec_slice_fallback"); syms.vec_slice_fallback_integer64 = r_sym("vec_slice_fallback_integer64"); fns.vec_slice_dispatch_integer64 = r_eval(syms.vec_slice_dispatch_integer64, ns); fns.vec_slice_fallback = r_eval(syms.vec_slice_fallback, ns); fns.vec_slice_fallback_integer64 = r_eval(syms.vec_slice_fallback_integer64, ns); } vctrs/src/proxy.h0000644000176200001440000000055714315060310013534 0ustar liggesusers#ifndef VCTRS_PROXY_H #define VCTRS_PROXY_H #include "vctrs-core.h" r_obj* vec_proxy(r_obj* x); r_obj* vec_proxy_equal(r_obj* x); r_obj* vec_proxy_compare(r_obj* x); r_obj* vec_proxy_order(r_obj* x); r_obj* vec_proxy_recurse(r_obj* x); r_obj* vec_proxy_method(r_obj* x); r_obj* vec_proxy_invoke(r_obj* x, r_obj* method); r_obj* vec_proxy_unwrap(r_obj* x); #endif vctrs/src/fields.c0000644000176200001440000000702014373202700013611 0ustar liggesusers#include "vctrs.h" // SEXP x and y must be CHARSXP // x_utf* is pointer to const char* which is lazily initialised: // This makes this function also suitable for use when repeated // comparing varying y to constant x bool equal_string(SEXP x, const char** x_utf8, SEXP y) { // Try fast pointer comparison if (x == y) return true; if (*x_utf8 == NULL) *x_utf8 = Rf_translateCharUTF8(x); // Try slower conversion to common encoding const char* y_utf = Rf_translateCharUTF8(y); return (strcmp(y_utf, *x_utf8) == 0); } int find_offset(SEXP x, SEXP index) { if (Rf_length(index) != 1) { Rf_errorcall(R_NilValue, "Invalid index: must have length 1"); } int n = Rf_length(x); if (TYPEOF(index) == INTSXP) { int val = INTEGER(index)[0]; if (val == NA_INTEGER) Rf_errorcall(R_NilValue, "Invalid index: NA_integer_"); val--; if (val < 0 || val >= n) Rf_errorcall(R_NilValue, "Invalid index: out of bounds"); return val; } else if (TYPEOF(index) == REALSXP) { double val = REAL(index)[0]; if (R_IsNA(val)) Rf_errorcall(R_NilValue, "Invalid index: NA_real_"); val--; if (val < 0 || val >= n) Rf_errorcall(R_NilValue, "Invalid index: out of bounds"); if (val > R_LEN_T_MAX) { Rf_errorcall(R_NilValue, "Invalid index: too large"); } return (int) val; } else if (TYPEOF(index) == STRSXP) { SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); if (names == R_NilValue) Rf_errorcall(R_NilValue, "Corrupt x: no names"); SEXP val_0 = STRING_ELT(index, 0); if (val_0 == NA_STRING) Rf_errorcall(R_NilValue, "Invalid index: NA_character_"); const char* val_0_chr = Rf_translateCharUTF8(val_0); if (val_0_chr[0] == '\0') Rf_errorcall(R_NilValue, "Invalid index: empty string"); for (int j = 0; j < Rf_length(names); ++j) { SEXP name_j = STRING_ELT(names, j); if (name_j == NA_STRING) Rf_errorcall(R_NilValue, "Corrupt x: element %i is unnamed", j + 1); if (equal_string(val_0, &val_0_chr, name_j)) { UNPROTECT(1); return j; } } Rf_errorcall(R_NilValue, "Invalid index: field name '%s' not found", val_0_chr); } else { Rf_errorcall(R_NilValue, "Invalid index: must be a character or numeric vector"); } } // Lists ------------------------------------------------------------------- SEXP vctrs_list_get(SEXP x, SEXP index) { int idx = find_offset(x, index); return VECTOR_ELT(x, idx); } SEXP vctrs_list_set(SEXP x, SEXP index, SEXP value) { int idx = find_offset(x, index); SEXP out = PROTECT(Rf_shallow_duplicate(x)); SET_VECTOR_ELT(out, idx, value); UNPROTECT(1); return out; } // Records ------------------------------------------------------------------ void check_rcrd(SEXP x) { if (!Rf_isVectorList(x)) Rf_errorcall(R_NilValue, "Corrupt rcrd: not a list"); if (Rf_length(x) == 0) Rf_errorcall(R_NilValue, "Corrupt rcrd: length 0"); } SEXP vctrs_fields(SEXP x) { check_rcrd(x); return Rf_getAttrib(x, R_NamesSymbol); } SEXP vctrs_n_fields(SEXP x) { check_rcrd(x); return Rf_ScalarInteger(Rf_length(x)); } SEXP vctrs_field_get(SEXP x, SEXP index) { check_rcrd(x); return vctrs_list_get(x, index); } SEXP vctrs_field_set(SEXP x, SEXP index, SEXP value) { check_rcrd(x); if (!obj_is_vector(value)) { Rf_errorcall(R_NilValue, "Invalid value: not a vector."); } if (vec_size(value) != vec_size(x)) { Rf_errorcall(R_NilValue, "Invalid value: incorrect length."); } return vctrs_list_set(x, index, value); } vctrs/src/cast.h0000644000176200001440000000540314362266120013311 0ustar liggesusers#ifndef VCTRS_CAST_H #define VCTRS_CAST_H #include "vctrs-core.h" #include "ptype2.h" struct cast_opts { r_obj* x; r_obj* to; struct vctrs_arg* p_x_arg; struct vctrs_arg* p_to_arg; struct r_lazy call; struct fallback_opts fallback; }; // FIXME: Should we merge these two structs? static inline struct ptype2_opts cast_opts_as_ptype2_opts(const struct cast_opts* p_opts) { return (struct ptype2_opts) { .x = p_opts->x, .y = p_opts->to, .p_x_arg = p_opts->p_x_arg, .p_y_arg = p_opts->p_to_arg, .call = p_opts->call, .fallback = p_opts->fallback, }; } struct cast_common_opts { struct vctrs_arg* p_arg; struct r_lazy call; struct fallback_opts fallback; }; r_obj* vec_cast_opts(const struct cast_opts* opts); static inline r_obj* vec_cast(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call) { struct cast_opts opts = { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg, .call = call }; return vec_cast_opts(&opts); } static inline r_obj* vec_cast_params(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, enum s3_fallback s3_fallback) { const struct cast_opts opts = { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg, .call = call, .fallback = { .s3 = s3_fallback } }; return vec_cast_opts(&opts); } r_obj* vec_cast_common(r_obj* xs, r_obj* to, struct vctrs_arg* p_arg, struct r_lazy call); r_obj* vec_cast_common_opts(r_obj* xs, r_obj* to, const struct cast_common_opts* opts); r_obj* vec_cast_common_params(r_obj* xs, r_obj* to, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call); struct cast_opts new_cast_opts(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, r_obj* opts); r_obj* vec_cast_e(const struct cast_opts* opts, ERR* err); r_obj* vec_cast_default(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, const struct fallback_opts* p_opts); #endif vctrs/src/shape.h0000644000176200001440000000050014315060310013437 0ustar liggesusers#ifndef VCTRS_SHAPE_H #define VCTRS_SHAPE_H #include "vctrs-core.h" #include "cast.h" SEXP vec_shaped_ptype(SEXP ptype, SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); r_obj* vec_shape_broadcast(r_obj* out, const struct cast_opts* p_opts); #endif vctrs/src/order-truelength.c0000644000176200001440000001525414315060310015640 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" /* * See the notes in the character ordering section at the top of `order.c` * for more details on how TRUELENGTH is used to detect unique strings. * * The helpers here are somewhat equivalent to the following from R's `order()` * https://github.com/wch/r-source/blob/91b4507bf6040c0167fc5b6037c202c8cbd98afd/src/main/radixsort.c#L66-L123 */ // ----------------------------------------------------------------------------- /* * Construct a new `truelength_info` * * Pair with `PROTECT_TRUELENGTH_INFO()` in the caller */ struct truelength_info* new_truelength_info(r_ssize n_max) { SEXP self = PROTECT(r_new_raw(sizeof(struct truelength_info))); struct truelength_info* p_truelength_info = (struct truelength_info*) RAW(self); p_truelength_info->self = self; p_truelength_info->strings = r_globals.empty_chr; p_truelength_info->truelengths = r_globals.empty_raw; p_truelength_info->n_strings_alloc = 0; p_truelength_info->n_strings_used = 0; p_truelength_info->uniques = r_globals.empty_chr; p_truelength_info->n_uniques_alloc = 0; p_truelength_info->n_uniques_used = 0; p_truelength_info->sizes = r_globals.empty_int; p_truelength_info->sizes_aux = r_globals.empty_int; p_truelength_info->n_sizes_alloc = 0; p_truelength_info->n_sizes_used = 0; p_truelength_info->max_string_size = 0; p_truelength_info->n_max = n_max; UNPROTECT(1); return p_truelength_info; } // ----------------------------------------------------------------------------- /* * First, reset the TRUELENGTHs of all unique CHARSXPs in `uniques` to 0, which * is the default used by R. * * Then, reset the TRUELENGTHs of all CHARSXPs in `strings` to their original * value contained in `truelengths`. There should be very few of these, * if any, as R doesn't typically use the TRUELENGTH slot of CHARSXPs. One * exception seems to be very simple strings, such as `"a"`, which R probably * adds to the cache at startup, and sets their TRUELENGTH value for some * reason. * * It is important to reset `uniques` first, then `strings`, as the CHARSXPs * in `strings` are, by definition, also in `uniques`. * * This will be called after each character data frame column is processed, and * at the end of `chr_order()` for a single character vector. */ void truelength_reset(struct truelength_info* p_truelength_info) { r_ssize n_uniques_used = p_truelength_info->n_uniques_used; r_ssize n_strings_used = p_truelength_info->n_strings_used; // First reset uniques for (r_ssize i = 0; i < n_uniques_used; ++i) { SEXP unique = p_truelength_info->p_uniques[i]; SET_TRUELENGTH(unique, 0); } // Then reset strings for (r_ssize i = 0; i < n_strings_used; ++i) { SEXP string = p_truelength_info->p_strings[i]; r_ssize truelength = p_truelength_info->p_truelengths[i]; SET_TRUELENGTH(string, truelength); } // Also reset vector specific details p_truelength_info->n_uniques_used = 0; p_truelength_info->n_strings_used = 0; p_truelength_info->n_sizes_used = 0; p_truelength_info->max_string_size = 0; } // ----------------------------------------------------------------------------- static r_ssize truelength_realloc_size(r_ssize n_x, r_ssize n_max); static inline SEXP truelengths_resize(SEXP x, r_ssize x_size, r_ssize size); void truelength_realloc_strings(struct truelength_info* p_truelength_info) { r_ssize size = truelength_realloc_size( p_truelength_info->n_strings_alloc, p_truelength_info->n_max ); p_truelength_info->strings = chr_resize( p_truelength_info->strings, p_truelength_info->n_strings_alloc, size ); REPROTECT(p_truelength_info->strings, p_truelength_info->strings_pi); p_truelength_info->p_strings = STRING_PTR(p_truelength_info->strings); p_truelength_info->truelengths = truelengths_resize( p_truelength_info->truelengths, p_truelength_info->n_strings_alloc, size ); REPROTECT(p_truelength_info->truelengths, p_truelength_info->truelengths_pi); p_truelength_info->p_truelengths = (r_ssize*) RAW(p_truelength_info->truelengths); p_truelength_info->n_strings_alloc = size; } static inline SEXP truelengths_resize(SEXP x, r_ssize x_size, r_ssize size) { return raw_resize( x, x_size * sizeof(r_ssize), size * sizeof(r_ssize) ); } // ----------------------------------------------------------------------------- void truelength_realloc_uniques(struct truelength_info* p_truelength_info) { r_ssize size = truelength_realloc_size( p_truelength_info->n_uniques_alloc, p_truelength_info->n_max ); p_truelength_info->uniques = chr_resize( p_truelength_info->uniques, p_truelength_info->n_uniques_alloc, size ); REPROTECT(p_truelength_info->uniques, p_truelength_info->uniques_pi); p_truelength_info->p_uniques = STRING_PTR(p_truelength_info->uniques); p_truelength_info->n_uniques_alloc = size; } // ----------------------------------------------------------------------------- void truelength_realloc_sizes(struct truelength_info* p_truelength_info) { r_ssize size = truelength_realloc_size( p_truelength_info->n_sizes_alloc, p_truelength_info->n_max ); p_truelength_info->sizes = int_resize( p_truelength_info->sizes, p_truelength_info->n_sizes_alloc, size ); REPROTECT(p_truelength_info->sizes, p_truelength_info->sizes_pi); p_truelength_info->p_sizes = INTEGER(p_truelength_info->sizes); p_truelength_info->sizes_aux = int_resize( p_truelength_info->sizes_aux, p_truelength_info->n_sizes_alloc, size ); REPROTECT(p_truelength_info->sizes_aux, p_truelength_info->sizes_aux_pi); p_truelength_info->p_sizes_aux = INTEGER(p_truelength_info->sizes_aux); p_truelength_info->n_sizes_alloc = size; } // ----------------------------------------------------------------------------- static r_ssize truelength_realloc_size(r_ssize n_x, r_ssize n_max) { // First allocation if (n_x == 0) { if (TRUELENGTH_SIZE_ALLOC_DEFAULT < n_max) { return TRUELENGTH_SIZE_ALLOC_DEFAULT; } else { return n_max; } } // Avoid potential overflow when doubling size uint64_t n_new = ((uint64_t) n_x) * 2; // Clamp maximum allocation size to the size of the input if (n_new > n_max) { return n_max; } // Can now safely cast back to `r_ssize` return (r_ssize) n_new; } vctrs/src/growable.c0000644000176200001440000000051114315060310014136 0ustar liggesusers#include "vctrs.h" struct growable new_growable(SEXPTYPE type, int capacity) { struct growable g; g.x = Rf_allocVector(type, capacity); g.type = type; g.array = r_vec_unwrap(type, g.x); g.n = 0; g.capacity = capacity; return g; } SEXP growable_values(struct growable* g) { return Rf_lengthgets(g->x, g->n); } vctrs/src/dim.c0000644000176200001440000000037014315060310013110 0ustar liggesusers#include "vctrs.h" // [[ register() ]] SEXP vctrs_dim(SEXP x) { return vec_dim(x); } // [[ register() ]] SEXP vctrs_dim_n(SEXP x) { return r_int(vec_dim_n(x)); } // [[ register() ]] SEXP vctrs_has_dim(SEXP x) { return r_lgl(has_dim(x)); } vctrs/src/slice-assign-array.c0000644000176200001440000002622614315060310016044 0ustar liggesusers#include "vctrs.h" #define ASSIGN_SHAPED_INDEX(CTYPE, DEREF, CONST_DEREF) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ CTYPE* p_out = DEREF(out); \ \ const CTYPE* p_value = CONST_DEREF(value); \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++k) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ continue; \ } \ \ loc += step; \ \ p_out[loc] = p_value[k]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out #define ASSIGN_SHAPED_COMPACT(CTYPE, DEREF, CONST_DEREF) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ CTYPE* p_out = DEREF(out); \ \ const R_len_t start = p_info->p_index[0]; \ const R_len_t n = p_info->p_index[1]; \ const R_len_t step = p_info->p_index[2]; \ \ const CTYPE* p_value = CONST_DEREF(value); \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++k, loc += step) { \ p_out[loc] = p_value[k]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out // ----------------------------------------------------------------------------- #define ASSIGN_SHAPED(CTYPE, DEREF, CONST_DEREF) \ if (is_compact_seq(index)) { \ ASSIGN_SHAPED_COMPACT(CTYPE, DEREF, CONST_DEREF); \ } else { \ ASSIGN_SHAPED_INDEX(CTYPE, DEREF, CONST_DEREF); \ } static inline SEXP lgl_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(int, LOGICAL, LOGICAL_RO); } static inline SEXP int_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(int, INTEGER, INTEGER_RO); } static inline SEXP dbl_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(double, REAL, REAL_RO); } static inline SEXP cpl_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(Rcomplex, COMPLEX, COMPLEX_RO); } static inline SEXP chr_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(SEXP, STRING_PTR, STRING_PTR_RO); } static inline SEXP raw_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_SHAPED(Rbyte, RAW, RAW_RO); } #undef ASSIGN_SHAPED #undef ASSIGN_SHAPED_COMPACT #undef ASSIGN_SHAPED_INDEX // ----------------------------------------------------------------------------- #define ASSIGN_BARRIER_SHAPED_INDEX(GET, SET) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++k) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ continue; \ } \ \ loc += step; \ \ SEXP elt = GET(value, k); \ SET(out, loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out #define ASSIGN_BARRIER_SHAPED_COMPACT(GET, SET) \ SEXP out = PROTECT(vec_clone_referenced(proxy, owned)); \ \ const R_len_t start = p_info->p_index[0]; \ const R_len_t n = p_info->p_index[1]; \ const R_len_t step = p_info->p_index[2]; \ \ R_len_t k = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++k, loc += step) { \ SEXP elt = GET(value, k); \ SET(out, loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(1); \ return out // ----------------------------------------------------------------------------- #define ASSIGN_BARRIER_SHAPED(GET, SET) \ if (is_compact_seq(index)) { \ ASSIGN_BARRIER_SHAPED_COMPACT(GET, SET); \ } else { \ ASSIGN_BARRIER_SHAPED_INDEX(GET, SET); \ } static SEXP list_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { ASSIGN_BARRIER_SHAPED(VECTOR_ELT, SET_VECTOR_ELT); } #undef ASSIGN_BARRIER_SHAPED #undef ASSIGN_BARRIER_SHAPED_COMPACT #undef ASSIGN_BARRIER_SHAPED_INDEX // ----------------------------------------------------------------------------- static inline SEXP vec_assign_shaped_switch(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, struct strides_info* p_info) { switch (vec_proxy_typeof(proxy)) { case VCTRS_TYPE_logical: return lgl_assign_shaped(proxy, index, value, owned, p_info); case VCTRS_TYPE_integer: return int_assign_shaped(proxy, index, value, owned, p_info); case VCTRS_TYPE_double: return dbl_assign_shaped(proxy, index, value, owned, p_info); case VCTRS_TYPE_complex: return cpl_assign_shaped(proxy, index, value, owned, p_info); case VCTRS_TYPE_character: return chr_assign_shaped(proxy, index, value, owned, p_info); case VCTRS_TYPE_raw: return raw_assign_shaped(proxy, index, value, owned, p_info); case VCTRS_TYPE_list: return list_assign_shaped(proxy, index, value, owned, p_info); default: stop_unimplemented_vctrs_type("vec_assign_shaped_switch", vec_proxy_typeof(proxy)); } } // ----------------------------------------------------------------------------- // [[ include("vctrs.h") ]] SEXP vec_assign_shaped(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { int n_protect = 0; struct strides_info info = new_strides_info(proxy, index); PROTECT_STRIDES_INFO(&info, &n_protect); SEXP out = vec_assign_shaped_switch(proxy, index, value, owned, &info); UNPROTECT(n_protect); return out; } vctrs/src/ptype2-dispatch.c0000644000176200001440000001701014362266120015367 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "type-factor.h" #include "type-tibble.h" #include "decl/ptype2-dispatch-decl.h" r_obj* vec_ptype2_dispatch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left) { r_obj* x = opts->x; r_obj* y = opts->y; enum vctrs_type2_s3 type2_s3 = vec_typeof2_s3_impl(x, y, x_type, y_type, left); switch (type2_s3) { case VCTRS_TYPE2_S3_character_bare_factor: case VCTRS_TYPE2_S3_character_bare_ordered: return r_globals.empty_chr; case VCTRS_TYPE2_S3_bare_factor_bare_factor: return fct_ptype2(opts); case VCTRS_TYPE2_S3_bare_ordered_bare_ordered: return ord_ptype2(opts); case VCTRS_TYPE2_S3_bare_date_bare_date: return vctrs_shared_empty_date; case VCTRS_TYPE2_S3_bare_date_bare_posixct: case VCTRS_TYPE2_S3_bare_date_bare_posixlt: return date_datetime_ptype2(x, y); case VCTRS_TYPE2_S3_bare_posixct_bare_posixct: case VCTRS_TYPE2_S3_bare_posixct_bare_posixlt: case VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt: return datetime_datetime_ptype2(x, y); case VCTRS_TYPE2_S3_dataframe_bare_tibble: case VCTRS_TYPE2_S3_bare_tibble_bare_tibble: return tib_ptype2(opts); default: return r_null; } } // @param from_dispatch Used to implement special behaviour when // `vec_default_ptype2()` is invoked directly from the dispatch // mechanism as opposed from a method. static inline r_obj* vec_ptype2_default_full(r_obj* x, r_obj* y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call, const struct fallback_opts* opts, bool from_dispatch) { r_obj* ffi_s3_fallback = KEEP(r_int(opts->s3)); r_obj* ffi_x_arg = KEEP(vctrs_arg(x_arg)); r_obj* ffi_y_arg = KEEP(vctrs_arg(y_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(call)); r_obj* out = vctrs_eval_mask7(syms_vec_ptype2_default, syms_x, x, syms_y, y, syms_x_arg, ffi_x_arg, syms_y_arg, ffi_y_arg, syms_call, ffi_call, syms_from_dispatch, r_lgl(from_dispatch), syms_s3_fallback, ffi_s3_fallback); FREE(4); return out; } r_obj* vec_ptype2_default(r_obj* x, r_obj* y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call, const struct fallback_opts* p_opts) { return vec_ptype2_default_full(x, y, x_arg, y_arg, call, p_opts, false); } r_obj* vec_ptype2_dispatch_s3(const struct ptype2_opts* opts) { r_obj* x = KEEP(vec_ptype(opts->x, opts->p_x_arg, opts->call)); r_obj* y = KEEP(vec_ptype(opts->y, opts->p_y_arg, opts->call)); r_obj* method_sym = r_null; r_obj* method = s3_find_method_xy("vec_ptype2", x, y, vctrs_method_table, &method_sym); // Compatibility with legacy double dispatch mechanism if (method == r_null) { r_obj* x_method_sym = r_null; r_obj* x_method = KEEP(s3_find_method2("vec_ptype2", x, vctrs_method_table, &x_method_sym)); if (x_method != r_null) { const char* x_method_str = r_sym_c_string(x_method_sym); r_obj* x_table = s3_get_table(r_fn_env(x_method)); method = s3_find_method2(x_method_str, y, x_table, &method_sym); } FREE(1); } KEEP(method); if (method == r_null) { r_obj* out = vec_ptype2_default_full(x, y, opts->p_x_arg, opts->p_y_arg, opts->call, &(opts->fallback), true); FREE(3); return out; } r_obj* ffi_x_arg = KEEP(vctrs_arg(opts->p_x_arg)); r_obj* ffi_y_arg = KEEP(vctrs_arg(opts->p_y_arg)); r_obj* out = vec_invoke_coerce_method(method_sym, method, syms_x, x, syms_y, y, syms_x_arg, ffi_x_arg, syms_y_arg, ffi_y_arg, opts->call, &(opts->fallback)); FREE(5); return out; } r_obj* vec_invoke_coerce_method(r_obj* method_sym, r_obj* method, r_obj* x_sym, r_obj* x, r_obj* y_sym, r_obj* y, r_obj* x_arg_sym, r_obj* x_arg, r_obj* y_arg_sym, r_obj* y_arg, struct r_lazy lazy_call, const struct fallback_opts* opts) { r_obj* call = KEEP(r_lazy_eval(lazy_call)); if (opts->s3 != S3_FALLBACK_DEFAULT) { r_obj* ffi_s3_fallback = KEEP(r_int(opts->s3)); r_obj* out = vctrs_dispatch6(method_sym, method, x_sym, x, y_sym, y, x_arg_sym, x_arg, y_arg_sym, y_arg, syms_call, call, syms_s3_fallback, ffi_s3_fallback); FREE(2); return out; } else { r_obj* out = vctrs_dispatch5(method_sym, method, x_sym, x, y_sym, y, x_arg_sym, x_arg, y_arg_sym, y_arg, syms_call, call); FREE(1); return out; } } // [[ register() ]] r_obj* ffi_ptype2_dispatch_native(r_obj* x, r_obj* y, r_obj* fallback_opts, r_obj* frame) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; const struct ptype2_opts opts = new_ptype2_opts(x, y, &x_arg, &y_arg, call, fallback_opts); int _left; r_obj* out = vec_ptype2_dispatch_native(&opts, vec_typeof(x), vec_typeof(y), &_left); if (out == r_null) { out = vec_ptype2_default_full(x, y, &x_arg, &y_arg, opts.call, &opts.fallback, true); return out; } else { return out; } } void vctrs_init_ptype2_dispatch(r_obj* ns) { syms_vec_ptype2_default = r_sym("vec_default_ptype2"); } static r_obj* syms_vec_ptype2_default = NULL; vctrs/src/set.h0000644000176200001440000000217114362266120013151 0ustar liggesusers#ifndef VCTRS_SET_H #define VCTRS_SET_H #include "vctrs-core.h" r_obj* vec_set_intersect(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); r_obj* vec_set_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); r_obj* vec_set_union(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); r_obj* vec_set_symmetric_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); #endif vctrs/src/hash.c0000644000176200001440000003465614362266120013311 0ustar liggesusers#include "vctrs.h" // boost::hash_combine from https://stackoverflow.com/questions/35985960 static inline uint32_t hash_combine(uint32_t x, uint32_t y) { return x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); } // 32-bit mixer from murmurhash // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L68 static inline uint32_t hash_uint32(uint32_t x) { x ^= x >> 16; x *= 0x85ebca6b; x ^= x >> 13; x *= 0xc2b2ae35; x ^= x >> 16; return x; } // 64-bit mixer from murmurhash // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L81 static inline uint32_t hash_uint64(uint64_t x) { x ^= x >> 33; x *= UINT64_C(0xff51afd7ed558ccd); x ^= x >> 33; x *= UINT64_C(0xc4ceb9fe1a85ec53); x ^= x >> 33; return x; } // Seems like something designed specificaly for doubles should work better // but I haven't been able to find anything static inline uint32_t hash_double(double x) { // Treat positive/negative 0 as equivalent if (x == 0.0) { x = 0.0; } union { double d; uint64_t i; } value; value.d = x; return hash_uint64(value.i); } static inline uint32_t hash_char(SEXP x) { return hash_uint64((uintptr_t) x); } // Hashing scalars ----------------------------------------------------- static inline uint32_t lgl_hash_scalar(const int* x); static inline uint32_t int_hash_scalar(const int* x); static inline uint32_t dbl_hash_scalar(const double* x); static inline uint32_t cpl_hash_scalar(const Rcomplex* x); static inline uint32_t chr_hash_scalar(const SEXP* x); static inline uint32_t raw_hash_scalar(const Rbyte* x); static inline uint32_t lgl_hash_scalar(const int* x) { return hash_uint32(*x); } static inline uint32_t int_hash_scalar(const int* x) { return hash_uint32(*x); } static inline uint32_t dbl_hash_scalar(const double* x) { double val = *x; // Hash all NAs and NaNs to same value (i.e. ignoring significand) switch (dbl_classify(val)) { case VCTRS_DBL_number: break; case VCTRS_DBL_missing: val = NA_REAL; break; case VCTRS_DBL_nan: val = R_NaN; break; } return hash_double(val); } static inline uint32_t cpl_hash_scalar(const Rcomplex* x) { uint32_t hash = 0; hash = hash_combine(hash, dbl_hash_scalar(&x->r)); hash = hash_combine(hash, dbl_hash_scalar(&x->i)); return hash; } static inline uint32_t chr_hash_scalar(const SEXP* x) { return hash_char(*x); } static inline uint32_t raw_hash_scalar(const Rbyte* x) { return hash_uint32(*x); } static inline uint32_t list_hash_scalar_na_equal(SEXP x, R_len_t i) { return hash_object(VECTOR_ELT(x, i)); } static inline uint32_t list_hash_scalar_na_propagate(SEXP x, R_len_t i) { SEXP elt = VECTOR_ELT(x, i); if (elt == R_NilValue) { return HASH_MISSING; } else { return hash_object(elt); } } // Hashing objects ----------------------------------------------------- static uint32_t lgl_hash(SEXP x); static uint32_t int_hash(SEXP x); static uint32_t dbl_hash(SEXP x); static uint32_t chr_hash(SEXP x); static uint32_t list_hash(SEXP x); static uint32_t node_hash(SEXP x); static uint32_t fn_hash(SEXP x); static uint32_t sexp_hash(SEXP x); uint32_t hash_object(SEXP x) { uint32_t hash = sexp_hash(x); SEXP attrib = ATTRIB(x); if (attrib != R_NilValue) { hash = hash_combine(hash, hash_object(attrib)); } return hash; } // [[ register() ]] SEXP vctrs_hash_object(SEXP x) { SEXP out = PROTECT(Rf_allocVector(RAWSXP, sizeof(uint32_t))); uint32_t hash = 0; hash = hash_combine(hash, hash_object(x)); memcpy(RAW(out), &hash, sizeof(uint32_t)); UNPROTECT(1); return out; } static uint32_t sexp_hash(SEXP x) { switch (TYPEOF(x)) { case NILSXP: return 0; case LGLSXP: return lgl_hash(x); case INTSXP: return int_hash(x); case REALSXP: return dbl_hash(x); case STRSXP: return chr_hash(x); case EXPRSXP: case VECSXP: return list_hash(x); case DOTSXP: case LANGSXP: case LISTSXP: case BCODESXP: return node_hash(x); case CLOSXP: return fn_hash(x); case SYMSXP: case SPECIALSXP: case BUILTINSXP: case ENVSXP: case EXTPTRSXP: return hash_uint64((uintptr_t) x); default: Rf_errorcall(R_NilValue, "Unsupported type %s", Rf_type2char(TYPEOF(x))); } } #define HASH(CTYPE, CONST_DEREF, HASHER) \ uint32_t hash = 0; \ R_len_t n = Rf_length(x); \ const CTYPE* p = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < n; ++i, ++p) { \ hash = hash_combine(hash, HASHER(p)); \ } \ \ return hash static uint32_t lgl_hash(SEXP x) { HASH(int, LOGICAL_RO, lgl_hash_scalar); } static uint32_t int_hash(SEXP x) { HASH(int, INTEGER_RO, int_hash_scalar); } static uint32_t dbl_hash(SEXP x) { HASH(double, REAL_RO, dbl_hash_scalar); } static uint32_t chr_hash(SEXP x) { HASH(SEXP, STRING_PTR_RO, chr_hash_scalar); } #undef HASH #define HASH_BARRIER(GET, HASHER) \ uint32_t hash = 0; \ R_len_t n = Rf_length(x); \ \ for (R_len_t i = 0; i < n; ++i) { \ hash = hash_combine(hash, HASHER(GET(x, i))); \ } \ \ return hash static uint32_t list_hash(SEXP x) { HASH_BARRIER(VECTOR_ELT, hash_object); } #undef HASH_BARRIER static uint32_t node_hash(SEXP x) { uint32_t hash = 0; hash = hash_combine(hash, hash_object(CAR(x))); hash = hash_combine(hash, hash_object(CDR(x))); return hash; } static uint32_t fn_hash(SEXP x) { uint32_t hash = 0; hash = hash_combine(hash, hash_object(BODY(x))); hash = hash_combine(hash, hash_object(CLOENV(x))); hash = hash_combine(hash, hash_object(FORMALS(x))); return hash; } // Fill hash array ----------------------------------------------------- static inline void lgl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void lgl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void int_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void int_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void dbl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void dbl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void cpl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void cpl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void chr_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void chr_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void raw_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void raw_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void list_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x); static inline void list_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x); static inline void df_hash_fill(uint32_t* p, R_len_t size, SEXP x, bool na_equal); // Not compatible with hash_scalar. When `@na_equal` is false, missing // values are propagated and encoded as `1`. // // [[ include("vctrs.h") ]] void hash_fill(uint32_t* p, R_len_t size, SEXP x, bool na_equal) { if (has_dim(x)) { // The conversion to data frame is only a stopgap, in the long // term, we'll hash arrays natively x = PROTECT(r_as_data_frame(x)); hash_fill(p, size, x, na_equal); UNPROTECT(1); return; } if (na_equal) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_hash_fill_na_equal(p, size, x); return; case VCTRS_TYPE_integer: int_hash_fill_na_equal(p, size, x); return; case VCTRS_TYPE_double: dbl_hash_fill_na_equal(p, size, x); return; case VCTRS_TYPE_complex: cpl_hash_fill_na_equal(p, size, x); return; case VCTRS_TYPE_character: chr_hash_fill_na_equal(p, size, x); return; case VCTRS_TYPE_raw: raw_hash_fill_na_equal(p, size, x); return; case VCTRS_TYPE_list: list_hash_fill_na_equal(p, size, x); return; case VCTRS_TYPE_dataframe: df_hash_fill(p, size, x, true); return; default: break; } } else { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_hash_fill_na_propagate(p, size, x); return; case VCTRS_TYPE_integer: int_hash_fill_na_propagate(p, size, x); return; case VCTRS_TYPE_double: dbl_hash_fill_na_propagate(p, size, x); return; case VCTRS_TYPE_complex: cpl_hash_fill_na_propagate(p, size, x); return; case VCTRS_TYPE_character: chr_hash_fill_na_propagate(p, size, x); return; case VCTRS_TYPE_raw: raw_hash_fill_na_propagate(p, size, x); return; case VCTRS_TYPE_list: list_hash_fill_na_propagate(p, size, x); return; case VCTRS_TYPE_dataframe: df_hash_fill(p, size, x, false); return; default: break; } } stop_unimplemented_vctrs_type("hash_fill", vec_proxy_typeof(x)); } #define HASH_FILL(CTYPE, CONST_DEREF, HASHER) \ const CTYPE* xp = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i, ++xp) { \ p[i] = hash_combine(p[i], HASHER(xp)); \ } #define HASH_FILL_NA_PROPAGATE(CTYPE, CONST_DEREF, HASHER, NA_VALUE) \ const CTYPE* xp = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i, ++xp) { \ uint32_t h = p[i]; \ if (h == HASH_MISSING) { \ continue; \ } \ if (*xp == NA_VALUE) { \ p[i] = HASH_MISSING; \ } else { \ p[i] = hash_combine(h, HASHER(xp)); \ } \ } #define HASH_FILL_NA_PROPAGATE_CMP(CTYPE, CONST_DEREF, HASHER, NA_CMP) \ const CTYPE* xp = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i, ++xp) { \ uint32_t h = p[i]; \ if (h == HASH_MISSING) { \ continue; \ } \ if (NA_CMP(*xp)) { \ p[i] = HASH_MISSING; \ } else { \ p[i] = hash_combine(h, HASHER(xp)); \ } \ } static inline void lgl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(int, LOGICAL_RO, lgl_hash_scalar); } static inline void lgl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE(int, LOGICAL_RO, lgl_hash_scalar, NA_LOGICAL); } static inline void int_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(int, INTEGER_RO, int_hash_scalar); } static inline void int_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE(int, INTEGER_RO, int_hash_scalar, NA_INTEGER); } static inline void dbl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(double, REAL_RO, dbl_hash_scalar); } static inline void dbl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE_CMP(double, REAL_RO, dbl_hash_scalar, dbl_is_missing); } static inline void cpl_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(Rcomplex, COMPLEX_RO, cpl_hash_scalar); } static inline void cpl_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE_CMP(Rcomplex, COMPLEX_RO, cpl_hash_scalar, cpl_is_missing); } static inline void chr_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(SEXP, STRING_PTR_RO, chr_hash_scalar); } static inline void chr_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_NA_PROPAGATE(SEXP, STRING_PTR_RO, chr_hash_scalar, NA_STRING); } static inline void raw_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(Rbyte, RAW_RO, raw_hash_scalar); } static inline void raw_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL(Rbyte, RAW_RO, raw_hash_scalar); } #undef HASH_FILL_NA_PROPAGATE_CMP #undef HASH_FILL_NA_PROPAGATE #undef HASH_FILL #define HASH_FILL_BARRIER(HASHER) \ for (R_len_t i = 0; i < size; ++i) { \ p[i] = hash_combine(p[i], HASHER(x, i)); \ } #define HASH_FILL_BARRIER_NA_PROPAGATE(HASHER) \ for (R_len_t i = 0; i < size; ++i) { \ uint32_t h = p[i]; \ if (h == HASH_MISSING) { \ continue; \ } \ uint32_t elt_hash = HASHER(x, i); \ if (elt_hash == HASH_MISSING) { \ p[i] = HASH_MISSING; \ } else { \ p[i] = hash_combine(p[i], elt_hash); \ } \ } static void list_hash_fill_na_equal(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_BARRIER(list_hash_scalar_na_equal); } static void list_hash_fill_na_propagate(uint32_t* p, R_len_t size, SEXP x) { HASH_FILL_BARRIER_NA_PROPAGATE(list_hash_scalar_na_propagate); } #undef HASH_FILL_BARRIER_NA_PROPAGATE #undef HASH_FILL_BARRIER static void df_hash_fill(uint32_t* p, R_len_t size, SEXP x, bool na_equal) { R_len_t ncol = Rf_length(x); for (R_len_t i = 0; i < ncol; ++i) { SEXP col = VECTOR_ELT(x, i); hash_fill(p, size, col, na_equal); } } // [[ register() ]] SEXP vctrs_hash(SEXP x) { x = PROTECT(vec_proxy_equal(x)); R_len_t n = vec_size(x); SEXP out = PROTECT(Rf_allocVector(RAWSXP, n * sizeof(uint32_t))); uint32_t* p = (uint32_t*) RAW(out); memset(p, 0, n * sizeof(uint32_t)); hash_fill(p, n, x, true); UNPROTECT(2); return out; } vctrs/src/rank.c0000644000176200001440000001436014341667017013316 0ustar liggesusers#include "vctrs.h" enum ties { TIES_min, TIES_max, TIES_sequential, TIES_dense }; enum incomplete { INCOMPLETE_rank, INCOMPLETE_na }; #include "decl/rank-decl.h" // [[ register() ]] r_obj* vctrs_rank(r_obj* x, r_obj* ties, r_obj* incomplete, r_obj* direction, r_obj* na_value, r_obj* nan_distinct, r_obj* chr_proxy_collate) { const enum ties c_ties = parse_ties(ties); const enum incomplete c_incomplete = parse_incomplete(incomplete); const bool c_nan_distinct = r_as_bool(nan_distinct); return vec_rank( x, c_ties, c_incomplete, direction, na_value, c_nan_distinct, chr_proxy_collate ); } static r_obj* vec_rank(r_obj* x, enum ties ties_type, enum incomplete incomplete_type, r_obj* direction, r_obj* na_value, bool nan_distinct, r_obj* chr_proxy_collate) { r_ssize size = vec_size(x); r_keep_loc pi_x; KEEP_HERE(x, &pi_x); r_obj* complete = r_null; r_keep_loc pi_complete; KEEP_HERE(complete, &pi_complete); int* v_complete = NULL; r_ssize rank_size = size; bool rank_incomplete_with_na = (incomplete_type == INCOMPLETE_na); if (rank_incomplete_with_na) { // Slice out complete values of `x` to rank. // Retain the logical vector for constructing `out`. complete = vec_detect_complete(x); KEEP_AT(complete, pi_complete); v_complete = r_lgl_begin(complete); bool all_complete = r_lgl_all(complete); if (all_complete) { // No incomplete values to rank rank_incomplete_with_na = false; } else { x = vec_slice(x, complete); KEEP_AT(x, pi_x); rank_size = vec_size(x); } } r_obj* rank = KEEP(r_alloc_integer(rank_size)); int* v_rank = r_int_begin(rank); const bool chr_ordered = true; r_obj* info = KEEP(vec_order_info(x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered)); r_obj* order = r_list_get(info, 0); const int* v_order = r_int_cbegin(order); r_obj* group_sizes = r_list_get(info, 1); const int* v_group_sizes = r_int_cbegin(group_sizes); r_ssize n_groups = r_length(group_sizes); switch (ties_type) { case TIES_min: vec_rank_min(v_order, v_group_sizes, n_groups, v_rank); break; case TIES_max: vec_rank_max(v_order, v_group_sizes, n_groups, v_rank); break; case TIES_sequential: vec_rank_sequential(v_order, v_group_sizes, n_groups, v_rank); break; case TIES_dense: vec_rank_dense(v_order, v_group_sizes, n_groups, v_rank); break; } r_obj* out = r_null; if (rank_incomplete_with_na) { out = KEEP(r_alloc_integer(size)); int* v_out = r_int_begin(out); r_ssize j = 0; for (r_ssize i = 0; i < size; ++i) { v_out[i] = v_complete[i] ? v_rank[j++] : r_globals.na_int; } FREE(1); } else { out = rank; } FREE(4); return out; } // ----------------------------------------------------------------------------- static void vec_rank_min(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 1; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; } rank += group_size; } } static void vec_rank_max(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 0; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; rank += group_size; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; } } } static void vec_rank_sequential(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 1; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; ++rank; } } } static void vec_rank_dense(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 1; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; } ++rank; } } // ----------------------------------------------------------------------------- static inline enum ties parse_ties(r_obj* ties) { if (!r_is_string(ties)) { r_stop_internal("`ties` must be a string."); } const char* c_ties = r_chr_get_c_string(ties, 0); if (!strcmp(c_ties, "min")) return TIES_min; if (!strcmp(c_ties, "max")) return TIES_max; if (!strcmp(c_ties, "sequential")) return TIES_sequential; if (!strcmp(c_ties, "dense")) return TIES_dense; r_stop_internal( "`ties` must be one of: \"min\", \"max\", \"sequential\", or \"dense\"." ); } // ----------------------------------------------------------------------------- static inline enum incomplete parse_incomplete(r_obj* incomplete) { if (!r_is_string(incomplete)) { r_stop_internal("`incomplete` must be a string."); } const char* c_incomplete = r_chr_get_c_string(incomplete, 0); if (!strcmp(c_incomplete, "rank")) return INCOMPLETE_rank; if (!strcmp(c_incomplete, "na")) return INCOMPLETE_na; r_stop_internal( "`incomplete` must be either \"rank\" or \"na\"." ); } // ----------------------------------------------------------------------------- // Treats missing values as `true` static inline bool r_lgl_all(r_obj* x) { if (r_typeof(x) != R_TYPE_logical) { r_stop_internal("`x` must be a logical vector."); } const int* v_x = r_lgl_cbegin(x); r_ssize size = r_length(x); for (r_ssize i = 0; i < size; ++i) { if (!v_x[i]) { return false; } } return true; } vctrs/src/arg-counter.h0000644000176200001440000000360114315060310014572 0ustar liggesusers#ifndef VCTRS_ARG_COUNTER_H #define VCTRS_ARG_COUNTER_H #include "vctrs-core.h" #include "arg.h" struct counters { /* public: */ r_obj* shelter; // Argument tags for the current value of the reduction (the result // so far) and the next value. These handles typically point to the // local counter args, but might also point to external arg objects // like the initial current arg, or a splice box counter arg. struct vctrs_arg* curr_arg; struct vctrs_arg* next_arg; /* private: */ // Global counters r_ssize curr; r_ssize next; r_obj* names; r_ssize names_curr; r_ssize names_next; // Local counters for splice boxes. Since the tags are generated // lazily, we need two counter states to handle the // `vec_c(!!!list(foo = 1), !!!list(bar = 2))` case. struct counters* next_box_counters; struct counters* prev_box_counters; struct vctrs_arg curr_counter; struct vctrs_arg next_counter; struct arg_data_counter curr_counter_arg_data; struct arg_data_counter next_counter_arg_data; void* p_data; }; enum counters_shelter { COUNTERS_SHELTER_data = 0, COUNTERS_SHELTER_names, COUNTERS_SHELTER_next, COUNTERS_SHELTER_prev, COUNTERS_SHELTER_N }; /** * Swap the argument tags of the reduction * * There are two counters used for generating argument tags when an * error occur during a reduction. The first represent the result so * far, and the second the next input. Call `counters_shift()` to set * the counter of the next input as current counter, and start * iterating with a new counter for the next input. */ void counters_shift(struct counters* counters); r_obj* reduce(r_obj* current, struct vctrs_arg* p_current_arg, struct vctrs_arg* p_parent_arg, r_obj* rest, r_obj* (*impl)(r_obj* current, r_obj* next, struct counters* counters, void* data), void* data); #endif vctrs/src/type-date-time.c0000644000176200001440000003221614315060310015173 0ustar liggesusers#include "vctrs.h" static SEXP new_date(SEXP x); static SEXP new_datetime(SEXP x, SEXP tzone); static SEXP new_empty_datetime(SEXP tzone); static SEXP date_validate(SEXP x); static SEXP datetime_validate(SEXP x); static SEXP datetime_validate_tzone(SEXP x); static SEXP datetime_validate_type(SEXP x); static SEXP datetime_rezone(SEXP x, SEXP tzone); static SEXP tzone_get(SEXP x); static SEXP tzone_union(SEXP x_tzone, SEXP y_tzone); static bool tzone_equal(SEXP x_tzone, SEXP y_tzone); static SEXP r_as_date(SEXP x); static SEXP r_as_posixct(SEXP x, SEXP tzone); static SEXP r_as_posixlt(SEXP x, SEXP tzone); static SEXP r_date_as_character(SEXP x); static SEXP r_chr_date_as_posixct(SEXP x, SEXP tzone); static SEXP r_chr_date_as_posixlt(SEXP x, SEXP tzone); static SEXP posixlt_as_posixct_impl(SEXP x, SEXP tzone); static SEXP posixct_as_posixlt_impl(SEXP x, SEXP tzone); // ----------------------------------------------------------------------------- // ptype2 // [[ include("vctrs.h") ]] SEXP date_datetime_ptype2(SEXP x, SEXP y) { SEXP x_class = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP x_first_class = STRING_ELT(x_class, 0); SEXP tzone = (x_first_class == strings_date) ? tzone_get(y) : tzone_get(x); PROTECT(tzone); SEXP out = new_empty_datetime(tzone); UNPROTECT(2); return out; } // [[ include("vctrs.h") ]] SEXP datetime_datetime_ptype2(SEXP x, SEXP y) { SEXP x_tzone = PROTECT(tzone_get(x)); SEXP y_tzone = PROTECT(tzone_get(y)); // Never allocates SEXP tzone = tzone_union(x_tzone, y_tzone); SEXP out = new_empty_datetime(tzone); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- // cast // [[ include("vctrs.h") ]] SEXP date_as_date(SEXP x) { return date_validate(x); } // [[ include("vctrs.h") ]] SEXP date_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); // Date -> character -> POSIXct // This is the only way to retain the same clock time SEXP out = PROTECT(r_date_as_character(x)); out = PROTECT(r_chr_date_as_posixct(out, tzone)); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP date_as_posixlt(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); // Date -> character -> POSIXlt // This is the only way to retain the same clock time SEXP out = PROTECT(r_date_as_character(x)); out = PROTECT(r_chr_date_as_posixlt(out, tzone)); UNPROTECT(3); return out; } static SEXP posixt_as_date(SEXP ct, SEXP lt, bool* lossy); // [[ include("vctrs.h") ]] SEXP posixct_as_date(SEXP x, bool* lossy) { SEXP ct = PROTECT(datetime_validate(x)); SEXP tzone = PROTECT(tzone_get(ct)); SEXP lt = PROTECT(posixct_as_posixlt_impl(ct, tzone)); SEXP out = posixt_as_date(ct, lt, lossy); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP posixlt_as_date(SEXP x, bool* lossy) { SEXP lt = x; SEXP tzone = PROTECT(tzone_get(lt)); SEXP ct = PROTECT(posixlt_as_posixct_impl(lt, tzone)); SEXP out = posixt_as_date(ct, lt, lossy); UNPROTECT(2); return out; } // POSIXct is required for lossy checking. // POSIXlt is required for converting to Date. // `as.Date.POSIXct()` must go through `as.POSIXlt()`, so the POSIXct // time alone is not enough. static SEXP posixt_as_date(SEXP ct, SEXP lt, bool* lossy) { ct = PROTECT(datetime_validate(ct)); const double* p_ct = REAL(ct); SEXP out = PROTECT(r_as_date(lt)); SEXP roundtrip = PROTECT(date_as_posixct(out, ct)); const double* p_roundtrip = REAL(roundtrip); const R_len_t size = Rf_length(out); for (R_len_t i = 0; i < size; ++i) { const double ct_elt = p_ct[i]; // `NaN` and `NA` always convert without issue if (isnan(ct_elt)) { continue; } const double roundtrip_elt = p_roundtrip[i]; if (ct_elt != roundtrip_elt) { *lossy = true; UNPROTECT(3); return R_NilValue; } } UNPROTECT(3); return out; } static SEXP posixct_as_posixct_impl(SEXP x, SEXP tzone); // [[ include("vctrs.h") ]] SEXP posixct_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixct_as_posixct_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixct_as_posixct_impl(SEXP x, SEXP tzone) { x = PROTECT(datetime_validate(x)); SEXP out = datetime_rezone(x, tzone); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP posixlt_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixlt_as_posixct_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixlt_as_posixct_impl(SEXP x, SEXP tzone) { SEXP x_tzone = PROTECT(tzone_get(x)); x = PROTECT(r_as_posixct(x, x_tzone)); SEXP out = posixct_as_posixct_impl(x, tzone); UNPROTECT(2); return out; } // [[ include("vctrs.h") ]] SEXP posixct_as_posixlt(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixct_as_posixlt_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixct_as_posixlt_impl(SEXP x, SEXP tzone) { return r_as_posixlt(x, tzone); } // [[ include("vctrs.h") ]] SEXP posixlt_as_posixlt(SEXP x, SEXP to) { SEXP x_tzone = PROTECT(tzone_get(x)); SEXP to_tzone = PROTECT(tzone_get(to)); if (tzone_equal(x_tzone, to_tzone)) { UNPROTECT(2); return x; } SEXP out = x; // `as.POSIXlt.default()` doesn't respect `tz` so we have to do: // POSIXlt -> POSIXct -> POSIXct -> POSIXlt out = PROTECT(posixlt_as_posixct_impl(out, x_tzone)); out = PROTECT(posixct_as_posixct_impl(out, to_tzone)); out = PROTECT(posixct_as_posixlt_impl(out, to_tzone)); UNPROTECT(5); return out; } // ----------------------------------------------------------------------------- // restore // [[ include("vctrs.h") ]] SEXP vec_date_restore(SEXP x, SEXP to, const enum vctrs_owned owned) { SEXP out = PROTECT(vec_restore_default(x, to, owned)); out = date_validate(out); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP vec_posixct_restore(SEXP x, SEXP to, const enum vctrs_owned owned) { SEXP out = PROTECT(vec_restore_default(x, to, owned)); out = datetime_validate(out); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP vec_posixlt_restore(SEXP x, SEXP to, const enum vctrs_owned owned) { SEXP out = PROTECT(vec_restore_default(x, to, owned)); out = datetime_validate_tzone(out); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_new_date(SEXP x) { return new_date(x); } static SEXP new_date(SEXP x) { if (TYPEOF(x) != REALSXP) { Rf_errorcall(R_NilValue, "`x` must be a double vector."); } SEXP names = PROTECT(r_names(x)); SEXP out = PROTECT(r_clone_referenced(x)); SET_ATTRIB(out, R_NilValue); r_attrib_poke_names(out, names); r_attrib_poke_class(out, classes_date); UNPROTECT(2); return out; } // [[ register() ]] SEXP vctrs_new_datetime(SEXP x, SEXP tzone) { return new_datetime(x, tzone); } static SEXP new_datetime(SEXP x, SEXP tzone) { if (TYPEOF(x) != REALSXP) { Rf_errorcall(R_NilValue, "`x` must be a double vector."); } // Convenience special case where we allow a // null `tzone` to represent local time if (tzone == R_NilValue) { tzone = chrs_empty; } if (TYPEOF(tzone) != STRSXP) { Rf_errorcall(R_NilValue, "`tzone` must be a character vector or `NULL`."); } SEXP names = PROTECT(r_names(x)); SEXP out = PROTECT(r_clone_referenced(x)); SET_ATTRIB(out, R_NilValue); r_attrib_poke_names(out, names); r_attrib_poke_class(out, classes_posixct); Rf_setAttrib(out, syms_tzone, tzone); UNPROTECT(2); return out; } static SEXP new_empty_datetime(SEXP tzone) { return new_datetime(r_globals.empty_dbl, tzone); } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_date_validate(SEXP x) { return date_validate(x); } // Ensure that a `Date` is internally stored as a double vector static SEXP date_validate(SEXP x) { switch (TYPEOF(x)) { case REALSXP: return x; case INTSXP: // Keeps attributes return Rf_coerceVector(x, REALSXP); default: r_stop_internal("Corrupt `Date` with unknown type %s.", Rf_type2char(TYPEOF(x))); } } // [[ register() ]] SEXP vctrs_datetime_validate(SEXP x) { return datetime_validate(x); } // Ensure that a `POSIXct` is internally stored as a double vector. // Also checks that the `tzone` attribute is non-NULL. static SEXP datetime_validate(SEXP x) { x = PROTECT(datetime_validate_tzone(x)); x = PROTECT(datetime_validate_type(x)); UNPROTECT(2); return x; } static SEXP datetime_validate_tzone(SEXP x) { SEXP tzone = Rf_getAttrib(x, syms_tzone); if (tzone != R_NilValue) { return x; } x = PROTECT(r_clone_referenced(x)); Rf_setAttrib(x, syms_tzone, chrs_empty); UNPROTECT(1); return x; } static SEXP datetime_validate_type(SEXP x) { switch (TYPEOF(x)) { case REALSXP: return x; case INTSXP: // Keeps attributes return Rf_coerceVector(x, REALSXP); default: r_stop_internal("Corrupt `POSIXct` with unknown type %s.", Rf_type2char(TYPEOF(x))); } never_reached("datetime_validate_type"); } // ----------------------------------------------------------------------------- // Same underlying numeric representation, different `tzone` static SEXP datetime_rezone(SEXP x, SEXP tzone) { SEXP x_tzone = PROTECT(tzone_get(x)); if (tzone_equal(x_tzone, tzone)) { UNPROTECT(1); return x; } SEXP out = PROTECT(r_clone_referenced(x)); Rf_setAttrib(out, syms_tzone, tzone); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- // Time zone utilities static SEXP tzone_get(SEXP x) { SEXP tzone = PROTECT(Rf_getAttrib(x, syms_tzone)); if (tzone == R_NilValue) { UNPROTECT(1); return chrs_empty; } R_len_t size = Rf_length(tzone); if (size == 1) { UNPROTECT(1); return tzone; } if (size == 0) { Rf_errorcall(R_NilValue, "Corrupt datetime with 0-length `tzone` attribute"); } // If there are multiple, only take the first SEXP out = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(out, 0, STRING_ELT(tzone, 0)); UNPROTECT(2); return out; } // `tzone_get()` is guaranteed to return 1 element static inline bool tzone_is_local(SEXP tzone) { return STRING_ELT(tzone, 0) == strings_empty; } static SEXP tzone_union(SEXP x_tzone, SEXP y_tzone) { if (tzone_is_local(x_tzone)) { return y_tzone; } else { return x_tzone; } } // `tzone_get()` is guaranteed to return 1 element static bool tzone_equal(SEXP x_tzone, SEXP y_tzone) { // Equal objects? if (x_tzone == y_tzone) { return true; } // Equal CHARSXPs? SEXP x_string = STRING_ELT(x_tzone, 0); SEXP y_string = STRING_ELT(y_tzone, 0); if (x_string == y_string) { return true; } // Equal C char? const char* x_tzone_char = CHAR(x_string); const char* y_tzone_char = CHAR(y_string); return !strcmp(x_tzone_char, y_tzone_char); } // ----------------------------------------------------------------------------- static SEXP syms_tz = NULL; static SEXP syms_as_date = NULL; static SEXP fns_as_date = NULL; static SEXP r_as_date(SEXP x) { return vctrs_dispatch1(syms_as_date, fns_as_date, syms_x, x); } static SEXP syms_as_posixct = NULL; static SEXP fns_as_posixct = NULL; static SEXP r_as_posixct(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_as_posixct, fns_as_posixct, syms_x, x, syms_tz, tzone); } static SEXP syms_as_posixlt = NULL; static SEXP fns_as_posixlt = NULL; static SEXP r_as_posixlt(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_as_posixlt, fns_as_posixlt, syms_x, x, syms_tz, tzone); } static SEXP syms_date_as_character = NULL; static SEXP fns_date_as_character = NULL; static SEXP r_date_as_character(SEXP x) { return vctrs_dispatch1(syms_date_as_character, fns_date_as_character, syms_x, x); } static SEXP syms_chr_date_as_posixct = NULL; static SEXP fns_chr_date_as_posixct = NULL; static SEXP r_chr_date_as_posixct(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_chr_date_as_posixct, fns_chr_date_as_posixct, syms_x, x, syms_tzone, tzone); } static SEXP syms_chr_date_as_posixlt = NULL; static SEXP fns_chr_date_as_posixlt = NULL; static SEXP r_chr_date_as_posixlt(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_chr_date_as_posixlt, fns_chr_date_as_posixlt, syms_x, x, syms_tzone, tzone); } // ----------------------------------------------------------------------------- void vctrs_init_type_date_time(SEXP ns) { syms_tz = Rf_install("tz"); syms_as_date = Rf_install("as.Date"); syms_as_posixct = Rf_install("as.POSIXct"); syms_as_posixlt = Rf_install("as.POSIXlt"); syms_date_as_character = Rf_install("date_as_character"); syms_chr_date_as_posixct = Rf_install("chr_date_as_posixct"); syms_chr_date_as_posixlt = Rf_install("chr_date_as_posixlt"); fns_as_date = r_env_get(R_BaseEnv, syms_as_date); fns_as_posixct = r_env_get(R_BaseEnv, syms_as_posixct); fns_as_posixlt = r_env_get(R_BaseEnv, syms_as_posixlt); fns_date_as_character = r_env_get(ns, syms_date_as_character); fns_chr_date_as_posixct = r_env_get(ns, syms_chr_date_as_posixct); fns_chr_date_as_posixlt = r_env_get(ns, syms_chr_date_as_posixlt); } vctrs/src/cast-bare.h0000644000176200001440000000062614315060310014211 0ustar liggesusers#ifndef VCTRS_CAST_BARE_H #define VCTRS_CAST_BARE_H #include "vctrs-core.h" r_obj* int_as_double(r_obj* x, bool* lossy); r_obj* lgl_as_double(r_obj* x, bool* lossy); r_obj* dbl_as_integer(r_obj* x, bool* lossy); r_obj* lgl_as_integer(r_obj* x, bool* lossy); r_obj* chr_as_logical(r_obj* x, bool* lossy); r_obj* dbl_as_logical(r_obj* x, bool* lossy); r_obj* int_as_logical(r_obj* x, bool* lossy); #endif vctrs/src/typeof2-s3.c0000644000176200001440000007525614315060310014271 0ustar liggesusers#include "vctrs.h" #include "decl/typeof2-s3-decl.h" enum vctrs_type2_s3 vec_typeof2_s3_impl(r_obj* x, r_obj* y, enum vctrs_type type_x, enum vctrs_type type_y, int* left) { switch (type_x) { case VCTRS_TYPE_null: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_null_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_null_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_null_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_null_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_null_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_null_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_null_unknown; } } case VCTRS_TYPE_unspecified: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_unspecified_unknown; } } case VCTRS_TYPE_logical: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_logical_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_logical_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_logical_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_logical_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_logical_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_logical_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_logical_unknown; } } case VCTRS_TYPE_integer: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_integer_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_integer_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_integer_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_integer_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_integer_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_integer_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_integer_unknown; } } case VCTRS_TYPE_double: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_double_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_double_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_double_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_double_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_double_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_double_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_double_unknown; } } case VCTRS_TYPE_complex: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_complex_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_complex_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_complex_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_complex_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_complex_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_complex_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_complex_unknown; } } case VCTRS_TYPE_character: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_character_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_character_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_character_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_character_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_character_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_character_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_character_unknown; } } case VCTRS_TYPE_raw: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_raw_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_raw_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_raw_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_raw_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_raw_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_raw_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_raw_unknown; } } case VCTRS_TYPE_list: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_list_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_list_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_list_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_list_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_list_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_list_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_list_unknown; } } case VCTRS_TYPE_dataframe: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_dataframe_unknown; } } case VCTRS_TYPE_scalar: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_scalar_unknown; } } case VCTRS_TYPE_s3: { return vec_typeof2_s3_impl2(x, y, type_y, left); }} r_stop_unreachable(); } static enum vctrs_type2_s3 vec_typeof2_s3_impl2(r_obj* x, r_obj* y, enum vctrs_type type_y, int* left) { switch (class_type(x)) { case VCTRS_CLASS_bare_factor: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_factor; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_factor; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_factor; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_factor; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_factor; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_factor; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_factor; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_factor; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_factor; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_factor; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_factor; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = -1; return VCTRS_TYPE2_S3_bare_factor_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_factor_unknown; } }} } case VCTRS_CLASS_bare_ordered: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_ordered; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_ordered; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_ordered; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_ordered; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_ordered; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_ordered; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_ordered; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_ordered; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_ordered; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_ordered; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_ordered; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_ordered; case VCTRS_CLASS_bare_ordered: *left = -1; return VCTRS_TYPE2_S3_bare_ordered_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_unknown; } }} } case VCTRS_CLASS_bare_date: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_date; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_date; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_date; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_date; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_date; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_date; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_date; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_date; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_date; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_date; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_date; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_date; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_date; case VCTRS_CLASS_bare_date: *left = -1; return VCTRS_TYPE2_S3_bare_date_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_bare_date_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_date_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_date_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_date_unknown; } }} } case VCTRS_CLASS_bare_posixct: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_posixct; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_posixct; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_posixct; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_posixct; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_posixct; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_posixct; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_posixct; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_posixct; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_posixct; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_posixct; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_posixct; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_posixct; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_posixct; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_bare_posixct; case VCTRS_CLASS_bare_posixct: *left = -1; return VCTRS_TYPE2_S3_bare_posixct_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_posixct_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_posixct_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_posixct_unknown; } }} } case VCTRS_CLASS_bare_posixlt: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_posixlt; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_posixlt; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_posixlt; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_posixlt; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_posixlt; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_posixlt; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_posixlt; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_posixlt; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_posixlt; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_posixlt; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_posixlt; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_posixlt; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_posixlt; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_bare_posixlt; case VCTRS_CLASS_bare_posixct: *left = 1; return VCTRS_TYPE2_S3_bare_posixct_bare_posixlt; case VCTRS_CLASS_bare_posixlt: *left = -1; return VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_posixlt_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_posixlt_unknown; } }} } case VCTRS_CLASS_bare_tibble: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_tibble; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_tibble; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_tibble; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_tibble; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_tibble; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_tibble; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_tibble; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_tibble; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_tibble; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_tibble; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_tibble; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_tibble; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_tibble; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_bare_tibble; case VCTRS_CLASS_bare_posixct: *left = 1; return VCTRS_TYPE2_S3_bare_posixct_bare_tibble; case VCTRS_CLASS_bare_posixlt: *left = 1; return VCTRS_TYPE2_S3_bare_posixlt_bare_tibble; case VCTRS_CLASS_bare_tibble: *left = -1; return VCTRS_TYPE2_S3_bare_tibble_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_tibble_unknown; } }} } default: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_unknown; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_unknown; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_unknown; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_unknown; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_unknown; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_unknown; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_unknown; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_unknown; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_unknown; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_unknown; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_unknown; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_unknown; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_unknown; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_unknown; case VCTRS_CLASS_bare_posixct: *left = 1; return VCTRS_TYPE2_S3_bare_posixct_unknown; case VCTRS_CLASS_bare_posixlt: *left = 1; return VCTRS_TYPE2_S3_bare_posixlt_unknown; case VCTRS_CLASS_bare_tibble: *left = 1; return VCTRS_TYPE2_S3_bare_tibble_unknown; default: *left = -1; return VCTRS_TYPE2_S3_unknown_unknown; } }} }} r_stop_unreachable(); } static enum vctrs_type2_s3 vec_typeof2_s3(r_obj* x, r_obj* y) { int _; return vec_typeof2_s3_impl(x, y, vec_typeof(x), vec_typeof(y), &_); } static const char* vctrs_type2_s3_as_str(enum vctrs_type2_s3 type) { switch (type) { case VCTRS_TYPE2_S3_null_bare_factor: return "VCTRS_TYPE2_S3_null_bare_factor"; case VCTRS_TYPE2_S3_null_bare_ordered: return "VCTRS_TYPE2_S3_null_bare_ordered"; case VCTRS_TYPE2_S3_null_bare_date: return "VCTRS_TYPE2_S3_null_bare_date"; case VCTRS_TYPE2_S3_null_bare_posixct: return "VCTRS_TYPE2_S3_null_bare_posixct"; case VCTRS_TYPE2_S3_null_bare_posixlt: return "VCTRS_TYPE2_S3_null_bare_posixlt"; case VCTRS_TYPE2_S3_null_bare_tibble: return "VCTRS_TYPE2_S3_null_bare_tibble"; case VCTRS_TYPE2_S3_null_unknown: return "VCTRS_TYPE2_S3_null_unknown"; case VCTRS_TYPE2_S3_unspecified_bare_factor: return "VCTRS_TYPE2_S3_unspecified_bare_factor"; case VCTRS_TYPE2_S3_unspecified_bare_ordered: return "VCTRS_TYPE2_S3_unspecified_bare_ordered"; case VCTRS_TYPE2_S3_unspecified_bare_date: return "VCTRS_TYPE2_S3_unspecified_bare_date"; case VCTRS_TYPE2_S3_unspecified_bare_posixct: return "VCTRS_TYPE2_S3_unspecified_bare_posixct"; case VCTRS_TYPE2_S3_unspecified_bare_posixlt: return "VCTRS_TYPE2_S3_unspecified_bare_posixlt"; case VCTRS_TYPE2_S3_unspecified_bare_tibble: return "VCTRS_TYPE2_S3_unspecified_bare_tibble"; case VCTRS_TYPE2_S3_unspecified_unknown: return "VCTRS_TYPE2_S3_unspecified_unknown"; case VCTRS_TYPE2_S3_logical_bare_factor: return "VCTRS_TYPE2_S3_logical_bare_factor"; case VCTRS_TYPE2_S3_logical_bare_ordered: return "VCTRS_TYPE2_S3_logical_bare_ordered"; case VCTRS_TYPE2_S3_logical_bare_date: return "VCTRS_TYPE2_S3_logical_bare_date"; case VCTRS_TYPE2_S3_logical_bare_posixct: return "VCTRS_TYPE2_S3_logical_bare_posixct"; case VCTRS_TYPE2_S3_logical_bare_posixlt: return "VCTRS_TYPE2_S3_logical_bare_posixlt"; case VCTRS_TYPE2_S3_logical_bare_tibble: return "VCTRS_TYPE2_S3_logical_bare_tibble"; case VCTRS_TYPE2_S3_logical_unknown: return "VCTRS_TYPE2_S3_logical_unknown"; case VCTRS_TYPE2_S3_integer_bare_factor: return "VCTRS_TYPE2_S3_integer_bare_factor"; case VCTRS_TYPE2_S3_integer_bare_ordered: return "VCTRS_TYPE2_S3_integer_bare_ordered"; case VCTRS_TYPE2_S3_integer_bare_date: return "VCTRS_TYPE2_S3_integer_bare_date"; case VCTRS_TYPE2_S3_integer_bare_posixct: return "VCTRS_TYPE2_S3_integer_bare_posixct"; case VCTRS_TYPE2_S3_integer_bare_posixlt: return "VCTRS_TYPE2_S3_integer_bare_posixlt"; case VCTRS_TYPE2_S3_integer_bare_tibble: return "VCTRS_TYPE2_S3_integer_bare_tibble"; case VCTRS_TYPE2_S3_integer_unknown: return "VCTRS_TYPE2_S3_integer_unknown"; case VCTRS_TYPE2_S3_double_bare_factor: return "VCTRS_TYPE2_S3_double_bare_factor"; case VCTRS_TYPE2_S3_double_bare_ordered: return "VCTRS_TYPE2_S3_double_bare_ordered"; case VCTRS_TYPE2_S3_double_bare_date: return "VCTRS_TYPE2_S3_double_bare_date"; case VCTRS_TYPE2_S3_double_bare_posixct: return "VCTRS_TYPE2_S3_double_bare_posixct"; case VCTRS_TYPE2_S3_double_bare_posixlt: return "VCTRS_TYPE2_S3_double_bare_posixlt"; case VCTRS_TYPE2_S3_double_bare_tibble: return "VCTRS_TYPE2_S3_double_bare_tibble"; case VCTRS_TYPE2_S3_double_unknown: return "VCTRS_TYPE2_S3_double_unknown"; case VCTRS_TYPE2_S3_complex_bare_factor: return "VCTRS_TYPE2_S3_complex_bare_factor"; case VCTRS_TYPE2_S3_complex_bare_ordered: return "VCTRS_TYPE2_S3_complex_bare_ordered"; case VCTRS_TYPE2_S3_complex_bare_date: return "VCTRS_TYPE2_S3_complex_bare_date"; case VCTRS_TYPE2_S3_complex_bare_posixct: return "VCTRS_TYPE2_S3_complex_bare_posixct"; case VCTRS_TYPE2_S3_complex_bare_posixlt: return "VCTRS_TYPE2_S3_complex_bare_posixlt"; case VCTRS_TYPE2_S3_complex_bare_tibble: return "VCTRS_TYPE2_S3_complex_bare_tibble"; case VCTRS_TYPE2_S3_complex_unknown: return "VCTRS_TYPE2_S3_complex_unknown"; case VCTRS_TYPE2_S3_character_bare_factor: return "VCTRS_TYPE2_S3_character_bare_factor"; case VCTRS_TYPE2_S3_character_bare_ordered: return "VCTRS_TYPE2_S3_character_bare_ordered"; case VCTRS_TYPE2_S3_character_bare_date: return "VCTRS_TYPE2_S3_character_bare_date"; case VCTRS_TYPE2_S3_character_bare_posixct: return "VCTRS_TYPE2_S3_character_bare_posixct"; case VCTRS_TYPE2_S3_character_bare_posixlt: return "VCTRS_TYPE2_S3_character_bare_posixlt"; case VCTRS_TYPE2_S3_character_bare_tibble: return "VCTRS_TYPE2_S3_character_bare_tibble"; case VCTRS_TYPE2_S3_character_unknown: return "VCTRS_TYPE2_S3_character_unknown"; case VCTRS_TYPE2_S3_raw_bare_factor: return "VCTRS_TYPE2_S3_raw_bare_factor"; case VCTRS_TYPE2_S3_raw_bare_ordered: return "VCTRS_TYPE2_S3_raw_bare_ordered"; case VCTRS_TYPE2_S3_raw_bare_date: return "VCTRS_TYPE2_S3_raw_bare_date"; case VCTRS_TYPE2_S3_raw_bare_posixct: return "VCTRS_TYPE2_S3_raw_bare_posixct"; case VCTRS_TYPE2_S3_raw_bare_posixlt: return "VCTRS_TYPE2_S3_raw_bare_posixlt"; case VCTRS_TYPE2_S3_raw_bare_tibble: return "VCTRS_TYPE2_S3_raw_bare_tibble"; case VCTRS_TYPE2_S3_raw_unknown: return "VCTRS_TYPE2_S3_raw_unknown"; case VCTRS_TYPE2_S3_list_bare_factor: return "VCTRS_TYPE2_S3_list_bare_factor"; case VCTRS_TYPE2_S3_list_bare_ordered: return "VCTRS_TYPE2_S3_list_bare_ordered"; case VCTRS_TYPE2_S3_list_bare_date: return "VCTRS_TYPE2_S3_list_bare_date"; case VCTRS_TYPE2_S3_list_bare_posixct: return "VCTRS_TYPE2_S3_list_bare_posixct"; case VCTRS_TYPE2_S3_list_bare_posixlt: return "VCTRS_TYPE2_S3_list_bare_posixlt"; case VCTRS_TYPE2_S3_list_bare_tibble: return "VCTRS_TYPE2_S3_list_bare_tibble"; case VCTRS_TYPE2_S3_list_unknown: return "VCTRS_TYPE2_S3_list_unknown"; case VCTRS_TYPE2_S3_dataframe_bare_factor: return "VCTRS_TYPE2_S3_dataframe_bare_factor"; case VCTRS_TYPE2_S3_dataframe_bare_ordered: return "VCTRS_TYPE2_S3_dataframe_bare_ordered"; case VCTRS_TYPE2_S3_dataframe_bare_date: return "VCTRS_TYPE2_S3_dataframe_bare_date"; case VCTRS_TYPE2_S3_dataframe_bare_posixct: return "VCTRS_TYPE2_S3_dataframe_bare_posixct"; case VCTRS_TYPE2_S3_dataframe_bare_posixlt: return "VCTRS_TYPE2_S3_dataframe_bare_posixlt"; case VCTRS_TYPE2_S3_dataframe_bare_tibble: return "VCTRS_TYPE2_S3_dataframe_bare_tibble"; case VCTRS_TYPE2_S3_dataframe_unknown: return "VCTRS_TYPE2_S3_dataframe_unknown"; case VCTRS_TYPE2_S3_scalar_bare_factor: return "VCTRS_TYPE2_S3_scalar_bare_factor"; case VCTRS_TYPE2_S3_scalar_bare_ordered: return "VCTRS_TYPE2_S3_scalar_bare_ordered"; case VCTRS_TYPE2_S3_scalar_bare_date: return "VCTRS_TYPE2_S3_scalar_bare_date"; case VCTRS_TYPE2_S3_scalar_bare_posixct: return "VCTRS_TYPE2_S3_scalar_bare_posixct"; case VCTRS_TYPE2_S3_scalar_bare_posixlt: return "VCTRS_TYPE2_S3_scalar_bare_posixlt"; case VCTRS_TYPE2_S3_scalar_bare_tibble: return "VCTRS_TYPE2_S3_scalar_bare_tibble"; case VCTRS_TYPE2_S3_scalar_unknown: return "VCTRS_TYPE2_S3_scalar_unknown"; case VCTRS_TYPE2_S3_bare_factor_bare_factor: return "VCTRS_TYPE2_S3_bare_factor_bare_factor"; case VCTRS_TYPE2_S3_bare_factor_bare_ordered: return "VCTRS_TYPE2_S3_bare_factor_bare_ordered"; case VCTRS_TYPE2_S3_bare_factor_bare_date: return "VCTRS_TYPE2_S3_bare_factor_bare_date"; case VCTRS_TYPE2_S3_bare_factor_bare_posixct: return "VCTRS_TYPE2_S3_bare_factor_bare_posixct"; case VCTRS_TYPE2_S3_bare_factor_bare_posixlt: return "VCTRS_TYPE2_S3_bare_factor_bare_posixlt"; case VCTRS_TYPE2_S3_bare_factor_bare_tibble: return "VCTRS_TYPE2_S3_bare_factor_bare_tibble"; case VCTRS_TYPE2_S3_bare_factor_unknown: return "VCTRS_TYPE2_S3_bare_factor_unknown"; case VCTRS_TYPE2_S3_bare_ordered_bare_ordered: return "VCTRS_TYPE2_S3_bare_ordered_bare_ordered"; case VCTRS_TYPE2_S3_bare_ordered_bare_date: return "VCTRS_TYPE2_S3_bare_ordered_bare_date"; case VCTRS_TYPE2_S3_bare_ordered_bare_posixct: return "VCTRS_TYPE2_S3_bare_ordered_bare_posixct"; case VCTRS_TYPE2_S3_bare_ordered_bare_posixlt: return "VCTRS_TYPE2_S3_bare_ordered_bare_posixlt"; case VCTRS_TYPE2_S3_bare_ordered_bare_tibble: return "VCTRS_TYPE2_S3_bare_ordered_bare_tibble"; case VCTRS_TYPE2_S3_bare_ordered_unknown: return "VCTRS_TYPE2_S3_bare_ordered_unknown"; case VCTRS_TYPE2_S3_bare_date_bare_date: return "VCTRS_TYPE2_S3_bare_date_bare_date"; case VCTRS_TYPE2_S3_bare_date_bare_posixct: return "VCTRS_TYPE2_S3_bare_date_bare_posixct"; case VCTRS_TYPE2_S3_bare_date_bare_posixlt: return "VCTRS_TYPE2_S3_bare_date_bare_posixlt"; case VCTRS_TYPE2_S3_bare_date_bare_tibble: return "VCTRS_TYPE2_S3_bare_date_bare_tibble"; case VCTRS_TYPE2_S3_bare_date_unknown: return "VCTRS_TYPE2_S3_bare_date_unknown"; case VCTRS_TYPE2_S3_bare_posixct_bare_posixct: return "VCTRS_TYPE2_S3_bare_posixct_bare_posixct"; case VCTRS_TYPE2_S3_bare_posixct_bare_posixlt: return "VCTRS_TYPE2_S3_bare_posixct_bare_posixlt"; case VCTRS_TYPE2_S3_bare_posixct_bare_tibble: return "VCTRS_TYPE2_S3_bare_posixct_bare_tibble"; case VCTRS_TYPE2_S3_bare_posixct_unknown: return "VCTRS_TYPE2_S3_bare_posixct_unknown"; case VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt: return "VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt"; case VCTRS_TYPE2_S3_bare_posixlt_bare_tibble: return "VCTRS_TYPE2_S3_bare_posixlt_bare_tibble"; case VCTRS_TYPE2_S3_bare_posixlt_unknown: return "VCTRS_TYPE2_S3_bare_posixlt_unknown"; case VCTRS_TYPE2_S3_bare_tibble_bare_tibble: return "VCTRS_TYPE2_S3_bare_tibble_bare_tibble"; case VCTRS_TYPE2_S3_bare_tibble_unknown: return "VCTRS_TYPE2_S3_bare_tibble_unknown"; case VCTRS_TYPE2_S3_unknown_unknown: return "VCTRS_TYPE2_S3_unknown_unknown"; } r_stop_unreachable(); } r_obj* ffi_typeof2_s3(r_obj* x, r_obj* y) { enum vctrs_type2_s3 type = vec_typeof2_s3(x, y); return r_chr(vctrs_type2_s3_as_str(type)); } vctrs/src/runs.h0000644000176200001440000000030414363556517013356 0ustar liggesusers#ifndef VCTRS_RUNS_H #define VCTRS_RUNS_H #include "vctrs-core.h" r_obj* vec_identify_runs(r_obj* x, struct r_lazy error_call); r_obj* vec_run_sizes(r_obj* x, struct r_lazy error_call); #endif vctrs/src/hash.h0000644000176200001440000000011214276722575013311 0ustar liggesusers#ifndef VCTRS_HASH_H #define VCTRS_HASH_H #define HASH_MISSING 1 #endif vctrs/src/cast-dispatch.c0000644000176200001440000000632514362266120015105 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "type-factor.h" #include "type-tibble.h" r_obj* vec_cast_dispatch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy) { r_obj* x = opts->x; r_obj* to = opts->to; struct vctrs_arg* x_arg = opts->p_x_arg; struct vctrs_arg* to_arg = opts->p_to_arg; int dir = 0; enum vctrs_type2_s3 type2_s3 = vec_typeof2_s3_impl(x, to, x_type, to_type, &dir); switch (type2_s3) { case VCTRS_TYPE2_S3_character_bare_factor: if (dir == 0) { return chr_as_factor(x, to, lossy, to_arg); } else { return fct_as_character(x, x_arg); } case VCTRS_TYPE2_S3_character_bare_ordered: if (dir == 0) { return chr_as_ordered(x, to, lossy, to_arg); } else { return ord_as_character(x, x_arg); } case VCTRS_TYPE2_S3_bare_factor_bare_factor: return fct_as_factor(x, to, lossy, x_arg, to_arg); case VCTRS_TYPE2_S3_bare_ordered_bare_ordered: return ord_as_ordered(opts); case VCTRS_TYPE2_S3_bare_date_bare_posixct: if (dir == 0) { return date_as_posixct(x, to); } else { return posixct_as_date(x, lossy); } case VCTRS_TYPE2_S3_bare_date_bare_posixlt: if (dir == 0) { return date_as_posixlt(x, to); } else { return posixlt_as_date(x, lossy); } case VCTRS_TYPE2_S3_bare_posixct_bare_posixlt: if (dir == 0) { return posixct_as_posixlt(x, to); } else { return posixlt_as_posixct(x, to); } case VCTRS_TYPE2_S3_bare_date_bare_date: return date_as_date(x); case VCTRS_TYPE2_S3_bare_posixct_bare_posixct: return posixct_as_posixct(x, to); case VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt: return posixlt_as_posixlt(x, to); case VCTRS_TYPE2_S3_dataframe_bare_tibble: if (dir == 0) { return tib_cast(opts); } else { return df_cast_opts(opts); } case VCTRS_TYPE2_S3_bare_tibble_bare_tibble: return tib_cast(opts); default: return r_null; } } // [[ register() ]] r_obj* ffi_cast_dispatch_native(r_obj* x, r_obj* to, r_obj* fallback_opts, r_obj* x_arg, r_obj* to_arg, r_obj* frame) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_to_arg = vec_as_arg(to_arg); struct r_lazy call = { .x = syms_call, .env = frame }; struct cast_opts c_opts = new_cast_opts(x, to, &c_x_arg, &c_to_arg, call, fallback_opts); bool lossy = false; r_obj* out = vec_cast_dispatch_native(&c_opts, vec_typeof(x), vec_typeof(to), &lossy); if (lossy || out == r_null) { return vec_cast_default(x, to, &c_x_arg, &c_to_arg, c_opts.call, &c_opts.fallback); } else { return out; } } vctrs/src/assert.h0000644000176200001440000000072014401377400013653 0ustar liggesusers#ifndef VCTRS_ASSERT_H #define VCTRS_ASSERT_H #include "vctrs-core.h" void obj_check_vector(r_obj* x, struct vctrs_arg* arg, struct r_lazy call); void vec_check_size(r_obj* x, r_ssize size, struct vctrs_arg* arg, struct r_lazy call); void obj_check_list(r_obj* x, struct vctrs_arg* arg, struct r_lazy call); #endif vctrs/src/slice-array.c0000644000176200001440000004641214315060310014561 0ustar liggesusers#include "vctrs.h" #define SLICE_SHAPED_INDEX(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ const CTYPE* x_data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_data) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ *out_data = NA_VALUE; \ continue; \ } \ \ loc += step; \ *out_data = x_data[loc]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ \ int size_index = p_info->p_index[0]; \ if (size_index == NA_INTEGER) { \ R_len_t out_n = p_info->shape_elem_n * p_info->index_n; \ for (R_len_t i = 0; i < out_n; ++i, ++out_data) { \ *out_data = NA_VALUE; \ } \ UNPROTECT(2); \ return(out); \ } \ \ const CTYPE* x_data = CONST_DEREF(x); \ \ /* Convert to C index */ \ size_index = size_index - 1; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += size_index; \ const CTYPE elt_x_data = x_data[loc]; \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_data) { \ *out_data = elt_x_data; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ \ R_len_t start = p_info->p_index[0]; \ R_len_t n = p_info->p_index[1]; \ R_len_t step = p_info->p_index[2]; \ \ const CTYPE* x_data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++out_data, loc += step) { \ *out_data = x_data[loc]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ if (is_compact_rep(index)) { \ SLICE_SHAPED_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } else if (is_compact_seq(index)) { \ SLICE_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \ } else { \ SLICE_SHAPED_INDEX(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } static SEXP lgl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(LGLSXP, int, LOGICAL, LOGICAL_RO, NA_LOGICAL); } static SEXP int_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(INTSXP, int, INTEGER, INTEGER_RO, NA_INTEGER); } static SEXP dbl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(REALSXP, double, REAL, REAL_RO, NA_REAL); } static SEXP cpl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(CPLXSXP, Rcomplex, COMPLEX, COMPLEX_RO, vctrs_shared_na_cpl); } static SEXP chr_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(STRSXP, SEXP, STRING_PTR, STRING_PTR_RO, NA_STRING); } static SEXP raw_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(RAWSXP, Rbyte, RAW, RAW_RO, 0); } #undef SLICE_SHAPED #undef SLICE_SHAPED_COMPACT_REP #undef SLICE_SHAPED_COMPACT_SEQ #undef SLICE_SHAPED_INDEX #define SLICE_BARRIER_SHAPED_INDEX(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE) \ const CTYPE* x_data = CONST_DEREF(x); \ \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ R_len_t out_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_loc) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ SET(out, out_loc, NA_VALUE); \ continue; \ } \ \ loc += step; \ SEXP elt = x_data[loc]; \ SET(out, out_loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED_COMPACT_REP(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE) \ const CTYPE* x_data = CONST_DEREF(x); \ \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ int size_index = p_info->p_index[0]; \ if (size_index == NA_INTEGER) { \ R_len_t out_n = p_info->shape_elem_n * p_info->index_n; \ for (R_len_t i = 0; i < out_n; ++i) { \ SET(out, i, NA_VALUE); \ } \ UNPROTECT(2); \ return(out); \ } \ \ R_len_t out_loc = 0; \ \ /* Convert to C index */ \ size_index = size_index - 1; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += size_index; \ const SEXP elt_x_data = x_data[loc]; \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_loc) { \ SET(out, out_loc, elt_x_data); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, CONST_DEREF, SET) \ const CTYPE* x_data = CONST_DEREF(x); \ \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ R_len_t start = p_info->p_index[0]; \ R_len_t n = p_info->p_index[1]; \ R_len_t step = p_info->p_index[2]; \ \ R_len_t out_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++out_loc, loc += step) { \ SEXP elt = x_data[loc]; \ SET(out, out_loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE) \ if (is_compact_rep(index)) { \ SLICE_BARRIER_SHAPED_COMPACT_REP(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE); \ } else if (is_compact_seq(index)) { \ SLICE_BARRIER_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, CONST_DEREF, SET); \ } else { \ SLICE_BARRIER_SHAPED_INDEX(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE); \ } static SEXP list_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_BARRIER_SHAPED(VECSXP, SEXP, VECTOR_PTR_RO, SET_VECTOR_ELT, R_NilValue); } #undef SLICE_BARRIER_SHAPED #undef SLICE_BARRIER_SHAPED_COMPACT_REP #undef SLICE_BARRIER_SHAPED_COMPACT_SEQ #undef SLICE_BARRIER_SHAPED_INDEX SEXP vec_slice_shaped_base(enum vctrs_type type, SEXP x, SEXP index, struct strides_info* p_info) { switch (type) { case VCTRS_TYPE_logical: return lgl_slice_shaped(x, index, p_info); case VCTRS_TYPE_integer: return int_slice_shaped(x, index, p_info); case VCTRS_TYPE_double: return dbl_slice_shaped(x, index, p_info); case VCTRS_TYPE_complex: return cpl_slice_shaped(x, index, p_info); case VCTRS_TYPE_character: return chr_slice_shaped(x, index, p_info); case VCTRS_TYPE_raw: return raw_slice_shaped(x, index, p_info); case VCTRS_TYPE_list: return list_slice_shaped(x, index, p_info); default: stop_unimplemented_vctrs_type("vec_slice_shaped_base", type); } } SEXP vec_slice_shaped(enum vctrs_type type, SEXP x, SEXP index) { int n_protect = 0; struct strides_info info = new_strides_info(x, index); PROTECT_STRIDES_INFO(&info, &n_protect); SEXP out = vec_slice_shaped_base(type, x, index, &info); UNPROTECT(n_protect); return out; } vctrs/src/c.c0000644000176200001440000002744214520715753012612 0ustar liggesusers#include "vctrs.h" #include "decl/c-decl.h" r_obj* vec_c(r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call) { struct fallback_opts opts = { .s3 = r_is_true(r_peek_option("vctrs:::base_c_in_progress")) ? S3_FALLBACK_false : S3_FALLBACK_true }; return vec_c_opts(xs, ptype, name_spec, name_repair, &opts, p_error_arg, error_call); } r_obj* vec_c_opts(r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, const struct fallback_opts* fallback_opts, struct vctrs_arg* p_error_arg, struct r_lazy error_call) { struct ptype_common_opts ptype_opts = { .p_arg = p_error_arg, .call = error_call, .fallback = *fallback_opts }; r_obj* orig_ptype = ptype; ptype = KEEP(vec_ptype_common_opts(xs, orig_ptype, &ptype_opts)); if (ptype == r_null) { FREE(1); return r_null; } if (vec_is_common_class_fallback(ptype)) { r_obj* out = vec_c_fallback(ptype, xs, name_spec, name_repair, p_error_arg, error_call); FREE(1); return out; } // FIXME: Needed for dplyr::summarise() which passes a non-fallback ptype if (needs_vec_c_homogeneous_fallback(xs, ptype)) { r_obj* out = vec_c_fallback_invoke(xs, name_spec, error_call); FREE(1); return out; } // Find individual input sizes and total size of output r_ssize xs_size = r_length(xs); r_ssize out_size = 0; // Caching the sizes causes an extra allocation but it improves performance r_obj* sizes = KEEP(r_alloc_integer(xs_size)); int* p_sizes = r_int_begin(sizes); for (r_ssize i = 0; i < xs_size; ++i) { r_obj* x = r_list_get(xs, i); r_ssize size = (x == r_null) ? 0 : vec_size(x); out_size += size; p_sizes[i] = size; } r_obj* out = vec_init(ptype, out_size); r_keep_loc out_pi; KEEP_HERE(out, &out_pi); out = vec_proxy_recurse(out); KEEP_AT(out, out_pi); r_obj* loc = KEEP(compact_seq(0, 0, true)); int* p_loc = r_int_begin(loc); bool assign_names = !r_inherits(name_spec, "rlang_zap"); r_obj* xs_names = KEEP(r_names(xs)); bool xs_is_named = xs_names != r_null && !is_data_frame(ptype); r_obj* out_names = r_null; r_keep_loc out_names_pi; KEEP_HERE(r_null, &out_names_pi); // Compact sequences use 0-based counters r_ssize counter = 0; r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( p_error_arg, xs_names, xs_size, &i ); KEEP(p_x_arg->shelter); struct cast_opts c_cast_opts = { .to = ptype, .p_x_arg = p_x_arg, .call = error_call, .fallback = *fallback_opts }; const struct vec_assign_opts c_assign_opts = { .recursive = true, .assign_names = assign_names, .ignore_outer_names = true, .call = error_call }; for (; i < xs_size; ++i) { r_obj* x = r_list_get(xs, i); r_ssize size = p_sizes[i]; init_compact_seq(p_loc, counter, size, true); if (assign_names) { r_obj* outer = xs_is_named ? r_chr_get(xs_names, i) : r_null; r_obj* inner = KEEP(vec_names(x)); r_obj* x_nms = KEEP(apply_name_spec(name_spec, outer, inner, size)); if (x_nms != r_null) { R_LAZY_ALLOC(out_names, out_names_pi, R_TYPE_character, out_size); // If there is no name to assign, skip the assignment since // `out_names` already contains empty strings if (x_nms != chrs_empty) { out_names = chr_assign(out_names, loc, x_nms, VCTRS_OWNED_true); KEEP_AT(out_names, out_names_pi); } } FREE(2); } if (!size) { continue; } c_cast_opts.x = x; x = KEEP(vec_cast_opts(&c_cast_opts)); // Total ownership of `out` because it was freshly created with `vec_init()` out = vec_proxy_assign_opts(out, loc, x, VCTRS_OWNED_true, &c_assign_opts); KEEP_AT(out, out_pi); counter += size; FREE(1); } if (is_data_frame(out) && fallback_opts->s3) { df_c_fallback(out, ptype, xs, out_size, name_spec, name_repair, error_call); } out = KEEP(vec_restore_recurse(out, ptype, VCTRS_OWNED_true)); if (out_names != r_null) { out_names = KEEP(vec_as_names(out_names, name_repair)); out = vec_set_names(out, out_names); FREE(1); } else if (!assign_names) { // FIXME: `vec_ptype2()` doesn't consistently zaps names, so `out` // might have been initialised with names. This branch can be // removed once #1020 is resolved. out = vec_set_names(out, r_null); } FREE(8); return out; } r_obj* ffi_vec_c(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* frame) { args = r_node_cdr(args); r_obj* xs = KEEP(rlang_env_dots_list(frame)); r_obj* ptype = KEEP(r_eval(r_node_car(args), frame)); args = r_node_cdr(args); r_obj* name_spec = KEEP(r_eval(r_node_car(args), frame)); args = r_node_cdr(args); r_obj* name_repair = KEEP(r_eval(r_node_car(args), frame)); struct r_lazy error_arg_lazy = { .x = syms.dot_error_arg, .env = frame }; struct vctrs_arg error_arg = new_lazy_arg(&error_arg_lazy); struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, r_lazy_null, false, error_call); KEEP(name_repair_opts.shelter); r_obj* out = vec_c(xs, ptype, name_spec, &name_repair_opts, &error_arg, error_call); FREE(5); return out; } bool needs_vec_c_fallback(r_obj* ptype) { if (!vec_is_common_class_fallback(ptype)) { return false; } // Suboptimal: Prevent infinite recursion through `vctrs_vctr` method r_obj* cls = r_attrib_get(ptype, syms_fallback_class); cls = r_chr_get(cls, r_length(cls) - 1); return cls != strings_vctrs_vctr; } bool needs_vec_c_homogeneous_fallback(r_obj* xs, r_obj* ptype) { if (!r_length(xs)) { return false; } r_obj* x = list_first_non_null(xs, NULL); if (!obj_is_vector(x)) { return false; } // Never fall back for `vctrs_vctr` classes to avoid infinite // recursion through `c.vctrs_vctr()` if (r_inherits(x, "vctrs_vctr")) { return false; } if (ptype != r_null) { r_obj* x_class = KEEP(r_class(x)); r_obj* ptype_class = KEEP(r_class(ptype)); bool equal = equal_object(x_class, ptype_class); FREE(2); if (!equal) { return false; } } return !vec_implements_ptype2(x) && list_is_homogeneously_classed(xs) && vec_implements_base_c(x); } static inline bool vec_implements_base_c(r_obj* x) { if (!r_is_object(x)) { return false; } if (IS_S4_OBJECT(x)) { return s4_find_method(x, s4_c_method_table) != r_null; } else { return s3_find_method("c", x, base_method_table) != r_null; } } static inline bool class_implements_base_c(r_obj* cls) { if (s3_class_find_method("c", cls, base_method_table) != r_null) { return true; } if (s4_class_find_method(cls, s4_c_method_table) != r_null) { return true; } return false; } r_obj* vec_c_fallback(r_obj* ptype, r_obj* xs, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call) { r_obj* cls = KEEP(r_attrib_get(ptype, syms_fallback_class)); bool implements_c = class_implements_base_c(cls); FREE(1); if (implements_c) { return vec_c_fallback_invoke(xs, name_spec, error_call); } else { struct ptype_common_opts ptype_opts = { .p_arg = p_error_arg, .call = error_call, .fallback = { .s3 = S3_FALLBACK_false } }; // Should cause a common type error, unless another fallback // kicks in (for instance, homogeneous class with homogeneous // attributes) vec_ptype_common_opts(xs, r_null, &ptype_opts); // Suboptimal: Call `vec_c()` again to combine vector with // homogeneous class fallback return vec_c_opts( xs, r_null, name_spec, name_repair, &ptype_opts.fallback, p_error_arg, error_call ); } } r_obj* vec_c_fallback_invoke(r_obj* xs, r_obj* name_spec, struct r_lazy error_call) { r_obj* x = list_first_non_null(xs, NULL); if (vctrs_debug_verbose) { r_printf("Falling back to `base::c()` for class `%s`.\n", r_chr_get_c_string(r_class(x), 0)); } int err_type = vec_c_fallback_validate_args(x, name_spec); if (err_type) { stop_vec_c_fallback(xs, err_type, error_call); } r_obj* ffi_call = KEEP(r_call2(r_sym("base_c_invoke"), xs)); r_obj* out = r_eval(ffi_call, vctrs_ns_env); FREE(1); return out; } static inline int vec_c_fallback_validate_args(r_obj* x, r_obj* name_spec) { if (name_spec != r_null) { return 2; } return 0; } static void stop_vec_c_fallback(r_obj* xs, int err_type, struct r_lazy call) { r_obj* common_class = KEEP(r_class(list_first_non_null(xs, NULL))); const char* class_str = r_chr_get_c_string(common_class, 0); const char* msg = NULL; switch (err_type) { case 2: msg = "Can't use a name specification with non-vctrs types."; break; case 3: msg = "Can't find vctrs or base methods for concatenation."; break; default: msg = "Internal error: Unexpected error type."; break; } r_abort_lazy_call( call, "%s\n" "vctrs methods must be implemented for class `%s`.\n" "See .", msg, class_str ); } // `ptype` contains fallback information void df_c_fallback(r_obj* out, r_obj* ptype, r_obj* xs, r_ssize n_rows, r_obj* name_spec, const struct name_repair_opts* name_repair, struct r_lazy error_call) { int n_prot = 0; r_ssize n_cols = r_length(out); r_obj* ptype_orig = ptype; if (!is_data_frame(ptype)) { ptype = KEEP_N(vec_proxy(ptype), &n_prot); if (!is_data_frame(ptype)) { r_stop_internal("Expected c fallback target to have a df proxy."); } } if (r_length(ptype) != n_cols || r_typeof(out) != R_TYPE_list || r_typeof(ptype) != R_TYPE_list) { r_stop_internal("`ptype` and `out` must be lists of the same length."); } for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = r_list_get(out, i); r_obj* ptype_col = r_list_get(ptype, i); // Recurse into df-cols if (is_data_frame(col) && df_needs_fallback(ptype_col)) { r_obj* xs_col = KEEP(list_pluck(xs, i)); r_obj* out_col = r_list_get(out, i); df_c_fallback(out_col, ptype_col, xs_col, n_rows, name_spec, name_repair, error_call); FREE(1); } else if (vec_is_common_class_fallback(ptype_col)) { r_obj* xs_col = KEEP(list_pluck(xs, i)); r_obj* out_col = vec_c_fallback( ptype_col, xs_col, name_spec, name_repair, vec_args.empty, error_call ); r_list_poke(out, i, out_col); if (vec_size(out_col) != n_rows) { r_stop_internal("`c()` method returned a vector of unexpected size %d instead of %d.", vec_size(out_col), n_rows); } // Remove fallback vector from the ptype so it doesn't get in // the way of restoration later on r_list_poke(ptype_orig, i, vec_ptype_final(out_col)); FREE(1); } } FREE(n_prot); } static bool df_needs_fallback(r_obj* x) { r_ssize n_cols = r_length(x); r_obj* const * v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = v_x[i]; if (vec_is_common_class_fallback(col)) { return true; } if (is_data_frame(col) && df_needs_fallback(col)) { return true; } } return false; } vctrs/src/type-info.c0000644000176200001440000001312614404336165014271 0ustar liggesusers#include "vctrs.h" #include "decl/type-info-decl.h" struct vctrs_type_info vec_type_info(r_obj* x) { struct vctrs_type_info info = { .type = vec_typeof(x) }; switch (info.type) { case VCTRS_TYPE_s3: info.proxy_method = vec_proxy_method(x); break; default: info.proxy_method = r_null; } info.shelter = info.proxy_method; return info; } struct vctrs_proxy_info vec_proxy_info(r_obj* x) { struct vctrs_proxy_info info; info.shelter = KEEP(r_alloc_list(2)); info.proxy_method = r_is_object(x) ? vec_proxy_method(x) : r_null; r_list_poke(info.shelter, 0, info.proxy_method); if (info.proxy_method == r_null) { info.type = vec_base_typeof(x, false); info.proxy = x; } else { r_obj* proxy = KEEP(vec_proxy_invoke(x, info.proxy_method)); info.type = vec_base_typeof(proxy, true); info.proxy = proxy; FREE(1); } r_list_poke(info.shelter, 1, info.proxy); FREE(1); return info; } // [[ register() ]] r_obj* ffi_type_info(r_obj* x) { struct vctrs_type_info info = vec_type_info(x); r_obj* out = KEEP(Rf_mkNamed(R_TYPE_list, (const char*[]) { "type", "proxy_method", "" })); r_list_poke(out, 0, r_chr(vec_type_as_str(info.type))); r_list_poke(out, 1, info.proxy_method); FREE(1); return out; } // [[ register() ]] r_obj* ffi_proxy_info(r_obj* x) { struct vctrs_proxy_info info = vec_proxy_info(x); r_obj* out = KEEP(Rf_mkNamed(R_TYPE_list, (const char*[]) { "type", "proxy_method", "proxy", "" })); r_list_poke(out, 0, r_chr(vec_type_as_str(info.type))); r_list_poke(out, 1, info.proxy_method); r_list_poke(out, 2, info.proxy); FREE(1); return out; } static enum vctrs_type vec_base_typeof(r_obj* x, bool proxied) { switch (r_typeof(x)) { // Atomic types are always vectors case R_TYPE_null: return VCTRS_TYPE_null; case R_TYPE_logical: return VCTRS_TYPE_logical; case R_TYPE_integer: return VCTRS_TYPE_integer; case R_TYPE_double: return VCTRS_TYPE_double; case R_TYPE_complex: return VCTRS_TYPE_complex; case R_TYPE_character: return VCTRS_TYPE_character; case R_TYPE_raw: return VCTRS_TYPE_raw; case R_TYPE_list: // Bare lists and data frames are vectors if (!r_is_object(x)) return VCTRS_TYPE_list; if (is_data_frame(x)) return VCTRS_TYPE_dataframe; // S3 lists are only vectors if they are proxied if (proxied || r_inherits(x, "list")) return VCTRS_TYPE_list; // fallthrough default: return VCTRS_TYPE_scalar; } } enum vctrs_type vec_proxy_typeof(r_obj* x) { return vec_base_typeof(x, true); } // [[ register() ]] r_obj* ffi_obj_is_list(r_obj* x) { return r_lgl(obj_is_list(x)); } bool obj_is_list(r_obj* x) { // Require `x` to be a list internally if (r_typeof(x) != R_TYPE_list) { return false; } // Unclassed R_TYPE_list are lists if (!r_is_object(x)) { return true; } const enum vctrs_class_type type = class_type(x); // Classed R_TYPE_list are only lists if the last class is explicitly `"list"` // or if it is a bare "AsIs" type return (type == VCTRS_CLASS_list) || (type == VCTRS_CLASS_bare_asis); } r_obj* ffi_obj_is_vector(r_obj* x) { return r_lgl(obj_is_vector(x)); } bool obj_is_vector(r_obj* x) { if (x == r_null) { return false; } struct vctrs_proxy_info info = vec_proxy_info(x); return info.type != VCTRS_TYPE_scalar; } // [[ register() ]] r_obj* ffi_list_all_vectors(r_obj* x, r_obj* frame) { obj_check_list(x, vec_args.x, (struct r_lazy) { frame, r_null }); return r_lgl(list_all_vectors(x)); } bool list_all_vectors(r_obj* x) { if (r_typeof(x) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(x)); } // TODO: Use `r_list_all_of(x, &obj_is_vector)` when we add it back in const r_ssize size = r_length(x); r_obj* const* v_x = r_list_cbegin(x); for (r_ssize i = 0; i < size; ++i) { r_obj* elt = v_x[i]; if (!obj_is_vector(elt)) { return false; } } return true; } // [[ register() ]] r_obj* vctrs_typeof(r_obj* x, r_obj* dispatch) { enum vctrs_type type; if (r_lgl_get(dispatch, 0)) { type = vec_proxy_info(x).type; } else { type = vec_typeof(x); } return r_chr(vec_type_as_str(type)); } enum vctrs_type vec_typeof(r_obj* x) { // Check for unspecified vectors before `vec_base_typeof()` which // allows vectors of `NA` to pass through as `VCTRS_TYPE_logical` if (vec_is_unspecified(x)) { return VCTRS_TYPE_unspecified; } if (!r_is_object(x) || r_class(x) == r_null) { return vec_base_typeof(x, false); } // Bare data frames are treated as a base atomic type. Subclasses of // data frames are treated as S3 to give them a chance to be proxied // or implement their own methods for cast, type2, etc. if (is_bare_data_frame(x)) { return VCTRS_TYPE_dataframe; } return VCTRS_TYPE_s3; } r_no_return void stop_unimplemented_vctrs_type(const char* fn, enum vctrs_type type) { r_stop_internal("Unsupported vctrs type `%s`.", vec_type_as_str(type)); } const char* vec_type_as_str(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return "null"; case VCTRS_TYPE_unspecified: return "unspecified"; case VCTRS_TYPE_logical: return "logical"; case VCTRS_TYPE_integer: return "integer"; case VCTRS_TYPE_double: return "double"; case VCTRS_TYPE_complex: return "complex"; case VCTRS_TYPE_character: return "character"; case VCTRS_TYPE_raw: return "raw"; case VCTRS_TYPE_list: return "list"; case VCTRS_TYPE_dataframe: return "dataframe"; case VCTRS_TYPE_s3: return "s3"; case VCTRS_TYPE_scalar: return "scalar"; } never_reached("vec_type_as_str"); } void vctrs_init_type_info(r_obj* ns) { } vctrs/src/rep.c0000644000176200001440000002400414363556517013153 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/rep-decl.h" r_obj* vec_rep(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg) { check_rep_times(times, error_call, p_times_arg); if (times == 1) { return x; } const r_ssize times_ = (r_ssize) times; const r_ssize x_size = vec_size(x); if (x_size == 1) { return vec_check_recycle(x, times_, p_x_arg, error_call); } if (multiply_would_overflow(x_size, times_)) { stop_rep_size_oob(error_call); }; const r_ssize size = x_size * times_; r_obj* subscript = KEEP(r_alloc_integer(size)); int* v_subscript = r_int_begin(subscript); r_ssize k = 0; for (r_ssize i = 0; i < times_; ++i) { for (r_ssize j = 1; j <= x_size; ++j, ++k) { v_subscript[k] = j; } } r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } r_obj* ffi_vec_rep(r_obj* x, r_obj* ffi_times, r_obj* frame) { struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy times_arg_lazy = { .x = syms.times_arg, .env = frame }; struct vctrs_arg times_arg = new_lazy_arg(×_arg_lazy); ffi_times = KEEP(vec_cast(ffi_times, r_globals.empty_int, ×_arg, vec_args.empty, error_call)); if (vec_size(ffi_times) != 1) { stop_rep_times_size(error_call, ×_arg); } const int times = r_int_get(ffi_times, 0); r_obj* out = vec_rep(x, times, error_call, &x_arg, ×_arg); FREE(1); return out; } // ----------------------------------------------------------------------------- r_obj* vec_rep_each(r_obj* x, r_obj* times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg) { times = KEEP(vec_cast(times, r_globals.empty_int, p_times_arg, vec_args.empty, error_call)); const r_ssize times_size = vec_size(times); r_obj* out; if (times_size == 1) { const int times_ = r_int_get(times, 0); if (times_ == 1) { out = x; } else if (times_ == 0) { out = vec_slice_unsafe(x, r_globals.empty_int); } else { out = vec_rep_each_uniform(x, times_, error_call, p_times_arg); } } else { out = vec_rep_each_impl(x, times, times_size, error_call, p_times_arg); } FREE(1); return out; } r_obj* ffi_vec_rep_each(r_obj* x, r_obj* times, r_obj* frame) { struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.times_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy times_arg_lazy = { .x = syms.times_arg, .env = frame }; struct vctrs_arg times_arg = new_lazy_arg(×_arg_lazy); return vec_rep_each(x, times, error_call, &x_arg, ×_arg); } // ----------------------------------------------------------------------------- static r_obj* vec_rep_each_uniform(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_times_arg) { check_rep_each_times(times, 1, error_call, p_times_arg); const r_ssize times_ = (r_ssize) times; const r_ssize x_size = vec_size(x); if (multiply_would_overflow(x_size, times_)) { stop_rep_size_oob(error_call); }; const r_ssize size = x_size * times_; r_obj* subscript = KEEP(r_alloc_integer(size)); int* v_subscript = r_int_begin(subscript); r_ssize k = 0; for (r_ssize i = 1; i <= x_size; ++i) { for (r_ssize j = 0; j < times_; ++j, ++k) { v_subscript[k] = i; } } r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } static r_obj* vec_rep_each_impl(r_obj* x, r_obj* times, const r_ssize times_size, struct r_lazy error_call, struct vctrs_arg* p_times_arg) { const r_ssize x_size = vec_size(x); if (x_size != times_size) { stop_recycle_incompatible_size(times_size, x_size, p_times_arg, error_call); } const int* v_times = r_int_cbegin(times); r_ssize size = 0; for (r_ssize i = 0; i < times_size; ++i) { const int elt_times = v_times[i]; check_rep_each_times(elt_times, i + 1, error_call, p_times_arg); const r_ssize elt_times_ = (r_ssize) elt_times; if (plus_would_overflow(size, elt_times_)) { stop_rep_size_oob(error_call); } size += elt_times_; } r_obj* subscript = KEEP(r_alloc_integer(size)); int* v_subscript = r_int_begin(subscript); r_ssize k = 0; for (r_ssize i = 1; i <= x_size; ++i) { const r_ssize elt_times = (r_ssize) v_times[i - 1]; for (r_ssize j = 0; j < elt_times; ++j, ++k) { v_subscript[k] = i; } } r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } // ----------------------------------------------------------------------------- // TODO: Modify for long vectors with `R_XLEN_T_MAX` and `R_xlen_t`. static inline bool times_is_oob(int times) { return times > R_LEN_T_MAX; } // Only useful for positive or zero inputs static inline bool multiply_would_overflow(r_ssize x, r_ssize y) { return (double) x * y > R_LEN_T_MAX; } // Only useful for positive or zero inputs static inline bool plus_would_overflow(r_ssize x, r_ssize y) { return x > R_LEN_T_MAX - y; } // ----------------------------------------------------------------------------- static inline void check_rep_times(int times, struct r_lazy call, struct vctrs_arg* p_times_arg) { if (times < 0) { if (times == r_globals.na_int) { stop_rep_times_missing(call, p_times_arg); } else { stop_rep_times_negative(call, p_times_arg); } } else if (times_is_oob(times)) { stop_rep_times_oob(times, call, p_times_arg); } } static inline void stop_rep_times_negative(struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s must be a positive number.", vec_arg_format(p_times_arg)); } static inline void stop_rep_times_missing(struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s can't be missing.", vec_arg_format(p_times_arg)); } // Not currently thrown since `r_ssize == int`, but might be once // long vectors are supported static inline void stop_rep_times_oob(int times, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call( call, "%s must be less than %i, not %i.", vec_arg_format(p_times_arg), R_LEN_T_MAX, times ); } // ----------------------------------------------------------------------------- static inline void check_rep_each_times(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { if (times < 0) { if (times == r_globals.na_int) { stop_rep_each_times_missing(i, call, p_times_arg); } else { stop_rep_each_times_negative(i, call, p_times_arg); } } else if (times_is_oob(times)) { stop_rep_each_times_oob(times, i, call, p_times_arg); } } static inline void stop_rep_each_times_negative(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s must be a vector of positive numbers. Location %i is negative.", vec_arg_format(p_times_arg), i); } static inline void stop_rep_each_times_missing(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s can't be missing. Location %i is missing.", vec_arg_format(p_times_arg), i); } // Not currently thrown since `r_ssize == int`, but might be once // long vectors are supported static inline void stop_rep_each_times_oob(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call( call, "%s must be less than %i, not %i. ", "Location %i is too large.", vec_arg_format(p_times_arg), R_LEN_T_MAX, times, i ); } static inline void stop_rep_size_oob(struct r_lazy call) { r_abort_lazy_call( call, "Long vectors are not yet supported. " "Requested output size must be less than %i.", R_LEN_T_MAX ); } static inline void stop_rep_times_size(struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s must be a single number.", vec_arg_format(p_times_arg)); } // ----------------------------------------------------------------------------- static r_obj* vec_unrep(r_obj* x, struct r_lazy error_call) { r_obj* times = KEEP(vec_run_sizes(x, error_call)); const int* v_times = r_int_cbegin(times); const r_ssize size = r_length(times); r_obj* loc = KEEP(r_alloc_integer(size)); int* v_loc = r_int_begin(loc); r_ssize current = 1; for (r_ssize i = 0; i < size; ++i) { v_loc[i] = current; current += v_times[i]; } r_obj* out = KEEP(r_new_list(2)); r_list_poke(out, 0, vec_slice_unsafe(x, loc)); r_list_poke(out, 1, times); r_obj* names = r_new_character(2); r_attrib_poke_names(out, names); r_chr_poke(names, 0, strings_key); r_chr_poke(names, 1, strings_times); init_data_frame(out, size); FREE(3); return out; } r_obj* ffi_vec_unrep(r_obj* x, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; return vec_unrep(x, error_call); } // ----------------------------------------------------------------------------- void vctrs_init_rep(r_obj* ns) { } vctrs/src/callables.c0000644000176200001440000000202714402367170014275 0ustar liggesusers#include "vctrs.h" // ----------------------------------------------------------------------------- // Maturing R_len_t short_vec_size(SEXP x) { return vec_size(x); } SEXP short_vec_recycle(SEXP x, R_len_t size) { return vec_recycle(x, size); } // ----------------------------------------------------------------------------- // Experimental SEXP exp_vec_cast(SEXP x, SEXP to) { return vec_cast(x, to, vec_args.empty, vec_args.empty, r_lazy_null); } SEXP exp_vec_chop(SEXP x, SEXP indices) { return vec_chop_unsafe(x, indices, r_null); } SEXP exp_vec_slice_impl(SEXP x, SEXP subscript) { return vec_slice_unsafe(x, subscript); } SEXP exp_vec_names(SEXP x) { return vec_names(x); } SEXP exp_vec_set_names(SEXP x, SEXP names) { return vec_set_names(x, names); } SEXP exp_short_compact_seq(R_len_t start, R_len_t size, bool increasing) { return compact_seq(start, size, increasing); } void exp_short_init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing) { init_compact_seq(p, start, size, increasing); } vctrs/src/init.c0000644000176200001440000006340414511320527013320 0ustar liggesusers#include "vctrs.h" #include "altrep-rle.h" #include #include // for NULL #include // Compile with `-fvisibility=hidden -DHAVE_VISIBILITY_ATTRIBUTE` if you link to this library #include #define export attribute_visible extern extern SEXP vctrs_list_get(SEXP, SEXP); extern SEXP vctrs_list_set(SEXP, SEXP, SEXP); extern SEXP vctrs_field_get(SEXP, SEXP); extern SEXP vctrs_field_set(SEXP, SEXP, SEXP); extern SEXP vctrs_fields(SEXP); extern SEXP vctrs_n_fields(SEXP); extern SEXP vctrs_hash(SEXP); extern SEXP vctrs_hash_object(SEXP); extern SEXP vctrs_equal_object(SEXP, SEXP); extern SEXP vctrs_duplicated(SEXP); extern SEXP vctrs_unique_loc(SEXP); extern SEXP vctrs_count(SEXP); extern SEXP vctrs_id(SEXP); extern SEXP vctrs_n_distinct(SEXP); extern SEXP vec_split(SEXP, SEXP); extern SEXP vctrs_group_id(SEXP); extern SEXP vctrs_group_rle(SEXP); extern SEXP vec_group_loc(SEXP); extern SEXP vctrs_equal(SEXP, SEXP, SEXP); extern r_obj* ffi_vec_detect_missing(r_obj*); extern r_obj* ffi_vec_any_missing(r_obj* x); extern r_obj* ffi_vec_compare(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_match(SEXP, SEXP, SEXP, SEXP); extern r_obj* vctrs_in(r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_duplicated_any(SEXP); extern r_obj* ffi_size(r_obj*, r_obj*); extern r_obj* ffi_list_sizes(r_obj*, r_obj*); extern SEXP vctrs_dim(SEXP); extern SEXP vctrs_dim_n(SEXP); extern SEXP vctrs_is_unspecified(SEXP); extern SEXP vctrs_typeof(SEXP, SEXP); extern r_obj* ffi_obj_is_vector(r_obj*); extern r_obj* ffi_obj_check_vector(r_obj*, r_obj*); extern r_obj* ffi_vec_check_size(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_ptype2(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_typeof2(r_obj*, r_obj*); extern r_obj* ffi_typeof2_s3(r_obj*, r_obj*); extern r_obj* ffi_cast(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_location(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_slice(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_init(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_chop(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_unchop(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_chop_seq(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_slice_seq(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_slice_rep(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_restore(r_obj*, r_obj*); extern r_obj* ffi_vec_restore_recurse(r_obj*, r_obj*); extern r_obj* ffi_vec_restore_default(r_obj*, r_obj*); extern SEXP vec_proxy_equal(SEXP); extern SEXP vec_proxy_compare(SEXP); extern SEXP vec_proxy_order(SEXP); extern r_obj* ffi_df_proxy(r_obj*, r_obj*); extern SEXP vctrs_unspecified(SEXP); extern r_obj* ffi_ptype(r_obj*, r_obj*, r_obj*); extern SEXP vec_ptype_finalise(SEXP); extern r_obj* ffi_minimal_names(r_obj*); extern r_obj* ffi_unique_names(r_obj*, r_obj*); extern SEXP ffi_as_minimal_names(SEXP); extern SEXP vec_names(SEXP); extern SEXP vctrs_is_unique_names(SEXP); extern SEXP vctrs_as_unique_names(SEXP, SEXP); extern SEXP vec_set_names(SEXP, SEXP); extern r_obj* ffi_df_cast_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_df_ptype2_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_type_info(r_obj*); extern SEXP ffi_proxy_info(SEXP); extern r_obj* ffi_class_type(r_obj*); extern r_obj* ffi_vec_bare_df_restore(r_obj*, r_obj*); extern r_obj* ffi_recycle(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_assign(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_assign_seq(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_set_attributes(SEXP, SEXP); extern r_obj* ffi_as_df_row(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_outer_names(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_df_size(SEXP); extern r_obj* ffi_as_df_col(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_apply_name_spec(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_unset_s4(r_obj*); extern SEXP vctrs_validate_name_repair_arg(SEXP); extern SEXP vctrs_validate_minimal_names(SEXP, SEXP); extern r_obj* ffi_vec_as_names(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_is_partial(r_obj*); extern r_obj* ffi_obj_is_list(r_obj*); extern SEXP vctrs_try_catch_callback(SEXP, SEXP); extern r_obj* ffi_is_coercible(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_subscript(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_subscript_result(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_df_flatten_info(r_obj*); extern r_obj* df_flatten(r_obj*); extern SEXP vctrs_linked_version(void); extern r_obj* ffi_tib_ptype2(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_tib_cast(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_assign_params(r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_has_dim(SEXP); extern r_obj* ffi_vec_rep(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_rep_each(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_maybe_shared_col(SEXP, SEXP); extern SEXP vctrs_new_df_unshared_col(void); extern r_obj* ffi_vec_shaped_ptype(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_shape2(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_new_date(SEXP); extern SEXP vctrs_date_validate(SEXP); extern SEXP vctrs_new_datetime(SEXP, SEXP); extern SEXP vctrs_datetime_validate(SEXP); extern r_obj* ffi_ptype2_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_s3_find_method(SEXP, SEXP, SEXP); extern SEXP vctrs_implements_ptype2(SEXP); extern r_obj* ffi_ptype2_dispatch_native(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cast_dispatch_native(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_fast_c(SEXP, SEXP); extern r_obj* ffi_data_frame(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_df_list(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_detect_run_bounds(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_locate_run_bounds(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_identify_runs(r_obj*, r_obj*); extern r_obj* ffi_vec_run_sizes(r_obj*, r_obj*); extern SEXP vctrs_slice_complete(SEXP); extern SEXP vctrs_locate_complete(SEXP); extern SEXP vctrs_detect_complete(SEXP); extern SEXP vctrs_normalize_encoding(SEXP); extern SEXP vctrs_order(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_locate_sorted_groups(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_order_info(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern r_obj* ffi_vec_unrep(r_obj*, r_obj*); extern SEXP vctrs_fill_missing(SEXP, SEXP, SEXP); extern r_obj* ffi_chr_paste_prefix(r_obj*, r_obj*, r_obj*); extern r_obj* vctrs_rank(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* vctrs_integer64_proxy(r_obj*); extern r_obj* vctrs_integer64_restore(r_obj*); extern r_obj* vctrs_list_drop_empty(r_obj*); extern r_obj* vctrs_is_altrep(r_obj* x); extern r_obj* ffi_interleave_indices(r_obj*, r_obj*); extern r_obj* ffi_compute_nesting_container_info(r_obj*, r_obj*); extern r_obj* ffi_locate_matches(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_groups(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_locate_groups(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_complement(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_locate_containers(r_obj*, r_obj*); extern r_obj* ffi_check_list(r_obj*, r_obj*); extern r_obj* ffi_list_all_vectors(r_obj*, r_obj*); extern r_obj* ffi_list_check_all_vectors(r_obj*, r_obj*); extern r_obj* ffi_as_short_length(r_obj*, r_obj*); extern r_obj* ffi_s3_get_method(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_all_size(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_check_all_size(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_intersect(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_difference(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_union(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_symmetric_difference(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_expand_grid(r_obj*, r_obj*, r_obj*, r_obj*); // Maturing // In the public header extern bool obj_is_vector(SEXP); extern R_len_t short_vec_size(SEXP); extern SEXP short_vec_recycle(SEXP, R_len_t); // Experimental // Exported but not available in the public header extern SEXP exp_vec_cast(SEXP, SEXP); extern SEXP exp_vec_chop(SEXP, SEXP); extern SEXP exp_vec_slice_impl(SEXP, SEXP); extern SEXP exp_vec_names(SEXP); extern SEXP exp_vec_set_names(SEXP, SEXP); extern SEXP exp_short_compact_seq(R_len_t, R_len_t, bool); extern SEXP exp_short_init_compact_seq(int*, R_len_t, R_len_t, bool); // Defined below SEXP vctrs_init_library(SEXP); // Defined in altrep-rle.h extern SEXP altrep_rle_Make(SEXP); extern SEXP altrep_rle_is_materialized(SEXP); void vctrs_init_altrep_rle(DllInfo*); // Defined in altrep-lazy-character.c extern r_obj* ffi_altrep_new_lazy_character(r_obj*); extern r_obj* ffi_altrep_lazy_character_is_materialized(r_obj*); extern void vctrs_init_altrep_lazy_character(DllInfo*); static const R_CallMethodDef CallEntries[] = { {"vctrs_list_get", (DL_FUNC) &vctrs_list_get, 2}, {"vctrs_list_set", (DL_FUNC) &vctrs_list_set, 3}, {"vctrs_field_get", (DL_FUNC) &vctrs_field_get, 2}, {"vctrs_field_set", (DL_FUNC) &vctrs_field_set, 3}, {"vctrs_fields", (DL_FUNC) &vctrs_fields, 1}, {"vctrs_n_fields", (DL_FUNC) &vctrs_n_fields, 1}, {"vctrs_hash", (DL_FUNC) &vctrs_hash, 1}, {"vctrs_hash_object", (DL_FUNC) &vctrs_hash_object, 1}, {"vctrs_equal_object", (DL_FUNC) &vctrs_equal_object, 2}, {"vctrs_unique_loc", (DL_FUNC) &vctrs_unique_loc, 1}, {"vctrs_duplicated", (DL_FUNC) &vctrs_duplicated, 1}, {"vctrs_duplicated_any", (DL_FUNC) &vctrs_duplicated_any, 1}, {"vctrs_count", (DL_FUNC) &vctrs_count, 1}, {"vctrs_id", (DL_FUNC) &vctrs_id, 1}, {"vctrs_n_distinct", (DL_FUNC) &vctrs_n_distinct, 1}, {"vctrs_split", (DL_FUNC) &vec_split, 2}, {"vctrs_group_id", (DL_FUNC) &vctrs_group_id, 1}, {"vctrs_group_rle", (DL_FUNC) &vctrs_group_rle, 1}, {"vctrs_group_loc", (DL_FUNC) &vec_group_loc, 1}, {"ffi_size", (DL_FUNC) &ffi_size, 2}, {"ffi_list_sizes", (DL_FUNC) &ffi_list_sizes, 2}, {"vctrs_dim", (DL_FUNC) &vctrs_dim, 1}, {"vctrs_dim_n", (DL_FUNC) &vctrs_dim_n, 1}, {"vctrs_is_unspecified", (DL_FUNC) &vctrs_is_unspecified, 1}, {"vctrs_equal", (DL_FUNC) &vctrs_equal, 3}, {"ffi_vec_detect_missing", (DL_FUNC) &ffi_vec_detect_missing, 1}, {"ffi_vec_any_missing", (DL_FUNC) &ffi_vec_any_missing, 1}, {"ffi_vec_compare", (DL_FUNC) &ffi_vec_compare, 3}, {"vctrs_match", (DL_FUNC) &vctrs_match, 4}, {"vctrs_in", (DL_FUNC) &vctrs_in, 4}, {"vctrs_typeof", (DL_FUNC) &vctrs_typeof, 2}, {"vctrs_init_library", (DL_FUNC) &vctrs_init_library, 1}, {"ffi_obj_is_vector", (DL_FUNC) &ffi_obj_is_vector, 1}, {"ffi_obj_check_vector", (DL_FUNC) &ffi_obj_check_vector, 2}, {"ffi_vec_check_size", (DL_FUNC) &ffi_vec_check_size, 3}, {"ffi_ptype2", (DL_FUNC) &ffi_ptype2, 3}, {"ffi_typeof2", (DL_FUNC) &ffi_typeof2, 2}, {"ffi_typeof2_s3", (DL_FUNC) &ffi_typeof2_s3, 2}, {"ffi_cast", (DL_FUNC) &ffi_cast, 3}, {"ffi_as_location", (DL_FUNC) &ffi_as_location, 8}, {"ffi_slice", (DL_FUNC) &ffi_slice, 3}, {"ffi_init", (DL_FUNC) &ffi_init, 3}, {"ffi_vec_chop", (DL_FUNC) &ffi_vec_chop, 3}, {"ffi_list_unchop", (DL_FUNC) &ffi_list_unchop, 6}, {"ffi_vec_chop_seq", (DL_FUNC) &ffi_vec_chop_seq, 4}, {"ffi_slice_seq", (DL_FUNC) &ffi_slice_seq, 4}, {"ffi_slice_rep", (DL_FUNC) &ffi_slice_rep, 3}, {"ffi_vec_restore", (DL_FUNC) &ffi_vec_restore, 2}, {"ffi_vec_restore_recurse", (DL_FUNC) &ffi_vec_restore_recurse, 2}, {"ffi_vec_restore_default", (DL_FUNC) &ffi_vec_restore_default, 2}, {"ffi_vec_proxy", (DL_FUNC) &vec_proxy, 1}, {"ffi_vec_proxy_recurse", (DL_FUNC) &vec_proxy_recurse, 1}, {"vctrs_proxy_equal", (DL_FUNC) &vec_proxy_equal, 1}, {"vctrs_proxy_compare", (DL_FUNC) &vec_proxy_compare, 1}, {"vctrs_proxy_order", (DL_FUNC) &vec_proxy_order, 1}, {"ffi_df_proxy", (DL_FUNC) &ffi_df_proxy, 2}, {"vctrs_unspecified", (DL_FUNC) &vctrs_unspecified, 1}, {"ffi_ptype", (DL_FUNC) &ffi_ptype, 3}, {"vctrs_ptype_finalise", (DL_FUNC) &vec_ptype_finalise, 1}, {"ffi_minimal_names", (DL_FUNC) &ffi_minimal_names, 1}, {"ffi_unique_names", (DL_FUNC) &ffi_unique_names, 2}, {"ffi_as_minimal_names", (DL_FUNC) &ffi_as_minimal_names, 1}, {"vctrs_names", (DL_FUNC) &vec_names, 1}, {"vctrs_is_unique_names", (DL_FUNC) &vctrs_is_unique_names, 1}, {"vctrs_as_unique_names", (DL_FUNC) &vctrs_as_unique_names, 2}, {"vctrs_set_names", (DL_FUNC) &vec_set_names, 2}, {"ffi_df_cast_opts", (DL_FUNC) &ffi_df_cast_opts, 4}, {"ffi_df_ptype2_opts", (DL_FUNC) &ffi_df_ptype2_opts, 4}, {"ffi_type_info", (DL_FUNC) &ffi_type_info, 1}, {"ffi_proxy_info", (DL_FUNC) &ffi_proxy_info, 1}, {"ffi_class_type", (DL_FUNC) &ffi_class_type, 1}, {"ffi_vec_bare_df_restore", (DL_FUNC) &ffi_vec_bare_df_restore, 2}, {"ffi_recycle", (DL_FUNC) &ffi_recycle, 3}, {"ffi_assign", (DL_FUNC) &ffi_assign, 4}, {"ffi_assign_seq", (DL_FUNC) &ffi_assign_seq, 5}, {"vctrs_set_attributes", (DL_FUNC) &vctrs_set_attributes, 2}, {"ffi_as_df_row", (DL_FUNC) &ffi_as_df_row, 3}, {"ffi_outer_names", (DL_FUNC) &ffi_outer_names, 3}, {"vctrs_df_size", (DL_FUNC) &vctrs_df_size, 1}, {"ffi_as_df_col", (DL_FUNC) &ffi_as_df_col, 3}, {"ffi_apply_name_spec", (DL_FUNC) &ffi_apply_name_spec, 4}, {"ffi_unset_s4", (DL_FUNC) &ffi_unset_s4, 1}, {"vctrs_altrep_rle_Make", (DL_FUNC) &altrep_rle_Make, 1}, {"vctrs_altrep_rle_is_materialized", (DL_FUNC) &altrep_rle_is_materialized, 1}, {"ffi_altrep_new_lazy_character", (DL_FUNC) &ffi_altrep_new_lazy_character, 1}, {"ffi_altrep_lazy_character_is_materialized", (DL_FUNC) &ffi_altrep_lazy_character_is_materialized, 1}, {"vctrs_validate_name_repair_arg", (DL_FUNC) &vctrs_validate_name_repair_arg, 1}, {"vctrs_validate_minimal_names", (DL_FUNC) &vctrs_validate_minimal_names, 2}, {"ffi_vec_as_names", (DL_FUNC) &ffi_vec_as_names, 4}, {"ffi_is_partial", (DL_FUNC) &ffi_is_partial, 1}, {"ffi_obj_is_list", (DL_FUNC) &ffi_obj_is_list, 1}, {"vctrs_try_catch_callback", (DL_FUNC) &vctrs_try_catch_callback, 2}, {"ffi_is_coercible", (DL_FUNC) &ffi_is_coercible, 4}, {"ffi_as_subscript", (DL_FUNC) &ffi_as_subscript, 5}, {"ffi_as_subscript_result", (DL_FUNC) &ffi_as_subscript_result, 5}, {"ffi_df_flatten_info", (DL_FUNC) &ffi_df_flatten_info, 1}, {"ffi_df_flatten", (DL_FUNC) &df_flatten, 1}, {"vctrs_linked_version", (DL_FUNC) &vctrs_linked_version, 0}, {"ffi_tib_ptype2", (DL_FUNC) &ffi_tib_ptype2, 5}, {"ffi_tib_cast", (DL_FUNC) &ffi_tib_cast, 5}, {"ffi_assign_params", (DL_FUNC) &ffi_assign_params, 4}, {"vctrs_has_dim", (DL_FUNC) &vctrs_has_dim, 1}, {"ffi_vec_rep", (DL_FUNC) &ffi_vec_rep, 3}, {"ffi_vec_rep_each", (DL_FUNC) &ffi_vec_rep_each, 3}, {"vctrs_maybe_shared_col", (DL_FUNC) &vctrs_maybe_shared_col, 2}, {"vctrs_new_df_unshared_col", (DL_FUNC) &vctrs_new_df_unshared_col, 0}, {"ffi_vec_shaped_ptype", (DL_FUNC) &ffi_vec_shaped_ptype, 4}, {"ffi_vec_shape2", (DL_FUNC) &ffi_vec_shape2, 3}, {"vctrs_new_date", (DL_FUNC) &vctrs_new_date, 1}, {"vctrs_date_validate", (DL_FUNC) &vctrs_date_validate, 1}, {"vctrs_new_datetime", (DL_FUNC) &vctrs_new_datetime, 2}, {"vctrs_datetime_validate", (DL_FUNC) &vctrs_datetime_validate, 1}, {"ffi_ptype2_opts", (DL_FUNC) &ffi_ptype2_opts, 4}, {"vctrs_s3_find_method", (DL_FUNC) &vctrs_s3_find_method, 3}, {"vctrs_implements_ptype2", (DL_FUNC) &vctrs_implements_ptype2, 1}, {"ffi_ptype2_dispatch_native", (DL_FUNC) &ffi_ptype2_dispatch_native, 4}, {"ffi_cast_dispatch_native", (DL_FUNC) &ffi_cast_dispatch_native, 6}, {"vctrs_fast_c", (DL_FUNC) &vctrs_fast_c, 2}, {"ffi_data_frame", (DL_FUNC) &ffi_data_frame, 4}, {"ffi_df_list", (DL_FUNC) &ffi_df_list, 5}, {"ffi_vec_detect_run_bounds", (DL_FUNC) &ffi_vec_detect_run_bounds, 3}, {"ffi_vec_locate_run_bounds", (DL_FUNC) &ffi_vec_locate_run_bounds, 3}, {"ffi_vec_identify_runs", (DL_FUNC) &ffi_vec_identify_runs, 2}, {"ffi_vec_run_sizes", (DL_FUNC) &ffi_vec_run_sizes, 2}, {"vctrs_slice_complete", (DL_FUNC) &vctrs_slice_complete, 1}, {"vctrs_locate_complete", (DL_FUNC) &vctrs_locate_complete, 1}, {"vctrs_detect_complete", (DL_FUNC) &vctrs_detect_complete, 1}, {"vctrs_normalize_encoding", (DL_FUNC) &vctrs_normalize_encoding, 1}, {"vctrs_order", (DL_FUNC) &vctrs_order, 5}, {"vctrs_locate_sorted_groups", (DL_FUNC) &vctrs_locate_sorted_groups, 5}, {"vctrs_order_info", (DL_FUNC) &vctrs_order_info, 6}, {"ffi_vec_unrep", (DL_FUNC) &ffi_vec_unrep, 2}, {"vctrs_fill_missing", (DL_FUNC) &vctrs_fill_missing, 3}, {"ffi_chr_paste_prefix", (DL_FUNC) &ffi_chr_paste_prefix, 3}, {"vctrs_rank", (DL_FUNC) &vctrs_rank, 7}, {"vctrs_integer64_proxy", (DL_FUNC) &vctrs_integer64_proxy, 1}, {"vctrs_integer64_restore", (DL_FUNC) &vctrs_integer64_restore, 1}, {"vctrs_list_drop_empty", (DL_FUNC) &vctrs_list_drop_empty, 1}, {"vctrs_is_altrep", (DL_FUNC) &vctrs_is_altrep, 1}, {"ffi_interleave_indices", (DL_FUNC) &ffi_interleave_indices, 2}, {"ffi_compute_nesting_container_info", (DL_FUNC) &ffi_compute_nesting_container_info, 2}, {"ffi_locate_matches", (DL_FUNC) &ffi_locate_matches, 14}, {"ffi_interval_groups", (DL_FUNC) &ffi_interval_groups, 4}, {"ffi_interval_locate_groups", (DL_FUNC) &ffi_interval_locate_groups, 4}, {"ffi_interval_complement", (DL_FUNC) &ffi_interval_complement, 4}, {"ffi_interval_locate_containers", (DL_FUNC) &ffi_interval_locate_containers, 2}, {"ffi_check_list", (DL_FUNC) &ffi_check_list, 2}, {"ffi_list_all_vectors", (DL_FUNC) &ffi_list_all_vectors, 2}, {"ffi_list_check_all_vectors", (DL_FUNC) &ffi_list_check_all_vectors, 2}, {"ffi_as_short_length", (DL_FUNC) &ffi_as_short_length, 2}, {"ffi_s3_get_method", (DL_FUNC) &ffi_s3_get_method, 3}, {"ffi_list_all_size", (DL_FUNC) &ffi_list_all_size, 3}, {"ffi_list_check_all_size", (DL_FUNC) &ffi_list_check_all_size, 3}, {"ffi_vec_set_intersect", (DL_FUNC) &ffi_vec_set_intersect, 4}, {"ffi_vec_set_difference", (DL_FUNC) &ffi_vec_set_difference, 4}, {"ffi_vec_set_union", (DL_FUNC) &ffi_vec_set_union, 4}, {"ffi_vec_set_symmetric_difference", (DL_FUNC) &ffi_vec_set_symmetric_difference, 4}, {"ffi_vec_expand_grid", (DL_FUNC) &ffi_vec_expand_grid, 4}, {"ffi_exp_vec_cast", (DL_FUNC) &exp_vec_cast, 2}, {NULL, NULL, 0} }; extern r_obj* ffi_ptype_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_ptype_common_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_size_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_recycle_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cast_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cast_common_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_rbind(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cbind(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_c(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_new_data_frame(r_obj*); static const R_ExternalMethodDef ExtEntries[] = { {"ffi_ptype_common", (DL_FUNC) &ffi_ptype_common, 1}, {"ffi_ptype_common_opts", (DL_FUNC) &ffi_ptype_common_opts, 2}, {"ffi_size_common", (DL_FUNC) &ffi_size_common, 2}, {"ffi_recycle_common", (DL_FUNC) &ffi_recycle_common, 1}, {"ffi_cast_common", (DL_FUNC) &ffi_cast_common, 1}, {"ffi_cast_common_opts", (DL_FUNC) &ffi_cast_common_opts, 2}, {"ffi_rbind", (DL_FUNC) &ffi_rbind, 4}, {"ffi_cbind", (DL_FUNC) &ffi_cbind, 3}, {"ffi_vec_c", (DL_FUNC) &ffi_vec_c, 3}, {"ffi_new_data_frame", (DL_FUNC) &ffi_new_data_frame, -1}, {NULL, NULL, 0} }; export void R_init_vctrs(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, ExtEntries); R_useDynamicSymbols(dll, FALSE); // Maturing // In the public header R_RegisterCCallable("vctrs", "obj_is_vector", (DL_FUNC) &obj_is_vector); R_RegisterCCallable("vctrs", "short_vec_size", (DL_FUNC) &short_vec_size); R_RegisterCCallable("vctrs", "short_vec_recycle", (DL_FUNC) &short_vec_recycle); // Deprecated // In the public header // See `inst/include/vctrs.h` for details R_RegisterCCallable("vctrs", "vec_is_vector", (DL_FUNC) &obj_is_vector); // Experimental // Exported but not available in the public header R_RegisterCCallable("vctrs", "exp_vec_cast", (DL_FUNC) &exp_vec_cast); R_RegisterCCallable("vctrs", "exp_vec_chop", (DL_FUNC) &exp_vec_chop); R_RegisterCCallable("vctrs", "exp_vec_slice_impl", (DL_FUNC) &exp_vec_slice_impl); R_RegisterCCallable("vctrs", "exp_vec_names", (DL_FUNC) &exp_vec_names); R_RegisterCCallable("vctrs", "exp_vec_set_names", (DL_FUNC) &exp_vec_set_names); R_RegisterCCallable("vctrs", "exp_short_compact_seq", (DL_FUNC) &exp_short_compact_seq); R_RegisterCCallable("vctrs", "exp_short_init_compact_seq", (DL_FUNC) &exp_short_init_compact_seq); // Altrep classes vctrs_init_altrep_rle(dll); vctrs_init_altrep_lazy_character(dll); } void vctrs_init_bind(SEXP ns); void vctrs_init_cast(SEXP ns); void vctrs_init_data(SEXP ns); void vctrs_init_dictionary(SEXP ns); void vctrs_init_interval(r_obj* ns); void vctrs_init_match(r_obj* ns); void vctrs_init_names(SEXP ns); void vctrs_init_proxy_restore(SEXP ns); void vctrs_init_slice(SEXP ns); void vctrs_init_slice_assign(SEXP ns); void vctrs_init_subscript(SEXP ns); void vctrs_init_subscript_loc(SEXP ns); void vctrs_init_ptype(r_obj* ns); void vctrs_init_ptype2(SEXP ns); void vctrs_init_ptype2_dispatch(SEXP ns); void vctrs_init_rep(SEXP ns); void vctrs_init_type_data_frame(SEXP ns); void vctrs_init_type_date_time(SEXP ns); void vctrs_init_type_info(SEXP ns); void vctrs_init_unspecified(SEXP ns); void vctrs_init_utils(SEXP ns); void vctrs_init_globals(r_obj* ns); r_obj* vctrs_init_library(r_obj* ns) { r_init_library(ns); vctrs_init_bind(ns); vctrs_init_cast(ns); vctrs_init_data(ns); vctrs_init_dictionary(ns); vctrs_init_interval(ns); vctrs_init_match(ns); vctrs_init_names(ns); vctrs_init_proxy_restore(ns); vctrs_init_slice(ns); vctrs_init_slice_assign(ns); vctrs_init_subscript(ns); vctrs_init_subscript_loc(ns); vctrs_init_ptype(ns); vctrs_init_ptype2(ns); vctrs_init_ptype2_dispatch(ns); vctrs_init_rep(ns); vctrs_init_type_data_frame(ns); vctrs_init_type_date_time(ns); vctrs_init_type_info(ns); vctrs_init_unspecified(ns); vctrs_init_utils(ns); vctrs_init_globals(ns); return r_null; } vctrs/src/size.h0000644000176200001440000000222314362266120013326 0ustar liggesusers#ifndef VCTRS_SIZE_H #define VCTRS_SIZE_H #include "vctrs-core.h" #include "globals.h" r_ssize vec_size(r_obj* x); r_ssize vec_size_3(r_obj* x, struct vctrs_arg* p_arg, struct r_lazy call); r_obj* vec_check_recycle(r_obj* x, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call); static inline r_obj* vec_recycle(r_obj* x, r_ssize size) { return vec_check_recycle(x, size, vec_args.x, lazy_calls.vec_recycle); } r_obj* vec_recycle_fallback(r_obj* x, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call); r_obj* list_sizes(r_obj* x, const struct vec_error_opts* opts); r_ssize df_size(r_obj* x); r_ssize df_raw_size(r_obj* x); r_ssize df_rownames_size(r_obj* x); r_ssize df_raw_size_from_list(r_obj* x); r_ssize vec_as_short_length(r_obj* size, struct vctrs_arg* p_arg, struct r_lazy call); r_ssize vec_as_ssize(r_obj* n, struct vctrs_arg* arg, struct r_lazy call); #endif vctrs/src/interval.c0000644000176200001440000006746414362266120014215 0ustar liggesusers#include "vctrs.h" enum vctrs_interval_missing { VCTRS_INTERVAL_MISSING_group = 0, VCTRS_INTERVAL_MISSING_drop = 1 }; #include "decl/interval-decl.h" // ----------------------------------------------------------------------------- r_obj* ffi_interval_groups(r_obj* start, r_obj* end, r_obj* ffi_abutting, r_obj* ffi_missing) { const bool abutting = r_arg_as_bool(ffi_abutting, "abutting"); const enum vctrs_interval_missing missing = parse_missing(ffi_missing); const bool locations = false; r_obj* out = KEEP(vec_interval_group_info(start, end, abutting, missing, locations)); r_obj* loc_start = r_list_get(out, 0); r_obj* loc_end = r_list_get(out, 1); r_list_poke(out, 0, vec_slice_unsafe(start, loc_start)); r_list_poke(out, 1, vec_slice_unsafe(end, loc_end)); FREE(1); return out; } r_obj* ffi_interval_locate_groups(r_obj* start, r_obj* end, r_obj* ffi_abutting, r_obj* ffi_missing) { const bool abutting = r_arg_as_bool(ffi_abutting, "abutting"); const enum vctrs_interval_missing missing = parse_missing(ffi_missing); const bool locations = true; r_obj* out = KEEP(vec_interval_group_info(start, end, abutting, missing, locations)); r_obj* key = r_list_get(out, 0); r_obj* loc_start = r_list_get(key, 0); r_obj* loc_end = r_list_get(key, 1); r_list_poke(key, 0, vec_slice_unsafe(start, loc_start)); r_list_poke(key, 1, vec_slice_unsafe(end, loc_end)); FREE(1); return out; } /* * If `locations = false`, returns a two column data frame containing a * `$start` column with locations to slice `start` with and an `$end` column * containing locations to slice `end` with. After slicing, the newly * generated intervals represent the "groups". * * If `locations = true`, returns a two column data frame containing a * `$key` column that holds the data frame generated by `locations = false` * and a `$loc` column that is a list-column of integer vectors that map each * interval defined by `[start, end)` to its corresponding group. * * We don't slice `start` and `end` here because it is often useful to just * know the locations, for example in `vec_interval_complement()`. */ static r_obj* vec_interval_group_info(r_obj* start, r_obj* end, bool abutting, enum vctrs_interval_missing missing, bool locations) { int n_prot = 0; int _; r_obj* ptype = vec_ptype2_params( start, end, args_start, args_end, r_lazy_null, &_ ); KEEP_N(ptype, &n_prot); start = vec_cast_params( start, ptype, args_start, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(start, &n_prot); end = vec_cast_params( end, ptype, args_end, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(end, &n_prot); r_obj* start_proxy = KEEP_N(vec_proxy_compare(start), &n_prot); start_proxy = KEEP_N(vec_normalize_encoding(start_proxy), &n_prot); r_obj* end_proxy = KEEP_N(vec_proxy_compare(end), &n_prot); end_proxy = KEEP_N(vec_normalize_encoding(end_proxy), &n_prot); const enum vctrs_type type_proxy = vec_proxy_typeof(start_proxy); struct poly_vec* p_poly_start = new_poly_vec(start_proxy, type_proxy); KEEP_N(p_poly_start->shelter, &n_prot); const void* p_start = p_poly_start->p_vec; struct poly_vec* p_poly_end = new_poly_vec(end_proxy, type_proxy); KEEP_N(p_poly_end->shelter, &n_prot); const void* p_end = p_poly_end->p_vec; poly_binary_int_fn* const fn_compare = poly_p_compare_na_equal(type_proxy); poly_unary_bool_fn* const fn_is_missing = poly_p_is_missing(type_proxy); const r_ssize size = vec_size(start_proxy); if (size != vec_size(end_proxy)) { r_abort("`start` and `end` must have the same size."); } // Order is computed as ascending order, placing missing intervals up front // as the "smallest" values. We document that we assume that if `start` is // missing, then `end` is missing too. r_obj* order = interval_order( start_proxy, end_proxy, chrs_asc, chrs_smallest, size ); KEEP_N(order, &n_prot); const int* v_order = r_int_cbegin(order); // Assume the intervals can be merged into half their original size. // Apply a minimum size to avoid a size of zero. const r_ssize initial_size = r_ssize_max(size / 2, 1); struct r_dyn_array* p_loc_start = r_new_dyn_vector(R_TYPE_integer, initial_size); KEEP_N(p_loc_start->shelter, &n_prot); struct r_dyn_array* p_loc_end = r_new_dyn_vector(R_TYPE_integer, initial_size); KEEP_N(p_loc_end->shelter, &n_prot); struct r_dyn_array* p_loc = NULL; if (locations) { p_loc = r_new_dyn_vector(R_TYPE_list, initial_size); KEEP_N(p_loc->shelter, &n_prot); } r_ssize i = 0; r_ssize loc_order_missing_start = 0; r_ssize loc_order_missing_end = -1; // Move `i` past any missing intervals (they are at the front), // recording last missing interval location for later. Only need to check // missingness of `start`, because we document that we assume that `end` // is missing if `start` is missing. for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; if (!fn_is_missing(p_start, loc)) { break; } loc_order_missing_end = i; } r_ssize loc_order_start = 0; r_ssize loc_order_end = -1; r_ssize loc_group_start = 0; r_ssize loc_group_end = -1; if (i < size) { // Set information about first usable interval const r_ssize loc = v_order[i] - 1; loc_order_start = i; loc_order_end = i; loc_group_start = loc; loc_group_end = loc; ++i; } const int merge_limit = abutting ? -1 : 0; for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; // If `abutting`, this says: if group end < new start, finish out the group // If `!abutting`, this says: if group end <= new start, finish out the group if (fn_compare(p_end, loc_group_end, p_start, loc) <= merge_limit) { r_dyn_int_push_back(p_loc_start, loc_group_start + 1); r_dyn_int_push_back(p_loc_end, loc_group_end + 1); if (locations) { const r_ssize loc_size = loc_order_end - loc_order_start + 1; r_obj* loc = r_new_integer(loc_size); r_dyn_list_push_back(p_loc, loc); int* v_loc = r_int_begin(loc); const int* v_order_start = v_order + loc_order_start; memcpy(v_loc, v_order_start, loc_size * sizeof(*v_loc)); } loc_order_start = loc_order_end + 1; loc_group_start = loc; loc_group_end = loc; } else if (fn_compare(p_end, loc_group_end, p_end, loc) == -1) { loc_group_end = loc; } loc_order_end = i; } if (loc_order_end >= loc_order_start) { // Log last interval r_dyn_int_push_back(p_loc_start, loc_group_start + 1); r_dyn_int_push_back(p_loc_end, loc_group_end + 1); if (locations) { const r_ssize loc_size = loc_order_end - loc_order_start + 1; r_obj* loc = r_new_integer(loc_size); r_dyn_list_push_back(p_loc, loc); int* v_loc = r_int_begin(loc); const int* v_order_start = v_order + loc_order_start; memcpy(v_loc, v_order_start, loc_size * sizeof(*v_loc)); } } if (missing == VCTRS_INTERVAL_MISSING_group && loc_order_missing_end >= loc_order_missing_start) { // Log missing interval at the end const r_ssize loc_group_missing_start = v_order[loc_order_missing_start] - 1; const r_ssize loc_group_missing_end = v_order[loc_order_missing_end] - 1; r_dyn_int_push_back(p_loc_start, loc_group_missing_start + 1); r_dyn_int_push_back(p_loc_end, loc_group_missing_end + 1); if (locations) { const r_ssize loc_size = loc_order_missing_end - loc_order_missing_start + 1; r_obj* loc = r_new_integer(loc_size); r_dyn_list_push_back(p_loc, loc); int* v_loc = r_int_begin(loc); const int* v_order_start = v_order + loc_order_missing_start; memcpy(v_loc, v_order_start, loc_size * sizeof(*v_loc)); } } r_obj* key = KEEP_N(r_new_list(2), &n_prot); r_list_poke(key, 0, r_dyn_unwrap(p_loc_start)); r_list_poke(key, 1, r_dyn_unwrap(p_loc_end)); r_obj* key_names = r_new_character(2); r_attrib_poke_names(key, key_names); r_chr_poke(key_names, 0, r_str("start")); r_chr_poke(key_names, 1, r_str("end")); r_init_data_frame(key, p_loc_start->count); r_obj* out = r_null; r_keep_loc out_shelter; KEEP_HERE(out, &out_shelter); ++n_prot; if (locations) { out = r_new_list(2); KEEP_AT(out, out_shelter); r_list_poke(out, 0, key); r_list_poke(out, 1, r_dyn_unwrap(p_loc)); r_obj* out_names = r_new_character(2); r_attrib_poke_names(out, out_names); r_chr_poke(out_names, 0, r_str("key")); r_chr_poke(out_names, 1, r_str("loc")); r_init_data_frame(out, p_loc_start->count); } else { out = key; } FREE(n_prot); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_interval_complement(r_obj* start, r_obj* end, r_obj* lower, r_obj* upper) { return vec_interval_complement(start, end, lower, upper); } static r_obj* vec_interval_complement(r_obj* start, r_obj* end, r_obj* lower, r_obj* upper) { int n_prot = 0; int _; r_obj* ptype = vec_ptype2_params( start, end, args_start, args_end, r_lazy_null, &_ ); KEEP_N(ptype, &n_prot); start = vec_cast_params( start, ptype, args_start, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(start, &n_prot); end = vec_cast_params( end, ptype, args_end, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(end, &n_prot); r_obj* start_proxy = KEEP_N(vec_proxy_compare(start), &n_prot); start_proxy = KEEP_N(vec_normalize_encoding(start_proxy), &n_prot); r_obj* end_proxy = KEEP_N(vec_proxy_compare(end), &n_prot); end_proxy = KEEP_N(vec_normalize_encoding(end_proxy), &n_prot); const enum vctrs_type type_proxy = vec_proxy_typeof(start_proxy); struct poly_vec* p_poly_start = new_poly_vec(start_proxy, type_proxy); KEEP_N(p_poly_start->shelter, &n_prot); const void* p_start = p_poly_start->p_vec; struct poly_vec* p_poly_end = new_poly_vec(end_proxy, type_proxy); KEEP_N(p_poly_end->shelter, &n_prot); const void* p_end = p_poly_end->p_vec; poly_binary_int_fn* const fn_compare = poly_p_compare_na_equal(type_proxy); bool use_lower = (lower != r_null); bool use_upper = (upper != r_null); bool append_lower = false; bool append_upper = false; const void* p_lower = NULL; if (use_lower) { if (vec_size(lower) != 1) { r_abort("`lower` must be size 1."); } lower = vec_cast_params( lower, ptype, args_lower, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(lower, &n_prot); r_obj* lower_proxy = KEEP_N(vec_proxy_compare(lower), &n_prot); lower_proxy = KEEP_N(vec_normalize_encoding(lower_proxy), &n_prot); r_obj* lower_complete = KEEP_N(vec_detect_complete(lower_proxy), &n_prot); if (!r_lgl_get(lower_complete, 0)) { r_abort("`lower` can't contain missing values."); } struct poly_vec* p_poly_lower = new_poly_vec(lower_proxy, type_proxy); KEEP_N(p_poly_lower->shelter, &n_prot); p_lower = p_poly_lower->p_vec; } const void* p_upper = NULL; if (use_upper) { if (vec_size(upper) != 1) { r_abort("`upper` must be size 1."); } upper = vec_cast_params( upper, ptype, args_upper, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(upper, &n_prot); r_obj* upper_proxy = KEEP_N(vec_proxy_compare(upper), &n_prot); upper_proxy = KEEP_N(vec_normalize_encoding(upper_proxy), &n_prot); r_obj* upper_complete = KEEP_N(vec_detect_complete(upper_proxy), &n_prot); if (!r_lgl_get(upper_complete, 0)) { r_abort("`upper` can't contain missing values."); } struct poly_vec* p_poly_upper = new_poly_vec(upper_proxy, type_proxy); KEEP_N(p_poly_upper->shelter, &n_prot); p_upper = p_poly_upper->p_vec; } if (use_lower && use_upper && fn_compare(p_lower, 0, p_upper, 0) >= 0) { // Handle the special case of `lower >= upper` up front. // This could also be an error, but we try to be a little flexible. // These can't follow the standard code path because it assumes // `lower < upper`, like the rest of the intervals. // - `lower > upper` is an invalid interval. // - `lower = upper` will always result in an empty complement. r_obj* out = KEEP_N(r_new_list(2), &n_prot); r_list_poke(out, 0, vec_slice_unsafe(start, r_globals.empty_int)); r_list_poke(out, 1, vec_slice_unsafe(end, r_globals.empty_int)); r_obj* out_names = r_new_character(2); r_attrib_poke_names(out, out_names); r_chr_poke(out_names, 0, r_str("start")); r_chr_poke(out_names, 1, r_str("end")); r_init_data_frame(out, 0); FREE(n_prot); return out; } // Merge to sort, remove all missings, and merge all abutting intervals const bool abutting = true; const bool locations = false; r_obj* minimal = KEEP_N(vec_interval_group_info( start, end, abutting, VCTRS_INTERVAL_MISSING_drop, locations ), &n_prot); const int* v_loc_minimal_start = r_int_cbegin(r_list_get(minimal, 0)); const int* v_loc_minimal_end = r_int_cbegin(r_list_get(minimal, 1)); r_ssize size = vec_size(minimal); // Because we have the minimal interval information (i.e. no intervals overlap // or abut!), we know that the complement takes exactly `size - 1` space if // `lower` and `upper` aren't used. // // If `lower` is used, it can at most add one more interval, and // requires one more `loc_end` location. No `loc_start` location is needed // because we just append `lower` to the front if needed. // // If `upper` is used, it can at most add one more interval, and // requires one more `loc_start` location. No `loc_end` location is needed // because we just append `upper` to the end if needed. const r_ssize max_size_start = r_ssize_max(size - 1 + use_upper, 0); const r_ssize max_size_end = r_ssize_max(size - 1 + use_lower, 0); r_obj* loc_start = KEEP_N(r_alloc_integer(max_size_start), &n_prot); int* v_loc_start = r_int_begin(loc_start); r_ssize i_start = 0; r_obj* loc_end = KEEP_N(r_alloc_integer(max_size_end), &n_prot); int* v_loc_end = r_int_begin(loc_end); r_ssize i_end = 0; r_ssize i = 0; r_ssize loc_lower_is_after_start_of = -1; r_ssize loc_lower_is_before_end_of = 0; if (use_lower) { // Shift `i` forward to the first interval completely past `lower`. // Track information about where `lower` is in relation to the intervals. for (; i < size; ++i) { const r_ssize loc_start = v_loc_minimal_start[i] - 1; const r_ssize loc_end = v_loc_minimal_end[i] - 1; if (fn_compare(p_lower, 0, p_end, loc_end) == 1) { ++loc_lower_is_before_end_of; ++loc_lower_is_after_start_of; } else if (fn_compare(p_lower, 0, p_start, loc_start) >= 0) { ++loc_lower_is_after_start_of; } else { break; } } } r_ssize loc_upper_is_after_start_of = size - 1; r_ssize loc_upper_is_before_end_of = size; if (use_upper) { // Shift `size` backwards to the first interval that is completely before `upper`. // Track information about where `upper` is in relation to the intervals. for (; size - 1 >= 0; --size) { const r_ssize loc_start = v_loc_minimal_start[size - 1] - 1; const r_ssize loc_end = v_loc_minimal_end[size - 1] - 1; if (fn_compare(p_upper, 0, p_start, loc_start) == -1) { --loc_upper_is_before_end_of; --loc_upper_is_after_start_of; } else if (fn_compare(p_upper, 0, p_end, loc_end) <= 0) { --loc_upper_is_before_end_of; } else { break; } } } const bool has_intervals_between = i < size; if (use_lower && has_intervals_between) { r_ssize loc_gap_start = -1; if (loc_lower_is_before_end_of == loc_lower_is_after_start_of) { // `lower` is in the middle of an interval, use the end of that interval loc_gap_start = v_loc_minimal_end[loc_lower_is_before_end_of] - 1; } else { // `lower` is not within an interval, use `lower` append_lower = true; } // The next start location is the end of the interval that `loc_gap_start` // lines up with. We know this start location exists because of // `has_intervals_between`. const r_ssize loc_gap_end = v_loc_minimal_start[loc_lower_is_after_start_of + 1] - 1; if (!append_lower) { v_loc_start[i_start] = loc_gap_start + 1; ++i_start; } v_loc_end[i_end] = loc_gap_end + 1; ++i_end; } r_ssize loc_previous_end = -1; if (i < size) { // Set information about first usable interval loc_previous_end = v_loc_minimal_end[i] - 1; ++i; } for (; i < size; ++i) { const r_ssize loc_elt_start = v_loc_minimal_start[i] - 1; const r_ssize loc_elt_end = v_loc_minimal_end[i] - 1; const r_ssize loc_gap_start = loc_previous_end; const r_ssize loc_gap_end = loc_elt_start; v_loc_start[i_start] = loc_gap_start + 1; ++i_start; v_loc_end[i_end] = loc_gap_end + 1; ++i_end; loc_previous_end = loc_elt_end; } if (use_upper && has_intervals_between) { // The previous end location is the start of the interval that `loc_gap_end` // lines up with. We know this end location exists because of // `has_intervals_between`. const r_ssize loc_gap_start = v_loc_minimal_end[loc_upper_is_before_end_of - 1] - 1; r_ssize loc_gap_end = -1; if (loc_upper_is_before_end_of == loc_upper_is_after_start_of) { // `upper` is in the middle of an interval, use the start of that interval loc_gap_end = v_loc_minimal_start[loc_upper_is_before_end_of] - 1; } else { // `upper` is not within an interval, use `upper` append_upper = true; } v_loc_start[i_start] = loc_gap_start + 1; ++i_start; if (!append_upper) { v_loc_end[i_end] = loc_gap_end + 1; ++i_end; } } if (use_lower && use_upper && !has_intervals_between) { /* * This branch handles the case when `lower` and `upper` have no full * intervals between them. They can be in any of these states. In * particular, if they are in the same interval together, then there is * no complement. * * | [ ) [ ) | append_lower = append_upper = true. complement: -> * | [ ) [ ) | append_upper = true. complement: ) -> * | [ ) [ ) | append_lower = true. complement: -> [ * | [ ) [ ) | both in separate intervals. complement: ) -> [ * | [ ) [ ) | both in same interval! complement: none * | [ ) [ ) | both in same interval! complement: none */ bool lower_in_interval = false; bool upper_in_interval = false; r_ssize loc_gap_start = -1; if (loc_lower_is_before_end_of == loc_lower_is_after_start_of) { lower_in_interval = true; loc_gap_start = v_loc_minimal_end[loc_lower_is_before_end_of] - 1; } else { append_lower = true; } r_ssize loc_gap_end = -1; if (loc_upper_is_before_end_of == loc_upper_is_after_start_of) { upper_in_interval = true; loc_gap_end = v_loc_minimal_start[loc_upper_is_before_end_of] - 1; } else { append_upper = true; } const bool lower_and_upper_in_same_interval = lower_in_interval && upper_in_interval && (loc_lower_is_before_end_of == loc_upper_is_before_end_of); if (!append_lower && !lower_and_upper_in_same_interval) { v_loc_start[i_start] = loc_gap_start + 1; ++i_start; } if (!append_upper && !lower_and_upper_in_same_interval) { v_loc_end[i_end] = loc_gap_end + 1; ++i_end; } } // This should essentially be free. // It will only ever shrink `loc_start` and `loc_end`. loc_start = KEEP_N(r_int_resize(loc_start, i_start), &n_prot); loc_end = KEEP_N(r_int_resize(loc_end, i_end), &n_prot); // Slice `end` to get new starts and `start` to get new ends! r_obj* out_start = KEEP_N(vec_slice_unsafe(end, loc_start), &n_prot); r_obj* out_end = KEEP_N(vec_slice_unsafe(start, loc_end), &n_prot); if (append_lower || append_upper) { r_obj* args = KEEP_N(r_new_list(2), &n_prot); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = R_NilValue }; if (append_lower) { // Push `lower` to the start of the new starts r_list_poke(args, 0, lower); r_list_poke(args, 1, out_start); out_start = KEEP_N(vec_c( args, ptype, R_NilValue, &name_repair_opts, vec_args.empty, r_lazy_null ), &n_prot); } if (append_upper) { // Push `upper` to the end of the new ends r_list_poke(args, 0, out_end); r_list_poke(args, 1, upper); out_end = KEEP_N(vec_c( args, ptype, R_NilValue, &name_repair_opts, vec_args.empty, r_lazy_null ), &n_prot); } } r_obj* out = KEEP_N(r_new_list(2), &n_prot); r_list_poke(out, 0, out_start); r_list_poke(out, 1, out_end); r_obj* out_names = r_new_character(2); r_attrib_poke_names(out, out_names); r_chr_poke(out_names, 0, r_str("start")); r_chr_poke(out_names, 1, r_str("end")); r_init_data_frame(out, vec_size(out_start)); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_interval_locate_containers(r_obj* start, r_obj* end) { return vec_interval_locate_containers(start, end); } static r_obj* vec_interval_locate_containers(r_obj* start, r_obj* end) { int n_prot = 0; int _; r_obj* ptype = vec_ptype2_params( start, end, args_start, args_end, r_lazy_null, &_ ); KEEP_N(ptype, &n_prot); start = vec_cast_params( start, ptype, args_start, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(start, &n_prot); end = vec_cast_params( end, ptype, args_end, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(end, &n_prot); r_obj* start_proxy = KEEP_N(vec_proxy_compare(start), &n_prot); start_proxy = KEEP_N(vec_normalize_encoding(start_proxy), &n_prot); r_obj* end_proxy = KEEP_N(vec_proxy_compare(end), &n_prot); end_proxy = KEEP_N(vec_normalize_encoding(end_proxy), &n_prot); const enum vctrs_type type_proxy = vec_proxy_typeof(start_proxy); struct poly_vec* p_poly_start = new_poly_vec(start_proxy, type_proxy); KEEP_N(p_poly_start->shelter, &n_prot); const void* p_start = p_poly_start->p_vec; struct poly_vec* p_poly_end = new_poly_vec(end_proxy, type_proxy); KEEP_N(p_poly_end->shelter, &n_prot); const void* p_end = p_poly_end->p_vec; poly_binary_int_fn* const fn_compare = poly_p_compare_na_equal(type_proxy); poly_unary_bool_fn* const fn_is_missing = poly_p_is_missing(type_proxy); const r_ssize size = vec_size(start_proxy); if (size != vec_size(end_proxy)) { r_abort("`start` and `end` must have the same size."); } // Order is computed with the first column in ascending order, and the // second column in descending order. This makes it easy to find the // containers, as any time we detect something that isn't contained in the // current container, it must be a new container. Missing intervals are up // front for easy detection. We document that we assume that if `start` is // missing, then `end` is missing too. r_obj* direction = KEEP_N(r_new_character(2), &n_prot); r_chr_poke(direction, 0, r_str("asc")); r_chr_poke(direction, 1, r_str("desc")); r_obj* na_value = KEEP_N(r_new_character(2), &n_prot); r_chr_poke(na_value, 0, r_str("smallest")); r_chr_poke(na_value, 1, r_str("largest")); r_obj* order = interval_order( start_proxy, end_proxy, direction, na_value, size ); KEEP_N(order, &n_prot); const int* v_order = r_int_cbegin(order); // Assume that half the intervals are containers. // This is probably a little high. // Apply a minimum size to avoid a size of zero. const r_ssize initial_size = r_ssize_max(size / 2, 1); struct r_dyn_array* p_loc = r_new_dyn_vector(R_TYPE_integer, initial_size); KEEP_N(p_loc->shelter, &n_prot); r_ssize i = 0; bool any_missing = false; // Move `i` past any missing intervals (they are at the front), // recording if there are any missing intervals for later. Only need to check // missingness of `start`, because we document that we assume that `end` // is missing if `start` is missing. for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; if (!fn_is_missing(p_start, loc)) { break; } any_missing = true; } r_ssize loc_container = -1; if (i < size) { // Set information about first usable container const r_ssize loc = v_order[i] - 1; loc_container = loc; r_dyn_int_push_back(p_loc, loc_container + 1); ++i; } for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; if ((fn_compare(p_start, loc_container, p_start, loc) != 1) && (fn_compare(p_end, loc_container, p_end, loc) != -1)) { // Still in current container continue; } // New container loc_container = loc; r_dyn_int_push_back(p_loc, loc_container + 1); } if (any_missing) { // Push missing container as the last container. // We know missings are at the front, so just use the first order value // as the location. This matches ascending ordering with missing values // at the end, and breaking ties with the first missing location we saw. r_dyn_int_push_back(p_loc, v_order[0]); } r_obj* out = r_dyn_unwrap(p_loc); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- /* * `interval_order()` orders the `start` and `end` values of a vector of * intervals. We document that we make the assumption that if `start` is * missing, then `end` is also missing. We also document the assumption that * partially missing (i.e. incomplete but not missing) observations are not * allowed in either bound. */ static inline r_obj* interval_order(r_obj* start, r_obj* end, r_obj* direction, r_obj* na_value, r_ssize size) { // Put them in a data frame to compute joint ordering r_obj* df = KEEP(r_new_list(2)); r_list_poke(df, 0, start); r_list_poke(df, 1, end); r_obj* df_names = r_new_character(2); r_attrib_poke_names(df, df_names); r_chr_poke(df_names, 0, r_str("start")); r_chr_poke(df_names, 1, r_str("end")); r_init_data_frame(df, size); const bool nan_distinct = false; r_obj* chr_proxy_collate = r_null; r_obj* out = vec_order( df, direction, na_value, nan_distinct, chr_proxy_collate ); FREE(1); return out; } // ----------------------------------------------------------------------------- static inline enum vctrs_interval_missing parse_missing(r_obj* missing) { if (!r_is_string(missing)) { r_abort("`missing` must be a string."); } const char* c_missing = r_chr_get_c_string(missing, 0); if (!strcmp(c_missing, "group")) return VCTRS_INTERVAL_MISSING_group; if (!strcmp(c_missing, "drop")) return VCTRS_INTERVAL_MISSING_drop; r_abort("`missing` must be either \"group\" or \"drop\"."); } // ----------------------------------------------------------------------------- void vctrs_init_interval(r_obj* ns) { args_start_ = new_wrapper_arg(NULL, "start"); args_end_ = new_wrapper_arg(NULL, "end"); args_lower_ = new_wrapper_arg(NULL, "lower"); args_upper_ = new_wrapper_arg(NULL, "upper"); } vctrs/src/slice-chop.c0000644000176200001440000003707514404336165014416 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" /* * Index manager/generator for chopping purposes * * There are 3 types of possible indices: * - If `indices = NULL, sizes = NULL`, then we use a sequential size 1 index * that just increments by 1 from `0` to `vec_size(x) - 1`. * - If `indices` is supplied, then each element of `indices` is an integer * vector of locations to chop with. * - If `sizes` is supplied, then each element of `sizes` is the size of the * current slice to chop. The sizes are accumulated in order to get the * start location of the next slice. * * - Generate the next index with `indices_next()`. * - Generate the output size with `indices_out_size()`. * * @member shelter The shelter to protect the entire chop indices manager. * @member indices, v_indices * - If `NULL`, then `indices` aren't being used. `v_indices` is set to * `NULL`. * - Otherwise, a list of integer vector indices to chop with. `v_indices` is * set to `r_list_cbegin(indices)`. * @member sizes, v_sizes * - If `NULL`, then `sizes` aren't being used. `v_sizes` is set to `NULL`. * - Otherwise, an integer vector of sequential sizes to chop with. `v_sizes` * is set to `r_int_cbegin(sizes)`. * @member index, p_index * - If neither `indices` nor `sizes` are provided, `index` is a scalar * integer vector that starts at 0 and is incremented by 1 at every * iteration. `p_index` points to `r_int_begin(index)` and is used to * perform the increment. * - If `indices` is provided, this is set to the i-th element of `indices` * at each iteration, and `p_index` is set to `NULL`. * - If `sizes` is provided, this is a compact-seq representing the i-th * slice. `p_index` points to `r_int_begin(index)` and is used to updated * the compact-seq at each iteration. * @member has_indices Whether or not `indices` was provided. * @member has_sizes Whether or not `sizes` was provided. * @member loc The current iteration value. */ struct vctrs_chop_indices { r_obj* shelter; r_obj* indices; r_obj* const* v_indices; r_obj* sizes; const int* v_sizes; r_obj* index; int* p_index; bool has_indices; bool has_sizes; r_ssize loc; }; #include "decl/slice-chop-decl.h" // ----------------------------------------------------------------------------- static struct vctrs_chop_indices* new_chop_indices(r_obj* x, r_obj* indices, r_obj* sizes) { r_obj* shelter = KEEP(r_alloc_list(4)); r_obj* self = r_alloc_raw(sizeof(struct vctrs_chop_indices)); r_list_poke(shelter, 0, self); struct vctrs_chop_indices* p_indices = r_raw_begin(self); p_indices->shelter = shelter; p_indices->indices = indices; r_list_poke(p_indices->shelter, 1, p_indices->indices); p_indices->has_indices = p_indices->indices != r_null; p_indices->sizes = sizes; r_list_poke(p_indices->shelter, 2, p_indices->sizes); p_indices->has_sizes = p_indices->sizes != r_null; if (p_indices->has_indices) { p_indices->v_indices = r_list_cbegin(p_indices->indices); p_indices->v_sizes = NULL; p_indices->index = r_null; r_list_poke(p_indices->shelter, 3, p_indices->index); p_indices->p_index = NULL; } else if (p_indices->has_sizes) { p_indices->v_indices = NULL; p_indices->v_sizes = r_int_cbegin(p_indices->sizes); p_indices->index = compact_seq(0, 0, true); r_list_poke(p_indices->shelter, 3, p_indices->index); p_indices->p_index = r_int_begin(p_indices->index); } else { p_indices->v_indices = NULL; p_indices->v_sizes = NULL; p_indices->index = r_int(0); r_list_poke(p_indices->shelter, 3, p_indices->index); p_indices->p_index = r_int_begin(p_indices->index); } p_indices->loc = 0; FREE(1); return p_indices; } /* * Generate the next `index` * * You can assume that the returned `index` is always protected by `p_indices`, * so the caller doesn't need to protect it. */ static inline r_obj* indices_next(struct vctrs_chop_indices* p_indices) { const r_ssize loc = p_indices->loc; ++(p_indices->loc); if (p_indices->has_indices) { return p_indices->v_indices[loc]; } else if (p_indices->has_sizes) { const r_ssize start = p_indices->p_index[0] + p_indices->p_index[1]; const r_ssize size = p_indices->v_sizes[loc]; const bool increasing = true; init_compact_seq(p_indices->p_index, start, size, increasing); return p_indices->index; } else { *p_indices->p_index = loc + 1; return p_indices->index; } } static inline r_ssize indices_out_size(struct vctrs_chop_indices* p_indices, r_obj* x) { if (p_indices->has_indices) { return r_length(p_indices->indices); } else if (p_indices->has_sizes) { return r_length(p_indices->sizes); } else { return vec_size(x); } } // ----------------------------------------------------------------------------- r_obj* ffi_vec_chop_seq(r_obj* x, r_obj* starts, r_obj* sizes, r_obj* increasings) { int* v_starts = r_int_begin(starts); int* v_sizes = r_int_begin(sizes); int* v_increasings = r_lgl_begin(increasings); const r_ssize n = r_length(starts); r_obj* indices = KEEP(r_alloc_list(n)); for (r_ssize i = 0; i < n; ++i) { r_obj* index = compact_seq(v_starts[i], v_sizes[i], v_increasings[i]); r_list_poke(indices, i, index); } r_obj* out = KEEP(vec_chop_unsafe(x, indices, r_null)); FREE(2); return out; } r_obj* ffi_vec_chop(r_obj* x, r_obj* indices, r_obj* sizes) { return vec_chop(x, indices, sizes); } r_obj* vec_chop(r_obj* x, r_obj* indices, r_obj* sizes) { const r_ssize n = vec_size(x); r_obj* names = KEEP(vec_names(x)); if (indices != r_null && sizes != r_null) { r_abort_lazy_call(r_lazy_null, "Can't supply both `indices` and `sizes`."); } if (indices != r_null) { indices = list_as_locations(indices, n, names); } KEEP(indices); if (sizes != r_null) { sizes = vec_as_chop_sizes(sizes, n); } KEEP(sizes); r_obj* out = vec_chop_unsafe(x, indices, sizes); FREE(3); return out; } // Performance variant that doesn't check the types or values of `indices` / `sizes` r_obj* vec_chop_unsafe(r_obj* x, r_obj* indices, r_obj* sizes) { struct vctrs_proxy_info info = vec_proxy_info(x); KEEP(info.shelter); struct vctrs_chop_indices* p_indices = new_chop_indices(x, indices, sizes); KEEP(p_indices->shelter); r_obj* out = vec_chop_base(x, info, p_indices); FREE(2); return out; } static r_obj* vec_chop_base(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { if (vec_requires_fallback(x, info)) { // Fallback to `[` if the class doesn't implement a proxy. This is // to be maximally compatible with existing classes. if (info.type == VCTRS_TYPE_scalar) { r_abort_lazy_call(r_lazy_null, "Can't slice a scalar"); } if (has_dim(x)) { return chop_fallback_shaped(x, p_indices); } else { return chop_fallback(x, p_indices); } } switch (info.type) { case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: case VCTRS_TYPE_list: { if (has_dim(x)) { return chop_shaped(x, info, p_indices); } else { return chop(x, info, p_indices); } } case VCTRS_TYPE_dataframe: { return chop_df(x, info, p_indices); } default: obj_check_vector(x, vec_args.empty, r_lazy_null); stop_unimplemented_vctrs_type("vec_chop_base", info.type); } } static r_obj* chop(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { r_obj* proxy = info.proxy; r_obj* names = KEEP(r_names(proxy)); const enum vctrs_type type = info.type; const r_ssize out_size = indices_out_size(p_indices, proxy); r_obj* out = KEEP(r_alloc_list(out_size)); for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); // Always materialize ALTREP vectors when chopping to avoid inefficiently // creating a large amount of small ALTREP objects that are used downstream. // This is a heuristic and we should also be on the lookout for cases where // we chop to create a small amount of large ALTREP objects that are // quickly discarded (#1450). r_obj* elt = KEEP(vec_slice_base( type, proxy, index, VCTRS_MATERIALIZE_true )); if (names != r_null) { r_obj* elt_names = slice_names(names, index); r_attrib_poke_names(elt, elt_names); } elt = vec_restore(elt, x, vec_owned(elt)); r_list_poke(out, i, elt); FREE(1); } FREE(2); return out; } static r_obj* chop_df(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { r_obj* proxy = info.proxy; r_obj* const* v_proxy = r_list_cbegin(proxy); const r_ssize n_cols = r_length(proxy); r_obj* col_names = KEEP(r_names(proxy)); r_obj* row_names = KEEP(df_rownames(proxy)); const bool has_row_names = r_typeof(row_names) == R_TYPE_character; const r_ssize out_size = indices_out_size(p_indices, proxy); r_obj* out = KEEP(r_alloc_list(out_size)); r_obj* const* v_out = r_list_cbegin(out); // Pre-load the `out` container with empty bare data frames for (r_ssize i = 0; i < out_size; ++i) { r_obj* elt = r_alloc_list(n_cols); r_list_poke(out, i, elt); r_attrib_poke_names(elt, col_names); r_obj* index = indices_next(p_indices); const r_ssize size = vec_subscript_size(index); init_data_frame(elt, size); if (has_row_names) { r_obj* elt_row_names = slice_rownames(row_names, index); r_attrib_poke(elt, r_syms.row_names, elt_row_names); } } r_obj* indices = p_indices->indices; r_obj* sizes = p_indices->sizes; // Chop each column according to the indices, and then assign the results // into the appropriate data frame column in the `out` list for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = v_proxy[i]; r_obj* col_chopped = KEEP(vec_chop_unsafe(col, indices, sizes)); r_obj* const* v_col_chopped = r_list_cbegin(col_chopped); for (r_ssize j = 0; j < out_size; ++j) { r_obj* elt = v_out[j]; r_list_poke(elt, i, v_col_chopped[j]); } FREE(1); } // Restore each data frame for (r_ssize i = 0; i < out_size; ++i) { r_obj* elt = v_out[i]; elt = vec_restore(elt, x, vec_owned(elt)); r_list_poke(out, i, elt); } FREE(3); return out; } static r_obj* chop_shaped(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { r_obj* proxy = info.proxy; const enum vctrs_type type = info.type; r_obj* dim_names = KEEP(r_dim_names(proxy)); r_obj* row_names = r_null; if (dim_names != r_null) { row_names = r_list_get(dim_names, 0); } const r_ssize out_size = indices_out_size(p_indices, proxy); r_obj* out = KEEP(r_alloc_list(out_size)); for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); r_obj* elt = KEEP(vec_slice_shaped(type, proxy, index)); if (dim_names != r_null) { if (row_names != r_null) { // Required to slice row names to the right size before poking to avoid // erroring on the dimnames length check in `Rf_setAttrib()` r_obj* new_dim_names = KEEP(r_clone(dim_names)); r_obj* new_row_names = slice_names(row_names, index); r_list_poke(new_dim_names, 0, new_row_names); r_attrib_poke_dim_names(elt, new_dim_names); FREE(1); } else { r_attrib_poke_dim_names(elt, dim_names); } } elt = vec_restore(elt, x, vec_owned(elt)); r_list_poke(out, i, elt); FREE(1); } FREE(2); return out; } static r_obj* chop_fallback(r_obj* x, struct vctrs_chop_indices* p_indices) { // Evaluate in a child of the global environment to allow dispatch // to custom functions. We define `[` to point to its base // definition to ensure consistent look-up. This is the same logic // as in `vctrs_dispatch_n()`, reimplemented here to allow repeated // evaluations in a loop. r_obj* env = KEEP(r_alloc_empty_environment(r_envs.global)); r_env_poke(env, syms_x, x); // Construct call with symbols, not values, for performance. // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. r_obj* call; if (is_integer64(x)) { call = KEEP(r_call3(syms.vec_slice_dispatch_integer64, syms_x, syms_i)); r_env_poke(env, syms.vec_slice_dispatch_integer64, fns.vec_slice_dispatch_integer64); } else { call = KEEP(r_call3(syms_bracket, syms_x, syms_i)); r_env_poke(env, syms_bracket, fns_bracket); } const r_ssize out_size = indices_out_size(p_indices, x); r_obj* out = KEEP(r_alloc_list(out_size)); for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); if (is_compact(index)) { index = compact_materialize(index); } KEEP(index); // Update `i` binding with the new index value r_env_poke(env, syms_i, index); r_obj* elt = KEEP(r_eval(call, env)); if (!vec_is_restored(elt, x)) { elt = vec_restore(elt, x, vec_owned(elt)); } r_list_poke(out, i, elt); FREE(2); } FREE(3); return out; } static r_obj* chop_fallback_shaped(r_obj* x, struct vctrs_chop_indices* p_indices) { const r_ssize out_size = indices_out_size(p_indices, x); r_obj* out = KEEP(r_alloc_list(out_size)); for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); if (is_compact(index)) { index = compact_materialize(index); } KEEP(index); // `vec_slice_fallback()` will also `vec_restore()` for us r_obj* elt = vec_slice_fallback(x, index); r_list_poke(out, i, elt); FREE(1); } FREE(1); return out; } // ----------------------------------------------------------------------------- r_obj* list_as_locations(r_obj* indices, r_ssize n, r_obj* names) { if (r_typeof(indices) != R_TYPE_list) { r_abort_lazy_call(r_lazy_null, "`indices` must be a list of index values, or `NULL`."); } indices = KEEP(r_clone_referenced(indices)); const r_ssize size = r_length(indices); r_obj* const* v_indices = r_list_cbegin(indices); // Restrict index values to positive integer locations const struct location_opts opts = { .subscript_opts = { .logical = SUBSCRIPT_TYPE_ACTION_ERROR, .numeric = SUBSCRIPT_TYPE_ACTION_CAST, .character = SUBSCRIPT_TYPE_ACTION_ERROR }, .missing = SUBSCRIPT_MISSING_PROPAGATE, .loc_negative = LOC_NEGATIVE_ERROR, .loc_oob = LOC_OOB_ERROR, .loc_zero = LOC_ZERO_ERROR }; for (r_ssize i = 0; i < size; ++i) { r_obj* index = v_indices[i]; index = vec_as_location_opts(index, n, names, &opts); r_list_poke(indices, i, index); } FREE(1); return indices; } static r_obj* vec_as_chop_sizes(r_obj* sizes, r_ssize size) { sizes = KEEP(vec_cast( sizes, r_globals.empty_int, vec_args.sizes, vec_args.empty, r_lazy_null )); const r_ssize n_sizes = r_length(sizes); const int* v_sizes = r_int_cbegin(sizes); r_ssize total = 0; for (r_ssize i = 0; i < n_sizes; ++i) { const int elt = v_sizes[i]; if (elt == r_globals.na_int) { r_abort_lazy_call(r_lazy_null, "`sizes` can't contain missing values."); } else if (elt < 0) { r_abort_lazy_call(r_lazy_null, "`sizes` can't contain negative sizes."); } else if (elt > size) { r_abort_lazy_call(r_lazy_null, "`sizes` can't contain sizes larger than %i.", size); } total += elt; } if (total != size) { r_abort_lazy_call(r_lazy_null, "`sizes` must sum to size %i, not size %i.", size, total); } FREE(1); return sizes; } vctrs/src/slice-assign.c0000644000176200001440000004555314511320527014743 0ustar liggesusers#include "vctrs.h" #include "decl/slice-assign-decl.h" // [[ include("slice-assign.h") ]] r_obj* vec_assign_opts(r_obj* x, r_obj* index, r_obj* value, const struct vec_assign_opts* c_opts) { if (x == r_null) { return r_null; } struct vec_assign_opts opts = *c_opts; if (r_lazy_is_null(opts.call)) { opts.call = lazy_calls.vec_assign; opts.x_arg = vec_args.x; opts.value_arg = vec_args.value; } obj_check_vector(x, opts.x_arg, opts.call); obj_check_vector(value, opts.value_arg, opts.call); const struct location_opts location_opts = new_location_opts_assign(); index = KEEP(vec_as_location_opts(index, vec_size(x), KEEP(vec_names(x)), &location_opts)); // Cast and recycle `value` value = KEEP(vec_cast(value, x, opts.value_arg, opts.x_arg, opts.call)); value = KEEP(vec_check_recycle(value, vec_size(index), opts.value_arg, opts.call)); r_obj* proxy = KEEP(vec_proxy(x)); const enum vctrs_owned owned = vec_owned(proxy); proxy = KEEP(vec_proxy_assign_opts(proxy, index, value, owned, &opts)); r_obj* out = vec_restore(proxy, x, owned); FREE(6); return out; } // [[ register() ]] r_obj* ffi_assign(r_obj* x, r_obj* index, r_obj* value, r_obj* frame) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy value_arg_lazy = { .x = syms.value_arg, .env = frame }; struct vctrs_arg value_arg = new_lazy_arg(&value_arg_lazy); struct r_lazy call = { .x = frame, .env = r_null }; const struct vec_assign_opts opts = { .assign_names = false, .x_arg = &x_arg, .value_arg = &value_arg, .call = call }; return vec_assign_opts(x, index, value, &opts); } // [[ register() ]] r_obj* ffi_assign_params(r_obj* x, r_obj* index, r_obj* value, r_obj* assign_names) { const struct vec_assign_opts opts = { .assign_names = r_bool_as_int(assign_names), .call = lazy_calls.vec_assign_params }; return vec_assign_opts(x, index, value, &opts); } static r_obj* vec_assign_switch(r_obj* proxy, r_obj* index, r_obj* value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { switch (vec_proxy_typeof(proxy)) { case VCTRS_TYPE_logical: return lgl_assign(proxy, index, value, owned); case VCTRS_TYPE_integer: return int_assign(proxy, index, value, owned); case VCTRS_TYPE_double: return dbl_assign(proxy, index, value, owned); case VCTRS_TYPE_complex: return cpl_assign(proxy, index, value, owned); case VCTRS_TYPE_character: return chr_assign(proxy, index, value, owned); case VCTRS_TYPE_raw: return raw_assign(proxy, index, value, owned); case VCTRS_TYPE_list: return list_assign(proxy, index, value, owned); case VCTRS_TYPE_dataframe: return df_assign(proxy, index, value, owned, opts); case VCTRS_TYPE_scalar: stop_scalar_type(proxy, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_assign_switch", vec_typeof(proxy)); } r_stop_unreachable(); } // `vec_proxy_assign_opts()` conditionally duplicates the `proxy` depending // on a number of factors. // // - If a fallback is required, the `proxy` is duplicated at the R level. // - If `owned` is `VCTRS_OWNED_true`, the `proxy` is not duplicated. If the // `proxy` happens to be an ALTREP object, materialization will be forced when // we do the actual assignment, but this should really only happen with // cheap-to-materialize ALTREP "wrapper" objects since we've claimed that we // "own" the `proxy`. // - If `owned` is `VCTRS_OWNED_false`, the `proxy` is only // duplicated if it is referenced, i.e. `MAYBE_REFERENCED()` returns `true`. // // In `vec_proxy_assign()`, which is part of the experimental public API, // ownership is determined with a call to `NO_REFERENCES()`. If there are no // references, then `VCTRS_OWNED_true` is used, else // `VCTRS_OWNED_false` is used. // // Ownership of the `proxy` must be recursive. For data frames, the `owned` // argument is passed along to each column. // // Practically, we only set `VCTRS_OWNED_true` when we create a fresh data // structure at the C level and then assign into it to fill it. This happens // in `vec_c()` and `vec_rbind()`. For data frames, this `owned` parameter // is particularly important for R 4.0.0 where references are tracked more // precisely. In R 4.0.0, a freshly created data frame's columns all have a // refcount of 1 because of the `r_list_poke()` call that set them in the // data frame. This makes them referenced, but not shared. If // `VCTRS_OWNED_false` was set and `df_assign()` was used in a loop // (as it is in `vec_rbind()`), then a copy of each column would be made at // each iteration of the loop (any time a new set of rows is assigned // into the output object). // // Even though it can directly assign, the safe // way to call `vec_proxy_assign()` and `vec_proxy_assign_opts()` is to catch // and protect their output rather than relying on them to assign directly. /* * @param proxy The proxy of the output container * @param index The locations to assign `value` to * @param value The value to assign into the proxy. Must already be * cast to the type of the true output container, and have been * recycled to the correct size. Should not be proxied, in case * we have to fallback. */ r_obj* vec_proxy_assign_opts(r_obj* proxy, r_obj* index, r_obj* value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { int n_protect = 0; // Ignore vectors marked as fallback because the caller will apply // a fallback method instead if (vec_is_common_class_fallback(proxy)) { return proxy; } struct vec_assign_opts mut_opts = *opts; bool ignore_outer_names = mut_opts.ignore_outer_names; mut_opts.ignore_outer_names = false; struct vctrs_proxy_info value_info = vec_proxy_info(value); KEEP_N(value_info.shelter, &n_protect); if (r_typeof(proxy) != r_typeof(value_info.proxy)) { r_stop_internal("`proxy` of type `%s` incompatible with `value` proxy of type `%s`.", r_type_as_c_string(r_typeof(proxy)), r_type_as_c_string(r_typeof(value_info.proxy))); } // If a fallback is required, the `proxy` is identical to the output container // because no proxy method was called r_obj* out = r_null; if (vec_requires_fallback(value, value_info)) { index = KEEP_N(compact_materialize(index), &n_protect); out = KEEP_N(vec_assign_fallback(proxy, index, value), &n_protect); } else if (has_dim(proxy)) { out = KEEP_N(vec_assign_shaped(proxy, index, value_info.proxy, owned, &mut_opts), &n_protect); } else { out = KEEP_N(vec_assign_switch(proxy, index, value_info.proxy, owned, &mut_opts), &n_protect); } if (!ignore_outer_names && opts->assign_names) { out = vec_proxy_assign_names(out, index, value_info.proxy, owned); } FREE(n_protect); return out; } #define ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF) \ r_ssize n = r_length(index); \ int* index_data = r_int_begin(index); \ \ if (n != r_length(value)) { \ r_stop_internal("`value` should have been recycled to fit `x`."); \ } \ \ const CTYPE* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, owned)); \ CTYPE* out_data = DEREF(out); \ \ for (r_ssize i = 0; i < n; ++i) { \ int j = index_data[i]; \ if (j != r_globals.na_int) { \ out_data[j - 1] = value_data[i]; \ } \ } \ \ FREE(1); \ return out #define ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF) \ int* index_data = r_int_begin(index); \ r_ssize start = index_data[0]; \ r_ssize n = index_data[1]; \ r_ssize step = index_data[2]; \ \ if (n != r_length(value)) { \ r_stop_internal("`value` should have been recycled to fit `x`."); \ } \ \ const CTYPE* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, owned)); \ CTYPE* out_data = DEREF(out) + start; \ \ for (r_ssize i = 0; i < n; ++i, out_data += step, ++value_data) { \ *out_data = *value_data; \ } \ \ FREE(1); \ return out #define ASSIGN(CTYPE, DEREF, CONST_DEREF) \ if (is_compact_seq(index)) { \ ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF); \ } else { \ ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF); \ } static r_obj* lgl_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned) { ASSIGN(int, LOGICAL, LOGICAL_RO); } static r_obj* int_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned) { ASSIGN(int, r_int_begin, INTEGER_RO); } static r_obj* dbl_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned) { ASSIGN(double, REAL, REAL_RO); } static r_obj* cpl_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned) { ASSIGN(Rcomplex, COMPLEX, COMPLEX_RO); } static r_obj* raw_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned) { ASSIGN(Rbyte, RAW, RAW_RO); } #define ASSIGN_BARRIER_INDEX(GET, SET) \ r_ssize n = r_length(index); \ int* index_data = r_int_begin(index); \ \ if (n != r_length(value)) { \ r_stop_internal("`value` (size %d) doesn't match `x` (size %d).", \ r_length(value), \ n); \ } \ \ r_obj* out = KEEP(vec_clone_referenced(x, owned)); \ \ for (r_ssize i = 0; i < n; ++i) { \ int j = index_data[i]; \ if (j != r_globals.na_int) { \ SET(out, j - 1, GET(value, i)); \ } \ } \ \ FREE(1); \ return out #define ASSIGN_BARRIER_COMPACT(GET, SET) \ int* index_data = r_int_begin(index); \ r_ssize start = index_data[0]; \ r_ssize n = index_data[1]; \ r_ssize step = index_data[2]; \ \ if (n != r_length(value)) { \ r_stop_internal("`value` (size %d) doesn't match `x` (size %d).", \ r_length(value), \ n); \ } \ \ r_obj* out = KEEP(vec_clone_referenced(x, owned)); \ \ for (r_ssize i = 0; i < n; ++i, start += step) { \ SET(out, start, GET(value, i)); \ } \ \ FREE(1); \ return out #define ASSIGN_BARRIER(GET, SET) \ if (is_compact_seq(index)) { \ ASSIGN_BARRIER_COMPACT(GET, SET); \ } else { \ ASSIGN_BARRIER_INDEX(GET, SET); \ } r_obj* chr_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned) { ASSIGN_BARRIER(r_chr_get, r_chr_poke); } r_obj* list_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned) { ASSIGN_BARRIER(r_list_get, r_list_poke); } /** * - `out` and `value` must be rectangular lists. * - `value` must have the same size as `index`. * * Performance and safety notes: * If `x` is a fresh data frame (which would be the case in `vec_c()` and * `vec_rbind()`) then `r_clone_referenced()` will return it untouched. Each * column will also be fresh, so if `vec_proxy()` just returns its input then * `vec_proxy_assign_opts()` will directly assign to that column in `x`. This * makes it extremely fast to assign to a data frame. * * If `x` is referenced already, then `r_clone_referenced()` will call * `Rf_shallow_duplicate()`. For lists, this loops over the list and marks * each list element with max namedness. This is helpful for us, because * it is possible to have a data frame that is itself referenced, with columns * that are not (mtcars is an example). If each list element wasn't marked, then * `vec_proxy_assign_opts()` would see an unreferenced column and modify it * directly, resulting in improper mutable semantics. See #986 for full details. * * [[ include("vctrs.h") ]] */ r_obj* df_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned, const struct vec_assign_opts* opts) { r_obj* out = KEEP(vec_clone_referenced(x, owned)); r_ssize n = r_length(out); if (r_length(value) != n) { r_stop_internal("Can't assign %d columns to df of length %d.", r_length(value), n); } for (r_ssize i = 0; i < n; ++i) { r_obj* out_elt = r_list_get(out, i); r_obj* value_elt = r_list_get(value, i); // No need to cast or recycle because those operations are // recursive and have already been performed. However, proxy and // restore are not necessarily recursive and we might need to // proxy each element we recurse into. // // NOTE: `vec_proxy_assign()` proxies `value_elt`. r_obj* proxy_elt = KEEP(opts->recursive ? out_elt : vec_proxy(out_elt)); r_obj* assigned = KEEP(vec_proxy_assign_opts(proxy_elt, index, value_elt, owned, opts)); if (!opts->recursive) { assigned = vec_restore(assigned, out_elt, owned); } r_list_poke(out, i, assigned); FREE(2); } FREE(1); return out; } static r_obj* vec_assign_fallback(r_obj* x, r_obj* index, r_obj* value) { return vctrs_dispatch3(syms_vec_assign_fallback, fns_vec_assign_fallback, syms_x, x, syms_i, index, syms_value, value); } static r_obj* vec_proxy_assign_names(r_obj* proxy, r_obj* index, r_obj* value, const enum vctrs_owned owned) { r_obj* value_nms = KEEP(vec_names(value)); if (value_nms == r_null) { FREE(1); return proxy; } r_obj* proxy_nms = KEEP(vec_proxy_names(proxy)); if (proxy_nms == r_null) { proxy_nms = KEEP(r_alloc_character(vec_size(proxy))); } else { proxy_nms = KEEP(vec_clone_referenced(proxy_nms, owned)); } proxy_nms = KEEP(chr_assign(proxy_nms, index, value_nms, owned)); proxy = KEEP(vec_clone_referenced(proxy, owned)); proxy = vec_proxy_set_names(proxy, proxy_nms, owned); FREE(5); return proxy; } // Exported for testing // [[ register() ]] r_obj* ffi_assign_seq(r_obj* x, r_obj* value, r_obj* ffi_start, r_obj* ffi_size, r_obj* ffi_increasing) { r_ssize start = r_int_get(ffi_start, 0); r_ssize size = r_int_get(ffi_size, 0); bool increasing = r_lgl_get(ffi_increasing, 0); r_obj* index = KEEP(compact_seq(start, size, increasing)); struct r_lazy call = lazy_calls.vec_assign_seq; // Cast and recycle `value` value = KEEP(vec_cast(value, x, vec_args.value, vec_args.x, call)); value = KEEP(vec_check_recycle(value, vec_subscript_size(index), vec_args.value, call)); r_obj* proxy = KEEP(vec_proxy(x)); const enum vctrs_owned owned = vec_owned(proxy); proxy = KEEP(vec_proxy_check_assign(proxy, index, value, vec_args.x, vec_args.value, call)); r_obj* out = vec_restore(proxy, x, owned); FREE(5); return out; } void vctrs_init_slice_assign(r_obj* ns) { syms_vec_assign_fallback = r_sym("vec_assign_fallback"); fns_vec_assign_fallback = r_eval(syms_vec_assign_fallback, ns); } static r_obj* syms_vec_assign_fallback = NULL; static r_obj* fns_vec_assign_fallback = NULL; vctrs/src/vctrs-core.h0000644000176200001440000000571614532373444014464 0ustar liggesusers#ifndef VCTRS_CORE_H #define VCTRS_CORE_H #include #include "globals.h" #include "rlang-dev.h" #include "type-info.h" #include #include #include extern bool vctrs_debug_verbose; #define VCTRS_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) // An ERR indicates either a C NULL in case of no error, or a // condition object otherwise #define ERR SEXP // Ownership is recursive enum vctrs_owned { VCTRS_OWNED_false = 0, VCTRS_OWNED_true }; enum vctrs_recurse { VCTRS_RECURSE_false = 0, VCTRS_RECURSE_true }; /** * Structure for argument tags * * Argument tags are used in error messages to provide information * about which elements of nested data structures (such as tibbles) * fail to match a given type. They are generated lazily by the `fill` * method in order to avoid any cost when there is no error. * * @member parent The previously active argument tag. * @member fill Takes a pointer to data, and a buffer to fill. If the * buffer is too small according to the `remaining` argument, * `fill()` must return a negative error value. */ struct vctrs_arg { r_obj* shelter; struct vctrs_arg* parent; r_ssize (*fill)(void* data, char* buf, r_ssize remaining); void* data; }; struct vec_error_opts { struct vctrs_arg* p_arg; struct r_lazy call; }; // Annex F of C99 specifies that `double` should conform to the IEEE 754 // type `binary64`, which is defined as: // * 1 bit : sign // * 11 bits: exponent // * 52 bits: significand // // R stores the value "1954" in the last 32 bits: this payload marks // the value as a NA, not a regular NaN. // // On big endian systems, this corresponds to the second element of an // integer array of size 2. On little endian systems, this is flipped // and the NA marker is in the first element. // // The type assumptions made here are asserted in `vctrs_init_utils()` #ifdef WORDS_BIGENDIAN static const int vctrs_indicator_pos = 1; #else static const int vctrs_indicator_pos = 0; #endif union vctrs_dbl_indicator { double value; // 8 bytes unsigned int key[2]; // 4 * 2 bytes }; enum vctrs_dbl { VCTRS_DBL_number, VCTRS_DBL_missing, VCTRS_DBL_nan }; enum vctrs_dbl dbl_classify(double x); // Compatibility ------------------------------------------------ #if (R_VERSION < R_Version(3, 5, 0)) # define LOGICAL_RO(x) ((const int*) LOGICAL(x)) # define INTEGER_RO(x) ((const int*) INTEGER(x)) # define REAL_RO(x) ((const double*) REAL(x)) # define COMPLEX_RO(x) ((const Rcomplex*) COMPLEX(x)) # define STRING_PTR_RO(x) ((const SEXP*) STRING_PTR(x)) # define RAW_RO(x) ((const Rbyte*) RAW(x)) # define DATAPTR_RO(x) ((const void*) STRING_PTR(x)) #endif #define VECTOR_PTR_RO(x) ((const SEXP*) DATAPTR_RO(x)) // Likely supplied in R 4.4.0 // https://github.com/wch/r-source/commit/38403c9c347dd5426da6009573b087188ec6be04 #ifndef R_PRIdXLEN_T # ifdef LONG_VECTOR_SUPPORT # define R_PRIdXLEN_T "td" # else # define R_PRIdXLEN_T "d" # endif #endif #endif vctrs/src/utils.c0000644000176200001440000015116314511320527013515 0ustar liggesusers#include "vctrs-core.h" #include "vctrs.h" #include "type-data-frame.h" #include // Initialised at load time bool (*rlang_is_splice_box)(SEXP) = NULL; SEXP (*rlang_unbox)(SEXP) = NULL; SEXP (*rlang_env_dots_values)(SEXP) = NULL; SEXP (*rlang_env_dots_list)(SEXP) = NULL; SEXP vctrs_method_table = NULL; SEXP base_method_table = NULL; SEXP s4_c_method_table = NULL; SEXP strings_tbl = NULL; SEXP strings_tbl_df = NULL; SEXP strings_data_frame = NULL; SEXP strings_date = NULL; SEXP strings_posixct = NULL; SEXP strings_posixlt = NULL; SEXP strings_posixt = NULL; SEXP strings_factor = NULL; SEXP strings_ordered = NULL; SEXP strings_list = NULL; SEXP classes_data_frame = NULL; SEXP classes_factor = NULL; SEXP classes_ordered = NULL; SEXP classes_date = NULL; SEXP classes_posixct = NULL; SEXP classes_tibble = NULL; SEXP classes_vctrs_group_rle = NULL; static SEXP syms_as_data_frame2 = NULL; static SEXP fns_as_data_frame2 = NULL; static SEXP vctrs_eval_mask_n_impl(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args, SEXP env); /** * Evaluate with masked arguments * * This takes two arrays of argument (`args`) and argument names * `syms`). The names should correspond to formal arguments of `fn`. * Elements of `args` are assigned to their corresponding name in * `syms` directly in the current environment, i.e. the environment of * the closure wrapping the `.Call()` invokation. Since masked * evaluation causes side effects and variable assignments in that * frame environment, the native code invokation must be tailing: no * further R code (including `on.exit()` expressions) should be * evaluated in that closure wrapper. * * A call to `fn` is constructed with the * CARs and TAGs assigned symmetrically to the elements of * `syms`. This way the arguments are masked by symbols corresponding * to the formal parameters. * * @param fn The function to call. * @param syms A null-terminated array of symbols. The arguments * `args` are assigned to these symbols. The assignment occurs in a * child of `env` and the dispatch call refers to these symbols. * @param args A null-terminated array of arguments passed to the method. * @param env The environment in which to evaluate. */ SEXP vctrs_eval_mask_n(SEXP fn, SEXP* syms, SEXP* args) { return vctrs_eval_mask_n_impl(R_NilValue, fn, syms, args, vctrs_ns_env); } SEXP vctrs_eval_mask1(SEXP fn, SEXP x_sym, SEXP x) { SEXP syms[2] = { x_sym, NULL }; SEXP args[2] = { x, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask2(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y) { SEXP syms[3] = { x_sym, y_sym, NULL }; SEXP args[3] = { x, y, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask3(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[4] = { x_sym, y_sym, z_sym, NULL }; SEXP args[4] = { x, y, z, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask4(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4) { SEXP syms[5] = { x1_sym, x2_sym, x3_sym, x4_sym, NULL }; SEXP args[5] = { x1, x2, x3, x4, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask5(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5) { SEXP syms[6] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, NULL }; SEXP args[6] = { x1, x2, x3, x4, x5, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask6(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6) { SEXP syms[7] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, NULL }; SEXP args[7] = { x1, x2, x3, x4, x5, x6, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask7(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6, SEXP x7_sym, SEXP x7) { SEXP syms[8] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, x7_sym, NULL }; SEXP args[8] = { x1, x2, x3, x4, x5, x6, x7, NULL }; return vctrs_eval_mask_n(fn, syms, args); } r_obj* vctrs_eval_mask8(r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7, r_obj* x8_sym, r_obj* x8) { r_obj* syms[9] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, x7_sym, x8_sym, NULL }; r_obj* args[9] = { x1, x2, x3, x4, x5, x6, x7, x8, NULL }; return vctrs_eval_mask_n(fn, syms, args); } /** * Dispatch in the current environment * * Like `vctrs_eval_mask_n()`, the arguments `args` are are assigned * to the symbols `syms`. In addition, the function `fn` is assigned * to `fn_sym`. The mask is the current environment which has hygiene * implications regarding the closure wrapping `.Call()`, as * documented in `vctrs_eval_mask_n()`. * * @param fn_sym A symbol to which `fn` is assigned. * @inheritParams vctrs_eval_mask_n */ SEXP vctrs_dispatch_n(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args) { SEXP mask = PROTECT(r_peek_frame()); SEXP out = vctrs_eval_mask_n_impl(fn_sym, fn, syms, args, mask); UNPROTECT(1); return out; } SEXP vctrs_dispatch1(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x) { SEXP syms[2] = { x_sym, NULL }; SEXP args[2] = { x, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch2(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y) { SEXP syms[3] = { x_sym, y_sym, NULL }; SEXP args[3] = { x, y, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch3(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[4] = { x_sym, y_sym, z_sym, NULL }; SEXP args[4] = { x, y, z, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch4(SEXP fn_sym, SEXP fn, SEXP w_sym, SEXP w, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[5] = { w_sym, x_sym, y_sym, z_sym, NULL }; SEXP args[5] = { w, x, y, z, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch6(SEXP fn_sym, SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6) { SEXP syms[7] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, NULL }; SEXP args[7] = { x1, x2, x3, x4, x5, x6, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } static SEXP vctrs_eval_mask_n_impl(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args, SEXP env) { SEXP mask = PROTECT(r_alloc_empty_environment(env)); if (fn_sym != R_NilValue) { Rf_defineVar(fn_sym, fn, mask); fn = fn_sym; } SEXP body = PROTECT(r_call_n(fn, syms, syms)); SEXP call_fn = PROTECT(r_new_function(R_NilValue, body, mask)); SEXP call = PROTECT(Rf_lang1(call_fn)); while (*syms) { Rf_defineVar(*syms, *args, mask); ++syms; ++args; } SEXP out = Rf_eval(call, env); UNPROTECT(4); return out; } // [[ register() ]] SEXP vctrs_maybe_shared_col(SEXP x, SEXP i) { int i_ = r_int_get(i, 0) - 1; SEXP col = VECTOR_ELT(x, i_); bool out = MAYBE_SHARED(col); return Rf_ScalarLogical(out); } // [[ register() ]] SEXP vctrs_new_df_unshared_col(void) { SEXP col = PROTECT(Rf_allocVector(INTSXP, 1)); INTEGER(col)[0] = 1; SEXP out = PROTECT(Rf_allocVector(VECSXP, 1)); // In R 4.0.0, `SET_VECTOR_ELT()` bumps the REFCNT of // `col`. Because of this, `col` is now referenced (refcnt > 0), // but it isn't shared (refcnt > 1). SET_VECTOR_ELT(out, 0, col); SEXP names = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(names, 0, Rf_mkChar("x")); Rf_setAttrib(out, R_NamesSymbol, names); init_data_frame(out, 1); UNPROTECT(3); return out; } // An alternative to `attributes(x) <- attrib`, which makes // two copies on R < 3.6.0 // [[ register() ]] SEXP vctrs_set_attributes(SEXP x, SEXP attrib) { R_len_t n_attrib = Rf_length(attrib); int n_protect = 0; x = PROTECT(r_clone_referenced(x)); ++n_protect; // Remove existing attributes, and unset the object bit SET_ATTRIB(x, R_NilValue); SET_OBJECT(x, 0); // Possible early exit after removing attributes if (n_attrib == 0) { UNPROTECT(n_protect); return x; } SEXP names = Rf_getAttrib(attrib, R_NamesSymbol); if (Rf_isNull(names)) { Rf_errorcall(R_NilValue, "Attributes must be named."); } // Check that each element of `names` is named. for (R_len_t i = 0; i < n_attrib; ++i) { SEXP name = STRING_ELT(names, i); if (name == NA_STRING || name == R_BlankString) { const char* msg = "All attributes must have names. Attribute %i does not."; Rf_errorcall(R_NilValue, msg, i + 1); } } // Always set `dim` first, if it exists. This way it is set before `dimnames`. int dim_pos = -1; for (R_len_t i = 0; i < n_attrib; ++i) { if (!strcmp(CHAR(STRING_ELT(names, i)), "dim")) { dim_pos = i; break; } } if (dim_pos != -1) { Rf_setAttrib(x, R_DimSymbol, VECTOR_ELT(attrib, dim_pos)); } for (R_len_t i = 0; i < n_attrib; ++i) { if (i == dim_pos) { continue; } Rf_setAttrib(x, Rf_installChar(STRING_ELT(names, i)), VECTOR_ELT(attrib, i)); } UNPROTECT(n_protect); return x; } // [[ include("utils.h") ]] SEXP map(SEXP x, SEXP (*fn)(SEXP)) { R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); for (R_len_t i = 0; i < n; ++i) { SET_VECTOR_ELT(out, i, fn(VECTOR_ELT(x, i))); } SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, nms); UNPROTECT(2); return out; } // [[ include("utils.h") ]] SEXP map_with_data(SEXP x, SEXP (*fn)(SEXP, void*), void* data) { R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); for (R_len_t i = 0; i < n; ++i) { SET_VECTOR_ELT(out, i, fn(VECTOR_ELT(x, i), data)); } SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, nms); UNPROTECT(2); return out; } // [[ include("utils.h") ]] SEXP bare_df_map(SEXP df, SEXP (*fn)(SEXP)) { SEXP out = PROTECT(map(df, fn)); // Total ownership because `map()` generates a fresh list out = vec_bare_df_restore(out, df, VCTRS_OWNED_true, VCTRS_RECURSE_false); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP df_map(SEXP df, SEXP (*fn)(SEXP)) { SEXP out = PROTECT(map(df, fn)); // Total ownership because `map()` generates a fresh list out = vec_df_restore(out, df, VCTRS_OWNED_true, VCTRS_RECURSE_false); UNPROTECT(1); return out; } #define RESIZE(CONST_DEREF, DEREF, CTYPE, SEXPTYPE) do { \ if (x_size == size) { \ return x; \ } \ \ const CTYPE* p_x = CONST_DEREF(x); \ \ SEXP out = PROTECT(Rf_allocVector(SEXPTYPE, size)); \ CTYPE* p_out = DEREF(out); \ \ r_ssize copy_size = (size > x_size) ? x_size : size; \ \ memcpy(p_out, p_x, copy_size * sizeof(CTYPE)); \ \ UNPROTECT(1); \ return out; \ } while (0) #define RESIZE_BARRIER(CONST_DEREF, SEXPTYPE, SET) do { \ if (x_size == size) { \ return x; \ } \ \ const SEXP* p_x = CONST_DEREF(x); \ \ SEXP out = PROTECT(Rf_allocVector(SEXPTYPE, size)); \ \ r_ssize copy_size = (size > x_size) ? x_size : size; \ \ for (r_ssize i = 0; i < copy_size; ++i) { \ SET(out, i, p_x[i]); \ } \ \ UNPROTECT(1); \ return out; \ } while (0) // Faster than `Rf_xlengthgets()` because that fills the new extended // locations with `NA`, which we don't need. // [[ include("utils.h") ]] SEXP int_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE(INTEGER_RO, INTEGER, int, INTSXP); } // [[ include("utils.h") ]] SEXP raw_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE(RAW_RO, RAW, Rbyte, RAWSXP); } // [[ include("utils.h") ]] SEXP chr_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE_BARRIER(STRING_PTR_RO, STRSXP, SET_STRING_ELT); } #undef RESIZE #undef RESIZE_BARRIER inline void never_reached(const char* fn) { Rf_error("Internal error in `%s()`: Reached the unreachable.", fn); } static char s3_buf[200]; SEXP s3_paste_method_sym(const char* generic, const char* cls) { int gen_len = strlen(generic); int cls_len = strlen(cls); int dot_len = 1; if (gen_len + cls_len + dot_len >= sizeof(s3_buf)) { r_stop_internal("Generic or class name is too long."); } char* buf = s3_buf; memcpy(buf, generic, gen_len); buf += gen_len; *buf = '.'; ++buf; memcpy(buf, cls, cls_len); buf += cls_len; *buf = '\0'; return Rf_install(s3_buf); } // First check in global env, then in method table SEXP s3_get_method(const char* generic, const char* cls, SEXP table) { SEXP sym = s3_paste_method_sym(generic, cls); return s3_sym_get_method(sym, table); } SEXP s3_sym_get_method(SEXP sym, SEXP table) { SEXP method = r_env_get(R_GlobalEnv, sym); if (r_is_function(method)) { return method; } method = r_env_get(table, sym); if (r_is_function(method)) { return method; } return R_NilValue; } // [[ register() ]] SEXP vctrs_s3_find_method(SEXP generic, SEXP x, SEXP table) { return s3_find_method(r_chr_get_c_string(generic, 0), x, table); } // [[ register() ]] r_obj* ffi_s3_get_method(r_obj* generic, r_obj* cls, r_obj* table) { if (!r_is_string(generic)) { r_stop_internal("`generic` must be a string"); } if (!r_is_string(cls)) { r_stop_internal("`cls` must be a string"); } return s3_get_method(r_chr_get_c_string(generic, 0), r_chr_get_c_string(cls, 0), table); } // [[ include("utils.h") ]] SEXP s3_find_method(const char* generic, SEXP x, SEXP table) { if (!OBJECT(x)) { return R_NilValue; } SEXP cls = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP method = s3_class_find_method(generic, cls, table); UNPROTECT(1); return method; } // [[ include("utils.h") ]] SEXP s3_class_find_method(const char* generic, SEXP cls, SEXP table) { // Avoid corrupt objects where `x` is an OBJECT(), but the class is NULL if (cls == R_NilValue) { return R_NilValue; } SEXP const* p_cls = STRING_PTR_RO(cls); int n_cls = Rf_length(cls); for (int i = 0; i < n_cls; ++i) { SEXP method = s3_get_method(generic, CHAR(p_cls[i]), table); if (method != R_NilValue) { return method; } } return R_NilValue; } // [[ include("utils.h") ]] SEXP s3_get_class(SEXP x) { SEXP cls = R_NilValue; if (OBJECT(x)) { cls = Rf_getAttrib(x, R_ClassSymbol); } // This handles unclassed objects as well as gremlins objects where // `x` is an OBJECT(), but the class is NULL if (cls == R_NilValue) { cls = s3_bare_class(x); } if (!Rf_length(cls)) { r_stop_internal("Class must have length."); } return cls; } SEXP s3_get_class0(SEXP x) { SEXP cls = PROTECT(s3_get_class(x)); SEXP out = STRING_ELT(cls, 0); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP s3_find_method_xy(const char* generic, SEXP x, SEXP y, SEXP table, SEXP* method_sym_out) { SEXP x_class = PROTECT(s3_get_class0(x)); SEXP y_class = PROTECT(s3_get_class0(y)); SEXP method_sym = R_NilValue; method_sym = s3_paste_method_sym(generic, CHAR(x_class)); method_sym = s3_paste_method_sym(CHAR(PRINTNAME(method_sym)), CHAR(y_class)); SEXP method = s3_sym_get_method(method_sym, table); if (method == R_NilValue) { *method_sym_out = R_NilValue; } else { *method_sym_out = method_sym; } UNPROTECT(2); return method; } // [[ include("utils.h") ]] SEXP s3_find_method2(const char* generic, SEXP x, SEXP table, SEXP* method_sym_out) { SEXP cls = PROTECT(s3_get_class0(x)); SEXP method_sym = s3_paste_method_sym(generic, CHAR(cls)); SEXP method = s3_sym_get_method(method_sym, table); if (method == R_NilValue) { *method_sym_out = R_NilValue; } else { *method_sym_out = method_sym; } UNPROTECT(1); return method; } // [[ include("utils.h") ]] SEXP s3_bare_class(SEXP x) { switch (TYPEOF(x)) { case NILSXP: return chrs_null; case LGLSXP: return chrs_logical; case INTSXP: return chrs_integer; case REALSXP: return chrs_double; case CPLXSXP: return chrs_complex; case STRSXP: return chrs_character; case RAWSXP: return chrs_raw; case VECSXP: return chrs_list; case EXPRSXP: return chrs_expression; case CLOSXP: case SPECIALSXP: case BUILTINSXP: return chrs_function; default: stop_unimplemented_vctrs_type("base_dispatch_class_str", vec_typeof(x)); } } static SEXP s4_get_method(const char* cls, SEXP table) { SEXP sym = Rf_install(cls); SEXP method = r_env_get(table, sym); if (r_is_function(method)) { return method; } return R_NilValue; } // For S4 objects, the `table` is specific to the generic SEXP s4_find_method(SEXP x, SEXP table) { if (!IS_S4_OBJECT(x)) { return R_NilValue; } SEXP cls = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP out = s4_class_find_method(cls, table); UNPROTECT(1); return out; } SEXP s4_class_find_method(SEXP cls, SEXP table) { // Avoid corrupt objects where `x` is an OBJECT(), but the class is NULL if (cls == R_NilValue) { return R_NilValue; } SEXP const* p_class = STRING_PTR_RO(cls); int n_class = Rf_length(cls); for (int i = 0; i < n_class; ++i) { SEXP method = s4_get_method(CHAR(p_class[i]), table); if (method != R_NilValue) { return method; } } return R_NilValue; } // [[ include("utils.h") ]] bool vec_implements_ptype2(SEXP x) { switch (vec_typeof(x)) { case VCTRS_TYPE_scalar: return false; case VCTRS_TYPE_s3: { SEXP method_sym = R_NilValue; SEXP method = s3_find_method_xy("vec_ptype2", x, x, vctrs_method_table, &method_sym); if (method != R_NilValue) { return true; } method = s3_find_method2("vec_ptype2", x, vctrs_method_table, &method_sym); return method != R_NilValue; } default: return true; } } // [[ register() ]] SEXP vctrs_implements_ptype2(SEXP x) { return r_lgl(vec_implements_ptype2(x)); } // [[ include("utils.h") ]] SEXP list_first_non_null(SEXP xs, R_len_t* non_null_i) { SEXP x = R_NilValue; R_len_t n = Rf_length(xs); R_len_t i = 0; for (; i < n; ++i) { x = VECTOR_ELT(xs, i); if (x != R_NilValue) { break; } } if (non_null_i) { *non_null_i = i; } return x; } // [[ include("utils.h") ]] bool list_is_homogeneously_classed(SEXP xs) { R_len_t n = Rf_length(xs); if (n == 0 || n == 1) { return true; } R_len_t i = -1; SEXP first = list_first_non_null(xs, &i); SEXP first_class = PROTECT(r_class(first)); for (; i < n; ++i) { SEXP this = VECTOR_ELT(xs, i); if (this == R_NilValue) { continue; } SEXP this_class = PROTECT(r_class(this)); if (!equal_object(first_class, this_class)) { UNPROTECT(2); return false; } UNPROTECT(1); } UNPROTECT(1); return true; } // [[ include("utils.h") ]] SEXP node_compact_d(SEXP node) { SEXP handle = PROTECT(Rf_cons(R_NilValue, node)); SEXP prev = handle; while (node != R_NilValue) { if (CAR(node) == R_NilValue) { SETCDR(prev, CDR(node)); } else { prev = node; } node = CDR(node); } UNPROTECT(1); return CDR(handle); } // [[ include("utils.h") ]] SEXP new_empty_factor(SEXP levels) { if (TYPEOF(levels) != STRSXP) { r_stop_internal("`level` must be a character vector."); } SEXP out = PROTECT(Rf_allocVector(INTSXP, 0)); Rf_setAttrib(out, R_LevelsSymbol, levels); Rf_setAttrib(out, R_ClassSymbol, classes_factor); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP new_empty_ordered(SEXP levels) { SEXP out = PROTECT(Rf_allocVector(INTSXP, 0)); Rf_setAttrib(out, R_LevelsSymbol, levels); Rf_setAttrib(out, R_ClassSymbol, classes_ordered); UNPROTECT(1); return out; } // [[ include("utils.h") ]] bool list_has_inner_vec_names(SEXP x, R_len_t size) { for (R_len_t i = 0; i < size; ++i) { SEXP elt = VECTOR_ELT(x, i); if (vec_names(elt) != R_NilValue) { return true; } } return false; } /** * Pluck elements `i` from a list of lists. * @return A list of the same length as `xs`. */ // [[ include("utils.h") ]] r_obj* list_pluck(r_obj* xs, r_ssize i) { r_ssize n = r_length(xs); r_obj* const * v_xs = r_list_cbegin(xs); r_obj* out = KEEP(r_new_list(n)); for (r_ssize j = 0; j < n; ++j) { r_obj* x = v_xs[j]; if (x != r_null) { r_list_poke(out, j, r_list_get(x, i)); } } FREE(1); return out; } // Initialised at load time SEXP compact_seq_attrib = NULL; // p[0] = Start value // p[1] = Sequence size. Always >= 0. // p[2] = Step size to increment/decrement `start` with void init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing) { int step = increasing ? 1 : -1; p[0] = start; p[1] = size; p[2] = step; } // Returns a compact sequence that `vec_slice()` understands // The sequence is generally generated as `[start, start +/- size)` // If `size == 0` a 0-length sequence is generated // `start` is 0-based SEXP compact_seq(R_len_t start, R_len_t size, bool increasing) { if (start < 0) { r_stop_internal("`start` must not be negative."); } if (size < 0) { r_stop_internal("`size` must not be negative."); } if (!increasing && size > start + 1) { r_stop_internal("`size` must not be larger than `start` for decreasing sequences."); } SEXP info = PROTECT(Rf_allocVector(INTSXP, 3)); int* p = INTEGER(info); init_compact_seq(p, start, size, increasing); SET_ATTRIB(info, compact_seq_attrib); UNPROTECT(1); return info; } bool is_compact_seq(SEXP x) { return ATTRIB(x) == compact_seq_attrib; } // Materialize a 1-based sequence SEXP compact_seq_materialize(SEXP x) { int* p = INTEGER(x); R_len_t start = p[0] + 1; R_len_t size = p[1]; R_len_t step = p[2]; SEXP out = PROTECT(Rf_allocVector(INTSXP, size)); int* out_data = INTEGER(out); for (R_len_t i = 0; i < size; ++i, ++out_data, start += step) { *out_data = start; } UNPROTECT(1); return out; } // Initialised at load time SEXP compact_rep_attrib = NULL; void init_compact_rep(int* p, R_len_t i, R_len_t n) { p[0] = i; p[1] = n; } // Returns a compact repetition that `vec_slice()` understands // `i` should be an R-based index SEXP compact_rep(R_len_t i, R_len_t n) { if (n < 0) { r_stop_internal("Negative `n` in `compact_rep()`."); } SEXP rep = PROTECT(Rf_allocVector(INTSXP, 2)); int* p = INTEGER(rep); init_compact_rep(p, i, n); SET_ATTRIB(rep, compact_rep_attrib); UNPROTECT(1); return rep; } bool is_compact_rep(SEXP x) { return ATTRIB(x) == compact_rep_attrib; } SEXP compact_rep_materialize(SEXP x) { int i = r_int_get(x, 0); int n = r_int_get(x, 1); SEXP out = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill(out, i, n); UNPROTECT(1); return out; } bool is_compact(SEXP x) { return is_compact_rep(x) || is_compact_seq(x); } SEXP compact_materialize(SEXP x) { if (is_compact_rep(x)) { return compact_rep_materialize(x); } else if (is_compact_seq(x)) { return compact_seq_materialize(x); } else { return x; } } R_len_t vec_subscript_size(SEXP x) { if (is_compact_rep(x)) { return r_int_get(x, 1); } else if (is_compact_seq(x)) { return r_int_get(x, 1); } else { return vec_size(x); } } static SEXP syms_colnames = NULL; static SEXP fns_colnames = NULL; // [[ include("utils.h") ]] SEXP colnames(SEXP x) { return vctrs_dispatch1(syms_colnames, fns_colnames, syms_x, x); } r_obj* colnames2(r_obj* x) { r_obj* names = colnames(x); if (names == r_null) { return r_alloc_character(Rf_ncols(x)); } else { return names; } } // [[ include("utils.h") ]] bool is_integer64(SEXP x) { return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); } // [[ include("utils.h") ]] bool lgl_any_na(SEXP x) { R_xlen_t size = Rf_xlength(x); const int* p_x = LOGICAL_RO(x); for (R_xlen_t i = 0; i < size; ++i) { if (p_x[i] == NA_LOGICAL) { return true; } } return false; } void* r_vec_deref_barrier(SEXP x) { switch (TYPEOF(x)) { case STRSXP: case VECSXP: return (void*) x; default: return r_vec_begin(x); } } const void* r_vec_deref_barrier_const(SEXP x) { switch (TYPEOF(x)) { case STRSXP: case VECSXP: return (const void*) x; default: return r_vec_cbegin(x); } } #define FILL(CTYPE, DEST, DEST_I, SRC, SRC_I, N) \ do { \ CTYPE* p_dest = (CTYPE*) DEST; \ p_dest += DEST_I; \ CTYPE* end = p_dest + N; \ CTYPE value = ((const CTYPE*) SRC)[SRC_I]; \ \ while (p_dest != end) { \ *p_dest++ = value; \ } \ } while (false) #define FILL_BARRIER(GET, SET, DEST, DEST_I, SRC, SRC_I, N) \ do { \ SEXP out = (SEXP) DEST; \ SEXP value = GET((SEXP) SRC, SRC_I); \ \ for (r_ssize i = 0; i < N; ++i) { \ SET(out, DEST_I + i, value); \ } \ } while (false) void r_vec_fill(SEXPTYPE type, void* dest, r_ssize dest_i, const void* src, r_ssize src_i, r_ssize n) { switch (type) { case INTSXP: FILL(int, dest, dest_i, src, src_i, n); return; case STRSXP: FILL_BARRIER(STRING_ELT, SET_STRING_ELT, dest, dest_i, src, src_i, n); return; default: stop_unimplemented_type("r_vec_fill", type); } } #undef FILL_BARRIER #undef FILL #define FILL() { \ for (R_len_t i = 0; i < n; ++i) { \ p_x[i] = value; \ } \ } void r_p_lgl_fill(int* p_x, int value, R_len_t n) { FILL(); } void r_p_int_fill(int* p_x, int value, R_len_t n) { FILL(); } void r_p_chr_fill(SEXP* p_x, SEXP value, R_len_t n) { FILL(); } #undef FILL void r_lgl_fill(SEXP x, int value, R_len_t n) { r_p_lgl_fill(LOGICAL(x), value, n); } void r_int_fill(SEXP x, int value, R_len_t n) { r_p_int_fill(INTEGER(x), value, n); } // void r_chr_fill(SEXP x, SEXP value, R_len_t n) { // r_p_chr_fill(STRING_PTR(x), value, n); // } void r_int_fill_seq(SEXP x, int start, R_len_t n) { int* data = INTEGER(x); for (R_len_t i = 0; i < n; ++i, ++data, ++start) { *data = start; } } SEXP r_seq(R_len_t from, R_len_t to) { R_len_t n = to - from; if (n < 0) { r_stop_internal("Negative length."); } SEXP seq = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill_seq(seq, from, n); UNPROTECT(1); return seq; } #define FIND(CTYPE, CONST_DEREF) \ R_len_t n = Rf_length(x); \ const CTYPE* data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < n; ++i) { \ if (data[i] == value) { \ return i; \ } \ } \ return -1 R_len_t r_chr_find(SEXP x, SEXP value) { FIND(SEXP, STRING_PTR_RO); } #undef FIND bool r_int_any_na(SEXP x) { int* data = INTEGER(x); R_len_t n = Rf_length(x); for (R_len_t i = 0; i < n; ++i, ++data) { if (*data == NA_INTEGER) { return true; } } return false; } int r_chr_max_len(SEXP x) { R_len_t n = Rf_length(x); SEXP const* p = STRING_PTR_RO(x); int max = 0; for (R_len_t i = 0; i < n; ++i, ++p) { int len = strlen(CHAR(*p)); max = len > max ? len : max; } return max; } /** * Create a character vector of sequential integers * * @param n The sequence is from 1 to `n`. * @param buf,len A memory buffer of size `len`. * @param prefix A null-terminated string that is prefixed to the * sequence. */ SEXP r_chr_iota(R_len_t n, char* buf, int len, const char* prefix) { int prefix_len = strlen(prefix); if (len - 1 < prefix_len) { r_stop_internal("Prefix is larger than iota buffer."); } memcpy(buf, prefix, prefix_len); len -= prefix_len; char* beg = buf + prefix_len; SEXP out = PROTECT(Rf_allocVector(STRSXP, n)); for (R_len_t i = 0; i < n; ++i) { int written = snprintf(beg, len, "%d", i + 1); if (written >= len) { UNPROTECT(1); return R_NilValue; } SET_STRING_ELT(out, i, Rf_mkChar(buf)); } UNPROTECT(1); return out; } static SEXP new_env_call = NULL; static SEXP new_env__parent_node = NULL; static SEXP new_env__size_node = NULL; // [[ include("utils.h") ]] SEXP r_protect(SEXP x) { return Rf_lang2(fns_quote, x); } // [[ include("utils.h") ]] int r_bool_as_int(SEXP x) { if (!r_is_bool(x)) { Rf_errorcall(R_NilValue, "Input must be a single `TRUE` or `FALSE`."); } return LOGICAL(x)[0]; } bool r_is_number(SEXP x) { return TYPEOF(x) == INTSXP && Rf_length(x) == 1 && INTEGER(x)[0] != NA_INTEGER; } bool r_is_positive_number(SEXP x) { return r_is_number(x) && INTEGER(x)[0] > 0; } /** * Create a call or pairlist * * @param tags Optional. If not `NULL`, a null-terminated array of symbols. * @param cars Mandatory. A null-terminated array of CAR values. * @param fn The first CAR value of the language list. * * [[ include("utils.h") ]] */ SEXP _r_pairlist(SEXP* tags, SEXP* cars) { if (!cars) { r_stop_internal("NULL `cars`."); } SEXP list = PROTECT(Rf_cons(R_NilValue, R_NilValue)); SEXP node = list; while (*cars) { SEXP next_node = Rf_cons(*cars, R_NilValue); SETCDR(node, next_node); node = next_node; if (tags) { SET_TAG(next_node, *tags); ++tags; } ++cars; } UNPROTECT(1); return CDR(list); } SEXP r_call_n(SEXP fn, SEXP* tags, SEXP* cars) { return Rf_lcons(fn, _r_pairlist(tags, cars)); } bool r_has_name_at(SEXP names, R_len_t i) { if (TYPEOF(names) != STRSXP) { return false; } R_len_t n = Rf_length(names); if (n <= i) { r_stop_internal("Names shorter than expected: (%d/%d).", i + 1, n); } SEXP elt = STRING_ELT(names, i); return elt != NA_STRING && elt != strings_empty; } bool r_is_minimal_names(SEXP x) { if (TYPEOF(x) != STRSXP) { return false; } R_len_t n = Rf_length(x); const SEXP* p = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP elt = *p; if (elt == NA_STRING || elt == strings_empty) { return false; } } return true; } bool r_is_empty_names(SEXP x) { if (TYPEOF(x) != STRSXP) { if (x == R_NilValue) { return true; } else { return false; } } R_len_t n = Rf_length(x); const SEXP* p = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP elt = *p; if (elt != NA_STRING && elt != strings_empty) { return false; } } return true; } SEXP r_env_get(SEXP env, SEXP sym) { SEXP obj = PROTECT(Rf_findVarInFrame3(env, sym, FALSE)); // Force lazy loaded bindings if (TYPEOF(obj) == PROMSXP) { obj = Rf_eval(obj, R_BaseEnv); } UNPROTECT(1); return obj; } SEXP r_clone_referenced(SEXP x) { if (MAYBE_REFERENCED(x)) { return Rf_shallow_duplicate(x); } else { return x; } } bool r_is_names(SEXP names) { if (names == R_NilValue) { return false; } R_len_t n = Rf_length(names); const SEXP* p = STRING_PTR_RO(names); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP nm = *p; if (nm == strings_empty || nm == NA_STRING) { return false; } } return true; } bool r_chr_has_string(SEXP x, SEXP str) { R_len_t n = Rf_length(x); const SEXP* xp = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++xp) { if (*xp == str) { return true; } } return false; } SEXP r_as_data_frame(SEXP x) { if (is_bare_data_frame(x)) { return x; } else { return vctrs_dispatch1(syms_as_data_frame2, fns_as_data_frame2, syms_x, x); } } static SEXP syms_try_catch_hnd = NULL; static inline SEXP try_catch_hnd(SEXP ptr) { SEXP call = PROTECT(Rf_lang2(syms_try_catch_hnd, ptr)); SEXP out = Rf_eval(call, vctrs_ns_env); UNPROTECT(1); return out; } struct r_try_catch_data { void (*fn)(void*); void* fn_data; SEXP cnd_sym; void (*hnd)(void*); void* hnd_data; ERR err; }; // [[ register() ]] SEXP vctrs_try_catch_callback(SEXP ptr, SEXP cnd) { struct r_try_catch_data* data = (struct r_try_catch_data*) R_ExternalPtrAddr(ptr); if (cnd == R_NilValue) { if (data->fn) { data->fn(data->fn_data); } } else { data->err = cnd; if (data->hnd) { data->hnd(data->hnd_data); } } return R_NilValue; } static SEXP syms_try_catch_impl = NULL; // [[ include("utils.h") ]] ERR r_try_catch(void (*fn)(void*), void* fn_data, SEXP cnd_sym, void (*hnd)(void*), void* hnd_data) { struct r_try_catch_data data = { .fn = fn, .fn_data = fn_data, .cnd_sym = cnd_sym, .hnd = hnd, .hnd_data = hnd_data, .err = NULL }; SEXP xptr = PROTECT(R_MakeExternalPtr(&data, R_NilValue, R_NilValue)); SEXP hnd_fn = PROTECT(try_catch_hnd(xptr)); SEXP syms[3] = { syms_data, cnd_sym, NULL }; SEXP args[3] = { xptr, hnd_fn, NULL }; SEXP call = PROTECT(r_call_n(syms_try_catch_impl, syms, args)); Rf_eval(call, vctrs_ns_env); UNPROTECT(3); return data.err; } SEXP (*rlang_sym_as_character)(SEXP x); // [[ include("utils.h") ]] SEXP chr_c(SEXP x, SEXP y) { r_ssize x_n = r_length(x); r_ssize y_n = r_length(y); if (x_n == 0) { return y; } if (y_n == 0) { return x; } r_ssize out_n = r_ssize_add(x_n, y_n); SEXP out = PROTECT(r_alloc_vector(STRSXP, out_n)); const SEXP* p_x = STRING_PTR_RO(x); const SEXP* p_y = STRING_PTR_RO(y); for (r_ssize i = 0; i < x_n; ++i) { SET_STRING_ELT(out, i, p_x[i]); } for (r_ssize i = 0, j = x_n; i < y_n; ++i, ++j) { SET_STRING_ELT(out, j, p_y[i]); } UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_fast_c(SEXP x, SEXP y) { SEXPTYPE x_type = TYPEOF(x); if (x_type != TYPEOF(y)) { Rf_error("`x` and `y` must have the same types."); } switch (x_type) { case STRSXP: return chr_c(x, y); default: stop_unimplemented_type("vctrs_fast_c", x_type); } } bool vctrs_debug_verbose = false; SEXP vctrs_ns_env = NULL; SEXP vctrs_shared_empty_str = NULL; SEXP vctrs_shared_empty_date = NULL; Rcomplex vctrs_shared_na_cpl; SEXP vctrs_shared_na_lgl = NULL; SEXP vctrs_shared_na_list = NULL; SEXP vctrs_shared_zero_int = NULL; SEXP strings2 = NULL; SEXP strings_empty = NULL; SEXP strings_dots = NULL; SEXP strings_none = NULL; SEXP strings_minimal = NULL; SEXP strings_unique = NULL; SEXP strings_universal = NULL; SEXP strings_check_unique = NULL; SEXP strings_unique_quiet = NULL; SEXP strings_universal_quiet = NULL; SEXP strings_key = NULL; SEXP strings_loc = NULL; SEXP strings_val = NULL; SEXP strings_group = NULL; SEXP strings_length = NULL; SEXP strings_vctrs_vctr = NULL; SEXP strings_times = NULL; SEXP strings_needles = NULL; SEXP strings_haystack = NULL; SEXP chrs_subset = NULL; SEXP chrs_extract = NULL; SEXP chrs_assign = NULL; SEXP chrs_rename = NULL; SEXP chrs_remove = NULL; SEXP chrs_negate = NULL; SEXP chrs_null = NULL; SEXP chrs_logical = NULL; SEXP chrs_integer = NULL; SEXP chrs_double = NULL; SEXP chrs_complex = NULL; SEXP chrs_character = NULL; SEXP chrs_raw = NULL; SEXP chrs_list = NULL; SEXP chrs_expression = NULL; SEXP chrs_numeric = NULL; SEXP chrs_function = NULL; SEXP chrs_empty = NULL; SEXP chrs_cast = NULL; SEXP chrs_error = NULL; SEXP chrs_combine = NULL; SEXP chrs_convert = NULL; SEXP chrs_asc = NULL; SEXP chrs_desc = NULL; SEXP chrs_largest = NULL; SEXP chrs_smallest = NULL; SEXP chrs_which = NULL; SEXP syms_i = NULL; SEXP syms_j = NULL; SEXP syms_n = NULL; SEXP syms_x = NULL; SEXP syms_y = NULL; SEXP syms_x_size = NULL; SEXP syms_y_size = NULL; SEXP syms_to = NULL; SEXP syms_dots = NULL; SEXP syms_bracket = NULL; SEXP syms_x_arg = NULL; SEXP syms_y_arg = NULL; SEXP syms_to_arg = NULL; SEXP syms_times_arg = NULL; SEXP syms_subscript_arg = NULL; SEXP syms_needles_arg = NULL; SEXP syms_haystack_arg = NULL; SEXP syms_out = NULL; SEXP syms_value = NULL; SEXP syms_quiet = NULL; SEXP syms_dot_name_spec = NULL; SEXP syms_outer = NULL; SEXP syms_inner = NULL; SEXP syms_tilde = NULL; SEXP syms_dot_environment = NULL; SEXP syms_ptype = NULL; SEXP syms_missing = NULL; SEXP syms_size = NULL; SEXP syms_subscript_action = NULL; SEXP syms_subscript_type = NULL; SEXP syms_repair = NULL; SEXP syms_tzone = NULL; SEXP syms_data = NULL; SEXP syms_vctrs_error_incompatible_type = NULL; SEXP syms_vctrs_error_cast_lossy = NULL; SEXP syms_cnd_signal = NULL; SEXP syms_logical = NULL; SEXP syms_numeric = NULL; SEXP syms_character = NULL; SEXP syms_body = NULL; SEXP syms_parent = NULL; SEXP syms_s3_methods_table = NULL; SEXP syms_from_dispatch = NULL; SEXP syms_df_fallback = NULL; SEXP syms_s3_fallback = NULL; SEXP syms_stop_incompatible_type = NULL; SEXP syms_stop_incompatible_size = NULL; SEXP syms_stop_assert_size = NULL; SEXP syms_stop_matches_overflow = NULL; SEXP syms_stop_matches_nothing = NULL; SEXP syms_stop_matches_remaining = NULL; SEXP syms_stop_matches_incomplete = NULL; SEXP syms_stop_matches_multiple = NULL; SEXP syms_warn_matches_multiple = NULL; SEXP syms_stop_matches_relationship_one_to_one = NULL; SEXP syms_stop_matches_relationship_one_to_many = NULL; SEXP syms_stop_matches_relationship_many_to_one = NULL; SEXP syms_warn_matches_relationship_many_to_many = NULL; SEXP syms_action = NULL; SEXP syms_vctrs_common_class_fallback = NULL; SEXP syms_fallback_class = NULL; SEXP syms_abort = NULL; SEXP syms_message = NULL; SEXP syms_chr_proxy_collate = NULL; SEXP syms_actual = NULL; SEXP syms_required = NULL; SEXP syms_call = NULL; SEXP syms_dot_call = NULL; SEXP syms_which = NULL; SEXP fns_bracket = NULL; SEXP fns_quote = NULL; SEXP fns_names = NULL; SEXP result_attrib = NULL; SEXP r_new_shared_vector(SEXPTYPE type, R_len_t n) { SEXP out = Rf_allocVector(type, n); R_PreserveObject(out); MARK_NOT_MUTABLE(out); return out; } SEXP r_new_shared_character(const char* name) { SEXP out = Rf_mkString(name); R_PreserveObject(out); MARK_NOT_MUTABLE(out); return out; } void c_print_backtrace(void) { #if defined(RLIB_DEBUG) #include #include void *buffer[500]; int nptrs = backtrace(buffer, 100); char **strings = backtrace_symbols(buffer, nptrs); for (int j = 0; j < nptrs; ++j) { Rprintf("%s\n", strings[j]); } free(strings); #else Rprintf("vctrs must be compliled with -DRLIB_DEBUG."); #endif } void vctrs_init_utils(SEXP ns) { vctrs_ns_env = ns; vctrs_debug_verbose = r_is_true(Rf_GetOption1(Rf_install("vctrs:::debug"))); vctrs_method_table = r_env_get(ns, Rf_install(".__S3MethodsTable__.")); base_method_table = r_env_get(R_BaseNamespace, Rf_install(".__S3MethodsTable__.")); s4_c_method_table = r_parse_eval("environment(methods::getGeneric('c'))$.MTable", R_GlobalEnv); R_PreserveObject(s4_c_method_table); vctrs_shared_empty_str = Rf_mkString(""); R_PreserveObject(vctrs_shared_empty_str); // Holds the CHARSXP objects because unlike symbols they can be // garbage collected strings2 = r_new_shared_vector(STRSXP, 25); strings_dots = Rf_mkChar("..."); SET_STRING_ELT(strings2, 0, strings_dots); strings_empty = Rf_mkChar(""); SET_STRING_ELT(strings2, 1, strings_empty); strings_date = Rf_mkChar("Date"); SET_STRING_ELT(strings2, 2, strings_date); strings_posixct = Rf_mkChar("POSIXct"); SET_STRING_ELT(strings2, 3, strings_posixct); strings_posixlt = Rf_mkChar("POSIXlt"); SET_STRING_ELT(strings2, 4, strings_posixlt); strings_posixt = Rf_mkChar("POSIXt"); SET_STRING_ELT(strings2, 5, strings_posixt); strings_none = Rf_mkChar("none"); SET_STRING_ELT(strings2, 6, strings_none); strings_minimal = Rf_mkChar("minimal"); SET_STRING_ELT(strings2, 7, strings_minimal); strings_unique = Rf_mkChar("unique"); SET_STRING_ELT(strings2, 8, strings_unique); strings_universal = Rf_mkChar("universal"); SET_STRING_ELT(strings2, 9, strings_universal); strings_check_unique = Rf_mkChar("check_unique"); SET_STRING_ELT(strings2, 10, strings_check_unique); strings_unique_quiet = Rf_mkChar("unique_quiet"); SET_STRING_ELT(strings2, 23, strings_unique_quiet); strings_universal_quiet = Rf_mkChar("universal_quiet"); SET_STRING_ELT(strings2, 24, strings_universal_quiet); strings_key = Rf_mkChar("key"); SET_STRING_ELT(strings2, 11, strings_key); strings_loc = Rf_mkChar("loc"); SET_STRING_ELT(strings2, 12, strings_loc); strings_val = Rf_mkChar("val"); SET_STRING_ELT(strings2, 13, strings_val); strings_group = Rf_mkChar("group"); SET_STRING_ELT(strings2, 14, strings_group); strings_length = Rf_mkChar("length"); SET_STRING_ELT(strings2, 15, strings_length); strings_factor = Rf_mkChar("factor"); SET_STRING_ELT(strings2, 16, strings_factor); strings_ordered = Rf_mkChar("ordered"); SET_STRING_ELT(strings2, 17, strings_ordered); strings_list = Rf_mkChar("list"); SET_STRING_ELT(strings2, 18, strings_list); strings_vctrs_vctr = Rf_mkChar("vctrs_vctr"); SET_STRING_ELT(strings2, 19, strings_vctrs_vctr); strings_times = Rf_mkChar("times"); SET_STRING_ELT(strings2, 20, strings_times); strings_needles = Rf_mkChar("needles"); SET_STRING_ELT(strings2, 21, strings_needles); strings_haystack = Rf_mkChar("haystack"); SET_STRING_ELT(strings2, 22, strings_haystack); classes_data_frame = r_new_shared_vector(STRSXP, 1); strings_data_frame = Rf_mkChar("data.frame"); SET_STRING_ELT(classes_data_frame, 0, strings_data_frame); classes_factor = r_new_shared_vector(STRSXP, 1); SET_STRING_ELT(classes_factor, 0, strings_factor); classes_ordered = r_new_shared_vector(STRSXP, 2); SET_STRING_ELT(classes_ordered, 0, strings_ordered); SET_STRING_ELT(classes_ordered, 1, strings_factor); classes_date = r_new_shared_vector(STRSXP, 1); SET_STRING_ELT(classes_date, 0, strings_date); classes_posixct = r_new_shared_vector(STRSXP, 2); SET_STRING_ELT(classes_posixct, 0, strings_posixct); SET_STRING_ELT(classes_posixct, 1, strings_posixt); chrs_subset = r_new_shared_character("subset"); chrs_extract = r_new_shared_character("extract"); chrs_assign = r_new_shared_character("assign"); chrs_rename = r_new_shared_character("rename"); chrs_remove = r_new_shared_character("remove"); chrs_negate = r_new_shared_character("negate"); chrs_null = r_new_shared_character("NULL"); chrs_logical = r_new_shared_character("logical"); chrs_integer = r_new_shared_character("integer"); chrs_double = r_new_shared_character("double"); chrs_complex = r_new_shared_character("complex"); chrs_character = r_new_shared_character("character"); chrs_raw = r_new_shared_character("raw"); chrs_list = r_new_shared_character("list"); chrs_expression = r_new_shared_character("expression"); chrs_numeric = r_new_shared_character("numeric"); chrs_function = r_new_shared_character("function"); chrs_empty = r_new_shared_character(""); chrs_cast = r_new_shared_character("cast"); chrs_error = r_new_shared_character("error"); chrs_combine = r_new_shared_character("combine"); chrs_convert = r_new_shared_character("convert"); chrs_asc = r_new_shared_character("asc"); chrs_desc = r_new_shared_character("desc"); chrs_largest = r_new_shared_character("largest"); chrs_smallest = r_new_shared_character("smallest"); chrs_which = r_new_shared_character("which"); classes_tibble = r_new_shared_vector(STRSXP, 3); strings_tbl_df = Rf_mkChar("tbl_df"); SET_STRING_ELT(classes_tibble, 0, strings_tbl_df); strings_tbl = Rf_mkChar("tbl"); SET_STRING_ELT(classes_tibble, 1, strings_tbl); SET_STRING_ELT(classes_tibble, 2, strings_data_frame); classes_vctrs_group_rle = r_new_shared_vector(STRSXP, 3); SET_STRING_ELT(classes_vctrs_group_rle, 0, Rf_mkChar("vctrs_group_rle")); SET_STRING_ELT(classes_vctrs_group_rle, 1, Rf_mkChar("vctrs_rcrd")); SET_STRING_ELT(classes_vctrs_group_rle, 2, Rf_mkChar("vctrs_vctr")); vctrs_shared_empty_date = r_new_shared_vector(REALSXP, 0); Rf_setAttrib(vctrs_shared_empty_date, R_ClassSymbol, classes_date); vctrs_shared_na_cpl.i = NA_REAL; vctrs_shared_na_cpl.r = NA_REAL; vctrs_shared_na_lgl = r_new_shared_vector(LGLSXP, 1); LOGICAL(vctrs_shared_na_lgl)[0] = NA_LOGICAL; vctrs_shared_na_list = r_new_shared_vector(VECSXP, 1); SET_VECTOR_ELT(vctrs_shared_na_list, 0, R_NilValue); vctrs_shared_zero_int = r_new_shared_vector(INTSXP, 1); INTEGER(vctrs_shared_zero_int)[0] = 0; syms_i = Rf_install("i"); syms_j = Rf_install("j"); syms_n = Rf_install("n"); syms_x = Rf_install("x"); syms_y = Rf_install("y"); syms_x_size = Rf_install("x_size"); syms_y_size = Rf_install("y_size"); syms_to = Rf_install("to"); syms_dots = Rf_install("..."); syms_bracket = Rf_install("["); syms_x_arg = Rf_install("x_arg"); syms_y_arg = Rf_install("y_arg"); syms_to_arg = Rf_install("to_arg"); syms_times_arg = Rf_install("times_arg"); syms_subscript_arg = Rf_install("subscript_arg"); syms_needles_arg = Rf_install("needles_arg"); syms_haystack_arg = Rf_install("haystack_arg"); syms_out = Rf_install("out"); syms_value = Rf_install("value"); syms_quiet = Rf_install("quiet"); syms_dot_name_spec = Rf_install(".name_spec"); syms_outer = Rf_install("outer"); syms_inner = Rf_install("inner"); syms_tilde = Rf_install("~"); syms_dot_environment = Rf_install(".Environment"); syms_ptype = Rf_install("ptype"); syms_missing = R_MissingArg; syms_size = Rf_install("size"); syms_subscript_action = Rf_install("subscript_action"); syms_subscript_type = Rf_install("subscript_type"); syms_repair = Rf_install("repair"); syms_tzone = Rf_install("tzone"); syms_data = Rf_install("data"); syms_try_catch_impl = Rf_install("try_catch_impl"); syms_try_catch_hnd = Rf_install("try_catch_hnd"); syms_vctrs_error_incompatible_type = Rf_install("vctrs_error_incompatible_type"); syms_vctrs_error_cast_lossy = Rf_install("vctrs_error_cast_lossy"); syms_cnd_signal = Rf_install("cnd_signal"); syms_logical = Rf_install("logical"); syms_numeric = Rf_install("numeric"); syms_character = Rf_install("character"); syms_body = Rf_install("body"); syms_parent = Rf_install("parent"); syms_s3_methods_table = Rf_install(".__S3MethodsTable__."); syms_from_dispatch = Rf_install("vctrs:::from_dispatch"); syms_s3_fallback = Rf_install("vctrs:::s3_fallback"); syms_stop_incompatible_type = Rf_install("stop_incompatible_type"); syms_stop_incompatible_size = Rf_install("stop_incompatible_size"); syms_stop_assert_size = Rf_install("stop_assert_size"); syms_stop_matches_overflow = Rf_install("stop_matches_overflow"); syms_stop_matches_nothing = Rf_install("stop_matches_nothing"); syms_stop_matches_remaining = Rf_install("stop_matches_remaining"); syms_stop_matches_incomplete = Rf_install("stop_matches_incomplete"); syms_stop_matches_multiple = Rf_install("stop_matches_multiple"); syms_warn_matches_multiple = Rf_install("warn_matches_multiple"); syms_stop_matches_relationship_one_to_one = Rf_install("stop_matches_relationship_one_to_one"); syms_stop_matches_relationship_one_to_many = Rf_install("stop_matches_relationship_one_to_many"); syms_stop_matches_relationship_many_to_one = Rf_install("stop_matches_relationship_many_to_one"); syms_warn_matches_relationship_many_to_many = Rf_install("warn_matches_relationship_many_to_many"); syms_action = Rf_install("action"); syms_vctrs_common_class_fallback = Rf_install(c_strs_vctrs_common_class_fallback); syms_fallback_class = Rf_install("fallback_class"); syms_abort = Rf_install("abort"); syms_message = Rf_install("message"); syms_chr_proxy_collate = Rf_install("chr_proxy_collate"); syms_actual = Rf_install("actual"); syms_required = Rf_install("required"); syms_call = Rf_install("call"); syms_dot_call = Rf_install(".call"); syms_which = Rf_install("which"); fns_bracket = Rf_findVar(syms_bracket, R_BaseEnv); fns_quote = Rf_findVar(Rf_install("quote"), R_BaseEnv); fns_names = Rf_findVar(Rf_install("names"), R_BaseEnv); new_env_call = r_parse_eval("as.call(list(new.env, TRUE, NULL, NULL))", R_BaseEnv); R_PreserveObject(new_env_call); new_env__parent_node = CDDR(new_env_call); new_env__size_node = CDR(new_env__parent_node); rlang_is_splice_box = (bool (*)(SEXP)) R_GetCCallable("rlang", "rlang_is_splice_box"); rlang_unbox = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_unbox"); rlang_env_dots_values = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_env_dots_values"); rlang_env_dots_list = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_env_dots_list"); rlang_sym_as_character = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_sym_as_character"); syms_as_data_frame2 = Rf_install("as.data.frame2"); syms_colnames = Rf_install("colnames"); fns_as_data_frame2 = r_env_get(ns, syms_as_data_frame2); fns_colnames = r_env_get(R_BaseEnv, syms_colnames); compact_seq_attrib = Rf_cons(R_NilValue, R_NilValue); R_PreserveObject(compact_seq_attrib); SET_TAG(compact_seq_attrib, Rf_install("vctrs_compact_seq")); compact_rep_attrib = Rf_cons(R_NilValue, R_NilValue); R_PreserveObject(compact_rep_attrib); SET_TAG(compact_rep_attrib, Rf_install("vctrs_compact_rep")); { SEXP result_names = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(result_names, 0, Rf_mkChar("ok")); SET_STRING_ELT(result_names, 1, Rf_mkChar("err")); result_attrib = PROTECT(Rf_cons(result_names, R_NilValue)); SET_TAG(result_attrib, R_NamesSymbol); SEXP result_class = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(result_class, 0, Rf_mkChar("rlang_result")); result_attrib = PROTECT(Rf_cons(result_class, result_attrib)); SET_TAG(result_attrib, R_ClassSymbol); R_PreserveObject(result_attrib); MARK_NOT_MUTABLE(result_attrib); UNPROTECT(4); } // We assume the following in `union vctrs_dbl_indicator` VCTRS_ASSERT(sizeof(double) == sizeof(int64_t)); VCTRS_ASSERT(sizeof(double) == 2 * sizeof(int)); // We assume the following in `vec_order()` VCTRS_ASSERT(sizeof(int) == sizeof(int32_t)); VCTRS_ASSERT(sizeof(double) == sizeof(int64_t)); } vctrs/src/dim.h0000644000176200001440000000135314315060310013117 0ustar liggesusers#ifndef VCTRS_DIM_H #define VCTRS_DIM_H #include "vctrs-core.h" #include "utils.h" // These versions return NULL and 0 for bare vectors. // This is useful to distinguish them from 1D arrays. static inline SEXP vec_bare_dim(SEXP x) { return r_dim(x); } static inline R_len_t vec_bare_dim_n(SEXP x) { return Rf_length(vec_bare_dim(x)); } static inline SEXP vec_dim(SEXP x) { SEXP dim = vec_bare_dim(x); if (dim == R_NilValue) { dim = r_int(Rf_length(x)); } return dim; } static inline R_len_t vec_dim_n(SEXP x) { SEXP dim = vec_bare_dim(x); if (dim == R_NilValue) { return 1; } return Rf_length(dim); } static inline bool has_dim(SEXP x) { return ATTRIB(x) != R_NilValue && r_dim(x) != R_NilValue; } #endif vctrs/src/order-groups.c0000644000176200001440000001335714422506663015017 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" // ----------------------------------------------------------------------------- // Pair with `PROTECT_GROUP_INFO()` in the caller struct group_info* new_group_info(void) { SEXP self = PROTECT(r_new_raw(sizeof(struct group_info))); struct group_info* p_group_info = (struct group_info*) RAW(self); p_group_info->self = self; p_group_info->data_size = 0; p_group_info->data = r_globals.empty_int; p_group_info->n_groups = 0; p_group_info->max_group_size = 0; UNPROTECT(1); return p_group_info; } // ----------------------------------------------------------------------------- struct group_infos* new_group_infos(struct group_info* p_group_info0, struct group_info* p_group_info1, r_ssize max_data_size, bool force_groups, bool ignore_groups) { SEXP self = PROTECT(r_new_raw(sizeof(struct group_infos))); struct group_infos* p_group_infos = (struct group_infos*) RAW(self); SEXP p_p_group_info_data = PROTECT(r_new_raw(2 * sizeof(struct group_info*))); struct group_info** p_p_group_info = (struct group_info**) RAW(p_p_group_info_data); p_p_group_info[0] = p_group_info0; p_p_group_info[1] = p_group_info1; p_group_infos->self = self; p_group_infos->p_p_group_info_data = p_p_group_info_data; p_group_infos->p_p_group_info = p_p_group_info; p_group_infos->max_data_size = max_data_size; p_group_infos->current = 0; p_group_infos->force_groups = force_groups; p_group_infos->ignore_groups = ignore_groups; UNPROTECT(2); return p_group_infos; } // ----------------------------------------------------------------------------- static void group_realloc(r_ssize size, struct group_info* p_group_info); static r_ssize groups_realloc_size(r_ssize data_size, r_ssize max_data_size); /* * Push a group size onto the current `group_info*` * - Reallocates as needed * - Updates number of groups / max group size as well * * Should only be called through `groups_size_maybe_push()` to ensure * that we only push groups if we are tracking them. */ void groups_size_push(r_ssize size, struct group_infos* p_group_infos) { if (size == 0) { Rf_errorcall(R_NilValue, "Internal error: Group `size` to push should never be zero."); } struct group_info* p_group_info = groups_current(p_group_infos); // Extend `data` as required - reprotects itself if (p_group_info->data_size == p_group_info->n_groups) { r_ssize new_data_size = groups_realloc_size( p_group_info->data_size, p_group_infos->max_data_size ); group_realloc(new_data_size, p_group_info); } // Push group size p_group_info->p_data[p_group_info->n_groups] = size; // Bump number of groups ++p_group_info->n_groups; // Update max group size if (p_group_info->max_group_size < size) { p_group_info->max_group_size = size; } } // ----------------------------------------------------------------------------- /* * Reallocate `data` to be as long as `size`. */ static void group_realloc(r_ssize size, struct group_info* p_group_info) { // Reallocate p_group_info->data = int_resize( p_group_info->data, p_group_info->data_size, size ); // Reprotect REPROTECT(p_group_info->data, p_group_info->data_pi); // Update pointer p_group_info->p_data = INTEGER(p_group_info->data); // Update size p_group_info->data_size = size; } // ----------------------------------------------------------------------------- static r_ssize groups_realloc_size(r_ssize data_size, r_ssize max_data_size) { uint64_t new_data_size; if (data_size == 0) { // First allocation new_data_size = GROUP_DATA_SIZE_DEFAULT; } else { // Avoid potential overflow when doubling size new_data_size = ((uint64_t) data_size) * 2; } // Clamp maximum allocation size to the size of the input if (new_data_size > max_data_size) { return max_data_size; } // Can now safely cast back to `r_ssize` return (r_ssize) new_data_size; } // ----------------------------------------------------------------------------- /* * `groups_swap()` is called after each data frame column is processed. * It handles switching the `current` group info that we are working on, * and ensures that the information that might have been there before has * been zeroed out. It also ensures that the new current group info has at * least as much space as the previous one, which is especially important for * the first column swap where the 2nd group info array starts as a size 0 * integer vector (because we don't know if it will get used or not). */ void groups_swap(struct group_infos* p_group_infos) { if (p_group_infos->ignore_groups) { return; } struct group_info* p_group_info_pre = groups_current(p_group_infos); // Make the swap p_group_infos->current = 1 - p_group_infos->current; struct group_info* p_group_info_post = groups_current(p_group_infos); // Clear the info from last time the swap was made p_group_info_post->max_group_size = 0; p_group_info_post->n_groups = 0; // Ensure the new group info is at least as big as the old group info if (p_group_info_post->data_size < p_group_info_pre->data_size) { r_ssize new_data_size = p_group_info_pre->data_size; group_realloc(new_data_size, p_group_info_post); } } vctrs/src/vec-bool.h0000644000176200001440000000166314364250244014073 0ustar liggesusers#ifndef VCTRS_VEC_BOOL_H #define VCTRS_VEC_BOOL_H #include struct r_vector_bool { r_obj* shelter; r_obj* data; bool* v_data; r_ssize n; }; static inline struct r_vector_bool* r_new_vector_bool(r_ssize n) { r_obj* shelter = KEEP(r_alloc_list(2)); r_obj* vec = r_alloc_raw(sizeof(struct r_vector_bool)); r_list_poke(shelter, 0, vec); r_obj* data = r_alloc_raw(n * sizeof(bool)); r_list_poke(shelter, 1, data); struct r_vector_bool* p_vec = r_raw_begin(vec); p_vec->shelter = shelter; p_vec->data = data; p_vec->v_data = r_raw_begin(data); p_vec->n = n; FREE(1); return p_vec; } static inline bool* r_vector_bool_begin(struct r_vector_bool* p_vec) { return p_vec->v_data; } static inline const bool* r_vector_bool_cbegin(struct r_vector_bool* p_vec) { return (const bool*) p_vec->v_data; } static inline r_ssize r_vector_bool_length(struct r_vector_bool* p_vec) { return p_vec->n; } #endif vctrs/src/type-integer64.c0000644000176200001440000001010514315060310015122 0ustar liggesusers#include "vctrs.h" #include "decl/type-integer64-decl.h" #define r_na_llong LLONG_MIN static const char* v_integer64_proxy_df_names_c_strings[] = { "left", "right" }; static const enum r_type v_integer64_proxy_df_types[] = { R_TYPE_double, R_TYPE_double }; enum integer64_proxy_df_locs { INTEGER64_PROXY_DF_LOCS_left, INTEGER64_PROXY_DF_LOCS_right }; #define INTEGER64_PROXY_DF_SIZE R_ARR_SIZEOF(v_integer64_proxy_df_types) // [[ register() ]] r_obj* vctrs_integer64_proxy(r_obj* x) { if (r_typeof(x) != R_TYPE_double) { r_stop_internal("`x` must be a double."); } if (r_attrib_get(x, R_DimSymbol) != r_null) { r_stop_internal("`x` should not have a `dim` attribute."); } r_ssize size = r_length(x); // Casting `const double*` to `const long long*` is UB, but we are mimicking // what bit64 is doing, so if this ever breaks it means that bit64 is broken. const long long* v_x = (const long long*) r_dbl_cbegin(x); r_obj* nms = KEEP(r_chr_n( v_integer64_proxy_df_names_c_strings, INTEGER64_PROXY_DF_SIZE )); r_obj* out = KEEP(r_alloc_df_list( size, nms, v_integer64_proxy_df_types, INTEGER64_PROXY_DF_SIZE )); r_init_data_frame(out, size); r_obj* left = r_list_get(out, INTEGER64_PROXY_DF_LOCS_left); r_obj* right = r_list_get(out, INTEGER64_PROXY_DF_LOCS_right); double* v_left = r_dbl_begin(left); double* v_right = r_dbl_begin(right); for (r_ssize i = 0; i < size; ++i) { const long long elt = v_x[i]; if (elt == r_na_llong) { v_left[i] = r_globals.na_dbl; v_right[i] = r_globals.na_dbl; continue; } const int64_t elt_i64 = (int64_t) elt; int64_unpack(elt_i64, i, v_left, v_right); } FREE(2); return out; } // [[ register() ]] r_obj* vctrs_integer64_restore(r_obj* x) { if (!is_data_frame(x)) { r_stop_internal("`x` must be a data frame."); } if (r_length(x) != 2) { r_stop_internal("`x` must have two columns."); } r_obj* left = r_list_get(x, INTEGER64_PROXY_DF_LOCS_left); r_obj* right = r_list_get(x, INTEGER64_PROXY_DF_LOCS_right); const double* v_left = r_dbl_cbegin(left); const double* v_right = r_dbl_cbegin(right); r_ssize size = r_length(left); r_obj* out = KEEP(r_alloc_double(size)); // See above comment about UB in this cast long long* v_out = (long long*) r_dbl_begin(out); r_attrib_poke_class(out, r_chr("integer64")); for (r_ssize i = 0; i < size; ++i) { const double left = v_left[i]; const double right = v_right[i]; if (isnan(left)) { v_out[i] = r_na_llong; continue; } v_out[i] = (long long) int64_pack(left, right); } FREE(1); return out; } // ----------------------------------------------------------------------------- /* * This pair of functions facilitates: * - Splitting an `int64_t` into two `uint32_t` values, maintaining order * - Combining those two `uint32_t` values back into the original `int32_t` * * The two `uint32_t` values are stored in two doubles. This allows us to store * it in a two column data frame that vctrs knows how to work with, and we can * use the standard `NA_real_` as the missing value without fear of conflicting * with any other valid `int64_t` value. * * Unsigned 32-bit integers are used because bit shifting is undefined on signed * types. * * An arithmetic shift of `- INT64_MIN` is done to remap the int64_t value * into uint64_t space, while maintaining order. This relies on unsigned * arithmetic overflow behavior, which is well-defined. */ static inline void int64_unpack(int64_t x, r_ssize i, double* v_left, double* v_right) { const uint64_t x_u64 = ((uint64_t) x) - INT64_MIN; const uint32_t left_u32 = (uint32_t) (x_u64 >> 32); const uint32_t right_u32 = (uint32_t) x_u64; v_left[i] = (double) left_u32; v_right[i] = (double) right_u32; } static inline int64_t int64_pack(double left, double right) { const uint32_t left_u32 = (uint32_t) left; const uint32_t right_u32 = (uint32_t) right; const uint64_t out_u64 = ((uint64_t) left_u32) << 32 | right_u32; const int64_t out = (int64_t) (out_u64 + INT64_MIN); return out; } vctrs/src/bind.c0000644000176200001440000004303114426175222013270 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/bind-decl.h" // [[ register(external = TRUE) ]] r_obj* ffi_rbind(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* frame) { args = r_node_cdr(args); struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; r_obj* xs = KEEP(rlang_env_dots_list(frame)); r_obj* ptype = r_node_car(args); args = r_node_cdr(args); r_obj* names_to = r_node_car(args); args = r_node_cdr(args); r_obj* name_repair = r_node_car(args); args = r_node_cdr(args); r_obj* name_spec = r_node_car(args); if (names_to != r_null) { if (r_inherits(names_to, "rlang_zap")) { r_attrib_poke_names(xs, r_null); names_to = r_null; } else if (r_is_string(names_to)) { names_to = r_chr_get(names_to, 0); } else { r_abort_lazy_call(error_call, "%s must be `NULL`, a string, or an `rlang::zap()` object.", r_c_str_format_error_arg(".names_to")); } } struct name_repair_opts name_repair_opts = validate_bind_name_repair(name_repair, false); KEEP(name_repair_opts.shelter); name_repair_opts.call = error_call; r_obj* out = vec_rbind(xs, ptype, names_to, &name_repair_opts, name_spec, error_call); FREE(2); return out; } static r_obj* vec_rbind(r_obj* xs, r_obj* ptype, r_obj* names_to, struct name_repair_opts* name_repair, r_obj* name_spec, struct r_lazy error_call) { // In case `.arg` is added later on struct vctrs_arg* p_arg = vec_args.empty; int n_prot = 0; r_ssize n_inputs = r_length(xs); for (r_ssize i = 0; i < n_inputs; ++i) { r_list_poke(xs, i, as_df_row(r_list_get(xs, i), name_repair, error_call)); } // The common type holds information about common column names, // types, etc. Each element of `xs` needs to be cast to that type // before assignment. ptype = vec_ptype_common_params(xs, ptype, S3_FALLBACK_true, p_arg, error_call); KEEP_N(ptype, &n_prot); r_ssize n_cols = r_length(ptype); if (ptype == r_null) { FREE(n_prot); return new_data_frame(r_globals.empty_list, 0); } if (r_typeof(ptype) == R_TYPE_logical && !n_cols) { ptype = as_df_row_impl(vctrs_shared_na_lgl, name_repair, error_call); KEEP_N(ptype, &n_prot); } if (!is_data_frame(ptype)) { r_abort_lazy_call(error_call, "Can't bind objects that are not coercible to a data frame."); } bool assign_names = !r_inherits(name_spec, "rlang_zap"); bool has_names_to = names_to != r_null; r_ssize names_to_loc = 0; if (has_names_to) { if (!assign_names) { r_abort_lazy_call(error_call, "Can't zap outer names when %s is supplied.", r_c_str_format_error_arg(".names_to")); } r_obj* ptype_nms = KEEP(r_names(ptype)); names_to_loc = r_chr_find(ptype_nms, names_to); FREE(1); if (names_to_loc < 0) { ptype = cbind_names_to(r_names(xs) != r_null, names_to, ptype, error_call); KEEP_N(ptype, &n_prot); names_to_loc = 0; } } // Must happen after the `names_to` column has been added to `ptype` xs = vec_cast_common_params(xs, ptype, S3_FALLBACK_true, vec_args.empty, error_call); KEEP_N(xs, &n_prot); // Find individual input sizes and total size of output r_ssize n_rows = 0; r_obj* ns_placeholder = KEEP_N(r_alloc_integer(n_inputs), &n_prot); int* ns = r_int_begin(ns_placeholder); for (r_ssize i = 0; i < n_inputs; ++i) { r_obj* elt = r_list_get(xs, i); r_ssize size = (elt == r_null) ? 0 : vec_size(elt); n_rows += size; ns[i] = size; } r_obj* proxy = KEEP_N(vec_proxy_recurse(ptype), &n_prot); if (!is_data_frame(proxy)) { r_abort_lazy_call(error_call, "Can't fill a data frame that doesn't have a data frame proxy."); } r_keep_loc out_pi; r_obj* out = vec_init(proxy, n_rows); KEEP_HERE(out, &out_pi); ++n_prot; r_obj* loc = KEEP_N(compact_seq(0, 0, true), &n_prot); int* p_loc = r_int_begin(loc); r_obj* row_names = r_null; r_keep_loc rownames_pi; KEEP_HERE(row_names, &rownames_pi); ++n_prot; r_obj* names_to_col = r_null; enum r_type names_to_type = 99; void* p_names_to_col = NULL; const void* p_index = NULL; r_obj* xs_names = KEEP_N(r_names(xs), &n_prot); bool xs_is_named = xs_names != r_null; if (has_names_to) { r_obj* index = r_null; if (xs_is_named) { index = xs_names; } else { index = KEEP_N(r_alloc_integer(n_inputs), &n_prot); r_int_fill_seq(index, 1, n_inputs); } names_to_type = r_typeof(index); names_to_col = KEEP_N(r_alloc_vector(names_to_type, n_rows), &n_prot); p_index = r_vec_deref_barrier_const(index); p_names_to_col = r_vec_deref_barrier(names_to_col); xs_names = r_null; xs_is_named = false; } r_obj* const * p_xs_names = NULL; if (xs_is_named) { p_xs_names = r_chr_cbegin(xs_names); } // Compact sequences use 0-based counters r_ssize counter = 0; const struct vec_assign_opts bind_assign_opts = { .recursive = true, .assign_names = assign_names, // Unlike in `vec_c()` we don't need to ignore outer names because // `df_assign()` doesn't deal with those .ignore_outer_names = false }; for (r_ssize i = 0; i < n_inputs; ++i) { r_ssize size = ns[i]; if (!size) { continue; } r_obj* x = r_list_get(xs, i); // Update `loc` to assign within `out[counter:counter + size, ]` init_compact_seq(p_loc, counter, size, true); // Total ownership of `out` because it was freshly created with `vec_init()` out = df_assign(out, loc, x, VCTRS_OWNED_true, &bind_assign_opts); KEEP_AT(out, out_pi); if (assign_names) { r_obj* outer = xs_is_named ? p_xs_names[i] : r_null; r_obj* inner = KEEP(vec_names(x)); r_obj* x_nms = KEEP(apply_name_spec(name_spec, outer, inner, size)); if (x_nms != r_null) { R_LAZY_ALLOC(row_names, rownames_pi, R_TYPE_character, n_rows); // If there is no name to assign, skip the assignment since // `out_names` already contains empty strings if (inner != chrs_empty) { row_names = chr_assign(row_names, loc, x_nms, VCTRS_OWNED_true); KEEP_AT(row_names, rownames_pi); } } FREE(2); } // Assign current name to group vector, if supplied if (has_names_to) { r_vec_fill(names_to_type, p_names_to_col, counter, p_index, i, size); } counter += size; } if (row_names != r_null) { r_attrib_poke(out, r_syms.row_names, row_names); } df_c_fallback(out, ptype, xs, n_rows, name_spec, name_repair, error_call); out = vec_restore_recurse(out, ptype, VCTRS_OWNED_true); KEEP_AT(out, out_pi); if (has_names_to) { out = df_poke(out, names_to_loc, names_to_col); KEEP_AT(out, out_pi); } FREE(n_prot); return out; } static r_obj* as_df_row(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call) { if (vec_is_unspecified(x) && r_names(x) == r_null) { return x; } else { return as_df_row_impl(x, name_repair, error_call); } } static r_obj* as_df_row_impl(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call) { if (x == r_null) { return x; } if (is_data_frame(x)) { return df_repair_names(x, name_repair); } int nprot = 0; r_obj* dim = vec_bare_dim(x); r_ssize ndim = (dim == r_null) ? 1 : r_length(dim); if (ndim > 2) { r_abort_lazy_call(error_call, "Can't bind arrays."); } if (ndim == 2) { r_obj* out = KEEP(r_as_data_frame(x)); r_attrib_poke_names(out, vec_as_names(KEEP(colnames2(x)), name_repair)); FREE(2); FREE(nprot); return out; } // Take names before removing dimensions so we get colnames if needed r_obj* nms = KEEP(vec_names2(x)); nms = KEEP(vec_as_names(nms, name_repair)); if (dim != r_null) { x = KEEP_N(r_clone_referenced(x), &nprot); r_attrib_poke(x, r_syms.dim, r_null); r_attrib_poke(x, r_syms.dim_names, r_null); } // Remove names first as they are promoted to data frame column names x = KEEP(vec_set_names(x, r_null)); x = KEEP(vec_chop_unsafe(x, r_null, r_null)); r_attrib_poke_names(x, nms); x = new_data_frame(x, 1); FREE(4); FREE(nprot); return x; } // [[ register() ]] r_obj* ffi_as_df_row(r_obj* x, r_obj* quiet, r_obj* frame) { struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_unique, .fn = r_null, .quiet = r_lgl_get(quiet, 0) }; struct r_lazy error_call = { .x = frame, .env = r_null }; return as_df_row(x, &name_repair_opts, error_call); } static r_obj* cbind_names_to(bool has_names, r_obj* names_to, r_obj* ptype, struct r_lazy error_call) { r_obj* index_ptype = has_names ? r_globals.empty_chr : r_globals.empty_int; r_obj* tmp = KEEP(r_alloc_list(2)); r_list_poke(tmp, 0, index_ptype); r_list_poke(tmp, 1, ptype); r_obj* tmp_nms = KEEP(r_alloc_character(2)); r_chr_poke(tmp_nms, 0, names_to); r_chr_poke(tmp_nms, 1, strings_empty); r_attrib_poke_names(tmp, tmp_nms); r_obj* out = vec_cbind(tmp, r_null, r_null, NULL, error_call); FREE(2); return out; } // [[ register(external = TRUE) ]] r_obj* ffi_cbind(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* frame) { args = r_node_cdr(args); struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; r_obj* xs = KEEP(rlang_env_dots_list(frame)); r_obj* ptype = r_node_car(args); args = r_node_cdr(args); r_obj* size = r_node_car(args); args = r_node_cdr(args); r_obj* name_repair = r_node_car(args); struct name_repair_opts name_repair_opts = validate_bind_name_repair(name_repair, true); KEEP(name_repair_opts.shelter); name_repair_opts.call = error_call; r_obj* out = vec_cbind(xs, ptype, size, &name_repair_opts, error_call); FREE(2); return out; } static r_obj* vec_cbind(r_obj* xs, r_obj* ptype, r_obj* size, struct name_repair_opts* name_repair, struct r_lazy error_call) { // In case `.arg` is added later on struct vctrs_arg* p_arg = vec_args.empty; r_ssize n = r_length(xs); // Find the common container type of inputs r_obj* rownames = r_null; r_obj* containers = KEEP(map_with_data(xs, &cbind_container_type, &rownames)); ptype = KEEP(cbind_container_type(ptype, &rownames)); r_obj* type = KEEP(vec_ptype_common_params(containers, ptype, S3_FALLBACK_false, p_arg, error_call)); if (type == r_null) { type = new_data_frame(r_globals.empty_list, 0); } else if (!is_data_frame(type)) { type = r_as_data_frame(type); } FREE(1); KEEP(type); r_ssize nrow; if (size == r_null) { nrow = vec_check_size_common(xs, 0, p_arg, error_call); } else { nrow = vec_as_short_length(size, vec_args.dot_size, error_call); } if (rownames != r_null && r_length(rownames) != nrow) { rownames = KEEP(vec_check_recycle(rownames, nrow, vec_args.empty, error_call)); rownames = vec_as_unique_names(rownames, false); FREE(1); } KEEP(rownames); // Convert inputs to data frames, validate, and collect total number of columns r_obj* xs_names = KEEP(r_names(xs)); bool has_names = xs_names != r_null; r_obj* const* xs_names_p = has_names ? r_chr_cbegin(xs_names) : NULL; r_ssize ncol = 0; for (r_ssize i = 0; i < n; ++i) { r_obj* x = r_list_get(xs, i); if (x == r_null) { continue; } x = KEEP(vec_check_recycle(x, nrow, vec_args.empty, r_lazy_null)); r_obj* outer_name = has_names ? xs_names_p[i] : strings_empty; bool allow_packing; x = KEEP(as_df_col(x, outer_name, &allow_packing, error_call)); // Remove outer name of column vectors because they shouldn't be repacked if (has_names && !allow_packing) { r_chr_poke(xs_names, i, strings_empty); } r_list_poke(xs, i, x); FREE(2); // Named inputs are packed in a single column r_ssize x_ncol = outer_name == strings_empty ? r_length(x) : 1; ncol += x_ncol; } // Fill in columns r_keep_loc out_pi; r_obj* out = r_alloc_list(ncol); KEEP_HERE(out, &out_pi); init_data_frame(out, nrow); r_keep_loc names_pi; r_obj* names = r_alloc_character(ncol); KEEP_HERE(names, &names_pi); r_obj* idx = KEEP(compact_seq(0, 0, true)); int* idx_ptr = r_int_begin(idx); r_ssize counter = 0; for (r_ssize i = 0; i < n; ++i) { r_obj* x = r_list_get(xs, i); if (x == r_null) { continue; } r_obj* outer_name = has_names ? xs_names_p[i] : strings_empty; if (outer_name != strings_empty) { r_list_poke(out, counter, x); r_chr_poke(names, counter, outer_name); ++counter; continue; } r_ssize xn = r_length(x); init_compact_seq(idx_ptr, counter, xn, true); // Total ownership of `out` because it was freshly created with `r_alloc_vector()` out = list_assign(out, idx, x, VCTRS_OWNED_true); KEEP_AT(out, out_pi); r_obj* xnms = KEEP(r_names(x)); if (xnms != r_null) { names = chr_assign(names, idx, xnms, VCTRS_OWNED_true); KEEP_AT(names, names_pi); } FREE(1); counter += xn; } names = KEEP(vec_as_names(names, name_repair)); r_attrib_poke(out, r_syms.names, names); if (rownames != r_null) { r_attrib_poke(out, r_syms.row_names, rownames); } out = vec_restore(out, type, VCTRS_OWNED_true); FREE(9); return out; } r_obj* vec_cbind_frame_ptype(r_obj* x) { return vctrs_dispatch1(syms_vec_cbind_frame_ptype, fns_vec_cbind_frame_ptype, syms_x, x); } static r_obj* cbind_container_type(r_obj* x, void* data) { if (is_data_frame(x)) { r_obj* rn = df_rownames(x); if (rownames_type(rn) == ROWNAMES_TYPE_identifiers) { r_obj** learned_rn_p = (r_obj**) data; r_obj* learned_rn = *learned_rn_p; if (learned_rn == r_null) { *learned_rn_p = rn; } } return vec_cbind_frame_ptype(x); } else { return r_null; } } // [[ register() ]] r_obj* ffi_as_df_col(r_obj* x, r_obj* outer, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; bool allow_pack; return as_df_col(x, r_chr_get(outer, 0), &allow_pack, error_call); } static r_obj* as_df_col(r_obj* x, r_obj* outer, bool* allow_pack, struct r_lazy error_call) { if (is_data_frame(x)) { *allow_pack = true; return r_clone(x); } r_ssize ndim = vec_bare_dim_n(x); if (ndim > 2) { r_abort_lazy_call(error_call, "Can't bind arrays."); } if (ndim > 0) { *allow_pack = true; return shaped_as_df_col(x, outer); } *allow_pack = false; return vec_as_df_col(x, outer); } static r_obj* shaped_as_df_col(r_obj* x, r_obj* outer) { // If packed, store array as a column if (outer != strings_empty) { return x; } // If unpacked, transform to data frame first. We repair names // after unpacking and concatenation. r_obj* out = KEEP(r_as_data_frame(x)); // Remove names if they were repaired by `as.data.frame()` if (colnames(x) == r_null) { r_attrib_poke_names(out, r_null); } FREE(1); return out; } static r_obj* vec_as_df_col(r_obj* x, r_obj* outer) { r_obj* out = KEEP(r_alloc_list(1)); r_list_poke(out, 0, x); if (outer != strings_empty) { r_obj* names = KEEP(r_str_as_character(outer)); r_attrib_poke_names(out, names); FREE(1); } init_data_frame(out, r_length(x)); FREE(1); return out; } static struct name_repair_opts validate_bind_name_repair(r_obj* name_repair, bool allow_minimal) { struct name_repair_opts opts = new_name_repair_opts(name_repair, r_lazy_null, false, r_lazy_null); switch (opts.type) { case NAME_REPAIR_custom: case NAME_REPAIR_unique: case NAME_REPAIR_universal: case NAME_REPAIR_check_unique: break; case NAME_REPAIR_minimal: if (allow_minimal) break; // else fallthrough default: if (allow_minimal) { r_abort_call(r_null, "`.name_repair` can't be `\"%s\"`.\n" "It must be one of `\"unique\"`, `\"universal\"`, `\"check_unique\"`, or `\"minimal\"`.", name_repair_arg_as_c_string(opts.type)); } else { r_abort_call(r_null, "`.name_repair` can't be `\"%s\"`.\n" "It must be one of `\"unique\"`, `\"universal\"`, or `\"check_unique\"`.", name_repair_arg_as_c_string(opts.type)); } } return opts; } void vctrs_init_bind(r_obj* ns) { syms_vec_cbind_frame_ptype = r_sym("vec_cbind_frame_ptype"); fns_vec_cbind_frame_ptype = r_env_get(ns, syms_vec_cbind_frame_ptype); } static r_obj* syms_vec_cbind_frame_ptype = NULL; static r_obj* fns_vec_cbind_frame_ptype = NULL; vctrs/src/type-data-frame.h0000644000176200001440000000306414362266120015340 0ustar liggesusers#ifndef VCTRS_TYPE_DATA_FRAME_H #define VCTRS_TYPE_DATA_FRAME_H #include "vctrs-core.h" #include "cast.h" #include "names.h" #include "ptype2.h" r_obj* new_data_frame(r_obj* x, r_ssize n); void init_data_frame(r_obj* x, r_ssize n); void init_tibble(r_obj* x, r_ssize n); void init_compact_rownames(r_obj* x, r_ssize n); static inline r_obj* df_rownames(r_obj* x) { return r_attrib_get(x, R_RowNamesSymbol); } bool is_native_df(r_obj* x); r_obj* df_poke(r_obj* x, r_ssize i, r_obj* value); r_obj* df_poke_at(r_obj* x, r_obj* name, r_obj* value); r_obj* df_flatten(r_obj* x); r_obj* df_repair_names(r_obj* x, struct name_repair_opts* name_repair); r_obj* df_cast_opts(const struct cast_opts* opts); static inline r_obj* df_cast(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg) { const struct cast_opts opts = { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg }; return df_cast_opts(&opts); } enum rownames_type { ROWNAMES_TYPE_automatic, ROWNAMES_TYPE_automatic_compact, ROWNAMES_TYPE_identifiers }; enum rownames_type rownames_type(r_obj* rn); r_ssize rownames_size(r_obj* rn); r_obj* df_ptype2(const struct ptype2_opts* opts); static inline r_obj* df_ptype2_params(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { const struct ptype2_opts opts = { .x = x, .y = y, .p_x_arg = p_x_arg, .p_y_arg = p_y_arg }; return df_ptype2(&opts); } #endif vctrs/src/compare.c0000644000176200001440000001700114315060310013764 0ustar liggesusers#include "vctrs.h" #include #include "decl/compare-decl.h" static void stop_not_comparable(r_obj* x, r_obj* y, const char* message) { r_abort("`x` and `y` are not comparable: %s", message); } // ----------------------------------------------------------------------------- #define COMPARE(CTYPE, CBEGIN, SCALAR_COMPARE) \ do { \ r_obj* out = KEEP(r_alloc_integer(size)); \ int* v_out = r_int_begin(out); \ \ CTYPE const* v_x = CBEGIN(x); \ CTYPE const* v_y = CBEGIN(y); \ \ for (r_ssize i = 0; i < size; ++i) { \ v_out[i] = SCALAR_COMPARE(v_x[i], v_y[i]); \ } \ \ FREE(3); \ return out; \ } \ while (0) r_obj* vec_compare(r_obj* x, r_obj* y, bool na_equal) { r_ssize size = vec_size(x); enum vctrs_type type = vec_proxy_typeof(x); if (type != vec_proxy_typeof(y) || size != vec_size(y)) { stop_not_comparable(x, y, "must have the same types and lengths"); } x = KEEP(vec_normalize_encoding(x)); y = KEEP(vec_normalize_encoding(y)); if (type == VCTRS_TYPE_dataframe) { r_obj* out = df_compare(x, y, na_equal, size); FREE(2); return out; } if (na_equal) { switch (type) { case VCTRS_TYPE_logical: COMPARE(int, r_lgl_cbegin, lgl_compare_na_equal); case VCTRS_TYPE_integer: COMPARE(int, r_int_cbegin, int_compare_na_equal); case VCTRS_TYPE_double: COMPARE(double, r_dbl_cbegin, dbl_compare_na_equal); case VCTRS_TYPE_character: COMPARE(r_obj*, r_chr_cbegin, chr_compare_na_equal); case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare", type); } } else { switch (type) { case VCTRS_TYPE_logical: COMPARE(int, r_lgl_cbegin, lgl_compare_na_propagate); case VCTRS_TYPE_integer: COMPARE(int, r_int_cbegin, int_compare_na_propagate); case VCTRS_TYPE_double: COMPARE(double, r_dbl_cbegin, dbl_compare_na_propagate); case VCTRS_TYPE_character: COMPARE(r_obj*, r_chr_cbegin, chr_compare_na_propagate); case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare", type); } } } #undef COMPARE r_obj* ffi_vec_compare(r_obj* x, r_obj* y, r_obj* ffi_na_equal) { const bool na_equal = r_bool_as_int(ffi_na_equal); return vec_compare(x, y, na_equal); } // ----------------------------------------------------------------------------- static r_obj* df_compare(r_obj* x, r_obj* y, bool na_equal, r_ssize size) { int nprot = 0; r_obj* out = KEEP_N(r_alloc_integer(size), &nprot); int* v_out = r_int_begin(out); // Initialize to "equality" value and only change if we learn that it differs. // This also determines the zero column result. memset(v_out, 0, size * sizeof(int)); struct df_short_circuit_info info = new_df_short_circuit_info(size, false); struct df_short_circuit_info* p_info = &info; PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, &nprot); df_compare_impl(v_out, p_info, x, y, na_equal); FREE(nprot); return out; } static void df_compare_impl(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal) { r_ssize n_col = r_length(x); if (n_col != r_length(y)) { stop_not_comparable(x, y, "must have the same number of columns"); } for (r_ssize i = 0; i < n_col; ++i) { r_obj* x_col = r_list_get(x, i); r_obj* y_col = r_list_get(y, i); vec_compare_col(v_out, p_info, x_col, y_col, na_equal); // If we know all comparison values, break if (p_info->remaining == 0) { break; } } } // ----------------------------------------------------------------------------- #define COMPARE_COL(CTYPE, CBEGIN, SCALAR_COMPARE) \ do { \ CTYPE const* v_x = CBEGIN(x); \ CTYPE const* v_y = CBEGIN(y); \ \ for (r_ssize i = 0; i < p_info->size; ++i) { \ if (p_info->p_row_known[i]) { \ continue; \ } \ \ int cmp = SCALAR_COMPARE(v_x[i], v_y[i]); \ \ if (cmp != 0) { \ v_out[i] = cmp; \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ } \ } \ while (0) static void vec_compare_col(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal) { enum vctrs_type type = vec_proxy_typeof(x); if (type == VCTRS_TYPE_dataframe) { df_compare_impl(v_out, p_info, x, y, na_equal); return; } if (na_equal) { switch (type) { case VCTRS_TYPE_logical: COMPARE_COL(int, r_lgl_cbegin, lgl_compare_na_equal); break; case VCTRS_TYPE_integer: COMPARE_COL(int, r_int_cbegin, int_compare_na_equal); break; case VCTRS_TYPE_double: COMPARE_COL(double, r_dbl_cbegin, dbl_compare_na_equal); break; case VCTRS_TYPE_character: COMPARE_COL(r_obj*, r_chr_cbegin, chr_compare_na_equal); break; case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare_col", type); } } else { switch (type) { case VCTRS_TYPE_logical: COMPARE_COL(int, r_lgl_cbegin, lgl_compare_na_propagate); break; case VCTRS_TYPE_integer: COMPARE_COL(int, r_int_cbegin, int_compare_na_propagate); break; case VCTRS_TYPE_double: COMPARE_COL(double, r_dbl_cbegin, dbl_compare_na_propagate); break; case VCTRS_TYPE_character: COMPARE_COL(r_obj*, r_chr_cbegin, chr_compare_na_propagate); break; case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare_col", type); } } } #undef COMPARE_COL vctrs/src/unspecified.c0000644000176200001440000000404414315060310014637 0ustar liggesusers#include "vctrs.h" // Initialised at load time static SEXP unspecified_attrib = NULL; SEXP vctrs_shared_empty_uns = NULL; // [[ include("vctrs.h") ]] SEXP vec_unspecified(R_len_t n) { SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); r_lgl_fill(out, NA_LOGICAL, n); SET_ATTRIB(out, unspecified_attrib); SET_OBJECT(out, 1); UNPROTECT(1); return out; } // [[ register ]] SEXP vctrs_unspecified(SEXP n) { if (Rf_length(n) != 1) { Rf_errorcall(R_NilValue, "`n` must be a single number"); } if (TYPEOF(n) != INTSXP) { n = vec_cast(n, r_globals.empty_int, vec_args.empty, vec_args.empty, r_lazy_null); } int len = INTEGER(n)[0]; return vec_unspecified(len); } // [[ include("vctrs.h") ]] bool vec_is_unspecified(SEXP x) { if (TYPEOF(x) != LGLSXP) { return false; } SEXP attrib = ATTRIB(x); if (attrib == unspecified_attrib) { return true; } if (attrib != R_NilValue) { // The unspecified vector might have been created outside the // session (e.g. serialisation) if (Rf_inherits(x, "vctrs_unspecified")) { return true; } if (OBJECT(x)) { return false; } if (has_dim(x)) { return false; } } R_len_t n = Rf_length(x); if (n == 0) { return false; } R_len_t* p_x = LOGICAL(x); for (R_len_t i = 0; i < n; ++i) { if (p_x[i] != NA_LOGICAL) { return false; } } return true; } // [[ register ]] SEXP vctrs_is_unspecified(SEXP x) { return Rf_ScalarLogical(vec_is_unspecified(x)); } void vctrs_init_unspecified(SEXP ns) { { SEXP unspecified_class = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(unspecified_class, 0, Rf_mkChar("vctrs_unspecified")); unspecified_attrib = Rf_cons(unspecified_class, R_NilValue); R_PreserveObject(unspecified_attrib); SET_TAG(unspecified_attrib, R_ClassSymbol); UNPROTECT(1); } vctrs_shared_empty_uns = vec_unspecified(0); R_PreserveObject(vctrs_shared_empty_uns); MARK_NOT_MUTABLE(vctrs_shared_empty_uns); } vctrs/src/slice.h0000644000176200001440000000174414315060310013451 0ustar liggesusers#ifndef VCTRS_SLICE_H #define VCTRS_SLICE_H #include "vctrs-core.h" struct vec_slice_opts { struct vctrs_arg* x_arg; struct vctrs_arg* i_arg; struct r_lazy call; }; enum vctrs_materialize { VCTRS_MATERIALIZE_false = 0, VCTRS_MATERIALIZE_true }; r_obj* vec_slice_opts(r_obj* x, r_obj* i, const struct vec_slice_opts* opts); static inline r_obj* vec_slice(r_obj* x, r_obj* i) { const struct vec_slice_opts opts = { 0 }; return vec_slice_opts(x, i, &opts); } r_obj* vec_init(r_obj* x, r_ssize n); r_obj* vec_slice_unsafe(r_obj* x, r_obj* i); r_obj* vec_slice_base(enum vctrs_type type, r_obj* x, r_obj* subscript, enum vctrs_materialize materialize); r_obj* slice_names(r_obj* names, r_obj* subscript); r_obj* slice_rownames(r_obj* names, r_obj* subscript); r_obj* vec_slice_fallback(r_obj* x, r_obj* subscript); bool vec_is_restored(r_obj* x, r_obj* to); #endif vctrs/src/cast.c0000644000176200001440000002427714373202700013312 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/cast-decl.h" // [[ register() ]] r_obj* ffi_cast(r_obj* x, r_obj* to, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy to_arg_ = { .x = syms.to_arg, .env = frame }; struct vctrs_arg to_arg = new_lazy_arg(&to_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; return vec_cast(x, to, &x_arg, &to_arg, call); } r_obj* vec_cast_opts(const struct cast_opts* opts) { r_obj* x = opts->x; r_obj* to = opts->to; struct vctrs_arg* x_arg = opts->p_x_arg; struct vctrs_arg* to_arg = opts->p_to_arg; if (x == r_null) { if (!vec_is_partial(to)) { obj_check_vector(to, to_arg, opts->call); } return x; } if (to == r_null) { if (!vec_is_partial(x)) { obj_check_vector(x, x_arg, opts->call); } return x; } enum vctrs_type x_type = vec_typeof(x); enum vctrs_type to_type = vec_typeof(to); if (x_type == VCTRS_TYPE_unspecified) { return vec_init(to, vec_size(x)); } if (x_type == VCTRS_TYPE_scalar) { stop_scalar_type(x, x_arg, opts->call); } if (to_type == VCTRS_TYPE_scalar) { stop_scalar_type(to, to_arg, opts->call); } r_obj* out = r_null; bool lossy = false; if (to_type == VCTRS_TYPE_s3 || x_type == VCTRS_TYPE_s3) { out = KEEP(vec_cast_dispatch_native(opts, x_type, to_type, &lossy)); } else { out = KEEP(vec_cast_switch_native(opts, x_type, to_type, &lossy)); } if (lossy || out == r_null) { // This broadcasts dimensions too FREE(1); return vec_cast_dispatch_s3(opts); } if (has_dim(x) || has_dim(to)) { out = vec_shape_broadcast(out, opts); } FREE(1); return out; } static r_obj* vec_cast_switch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy) { r_obj* x = opts->x; int dir = 0; enum vctrs_type2 type2 = vec_typeof2_impl(x_type, to_type, &dir); switch (type2) { case VCTRS_TYPE2_logical_logical: case VCTRS_TYPE2_integer_integer: case VCTRS_TYPE2_double_double: case VCTRS_TYPE2_complex_complex: case VCTRS_TYPE2_raw_raw: case VCTRS_TYPE2_character_character: case VCTRS_TYPE2_list_list: return x; case VCTRS_TYPE2_logical_integer: if (dir == 0) { return lgl_as_integer(x, lossy); } else { return int_as_logical(x, lossy); } case VCTRS_TYPE2_logical_double: if (dir == 0) { return lgl_as_double(x, lossy); } else { return dbl_as_logical(x, lossy); } case VCTRS_TYPE2_integer_double: if (dir == 0) { return int_as_double(x, lossy); } else { return dbl_as_integer(x, lossy); } case VCTRS_TYPE2_dataframe_dataframe: return df_cast_opts(opts); default: break; } return r_null; } static inline r_obj* vec_cast_default_full(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, const struct fallback_opts* opts, bool from_dispatch) { r_obj* s3_fallback = KEEP(r_int(opts->s3)); r_obj* ffi_x_arg = KEEP(vctrs_arg(p_x_arg)); r_obj* ffi_to_arg = KEEP(vctrs_arg(p_to_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(call)); r_obj* out = vctrs_eval_mask7(syms.vec_default_cast, syms_x, x, syms_to, to, syms_x_arg, ffi_x_arg, syms_to_arg, ffi_to_arg, syms_call, ffi_call, syms_from_dispatch, r_lgl(from_dispatch), syms_s3_fallback, s3_fallback); FREE(4); return out; } r_obj* vec_cast_default(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, const struct fallback_opts* p_opts) { return vec_cast_default_full(x, to, p_x_arg, p_to_arg, call, p_opts, false); } static r_obj* vec_cast_dispatch_s3(const struct cast_opts* opts) { r_obj* x = opts->x; r_obj* to = opts->to; r_obj* method_sym = r_null; r_obj* method = s3_find_method_xy("vec_cast", to, x, vctrs_method_table, &method_sym); // Compatibility with legacy double dispatch mechanism if (method == r_null) { r_obj* to_method_sym = r_null; r_obj* to_method = KEEP(s3_find_method2("vec_cast", to, vctrs_method_table, &to_method_sym)); if (to_method != r_null) { const char* to_method_str = CHAR(PRINTNAME(to_method_sym)); r_obj* to_table = s3_get_table(CLOENV(to_method)); method = s3_find_method2(to_method_str, x, to_table, &method_sym); } FREE(1); } KEEP(method); if (method == r_null) { r_obj* out = vec_cast_default_full(x, to, opts->p_x_arg, opts->p_to_arg, opts->call, &(opts->fallback), true); FREE(1); return out; } r_obj* r_x_arg = KEEP(vctrs_arg(opts->p_x_arg)); r_obj* r_to_arg = KEEP(vctrs_arg(opts->p_to_arg)); r_obj* out = vec_invoke_coerce_method(method_sym, method, syms_x, x, syms_to, to, syms_x_arg, r_x_arg, syms_to_arg, r_to_arg, opts->call, &(opts->fallback)); FREE(3); return out; } struct cast_err_data { const struct cast_opts* opts; r_obj* out; }; static void vec_cast_e_cb(void* data_) { struct cast_err_data* data = (struct cast_err_data*) data_; data->out = vec_cast_opts(data->opts); } r_obj* vec_cast_e(const struct cast_opts* opts, ERR* err) { struct cast_err_data data = { .opts = opts, .out = r_null }; *err = r_try_catch(&vec_cast_e_cb, &data, syms_vctrs_error_incompatible_type, NULL, NULL); return data.out; } r_obj* vec_cast_common_opts(r_obj* xs, r_obj* to, const struct cast_common_opts* opts) { struct ptype_common_opts ptype_opts = { .p_arg = opts->p_arg, .call = opts->call, .fallback = opts->fallback }; r_obj* type = KEEP(vec_ptype_common_opts(xs, to, &ptype_opts)); r_ssize n = r_length(xs); r_obj* out = KEEP(r_alloc_list(n)); r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg(opts->p_arg, r_names(xs), n, &i); KEEP(p_x_arg->shelter); for (; i < n; ++i) { r_obj* elt = r_list_get(xs, i); struct cast_opts cast_opts = { .x = elt, .to = type, .p_x_arg = p_x_arg, .call = opts->call, .fallback = opts->fallback }; r_list_poke(out, i, vec_cast_opts(&cast_opts)); } r_attrib_poke_names(out, r_names(xs)); FREE(3); return out; } r_obj* vec_cast_common_params(r_obj* xs, r_obj* to, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call) { struct cast_common_opts opts = { .p_arg = p_arg, .call = call, .fallback = { .s3 = s3_fallback } }; return vec_cast_common_opts(xs, to, &opts); } r_obj* vec_cast_common(r_obj* xs, r_obj* to, struct vctrs_arg* p_arg, struct r_lazy call) { return vec_cast_common_params(xs, to, S3_FALLBACK_DEFAULT, p_arg, call); } // [[ register(external = TRUE) ]] r_obj* ffi_cast_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* dots = KEEP(rlang_env_dots_list(env)); r_obj* to = KEEP(r_eval(r_node_car(args), env)); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); r_obj* out = vec_cast_common(dots, to, &arg, call); FREE(2); return out; } // [[ register(external = TRUE) ]] r_obj* ffi_cast_common_opts(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* dots = KEEP(rlang_env_dots_list(env)); r_obj* to = KEEP(r_eval(r_node_car(args), env)); args = r_node_cdr(args); r_obj* ffi_fallback_opts = KEEP(r_eval(r_node_car(args), env)); struct r_lazy arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); struct cast_common_opts opts = { .p_arg = &arg, .call = { .x = syms.dot_call, .env = env }, .fallback = new_fallback_opts(ffi_fallback_opts) }; r_obj* out = vec_cast_common_opts(dots, to, &opts); FREE(3); return out; } struct cast_opts new_cast_opts(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, r_obj* opts) { return (struct cast_opts) { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg, .call = call, .fallback = { .s3 = r_int_get(r_list_get(opts, 0), 0) } }; } void vctrs_init_cast(r_obj* ns) { syms.vec_default_cast = r_sym("vec_default_cast"); } vctrs/src/ptype2-dispatch.h0000644000176200001440000000214114362266120015373 0ustar liggesusers#ifndef VCTRS_PTYPE2_DISPATCH_H #define VCTRS_PTYPE2_DISPATCH_H #include "vctrs-core.h" #include "ptype2.h" r_obj* vec_ptype2_dispatch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left); r_obj* vec_ptype2_dispatch_s3(const struct ptype2_opts* opts); r_obj* vec_invoke_coerce_method(r_obj* method_sym, r_obj* method, r_obj* x_sym, r_obj* x, r_obj* y_sym, r_obj* y, r_obj* x_arg_sym, r_obj* x_arg, r_obj* y_arg_sym, r_obj* y_arg, struct r_lazy call, const struct fallback_opts* opts); r_obj* vec_ptype2_default(r_obj* x, r_obj* y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call, const struct fallback_opts* p_opts); #endif vctrs/src/owned.h0000644000176200001440000000102014511320527013460 0ustar liggesusers#ifndef VCTRS_OWNED_H #define VCTRS_OWNED_H #include "vctrs-core.h" #include "altrep.h" #include "utils.h" static inline enum vctrs_owned vec_owned(SEXP x) { return NO_REFERENCES(x) ? VCTRS_OWNED_true : VCTRS_OWNED_false; } // Wrapper around `r_clone_referenced()` that only attempts to clone if // we indicate that we don't own `x` static inline SEXP vec_clone_referenced(SEXP x, const enum vctrs_owned owned) { if (owned == VCTRS_OWNED_false) { return r_clone_referenced(x); } else { return x; } } #endif vctrs/src/utils.h0000644000176200001440000003652014511320527013521 0ustar liggesusers#ifndef VCTRS_UTILS_H #define VCTRS_UTILS_H #include "vctrs-core.h" #include "arg-counter.h" #include "rlang-dev.h" #define SWAP(T, x, y) do { \ T tmp = x; \ x = y; \ y = tmp; \ } while (0) #define PROTECT_N(x, n) (++*n, PROTECT(x)) #define PROTECT2(x, y) (PROTECT(x), PROTECT(y)) int r_bool_as_int(SEXP x); SEXP vctrs_eval_mask_n(SEXP fn, SEXP* syms, SEXP* args); SEXP vctrs_eval_mask1(SEXP fn, SEXP x_sym, SEXP x); SEXP vctrs_eval_mask2(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y); SEXP vctrs_eval_mask3(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); SEXP vctrs_eval_mask4(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4); SEXP vctrs_eval_mask5(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5); SEXP vctrs_eval_mask6(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6); SEXP vctrs_eval_mask7(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6, SEXP x7_sym, SEXP x7); r_obj* vctrs_eval_mask8(r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7, r_obj* x8_sym, r_obj* x8); SEXP vctrs_dispatch_n(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args); SEXP vctrs_dispatch1(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x); SEXP vctrs_dispatch2(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y); SEXP vctrs_dispatch3(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); SEXP vctrs_dispatch4(SEXP fn_sym, SEXP fn, SEXP w_sym, SEXP w, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); static inline r_obj* vctrs_dispatch5(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5) { r_obj* syms[6] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, NULL }; r_obj* args[6] = { x1, x2, x3, x4, x5, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch6(SEXP fn_sym, SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6); static inline r_obj* vctrs_dispatch7(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7) { r_obj* syms[8] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, x7_sym, NULL }; r_obj* args[8] = { x1, x2, x3, x4, x5, x6, x7, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } static inline __attribute__((noreturn)) void stop_unimplemented_type(const char* fn, SEXPTYPE type) { r_stop_internal("Unimplemented type `%s`.", Rf_type2char(type)); } SEXP map(SEXP x, SEXP (*fn)(SEXP)); SEXP map_with_data(SEXP x, SEXP (*fn)(SEXP, void*), void* data); SEXP df_map(SEXP df, SEXP (*fn)(SEXP)); SEXP bare_df_map(SEXP df, SEXP (*fn)(SEXP)); bool is_data_frame(SEXP x); bool is_bare_data_frame(SEXP x); bool is_bare_tibble(SEXP x); SEXP int_resize(SEXP x, r_ssize x_size, r_ssize size); SEXP raw_resize(SEXP x, r_ssize x_size, r_ssize size); SEXP chr_resize(SEXP x, r_ssize x_size, r_ssize size); // Returns S3 / S4 method for `generic` suitable for the class of `x`. The // inheritance hierarchy is explored except for the default method. SEXP s3_get_method(const char* generic, const char* cls, SEXP table); SEXP s3_sym_get_method(SEXP sym, SEXP table); SEXP s3_find_method(const char* generic, SEXP x, SEXP table); SEXP s3_class_find_method(const char* generic, SEXP cls, SEXP table); SEXP s3_get_class(SEXP x); SEXP s3_find_method_xy(const char* generic, SEXP x, SEXP y, SEXP table, SEXP* method_sym_out); SEXP s3_find_method2(const char* generic, SEXP x, SEXP table, SEXP* method_sym_out); SEXP s3_paste_method_sym(const char* generic, const char* cls); SEXP s3_bare_class(SEXP x); SEXP s4_find_method(SEXP x, SEXP table); SEXP s4_class_find_method(SEXP cls, SEXP table); bool vec_implements_ptype2(SEXP x); SEXP r_env_get(SEXP env, SEXP sym); extern SEXP syms_s3_methods_table; static inline SEXP s3_get_table(SEXP env) { return r_env_get(env, syms_s3_methods_table); } SEXP list_first_non_null(SEXP xs, R_len_t* non_null_i); bool list_is_homogeneously_classed(SEXP xs); // Destructive compacting SEXP node_compact_d(SEXP node); void never_reached(const char* fn) __attribute__((noreturn)); SEXP new_empty_factor(SEXP levels); SEXP new_empty_ordered(SEXP levels); bool list_has_inner_vec_names(SEXP x, R_len_t size); r_obj* list_pluck(r_obj* xs, r_ssize i); void init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing); SEXP compact_seq(R_len_t start, R_len_t size, bool increasing); bool is_compact_seq(SEXP x); void init_compact_rep(int* p, R_len_t i, R_len_t n); SEXP compact_rep(R_len_t i, R_len_t n); bool is_compact_rep(SEXP x); bool is_compact(SEXP x); SEXP compact_materialize(SEXP x); R_len_t vec_subscript_size(SEXP x); bool is_integer64(SEXP x); bool lgl_any_na(SEXP x); SEXP colnames(SEXP x); r_obj* colnames2(r_obj* x); extern bool (*rlang_is_splice_box)(SEXP); extern SEXP (*rlang_unbox)(SEXP); extern SEXP (*rlang_env_dots_values)(SEXP); extern SEXP (*rlang_env_dots_list)(SEXP); void* r_vec_deref_barrier(SEXP x); const void* r_vec_deref_barrier_const(SEXP x); void r_vec_fill(SEXPTYPE type, void* p_dest, r_ssize dest_i, const void* p_src, r_ssize src_i, r_ssize n); void r_lgl_fill(SEXP x, int value, R_len_t n); void r_int_fill(SEXP x, int value, R_len_t n); void r_p_lgl_fill(int* p_x, int value, R_len_t n); void r_p_int_fill(int* p_x, int value, R_len_t n); void r_p_chr_fill(SEXP* p_x, SEXP value, R_len_t n); void r_int_fill_seq(SEXP x, int start, R_len_t n); SEXP r_seq(R_len_t from, R_len_t to); bool r_int_any_na(SEXP x); R_len_t r_chr_find(SEXP x, SEXP value); #define r_resize Rf_xlengthgets int r_chr_max_len(SEXP x); SEXP r_chr_iota(R_len_t n, char* buf, int len, const char* prefix); #define R_LAZY_ALLOC(SYM, PI, R_TYPE, SIZE) do { \ if (SYM == R_NilValue) { \ SYM = Rf_allocVector(R_TYPE, SIZE); \ REPROTECT(SYM, PI); \ } \ } while (0); static inline SEXP r_new_logical(R_len_t n) { return Rf_allocVector(LGLSXP, n); } static inline SEXP r_new_integer(R_len_t n) { return Rf_allocVector(INTSXP, n); } static inline SEXP r_new_character(R_len_t n) { return Rf_allocVector(STRSXP, n); } static inline SEXP r_new_raw(R_len_t n) { return Rf_allocVector(RAWSXP, n); } static inline SEXP r_new_list(R_len_t n) { return Rf_allocVector(VECSXP, n); } SEXP r_protect(SEXP x); bool r_is_number(SEXP x); bool r_is_positive_number(SEXP x); SEXP r_clone_referenced(SEXP x); SEXP r_call_n(SEXP fn, SEXP* tags, SEXP* cars); static inline SEXP r_mark_s4(SEXP x) { SET_S4_OBJECT(x); return(x); } static inline SEXP r_unmark_s4(SEXP x) { UNSET_S4_OBJECT(x); return(x); } bool r_has_name_at(SEXP names, R_len_t i); bool r_is_names(SEXP names); bool r_is_minimal_names(SEXP x); bool r_is_empty_names(SEXP x); bool r_chr_has_string(SEXP x, SEXP str); static inline void* r_vec_unwrap(SEXPTYPE type, SEXP x) { switch (type) { case INTSXP: return (void*) INTEGER(x); default: stop_unimplemented_type("r_vec_unwrap", type); } } #define r_lgl Rf_ScalarLogical #define r_int Rf_ScalarInteger #define r_str Rf_mkChar #define r_chr Rf_mkString #define r_sym Rf_install // This unserialises ASCII Unicode tags of the form `` extern SEXP (*rlang_sym_as_character)(SEXP x); SEXP r_as_data_frame(SEXP x); static inline void r_dbg_save(SEXP x, const char* name) { Rf_defineVar(Rf_install(name), x, R_GlobalEnv); } ERR r_try_catch(void (*fn)(void*), void* fn_data, SEXP cnd_sym, void (*hnd)(void*), void* hnd_data); extern SEXP result_attrib; static inline SEXP r_result(SEXP x, ERR err) { if (!err) { err = R_NilValue; } SEXP result = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(result, 0, x); SET_VECTOR_ELT(result, 1, err); SET_ATTRIB(result, result_attrib); SET_OBJECT(result, 1); UNPROTECT(1); return result; } static inline SEXP r_result_get(SEXP x, ERR err) { if (err) { r_cnd_signal(err); } return x; } static inline struct vctrs_arg vec_as_arg(SEXP x) { if (x == R_NilValue) { return *vec_args.empty; } if (!r_is_string(x)) { Rf_errorcall(R_NilValue, "Argument tag must be a string."); } return new_wrapper_arg(NULL, r_chr_get_c_string(x, 0)); } extern SEXP fns_quote; static inline SEXP expr_protect(SEXP x) { switch (TYPEOF(x)) { case SYMSXP: case LANGSXP: return Rf_lang2(fns_quote, x); default: return x; } } static inline const void* vec_type_missing_value(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return &NA_LOGICAL; case VCTRS_TYPE_integer: return &NA_INTEGER; case VCTRS_TYPE_double: return &NA_REAL; case VCTRS_TYPE_complex: return &vctrs_shared_na_cpl; case VCTRS_TYPE_character: return &NA_STRING; case VCTRS_TYPE_list: return &R_NilValue; default: stop_unimplemented_vctrs_type("vec_type_missing_value", type); } } void c_print_backtrace(void); SEXP chr_c(SEXP x, SEXP y); extern SEXP vctrs_ns_env; extern SEXP vctrs_shared_empty_str; extern SEXP vctrs_shared_zero_int; extern SEXP classes_data_frame; extern SEXP classes_factor; extern SEXP classes_ordered; extern SEXP classes_date; extern SEXP classes_posixct; extern SEXP classes_tibble; extern SEXP classes_vctrs_group_rle; extern SEXP strings_dots; extern SEXP strings_empty; extern SEXP strings_tbl; extern SEXP strings_tbl_df; extern SEXP strings_data_frame; extern SEXP strings_date; extern SEXP strings_posixct; extern SEXP strings_posixlt; extern SEXP strings_posixt; extern SEXP strings_factor; extern SEXP strings_ordered; extern SEXP strings_list; extern SEXP strings_none; extern SEXP strings_minimal; extern SEXP strings_unique; extern SEXP strings_universal; extern SEXP strings_check_unique; extern SEXP strings_unique_quiet; extern SEXP strings_universal_quiet; extern SEXP strings_key; extern SEXP strings_loc; extern SEXP strings_val; extern SEXP strings_group; extern SEXP strings_length; extern SEXP strings_vctrs_vctr; extern SEXP strings_times; extern SEXP strings_needles; extern SEXP strings_haystack; extern SEXP chrs_subset; extern SEXP chrs_extract; extern SEXP chrs_assign; extern SEXP chrs_rename; extern SEXP chrs_remove; extern SEXP chrs_negate; extern SEXP chrs_null; extern SEXP chrs_logical; extern SEXP chrs_integer; extern SEXP chrs_double; extern SEXP chrs_complex; extern SEXP chrs_character; extern SEXP chrs_raw; extern SEXP chrs_list; extern SEXP chrs_expression; extern SEXP chrs_numeric; extern SEXP chrs_function; extern SEXP chrs_empty; extern SEXP chrs_cast; extern SEXP chrs_error; extern SEXP chrs_combine; extern SEXP chrs_convert; extern SEXP chrs_asc; extern SEXP chrs_desc; extern SEXP chrs_largest; extern SEXP chrs_smallest; extern SEXP chrs_which; extern SEXP syms_i; extern SEXP syms_j; extern SEXP syms_n; extern SEXP syms_x; extern SEXP syms_y; extern SEXP syms_x_size; extern SEXP syms_y_size; extern SEXP syms_to; extern SEXP syms_dots; extern SEXP syms_bracket; extern SEXP syms_x_arg; extern SEXP syms_y_arg; extern SEXP syms_to_arg; extern SEXP syms_times_arg; extern SEXP syms_subscript_arg; extern SEXP syms_needles_arg; extern SEXP syms_haystack_arg; extern SEXP syms_out; extern SEXP syms_value; extern SEXP syms_quiet; extern SEXP syms_dot_name_spec; extern SEXP syms_outer; extern SEXP syms_inner; extern SEXP syms_tilde; extern SEXP syms_dot_environment; extern SEXP syms_ptype; extern SEXP syms_missing; extern SEXP syms_size; extern SEXP syms_subscript_action; extern SEXP syms_subscript_type; extern SEXP syms_repair; extern SEXP syms_tzone; extern SEXP syms_data; extern SEXP syms_vctrs_error_incompatible_type; extern SEXP syms_vctrs_error_cast_lossy; extern SEXP syms_cnd_signal; extern SEXP syms_logical; extern SEXP syms_numeric; extern SEXP syms_character; extern SEXP syms_body; extern SEXP syms_parent; extern SEXP syms_from_dispatch; extern SEXP syms_s3_fallback; extern SEXP syms_stop_incompatible_type; extern SEXP syms_stop_incompatible_size; extern SEXP syms_stop_assert_size; extern SEXP syms_stop_matches_overflow; extern SEXP syms_stop_matches_nothing; extern SEXP syms_stop_matches_remaining; extern SEXP syms_stop_matches_incomplete; extern SEXP syms_stop_matches_multiple; extern SEXP syms_warn_matches_multiple; extern SEXP syms_stop_matches_relationship_one_to_one; extern SEXP syms_stop_matches_relationship_one_to_many; extern SEXP syms_stop_matches_relationship_many_to_one; extern SEXP syms_warn_matches_relationship_many_to_many; extern SEXP syms_action; extern SEXP syms_vctrs_common_class_fallback; extern SEXP syms_fallback_class; extern SEXP syms_abort; extern SEXP syms_message; extern SEXP syms_chr_proxy_collate; extern SEXP syms_actual; extern SEXP syms_required; extern SEXP syms_call; extern SEXP syms_dot_call; extern SEXP syms_which; static const char * const c_strs_vctrs_common_class_fallback = "vctrs:::common_class_fallback"; #define syms_names R_NamesSymbol extern SEXP fns_bracket; extern SEXP fns_quote; extern SEXP fns_names; extern SEXP vctrs_method_table; extern SEXP base_method_table; extern SEXP s4_c_method_table; #if defined(RLIB_DEBUG) SEXP R_inspect(SEXP x); SEXP R_inspect3(SEXP x, int deep, int pvec); #endif #endif vctrs/src/set.c0000644000176200001440000003522214426171036013151 0ustar liggesusers#include "vctrs.h" #include "decl/set-decl.h" r_obj* ffi_vec_set_intersect(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_intersect(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_intersect(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2_params( x, y, x_arg, y_arg, call, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(vec_normalize_encoding(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(vec_normalize_encoding(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); // Load dictionary with `x`. // Key values point to first time we saw that `x` value. for (r_ssize i = 0; i < x_size; ++i) { const uint32_t hash = dict_hash_scalar(x_dict, i); if (x_dict->key[hash] == DICT_EMPTY) { dict_put(x_dict, hash, i); } } struct dictionary* y_dict = new_dictionary_partial(y_proxy); PROTECT_DICT(y_dict, &n_prot); r_obj* marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_marked = (bool*) r_raw_begin(marked_shelter); memset(v_marked, 0, x_size * sizeof(bool)); // Mark unique elements of `x` that are also in `y` for (r_ssize i = 0; i < y_size; ++i) { const uint32_t hash = dict_hash_with(x_dict, y_dict, i); const r_ssize loc = x_dict->key[hash]; if (loc != DICT_EMPTY) { v_marked[loc] = true; } } r_ssize n_marked = 0; for (r_ssize i = 0; i < x_size; ++i) { n_marked += v_marked[i]; } r_obj* loc = KEEP_N(r_alloc_integer(n_marked), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } r_obj* out = vec_slice_unsafe(x, loc); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_set_difference(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_difference(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2_params( x, y, x_arg, y_arg, call, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(vec_normalize_encoding(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(vec_normalize_encoding(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); r_obj* marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_marked = (bool*) r_raw_begin(marked_shelter); // Load dictionary with `x`. // Key values point to first time we saw that `x` value. // Mark those first seen locations as potential results. for (r_ssize i = 0; i < x_size; ++i) { const uint32_t hash = dict_hash_scalar(x_dict, i); const bool first_time = x_dict->key[hash] == DICT_EMPTY; if (first_time) { dict_put(x_dict, hash, i); } v_marked[i] = first_time; } struct dictionary* y_dict = new_dictionary_partial(y_proxy); PROTECT_DICT(y_dict, &n_prot); // If we've seen the `y` element in `x`, unmark it for (r_ssize i = 0; i < y_size; ++i) { const uint32_t hash = dict_hash_with(x_dict, y_dict, i); const r_ssize loc = x_dict->key[hash]; if (loc != DICT_EMPTY) { v_marked[loc] = false; } } r_ssize n_marked = 0; for (r_ssize i = 0; i < x_size; ++i) { n_marked += v_marked[i]; } r_obj* loc = KEEP_N(r_alloc_integer(n_marked), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } r_obj* out = vec_slice_unsafe(x, loc); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_set_union(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_union(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_union(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2_params( x, y, x_arg, y_arg, call, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(vec_normalize_encoding(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(vec_normalize_encoding(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); r_obj* marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_marked = (bool*) r_raw_begin(marked_shelter); // Load dictionary with `x`. // Key values point to first time we saw that `x` value. // Mark those first seen locations as definite results. for (r_ssize i = 0; i < x_size; ++i) { const uint32_t hash = dict_hash_scalar(x_dict, i); const bool first_time = x_dict->key[hash] == DICT_EMPTY; if (first_time) { dict_put(x_dict, hash, i); } v_marked[i] = first_time; } r_obj* loc = KEEP_N(r_alloc_integer(x_dict->used), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } // Go ahead and slice out `x` x = KEEP_N(vec_slice_unsafe(x, loc), &n_prot); // Resize `v_marked` for use with `y` marked_shelter = KEEP_N(r_raw_resize(marked_shelter, y_size * sizeof(bool)), &n_prot); v_marked = (bool*) r_raw_begin(marked_shelter); struct dictionary* y_dict = new_dictionary(y_proxy); PROTECT_DICT(y_dict, &n_prot); // Load dictionary with `y`. // Key values point to first time we saw that `y` value. // Mark those first seen locations as possible results. for (r_ssize i = 0; i < y_size; ++i) { const uint32_t hash = dict_hash_scalar(y_dict, i); const bool first_time = y_dict->key[hash] == DICT_EMPTY; if (first_time) { dict_put(y_dict, hash, i); } v_marked[i] = first_time; } r_ssize n_marked = y_dict->used; // Check if unique elements of `y` are in `x`. If they are, unmark them. for (r_ssize i = 0; i < y_size; ++i) { if (!v_marked[i]) { continue; } const uint32_t hash = dict_hash_with(x_dict, y_dict, i); const bool in_x = x_dict->key[hash] != DICT_EMPTY; v_marked[i] = !in_x; n_marked -= in_x; } loc = KEEP_N(r_int_resize(loc, n_marked), &n_prot); v_loc = r_int_begin(loc); j = 0; for (r_ssize i = 0; i < y_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } y = KEEP_N(vec_slice_unsafe(y, loc), &n_prot); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = r_null }; r_obj* args = KEEP_N(r_alloc_list(2), &n_prot); r_list_poke(args, 0, x); r_list_poke(args, 1, y); r_obj* out = vec_c( args, ptype, r_null, &name_repair_opts, vec_args.empty, r_lazy_null ); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_set_symmetric_difference(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_symmetric_difference(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_symmetric_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2_params( x, y, x_arg, y_arg, call, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(vec_normalize_encoding(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(vec_normalize_encoding(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); struct dictionary* y_dict = new_dictionary(y_proxy); PROTECT_DICT(y_dict, &n_prot); r_obj* x_marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_x_marked = (bool*) r_raw_begin(x_marked_shelter); r_obj* y_marked_shelter = KEEP_N(r_alloc_raw(y_size * sizeof(bool)), &n_prot); bool* v_y_marked = (bool*) r_raw_begin(y_marked_shelter); // Load dictionary with `x`. // Key values point to first time we saw that `x` value. // Mark those first seen locations as possible results. for (r_ssize i = 0; i < x_size; ++i) { const uint32_t hash = dict_hash_scalar(x_dict, i); const bool first_time = x_dict->key[hash] == DICT_EMPTY; if (first_time) { dict_put(x_dict, hash, i); } v_x_marked[i] = first_time; } // Load dictionary with `y`. // Key values point to first time we saw that `y` value. // Mark those first seen locations as possible results. for (r_ssize i = 0; i < y_size; ++i) { const uint32_t hash = dict_hash_scalar(y_dict, i); const bool first_time = y_dict->key[hash] == DICT_EMPTY; if (first_time) { dict_put(y_dict, hash, i); } v_y_marked[i] = first_time; } r_ssize n_x_marked = x_dict->used; r_ssize n_y_marked = y_dict->used; // Check if unique elements of `y` are in `x`. // If they are, unmark them from both `x` and `y`. for (r_ssize i = 0; i < y_size; ++i) { if (!v_y_marked[i]) { continue; } const uint32_t hash = dict_hash_with(x_dict, y_dict, i); const r_ssize loc = x_dict->key[hash]; const bool in_x = loc != DICT_EMPTY; if (in_x) { v_x_marked[loc] = false; v_y_marked[i] = false; --n_x_marked; --n_y_marked; } } r_obj* loc = KEEP_N(r_alloc_integer(n_x_marked), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_x_marked[i]) { v_loc[j] = i + 1; ++j; } } // Slice out `x`, then reuse `loc` for slicing `y` x = KEEP_N(vec_slice_unsafe(x, loc), &n_prot); loc = KEEP_N(r_int_resize(loc, n_y_marked), &n_prot); v_loc = r_int_begin(loc); j = 0; for (r_ssize i = 0; i < y_size; ++i) { if (v_y_marked[i]) { v_loc[j] = i + 1; ++j; } } y = KEEP_N(vec_slice_unsafe(y, loc), &n_prot); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = r_null }; r_obj* args = KEEP_N(r_alloc_list(2), &n_prot); r_list_poke(args, 0, x); r_list_poke(args, 1, y); r_obj* out = vec_c( args, ptype, r_null, &name_repair_opts, vec_args.empty, r_lazy_null ); FREE(n_prot); return out; } vctrs/src/slice-assign.h0000644000176200001440000000626314315060310014734 0ustar liggesusers#ifndef VCTRS_SLICE_ASSIGN_H #define VCTRS_SLICE_ASSIGN_H #include "vctrs-core.h" #include "owned.h" struct vec_assign_opts { bool assign_names; bool ignore_outer_names; bool recursive; struct vctrs_arg* x_arg; struct vctrs_arg* value_arg; struct r_lazy call; }; r_obj* vec_assign_opts(r_obj* x, r_obj* index, r_obj* value, const struct vec_assign_opts* opts); static inline r_obj* vec_assign(r_obj* x, r_obj* index, r_obj* value) { struct vec_assign_opts opts = { 0 }; return vec_assign_opts(x, index, value, &opts); } static inline r_obj* vec_check_assign(r_obj* x, r_obj* index, r_obj* value, struct vctrs_arg* x_arg, struct vctrs_arg* value_arg, struct r_lazy call) { struct vec_assign_opts opts = { .x_arg = x_arg, .value_arg = value_arg, .call = call }; return vec_assign_opts(x, index, value, &opts); } static inline r_obj* vec_assign_n(r_obj* x, r_obj* index, r_obj* value, bool assign_names, bool ignore_outer_names, struct vctrs_arg* x_arg, struct vctrs_arg* value_arg, struct r_lazy call) { struct vec_assign_opts opts = { .assign_names = assign_names, .ignore_outer_names = ignore_outer_names, .x_arg = x_arg, .value_arg = value_arg, .call = call }; return vec_assign_opts(x, index, value, &opts); } r_obj* vec_proxy_assign_opts(r_obj* proxy, r_obj* index, r_obj* value, const enum vctrs_owned owned, const struct vec_assign_opts* opts); static inline r_obj* vec_proxy_assign(r_obj* proxy, r_obj* index, r_obj* value) { struct vec_assign_opts args = { 0 }; return vec_proxy_assign_opts(proxy, index, value, vec_owned(proxy), &args); } static inline r_obj* vec_proxy_check_assign(r_obj* proxy, r_obj* index, r_obj* value, struct vctrs_arg* x_arg, struct vctrs_arg* value_arg, struct r_lazy call) { struct vec_assign_opts opts = { .x_arg = x_arg, .value_arg = value_arg, .call = call }; return vec_proxy_assign_opts(proxy, index, value, vec_owned(proxy), &opts); } r_obj* chr_assign(r_obj* out, r_obj* index, r_obj* value, const enum vctrs_owned owned); r_obj* list_assign(r_obj* out, r_obj* index, r_obj* value, const enum vctrs_owned owned); r_obj* df_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned, const struct vec_assign_opts* opts); r_obj* vec_assign_shaped(r_obj* proxy, r_obj* index, r_obj* value, const enum vctrs_owned owned, const struct vec_assign_opts* opts); #endif vctrs/src/slice-chop.h0000644000176200001440000000042414402367170014405 0ustar liggesusers#ifndef VCTRS_SLICE_CHOP_H #define VCTRS_SLICE_CHOP_H #include "vctrs-core.h" r_obj* vec_chop(r_obj* x, r_obj* indices, r_obj* sizes); r_obj* vec_chop_unsafe(r_obj*, r_obj* indices, r_obj* sizes); r_obj* list_as_locations(r_obj* indices, r_ssize n, r_obj* names); #endif vctrs/src/proxy-restore.h0000644000176200001440000000121614315060310015206 0ustar liggesusers#ifndef VCTRS_PROXY_RESTORE_H #define VCTRS_PROXY_RESTORE_H #include "vctrs-core.h" r_obj* vec_restore(r_obj* x, r_obj* to, enum vctrs_owned owned); r_obj* vec_restore_default(r_obj* x, r_obj* to, enum vctrs_owned owned); r_obj* vec_restore_recurse(r_obj* x, r_obj* to, enum vctrs_owned owned); r_obj* vec_df_restore(r_obj* x, r_obj* to, enum vctrs_owned owned, enum vctrs_recurse recurse); r_obj* vec_bare_df_restore(r_obj* x, r_obj* to, enum vctrs_owned owned, enum vctrs_recurse recurse); #endif vctrs/src/order-sortedness.c0000644000176200001440000003456414315060310015655 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" // ----------------------------------------------------------------------------- static inline int dbl_cmp(double x, double y, enum vctrs_dbl x_type, enum vctrs_dbl y_type, int direction, int na_order, int na_nan_order); /* * Check if a double vector is ordered, handling `decreasing`, `na_last`, and * `nan_distinct`. * * If the double vector is in the expected ordering, no sorting needs to * occur. In these cases, if `p_x` is in exactly the expected ordering. * If `p_x` is in exactly the opposite ordering, the the ordering will later * be reversed (this only happens if it is strictly opposite of expected * ordering, ties would prevent the reversal from being stable). Group * information is also pushed in these cases for use in the next columns. */ enum vctrs_sortedness dbl_sortedness(const double* p_x, r_ssize size, bool decreasing, bool na_last, bool nan_distinct, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; const int na_nan_order = nan_distinct ? na_order : 0; double previous = p_x[0]; enum vctrs_dbl previous_type = dbl_classify(previous); r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { double current = p_x[i]; enum vctrs_dbl current_type = dbl_classify(current); int cmp = dbl_cmp( current, previous, current_type, previous_type, direction, na_order, na_nan_order ); if (cmp >= 0) { break; } previous = current; previous_type = current_type; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { double current = p_x[i]; enum vctrs_dbl current_type = dbl_classify(current); int cmp = dbl_cmp( current, previous, current_type, previous_type, direction, na_order, na_nan_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; previous_type = current_type; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } static inline int dbl_cmp_numbers(double x, double y, int direction); /* * Compare two doubles, handling `na_order`, `direction`, and `na_nan_order` */ static inline int dbl_cmp(double x, double y, enum vctrs_dbl x_type, enum vctrs_dbl y_type, int direction, int na_order, int na_nan_order) { switch (x_type) { case VCTRS_DBL_number: switch (y_type) { case VCTRS_DBL_number: return dbl_cmp_numbers(x, y, direction); case VCTRS_DBL_missing: return -na_order; case VCTRS_DBL_nan: return -na_order; } case VCTRS_DBL_missing: switch (y_type) { case VCTRS_DBL_number: return na_order; case VCTRS_DBL_missing: return 0; case VCTRS_DBL_nan: return na_nan_order; } case VCTRS_DBL_nan: switch (y_type) { case VCTRS_DBL_number: return na_order; case VCTRS_DBL_missing: return -na_nan_order; case VCTRS_DBL_nan: return 0; } } never_reached("dbl_cmp"); } static inline int dbl_cmp_numbers(double x, double y, int direction) { const int cmp = (x > y) - (x < y); return cmp * direction; } // ----------------------------------------------------------------------------- static inline int int_cmp(int x, int y, const int direction, const int na_order); // Very similar to `dbl_sortedness()` enum vctrs_sortedness int_sortedness(const int* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; int previous = p_x[0]; r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { int current = p_x[i]; int cmp = int_cmp( current, previous, direction, na_order ); if (cmp >= 0) { break; } previous = current; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { int current = p_x[i]; int cmp = int_cmp( current, previous, direction, na_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } // Very similar to `dbl_cmp()` static inline int int_cmp(int x, int y, const int direction, const int na_order) { if (x == NA_INTEGER) { if (y == NA_INTEGER) { return 0; } else { return na_order; } } if (y == NA_INTEGER) { return -na_order; } int cmp = (x > y) - (x < y); return cmp * direction; } // ----------------------------------------------------------------------------- static inline int chr_cmp(SEXP x, SEXP y, const char* c_x, const char* c_y, const int direction, const int na_order); /* * Check if `p_x` is in the "expected" ordering as defined by `decreasing` and * `na_last`. If `p_x` is in the expected ordering, or if it is in the strictly * opposite of the expected ordering (with no ties), then groups are pushed, * and a `vctrs_sortedness` value is returned indicating how to finalize the * order. */ enum vctrs_sortedness chr_sortedness(const SEXP* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; SEXP previous = p_x[0]; const char* c_previous = CHAR(previous); r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { SEXP current = p_x[i]; const char* c_current = CHAR(current); int cmp = chr_cmp( current, previous, c_current, c_previous, direction, na_order ); if (cmp >= 0) { break; } previous = current; c_previous = c_current; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { SEXP current = p_x[i]; const char* c_current = CHAR(current); int cmp = chr_cmp( current, previous, c_current, c_previous, direction, na_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; c_previous = c_current; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } /* * `direction` is `1` for ascending and `-1` for descending. * `na_order` is `1` if `na_last = true` and `-1` if `na_last = false`. */ static inline int chr_cmp(SEXP x, SEXP y, const char* c_x, const char* c_y, const int direction, const int na_order) { // Same pointer - including `NA`s if (x == y) { return 0; } if (x == NA_STRING) { return na_order; } if (y == NA_STRING) { return -na_order; } return direction * strcmp(c_x, c_y); } // ----------------------------------------------------------------------------- static inline void int_incr(r_ssize size, int* p_x); static inline void ord_reverse(r_ssize size, int* p_o); /* * Resolve ordering based on the sortedness and whether or not `p_o` has * been initialized. For a vector / first column, this function has to * initialize the ordering (for reversed ordering this is faster than * initializing the order sequentially then reversing it). * * `size` will correspond to the size of `x` for the first column, but will * correspond to the size of the current group for subsequent columns. */ void ord_resolve_sortedness(enum vctrs_sortedness sortedness, r_ssize size, int* p_o) { switch (sortedness) { case VCTRS_SORTEDNESS_sorted: int_incr(size, p_o); return; case VCTRS_SORTEDNESS_reversed: ord_reverse(size, p_o); return; case VCTRS_SORTEDNESS_unsorted: Rf_errorcall(R_NilValue, "Internal error: Unsorted case should be handled elsewhere."); } never_reached("ord_resolve_sortedness"); } // Initialize with sequential 1-based ordering static inline void int_incr(r_ssize size, int* p_x) { for (r_ssize i = 0; i < size; ++i) { p_x[i] = i + 1; } } // Used when in strictly opposite of expected order and uninitialized. static inline void ord_reverse(r_ssize size, int* p_o) { const r_ssize half = size / 2; for (r_ssize i = 0; i < half; ++i) { r_ssize swap = size - 1 - i; p_o[i] = swap + 1; p_o[swap] = i + 1; } // Initialize center value if odd number if (size % 2 != 0) { p_o[half] = half + 1; } } static inline void ord_reverse_chunk(r_ssize size, int* p_o); void ord_resolve_sortedness_chunk(enum vctrs_sortedness sortedness, r_ssize size, int* p_o) { switch (sortedness) { case VCTRS_SORTEDNESS_sorted: return; case VCTRS_SORTEDNESS_reversed: ord_reverse_chunk(size, p_o); return; case VCTRS_SORTEDNESS_unsorted: Rf_errorcall(R_NilValue, "Internal error: Unsorted case should be handled elsewhere."); } never_reached("ord_resolve_sortedness_chunk"); } // Used when in strictly opposite of expected order and initialized. // No need to alter "center" value here, it will be initialized to a value // already and it won't be swapped. static inline void ord_reverse_chunk(r_ssize size, int* p_o) { const r_ssize half = size / 2; for (r_ssize i = 0; i < half; ++i) { r_ssize swap = size - 1 - i; const int temp = p_o[i]; p_o[i] = p_o[swap]; p_o[swap] = temp; } } vctrs/src/names.c0000644000176200001440000006262414404336165013471 0ustar liggesusers#include #include "vctrs.h" #include "type-data-frame.h" #include "decl/names-decl.h" // 3 leading '.' + 1 trailing '\0' + 24 characters #define MAX_IOTA_SIZE 28 r_obj* vec_as_names(r_obj* names, const struct name_repair_opts* opts) { if (!opts) { return names; } switch (opts->type) { case NAME_REPAIR_none: return names; case NAME_REPAIR_minimal: return ffi_as_minimal_names(names); case NAME_REPAIR_unique: return vec_as_unique_names(names, opts->quiet); case NAME_REPAIR_universal: return vec_as_universal_names(names, opts->quiet); case NAME_REPAIR_check_unique: return check_unique_names(names, opts); case NAME_REPAIR_custom: return vec_as_custom_names(names, opts); } r_stop_unreachable(); } r_obj* ffi_vec_as_names(r_obj* names, r_obj* repair, r_obj* ffi_quiet, r_obj* frame) { if (!r_is_bool(ffi_quiet)) { r_abort("`quiet` must a boolean value."); } bool quiet = r_lgl_get(ffi_quiet, 0); struct r_lazy call = (struct r_lazy) { .x = r_syms.call, .env = frame }; struct r_lazy repair_arg = { .x = syms.repair_arg, .env = frame }; struct name_repair_opts repair_opts = new_name_repair_opts(repair, repair_arg, quiet, call); KEEP(repair_opts.shelter); r_obj* out = vec_as_names(names, &repair_opts); FREE(1); return out; } struct repair_error_info { r_obj* shelter; r_obj* repair_arg; r_obj* call; r_obj* input_error_repair_arg; r_obj* input_error_call; }; struct repair_error_info new_repair_error_info(struct name_repair_opts* p_opts) { struct repair_error_info out; out.shelter = r_new_list(4); KEEP(out.shelter); out.repair_arg = r_lazy_eval(p_opts->name_repair_arg); r_list_poke(out.shelter, 0, out.repair_arg); out.call = r_lazy_eval(p_opts->call); r_list_poke(out.shelter, 1, out.call); // If this is NULL, the `repair` value has been hard-coded by the // frontend. Input errors are internal, and we provide no // recommendation to fix user errors by providing a different value // for `repair`. if (out.repair_arg == r_null) { out.input_error_repair_arg = chrs.repair; r_list_poke(out.shelter, 2, out.input_error_repair_arg); out.input_error_call = r_call(r_sym("vec_as_names")); r_list_poke(out.shelter, 3, out.input_error_call); } else { out.input_error_repair_arg = r_lazy_eval(p_opts->name_repair_arg); r_list_poke(out.shelter, 2, out.input_error_repair_arg); out.input_error_call = r_lazy_eval(p_opts->call); r_list_poke(out.shelter, 3, out.input_error_call); } FREE(1); return out; } r_obj* vec_as_universal_names(r_obj* names, bool quiet) { r_obj* quiet_obj = KEEP(r_lgl(quiet)); r_obj* out = vctrs_dispatch2(syms_as_universal_names, fns_as_universal_names, syms_names, names, syms_quiet, quiet_obj); FREE(1); return out; } static r_obj* check_unique_names(r_obj* names, const struct name_repair_opts* opts) { r_obj* ffi_arg = KEEP(r_lazy_eval(opts->name_repair_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(opts->call)); r_obj* out = KEEP(vctrs_dispatch3(syms_check_unique_names, fns_check_unique_names, syms_names, names, r_syms.arg, ffi_arg, syms_call, ffi_call)); // Restore visibility r_eval(r_null, r_envs.empty); FREE(3); return out; } r_obj* vec_as_custom_names(r_obj* names, const struct name_repair_opts* opts) { names = KEEP(ffi_as_minimal_names(names)); // Don't use vctrs dispatch utils because we match argument positionally r_obj* call = KEEP(r_call2(syms_repair, syms_names)); r_obj* mask = KEEP(r_alloc_empty_environment(R_GlobalEnv)); r_env_poke(mask, syms_repair, opts->fn); r_env_poke(mask, syms_names, names); r_obj* out = KEEP(r_eval(call, mask)); vec_validate_minimal_names(out, r_length(names), opts->call); FREE(4); return out; } static r_obj* vec_names_impl(r_obj* x, bool proxy) { bool has_class = r_is_object(x); if (has_class && r_inherits(x, "data.frame")) { // Only return row names if they are character. Data frames with // automatic row names are treated as unnamed. r_obj* rn = df_rownames(x); if (rownames_type(rn) == ROWNAMES_TYPE_identifiers) { return rn; } else { return r_null; } } if (vec_bare_dim(x) == r_null) { if (!proxy && has_class) { return vctrs_dispatch1(syms_names, fns_names, syms_x, x); } else { return r_names(x); } } r_obj* dimnames = KEEP(r_attrib_get(x, r_syms.dim_names)); if (dimnames == r_null || r_length(dimnames) < 1) { FREE(1); return r_null; } r_obj* out = r_list_get(dimnames, 0); FREE(1); return out; } // [[ register() ]] r_obj* vec_names(r_obj* x) { return vec_names_impl(x, false); } r_obj* vec_proxy_names(r_obj* x) { return vec_names_impl(x, true); } r_obj* vec_names2(r_obj* x) { r_obj* names = vec_names(x); if (names == r_null) { return r_alloc_character(vec_size(x)); } else { return names; } } r_obj* ffi_as_minimal_names(r_obj* names) { if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector"); } r_ssize i = 0; r_ssize n = r_length(names); r_obj* const * v_names = r_chr_cbegin(names); for (; i < n; ++i) { if (v_names[i] == r_globals.na_str) { break; } } if (i == n) { return names; } names = KEEP(r_clone(names)); for (; i < n; ++i) { if (v_names[i] == r_globals.na_str) { r_chr_poke(names, i, strings_empty); } } FREE(1); return names; } r_obj* ffi_minimal_names(r_obj* x) { r_obj* names = KEEP(vec_names(x)); if (names == r_null) { names = r_alloc_character(vec_size(x)); } else { names = ffi_as_minimal_names(names); } FREE(1); return names; } // From dictionary.c r_obj* vctrs_duplicated(r_obj* x); // [[ include("vctrs.h") ]] r_obj* vec_as_unique_names(r_obj* names, bool quiet) { if (is_unique_names(names) && !any_has_suffix(names)) { return names; } else { return(as_unique_names_impl(names, quiet)); } } // [[ include("vctrs.h") ]] bool is_unique_names(r_obj* names) { if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector"); } r_ssize n = r_length(names); r_obj* const * v_names = r_chr_cbegin(names); if (duplicated_any(names)) { return false; } for (r_ssize i = 0; i < n; ++i) { if (needs_suffix(v_names[i])) { return false; } } return true; } bool any_has_suffix(r_obj* names) { r_ssize n = r_length(names); r_obj* const * v_names = r_chr_cbegin(names); for (r_ssize i = 0; i < n; ++i) { if (suffix_pos(r_str_c_string(v_names[i])) >= 0) { return true; } } return false; } r_obj* as_unique_names_impl(r_obj* names, bool quiet) { r_ssize n = r_length(names); r_obj* new_names = KEEP(r_clone(names)); r_obj* const * v_new_names = r_chr_cbegin(new_names); for (r_ssize i = 0; i < n; ++i) { r_obj* elt = v_new_names[i]; // Set `NA` and dots values to "" so they get replaced by `...n` // later on if (needs_suffix(elt)) { elt = strings_empty; r_chr_poke(new_names, i, elt); continue; } // Strip `...n` suffixes const char* nm = r_str_c_string(elt); int pos = suffix_pos(nm); if (pos >= 0) { elt = Rf_mkCharLenCE(nm, pos, Rf_getCharCE(elt)); r_chr_poke(new_names, i, elt); continue; } } // Append all duplicates with a suffix r_obj* dups = KEEP(vctrs_duplicated(new_names)); const int* dups_ptr = r_lgl_cbegin(dups); for (r_ssize i = 0; i < n; ++i) { r_obj* elt = v_new_names[i]; if (elt != strings_empty && !dups_ptr[i]) { continue; } const char* name = r_str_c_string(elt); int size = strlen(name); int buf_size = size + MAX_IOTA_SIZE; R_CheckStack2(buf_size); char buf[buf_size]; buf[0] = '\0'; memcpy(buf, name, size); int remaining = buf_size - size; int needed = snprintf(buf + size, remaining, "...%d", (int) i + 1); if (needed >= remaining) { stop_large_name(); } r_chr_poke(new_names, i, Rf_mkCharLenCE(buf, size + needed, Rf_getCharCE(elt))); } if (!quiet) { describe_repair(names, new_names); } FREE(2); return new_names; } r_obj* vctrs_as_unique_names(r_obj* names, r_obj* quiet) { r_obj* out = KEEP(vec_as_unique_names(names, r_lgl_get(quiet, 0))); FREE(1); return out; } r_obj* vctrs_is_unique_names(r_obj* names) { bool out = is_unique_names(names); return r_lgl(out); } static bool is_dotdotint(const char* name) { int n = strlen(name); if (n < 3) { return false; } if (name[0] != '.' || name[1] != '.') { return false; } if (name[2] == '.') { name += 3; } else { name += 2; } return (bool) strtol(name, NULL, 10); } static ptrdiff_t suffix_pos(const char* name) { int n = strlen(name); const char* suffix_end = NULL; int in_dots = 0; bool in_digits = false; for (const char* ptr = name + n - 1; ptr >= name; --ptr) { char c = *ptr; if (in_digits) { if (c == '.') { in_digits = false; in_dots = 1; continue; } if (isdigit(c)) { continue; } goto done; } switch (in_dots) { case 0: if (isdigit(c)) { in_digits = true; continue; } goto done; case 1: case 2: if (c == '.') { ++in_dots; continue; } goto done; case 3: suffix_end = ptr + 1; if (isdigit(c)) { in_dots = 0; in_digits = true; continue; } goto done; default: r_stop_internal("Unexpected state."); }} done: if (suffix_end) { return suffix_end - name; } else { return -1; } } static void stop_large_name(void) { r_abort("Can't tidy up name because it is too large."); } static bool needs_suffix(r_obj* str) { return str == r_globals.na_str || str == strings_dots || str == strings_empty || is_dotdotint(r_str_c_string(str)); } r_obj* ffi_unique_names(r_obj* x, r_obj* quiet) { return vec_unique_names(x, LOGICAL(quiet)[0]); } r_obj* vec_unique_names(r_obj* x, bool quiet) { r_obj* names = KEEP(vec_names(x)); r_obj* out = vec_unique_names_impl(names, vec_size(x), quiet); FREE(1); return out; } r_obj* vec_unique_colnames(r_obj* x, bool quiet) { r_obj* names = KEEP(colnames(x)); r_obj* out = vec_unique_names_impl(names, Rf_ncols(x), quiet); FREE(1); return out; } static r_obj* vec_unique_names_impl(r_obj* names, r_ssize n, bool quiet) { r_obj* out; if (names == r_null) { out = KEEP(names_iota(n)); if (!quiet) { describe_repair(names, out); } } else { out = KEEP(vec_as_unique_names(names, quiet)); } FREE(1); return(out); } static r_obj* names_iota(r_ssize n) { char buf[MAX_IOTA_SIZE]; r_obj* nms = r_chr_iota(n, buf, MAX_IOTA_SIZE, "..."); if (nms == r_null) { r_abort("Too many names to repair."); } return nms; } static void describe_repair(r_obj* old_names, r_obj* new_names) { r_obj* call = KEEP(r_call3(r_sym("describe_repair"), old_names, new_names)); r_eval(call, vctrs_ns_env); // To reset visibility when called from a `.External2()` r_eval(r_null, r_envs.empty); FREE(1); } r_obj* ffi_outer_names(r_obj* names, r_obj* outer, r_obj* n) { if (names != r_null && r_typeof(names) != R_TYPE_character) { r_stop_internal("`names` must be `NULL` or a string."); } if (!r_is_number(n)) { r_stop_internal("`n` must be a single integer."); } if (outer != r_null) { outer = r_chr_get(outer, 0); } return outer_names(names, outer, r_int_get(n, 0)); } r_obj* outer_names(r_obj* names, r_obj* outer, r_ssize n) { if (outer == r_null) { return names; } if (r_typeof(outer) != R_TYPE_string) { r_stop_internal("`outer` must be a scalar string."); } if (outer == strings_empty || outer == r_globals.na_str) { return names; } if (r_is_empty_names(names)) { if (n == 1) { return r_str_as_character(outer); } else { return r_seq_chr(r_str_c_string(outer), n); } } else { return r_chr_paste_prefix(names, r_str_c_string(outer), ".."); } } r_obj* ffi_apply_name_spec(r_obj* name_spec, r_obj* outer, r_obj* inner, r_obj* n) { return apply_name_spec(name_spec, r_chr_get(outer, 0), inner, r_int_get(n, 0)); } r_obj* apply_name_spec(r_obj* name_spec, r_obj* outer, r_obj* inner, r_ssize n) { if (r_inherits(name_spec, "rlang_zap")) { return r_null; } if (outer == r_null) { return inner; } if (r_typeof(outer) != R_TYPE_string) { r_stop_internal("`outer` must be a scalar string."); } if (outer == strings_empty || outer == r_globals.na_str) { if (inner == r_null) { return chrs_empty; } else { return inner; } } if (r_is_empty_names(inner)) { if (n == 0) { return r_globals.empty_chr; } if (n == 1) { return r_str_as_character(outer); } inner = KEEP(r_seq(1, n + 1)); } else { inner = KEEP(inner); } switch (r_typeof(name_spec)) { case R_TYPE_closure: break; case R_TYPE_character: name_spec = glue_as_name_spec(name_spec); break; default: name_spec = r_as_function(name_spec, ".name_spec"); break; case R_TYPE_null: { const char* reason; if (n > 1) { reason = "a vector of length > 1"; } else { reason = "a named vector"; } r_abort("Can't merge the outer name `%s` with %s.\n" "Please supply a `.name_spec` specification.", r_str_c_string(outer), reason); }} KEEP(name_spec); r_obj* outer_chr = KEEP(r_str_as_character(outer)); r_obj* out = KEEP(vctrs_dispatch2(syms_dot_name_spec, name_spec, syms_outer, outer_chr, syms_inner, inner)); out = vec_recycle(out, n); if (out != r_null) { if (r_typeof(out) != R_TYPE_character) { r_abort("`.name_spec` must return a character vector."); } if (r_length(out) != n) { r_abort("`.name_spec` must return a character vector as long as `inner`."); } } FREE(4); return out; } static r_obj* glue_as_name_spec(r_obj* spec) { if (!r_is_string(spec)) { r_abort("Glue specification in `.name_spec` must be a single string."); } return vctrs_dispatch1(syms_glue_as_name_spec, fns_glue_as_name_spec, syms_internal_spec, spec); } #define VCTRS_PASTE_BUFFER_MAX_SIZE 4096 char vctrs_paste_buffer[VCTRS_PASTE_BUFFER_MAX_SIZE]; r_obj* r_chr_paste_prefix(r_obj* names, const char* prefix, const char* sep) { int n_prot = 0; names = KEEP_N(r_clone(names), &n_prot); r_ssize n = r_length(names); int outer_len = strlen(prefix); int names_len = r_chr_max_len(names); int sep_len = strlen(sep); int total_len = outer_len + names_len + sep_len + 1; char* buf = vctrs_paste_buffer; if (total_len > VCTRS_PASTE_BUFFER_MAX_SIZE) { r_obj* buf_box = KEEP_N( r_alloc_raw(total_len * sizeof(char)), &n_prot ); buf = (char*) RAW(buf_box); } buf[total_len - 1] = '\0'; char* bufp = buf; memcpy(bufp, prefix, outer_len); bufp += outer_len; for (int i = 0; i < sep_len; ++i) { *bufp++ = sep[i]; } r_obj* const* p_names = r_chr_cbegin(names); for (r_ssize i = 0; i < n; ++i) { const char* inner = r_str_c_string(p_names[i]); int inner_n = strlen(inner); memcpy(bufp, inner, inner_n); bufp[inner_n] = '\0'; r_chr_poke(names, i, r_str(buf)); } FREE(n_prot); return names; } r_obj* ffi_chr_paste_prefix(r_obj* names, r_obj* prefix, r_obj* sep) { return r_chr_paste_prefix(names, r_chr_get_c_string(prefix, 0), r_chr_get_c_string(sep, 0)); } r_obj* r_seq_chr(const char* prefix, r_ssize n) { int total_len = 24 + strlen(prefix) + 1; R_CheckStack2(total_len); char buf[total_len]; return r_chr_iota(n, buf, total_len, prefix); } static r_obj* set_rownames_dispatch(r_obj* x, r_obj* names) { return vctrs_dispatch2(syms_set_rownames_dispatch, fns_set_rownames_dispatch, syms_x, x, syms_names, names); } static r_obj* set_names_dispatch(r_obj* x, r_obj* names) { return vctrs_dispatch2(syms_set_names_dispatch, fns_set_names_dispatch, syms_x, x, syms_names, names); } static void check_names(r_obj* x, r_obj* names) { if (names == r_null) { return; } if (r_typeof(names) != R_TYPE_character) { r_abort( "`names` must be a character vector, not a %s.", r_type_as_c_string(r_typeof(names)) ); } r_ssize x_size = vec_size(x); r_ssize names_size = vec_size(names); if (x_size != names_size) { r_abort( "The size of `names`, %i, must be the same as the size of `x`, %i.", names_size, x_size ); } } r_obj* vec_set_rownames(r_obj* x, r_obj* names, bool proxy, const enum vctrs_owned owned) { if (!proxy && r_is_object(x)) { return set_rownames_dispatch(x, names); } int nprot = 0; r_obj* dim_names = r_attrib_get(x, r_syms.dim_names); // Early exit when no new row names and no existing row names if (names == r_null) { if (dim_names == r_null || r_list_get(dim_names, 0) == r_null) { return x; } } x = KEEP_N(vec_clone_referenced(x, owned), &nprot); if (dim_names == r_null) { dim_names = KEEP_N(r_alloc_list(vec_dim_n(x)), &nprot); } else { // Also clone attribute dim_names = KEEP_N(r_clone(dim_names), &nprot); } r_list_poke(dim_names, 0, names); r_attrib_poke(x, r_syms.dim_names, dim_names); FREE(nprot); return x; } r_obj* vec_set_df_rownames(r_obj* x, r_obj* names, bool proxy, const enum vctrs_owned owned) { if (names == r_null) { if (rownames_type(df_rownames(x)) != ROWNAMES_TYPE_identifiers) { return(x); } x = KEEP(vec_clone_referenced(x, owned)); init_compact_rownames(x, vec_size(x)); FREE(1); return x; } // Repair row names silently if (!proxy) { names = vec_as_names(names, p_unique_repair_silent_opts); } KEEP(names); x = KEEP(vec_clone_referenced(x, owned)); r_attrib_poke(x, r_syms.row_names, names); FREE(2); return x; } // FIXME: Do we need to get the vec_proxy() and only fall back if it doesn't // exist? See #526 and #531 for discussion and the related issue. r_obj* vec_set_names_impl(r_obj* x, r_obj* names, bool proxy, const enum vctrs_owned owned) { check_names(x, names); if (is_data_frame(x)) { return vec_set_df_rownames(x, names, proxy, owned); } if (has_dim(x)) { return vec_set_rownames(x, names, proxy, owned); } if (!proxy && r_is_object(x)) { return set_names_dispatch(x, names); } // Early exit if no new names and no existing names if (names == r_null && r_attrib_get(x, r_syms.names) == r_null) { return x; } if (owned) { // Possibly skip the cloning altogether x = KEEP(vec_clone_referenced(x, owned)); r_attrib_poke(x, r_syms.names, names); } else { // We need to clone, but to do this we will use `names<-` // which can perform a cheaper ALTREP shallow duplication x = KEEP(set_names_dispatch(x, names)); } FREE(1); return x; } // [[ register() ]] r_obj* vec_set_names(r_obj* x, r_obj* names) { return vec_set_names_impl(x, names, false, VCTRS_OWNED_false); } r_obj* vec_proxy_set_names(r_obj* x, r_obj* names, const enum vctrs_owned owned) { return vec_set_names_impl(x, names, true, owned); } r_obj* vctrs_validate_name_repair_arg(r_obj* arg) { struct name_repair_opts opts = new_name_repair_opts(arg, r_lazy_null, true, r_lazy_null); if (opts.type == NAME_REPAIR_custom) { return opts.fn; } else if (r_length(arg) != 1) { return r_str_as_character(r_str(name_repair_arg_as_c_string(opts.type))); } else { return arg; } } void stop_name_repair(struct name_repair_opts* p_opts) { struct repair_error_info info = new_repair_error_info(p_opts); KEEP(info.shelter); r_abort_call(info.input_error_call, "%s must be a string or a function. See `?vctrs::vec_as_names`.", r_format_error_arg(info.input_error_repair_arg)); } struct name_repair_opts new_name_repair_opts(r_obj* name_repair, struct r_lazy name_repair_arg, bool quiet, struct r_lazy call) { struct name_repair_opts opts = { .shelter = r_null, .type = 0, .fn = r_null, .name_repair_arg = name_repair_arg, .quiet = quiet, .call = call }; switch (r_typeof(name_repair)) { case R_TYPE_character: { if (!r_length(name_repair)) { stop_name_repair(&opts); } r_obj* c = r_chr_get(name_repair, 0); if (c == strings_none) { opts.type = NAME_REPAIR_none; } else if (c == strings_minimal) { opts.type = NAME_REPAIR_minimal; } else if (c == strings_unique) { opts.type = NAME_REPAIR_unique; } else if (c == strings_universal) { opts.type = NAME_REPAIR_universal; } else if (c == strings_check_unique) { opts.type = NAME_REPAIR_check_unique; } else if (c == strings_unique_quiet) { opts.type = NAME_REPAIR_unique; opts.quiet = true; } else if (c == strings_universal_quiet) { opts.type = NAME_REPAIR_universal; opts.quiet = true; } else { struct repair_error_info info = new_repair_error_info(&opts); KEEP(info.shelter); r_abort_call(info.input_error_call, "%s can't be \"%s\". See `?vctrs::vec_as_names`.", r_format_error_arg(info.input_error_repair_arg), r_str_c_string(c)); } return opts; } case R_TYPE_call: opts.fn = r_as_function(name_repair, ".name_repair"); opts.shelter = opts.fn; opts.type = NAME_REPAIR_custom; return opts; case R_TYPE_closure: opts.fn = name_repair; opts.type = NAME_REPAIR_custom; return opts; default: stop_name_repair(&opts); } r_stop_unreachable(); } const char* name_repair_arg_as_c_string(enum name_repair_type type) { switch (type) { case NAME_REPAIR_none: return "none"; case NAME_REPAIR_minimal: return "minimal"; case NAME_REPAIR_unique: return "unique"; case NAME_REPAIR_universal: return "universal"; case NAME_REPAIR_check_unique: return "check_unique"; case NAME_REPAIR_custom: return "custom"; } r_stop_unreachable(); } static void vec_validate_minimal_names(r_obj* names, r_ssize n, struct r_lazy call) { if (names == r_null) { r_abort_lazy_call(call, "Names repair functions can't return `NULL`."); } if (r_typeof(names) != R_TYPE_character) { r_abort_lazy_call(call, "Names repair functions must return a character vector."); } if (n >= 0 && r_length(names) != n) { r_abort_lazy_call(call, "Repaired names have length %d instead of length %d.", r_length(names), n); } if (r_chr_has_string(names, r_globals.na_str)) { r_abort_lazy_call(call, "Names repair functions can't return `NA` values."); } } r_obj* vctrs_validate_minimal_names(r_obj* names, r_obj* n_) { r_ssize n = -1; if (r_typeof(n_) == R_TYPE_integer) { if (r_length(n_) != 1) { r_stop_internal("`n` must be a single number."); } n = r_int_get(n_, 0); } vec_validate_minimal_names(names, n, r_lazy_null); return names; } struct name_repair_opts unique_repair_default_opts; struct name_repair_opts unique_repair_silent_opts; struct name_repair_opts no_repair_opts; void vctrs_init_names(r_obj* ns) { syms_set_rownames_dispatch = r_sym("set_rownames_dispatch"); syms_set_names_dispatch = r_sym("set_names_dispatch"); syms_as_universal_names = r_sym("as_universal_names"); syms_check_unique_names = r_sym("validate_unique"); fns_set_rownames_dispatch = r_env_get(ns, syms_set_rownames_dispatch); fns_set_names_dispatch = r_env_get(ns, syms_set_names_dispatch); fns_as_universal_names = r_env_get(ns, syms_as_universal_names); fns_check_unique_names = r_env_get(ns, syms_check_unique_names); syms_glue_as_name_spec = r_sym("glue_as_name_spec"); fns_glue_as_name_spec = r_env_get(ns, syms_glue_as_name_spec); syms_internal_spec = r_sym("_spec"); unique_repair_default_opts.type = NAME_REPAIR_unique; unique_repair_default_opts.fn = r_null; unique_repair_default_opts.quiet = false; unique_repair_silent_opts.type = NAME_REPAIR_unique; unique_repair_silent_opts.fn = r_null; unique_repair_silent_opts.quiet = true; no_repair_opts.type = NAME_REPAIR_none; no_repair_opts.fn = r_null; no_repair_opts.quiet = true; } static r_obj* syms_as_universal_names = NULL; static r_obj* syms_check_unique_names = NULL; static r_obj* syms_glue_as_name_spec = NULL; static r_obj* syms_internal_spec = NULL; static r_obj* syms_set_rownames_dispatch = NULL; static r_obj* syms_set_names_dispatch = NULL; static r_obj* fns_as_universal_names = NULL; static r_obj* fns_check_unique_names = NULL; static r_obj* fns_glue_as_name_spec = NULL; static r_obj* fns_set_rownames_dispatch = NULL; static r_obj* fns_set_names_dispatch = NULL; vctrs/src/size-common.h0000644000176200001440000000312714315060310014607 0ustar liggesusers#ifndef VCTRS_SIZE_COMMON_H #define VCTRS_SIZE_COMMON_H #include "vctrs-core.h" struct size_common_opts { struct vctrs_arg* p_arg; struct r_lazy call; }; r_ssize vec_size_common_opts(r_obj* xs, r_ssize absent, const struct size_common_opts* opts); r_obj* vec_recycle_common_opts(r_obj* xs, r_ssize size, const struct size_common_opts* opts); static inline r_ssize vec_size_common(r_obj* xs, r_ssize absent) { struct size_common_opts args = { .p_arg = vec_args.empty, .call = lazy_calls.vec_size_common }; return vec_size_common_opts(xs, absent, &args); } static inline r_obj* vec_recycle_common(r_obj* xs, r_ssize size) { struct size_common_opts args = { .p_arg = vec_args.empty, .call = lazy_calls.vec_recycle_common }; return vec_recycle_common_opts(xs, size, &args); } static inline r_ssize vec_check_size_common(r_obj* xs, r_ssize absent, struct vctrs_arg* p_arg, struct r_lazy call) { struct size_common_opts args = { .p_arg = p_arg, .call = call }; return vec_size_common_opts(xs, absent, &args); } static inline r_obj* vec_check_recycle_common(r_obj* xs, r_ssize size, struct vctrs_arg* p_arg, struct r_lazy call) { struct size_common_opts args = { .p_arg = p_arg, .call = call }; return vec_recycle_common_opts(xs, size, &args); } #endif vctrs/src/group.c0000644000176200001440000001343114346710200013501 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" // [[ register() ]] SEXP vctrs_group_id(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_out = INTEGER(out); R_len_t g = 1; for (int i = 0; i < n; ++i) { uint32_t hash = dict_hash_scalar(d, i); R_len_t key = d->key[hash]; if (key == DICT_EMPTY) { dict_put(d, hash, i); p_out[i] = g; ++g; } else { p_out[i] = p_out[key]; } } SEXP n_groups = PROTECT_N(Rf_ScalarInteger(d->used), &nprot); Rf_setAttrib(out, syms_n, n_groups); UNPROTECT(nprot); return out; } // ----------------------------------------------------------------------------- static SEXP new_group_rle(SEXP g, SEXP l, R_len_t n); // [[ register() ]] SEXP vctrs_group_rle(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(vec_normalize_encoding(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); const void* p_vec = d->p_poly_vec->p_vec; SEXP g = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_g = INTEGER(g); SEXP l = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_l = INTEGER(l); if (n == 0) { SEXP out = PROTECT_N(new_group_rle(g, l, 0), &nprot); UNPROTECT(nprot); return out; } // Integer vector that maps `hash` values to locations in `g` SEXP map = PROTECT_N(Rf_allocVector(INTSXP, d->size), &nprot); int* p_map = INTEGER(map); // Initialize first value uint32_t hash = dict_hash_scalar(d, 0); dict_put(d, hash, 0); p_map[hash] = 0; *p_g = 1; *p_l = 1; int loc = 1; for (int i = 1; i < n; ++i) { if (d->p_equal_na_equal(p_vec, i - 1, p_vec, i)) { ++(*p_l); continue; } ++p_l; *p_l = 1; // Check if we have seen this value before uint32_t hash = dict_hash_scalar(d, i); if (d->key[hash] == DICT_EMPTY) { dict_put(d, hash, i); p_map[hash] = loc; p_g[loc] = d->used; } else { p_g[loc] = p_g[p_map[hash]]; } ++loc; } g = PROTECT_N(Rf_lengthgets(g, loc), &nprot); l = PROTECT_N(Rf_lengthgets(l, loc), &nprot); SEXP out = new_group_rle(g, l, d->used); UNPROTECT(nprot); return out; } static SEXP new_group_rle(SEXP g, SEXP l, R_len_t n) { SEXP out = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, g); SET_VECTOR_ELT(out, 1, l); SEXP names = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(names, 0, strings_group); SET_STRING_ELT(names, 1, strings_length); Rf_setAttrib(out, R_NamesSymbol, names); SEXP n_groups = PROTECT(Rf_ScalarInteger(n)); Rf_setAttrib(out, syms_n, n_groups); Rf_setAttrib(out, R_ClassSymbol, classes_vctrs_group_rle); UNPROTECT(3); return out; } // ----------------------------------------------------------------------------- // [[ include("vctrs.h"); register() ]] SEXP vec_group_loc(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); SEXP proxy = PROTECT_N(vec_proxy_equal(x), &nprot); proxy = PROTECT_N(vec_normalize_encoding(proxy), &nprot); struct dictionary* d = new_dictionary(proxy); PROTECT_DICT(d, &nprot); SEXP groups = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_groups = INTEGER(groups); R_len_t g = 0; // Identify groups, this is essentially `vec_group_id()` for (int i = 0; i < n; ++i) { const uint32_t hash = dict_hash_scalar(d, i); const R_len_t key = d->key[hash]; if (key == DICT_EMPTY) { dict_put(d, hash, i); p_groups[i] = g; ++g; } else { p_groups[i] = p_groups[key]; } } const int n_groups = d->used; // Location of first occurence of each group in `x` SEXP key_loc = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_key_loc = INTEGER(key_loc); int key_loc_current = 0; // Count of the number of elements in each group SEXP counts = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_counts = INTEGER(counts); memset(p_counts, 0, n_groups * sizeof(int)); for (int i = 0; i < n; ++i) { const int group = p_groups[i]; if (group == key_loc_current) { p_key_loc[key_loc_current] = i + 1; ++key_loc_current; } ++p_counts[group]; } SEXP out_loc = PROTECT_N(Rf_allocVector(VECSXP, n_groups), &nprot); // Direct pointer to the location vectors we store in `out_loc` int** p_elt_loc = (int**) R_alloc(n_groups, sizeof(int*)); // Initialize `out_loc` to a list of integers with sizes corresponding // to the number of elements in that group for (int i = 0; i < n_groups; ++i) { SEXP elt_loc = Rf_allocVector(INTSXP, p_counts[i]); p_elt_loc[i] = INTEGER(elt_loc); SET_VECTOR_ELT(out_loc, i, elt_loc); } // The current location we are updating, each group has its own counter SEXP locations = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_locations = INTEGER(locations); memset(p_locations, 0, n_groups * sizeof(int)); // Fill in the location values for each group for (int i = 0; i < n; ++i) { const int group = p_groups[i]; const int location = p_locations[group]; p_elt_loc[group][location] = i + 1; ++p_locations[group]; } SEXP out_key = PROTECT_N(vec_slice(x, key_loc), &nprot); // Construct output data frame SEXP out = PROTECT_N(Rf_allocVector(VECSXP, 2), &nprot); SET_VECTOR_ELT(out, 0, out_key); SET_VECTOR_ELT(out, 1, out_loc); SEXP names = PROTECT_N(Rf_allocVector(STRSXP, 2), &nprot); SET_STRING_ELT(names, 0, strings_key); SET_STRING_ELT(names, 1, strings_loc); Rf_setAttrib(out, R_NamesSymbol, names); out = new_data_frame(out, n_groups); UNPROTECT(nprot); return out; } vctrs/src/cast-dispatch.h0000644000176200001440000000051514315060310015074 0ustar liggesusers#ifndef VCTRS_CAST_DISPATCH_H #define VCTRS_CAST_DISPATCH_H #include "vctrs-core.h" #include "cast.h" r_obj* vec_cast_dispatch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy); #endif vctrs/src/missing.h0000644000176200001440000000527014315060310014021 0ustar liggesusers#ifndef VCTRS_MISSING_H #define VCTRS_MISSING_H #include "vctrs-core.h" #include "utils.h" // ----------------------------------------------------------------------------- r_obj* vec_detect_missing(r_obj* x); bool vec_any_missing(r_obj* x); r_ssize vec_first_missing(r_obj* x); // ----------------------------------------------------------------------------- static inline bool lgl_is_missing(int x) { return x == r_globals.na_int; } static inline bool int_is_missing(int x) { return x == r_globals.na_int; } static inline bool dbl_is_missing(double x) { return isnan(x); } static inline bool cpl_is_missing(r_complex x) { return dbl_is_missing(x.r) || dbl_is_missing(x.i); } static inline bool chr_is_missing(r_obj* x) { return x == r_globals.na_str; } static inline bool raw_is_missing(unsigned char x) { return false; } static inline bool list_is_missing(r_obj* x) { return x == r_null; } // ----------------------------------------------------------------------------- #define P_IS_MISSING(CTYPE, IS_MISSING) do { \ return IS_MISSING(((CTYPE const*) p_x)[i]); \ } while (0) static r_no_return inline bool p_nil_is_missing(const void* p_x, r_ssize i) { r_stop_internal("Can't check NULL for missingness."); } static inline bool p_lgl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(int, lgl_is_missing); } static inline bool p_int_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(int, int_is_missing); } static inline bool p_dbl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(double, dbl_is_missing); } static inline bool p_cpl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(r_complex, cpl_is_missing); } static inline bool p_chr_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(r_obj*, chr_is_missing); } static inline bool p_raw_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(unsigned char, raw_is_missing); } static inline bool p_list_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(r_obj*, list_is_missing); } #undef P_IS_MISSING static inline bool p_is_missing(const void* p_x, r_ssize i, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_is_missing(p_x, i); case VCTRS_TYPE_integer: return p_int_is_missing(p_x, i); case VCTRS_TYPE_double: return p_dbl_is_missing(p_x, i); case VCTRS_TYPE_complex: return p_cpl_is_missing(p_x, i); case VCTRS_TYPE_character: return p_chr_is_missing(p_x, i); case VCTRS_TYPE_raw: return p_raw_is_missing(p_x, i); case VCTRS_TYPE_list: return p_list_is_missing(p_x, i); default: stop_unimplemented_vctrs_type("p_is_missing", type); } } // ----------------------------------------------------------------------------- #endif vctrs/src/vctrs-core.c0000644000176200001440000000045514315060310014432 0ustar liggesusers#include "vctrs.h" enum vctrs_dbl dbl_classify(double x) { if (!isnan(x)) { return VCTRS_DBL_number; } union vctrs_dbl_indicator indicator; indicator.value = x; if (indicator.key[vctrs_indicator_pos] == 1954) { return VCTRS_DBL_missing; } else { return VCTRS_DBL_nan; } } vctrs/src/Makevars0000644000176200001440000000006614305640202013674 0ustar liggesusersPKG_CPPFLAGS = -I./rlang PKG_CFLAGS = $(C_VISIBILITY) vctrs/src/utils-dispatch.h0000644000176200001440000000136014315060310015301 0ustar liggesusers#ifndef VCTRS_UTILS_DISPATCH_H #define VCTRS_UTILS_DISPATCH_H #include "vctrs-core.h" enum vctrs_class_type { VCTRS_CLASS_list, VCTRS_CLASS_data_frame, VCTRS_CLASS_bare_asis, VCTRS_CLASS_bare_data_frame, VCTRS_CLASS_bare_tibble, VCTRS_CLASS_bare_factor, VCTRS_CLASS_bare_ordered, VCTRS_CLASS_bare_date, VCTRS_CLASS_bare_posixct, VCTRS_CLASS_bare_posixlt, VCTRS_CLASS_unknown, VCTRS_CLASS_none }; enum vctrs_class_type class_type(r_obj* x); static inline bool class_type_is_data_frame(enum vctrs_class_type type) { switch (type) { case VCTRS_CLASS_data_frame: case VCTRS_CLASS_bare_data_frame: case VCTRS_CLASS_bare_tibble: return true; default: return false; } } bool vec_is_partial(r_obj* x); #endif vctrs/src/typeof2.h0000644000176200001440000000517714315060310013746 0ustar liggesusers#ifndef VCTRS_TYPEOF2_H #define VCTRS_TYPEOF2_H #include "vctrs-core.h" enum vctrs_type2 { VCTRS_TYPE2_null_null, VCTRS_TYPE2_null_unspecified, VCTRS_TYPE2_null_logical, VCTRS_TYPE2_null_integer, VCTRS_TYPE2_null_double, VCTRS_TYPE2_null_complex, VCTRS_TYPE2_null_character, VCTRS_TYPE2_null_raw, VCTRS_TYPE2_null_list, VCTRS_TYPE2_null_dataframe, VCTRS_TYPE2_null_s3, VCTRS_TYPE2_null_scalar, VCTRS_TYPE2_unspecified_unspecified, VCTRS_TYPE2_unspecified_logical, VCTRS_TYPE2_unspecified_integer, VCTRS_TYPE2_unspecified_double, VCTRS_TYPE2_unspecified_complex, VCTRS_TYPE2_unspecified_character, VCTRS_TYPE2_unspecified_raw, VCTRS_TYPE2_unspecified_list, VCTRS_TYPE2_unspecified_dataframe, VCTRS_TYPE2_unspecified_s3, VCTRS_TYPE2_unspecified_scalar, VCTRS_TYPE2_logical_logical, VCTRS_TYPE2_logical_integer, VCTRS_TYPE2_logical_double, VCTRS_TYPE2_logical_complex, VCTRS_TYPE2_logical_character, VCTRS_TYPE2_logical_raw, VCTRS_TYPE2_logical_list, VCTRS_TYPE2_logical_dataframe, VCTRS_TYPE2_logical_s3, VCTRS_TYPE2_logical_scalar, VCTRS_TYPE2_integer_integer, VCTRS_TYPE2_integer_double, VCTRS_TYPE2_integer_complex, VCTRS_TYPE2_integer_character, VCTRS_TYPE2_integer_raw, VCTRS_TYPE2_integer_list, VCTRS_TYPE2_integer_dataframe, VCTRS_TYPE2_integer_s3, VCTRS_TYPE2_integer_scalar, VCTRS_TYPE2_double_double, VCTRS_TYPE2_double_complex, VCTRS_TYPE2_double_character, VCTRS_TYPE2_double_raw, VCTRS_TYPE2_double_list, VCTRS_TYPE2_double_dataframe, VCTRS_TYPE2_double_s3, VCTRS_TYPE2_double_scalar, VCTRS_TYPE2_complex_complex, VCTRS_TYPE2_complex_character, VCTRS_TYPE2_complex_raw, VCTRS_TYPE2_complex_list, VCTRS_TYPE2_complex_dataframe, VCTRS_TYPE2_complex_s3, VCTRS_TYPE2_complex_scalar, VCTRS_TYPE2_character_character, VCTRS_TYPE2_character_raw, VCTRS_TYPE2_character_list, VCTRS_TYPE2_character_dataframe, VCTRS_TYPE2_character_s3, VCTRS_TYPE2_character_scalar, VCTRS_TYPE2_raw_raw, VCTRS_TYPE2_raw_list, VCTRS_TYPE2_raw_dataframe, VCTRS_TYPE2_raw_s3, VCTRS_TYPE2_raw_scalar, VCTRS_TYPE2_list_list, VCTRS_TYPE2_list_dataframe, VCTRS_TYPE2_list_s3, VCTRS_TYPE2_list_scalar, VCTRS_TYPE2_dataframe_dataframe, VCTRS_TYPE2_dataframe_s3, VCTRS_TYPE2_dataframe_scalar, VCTRS_TYPE2_S3_s3, VCTRS_TYPE2_S3_scalar, VCTRS_TYPE2_scalar_scalar }; enum vctrs_type2 vec_typeof2_impl(enum vctrs_type type_x, enum vctrs_type type_y, int* left); enum vctrs_type2 vec_typeof2(r_obj* x, r_obj* y); const char* vctrs_type2_as_str(enum vctrs_type2 type); #endif vctrs/src/ptype.h0000644000176200001440000000027114315060310013505 0ustar liggesusers#ifndef VCTRS_PTYPE_H #define VCTRS_PTYPE_H #include "vctrs-core.h" r_obj* vec_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call); r_obj* vec_ptype_final(r_obj* x); #endif vctrs/src/missing.c0000644000176200001440000003144714315060310014021 0ustar liggesusers#include "vctrs.h" #include "decl/missing-decl.h" // [[ register() ]] r_obj* ffi_vec_detect_missing(r_obj* x) { return vec_detect_missing(x); } // [[ include("missing.h") ]] r_obj* vec_detect_missing(r_obj* x) { r_obj* proxy = KEEP(vec_proxy_equal(x)); r_obj* out = proxy_detect_missing(proxy); FREE(1); return out; } static inline r_obj* proxy_detect_missing(r_obj* proxy) { const enum vctrs_type type = vec_proxy_typeof(proxy); switch (type) { case VCTRS_TYPE_logical: return lgl_detect_missing(proxy); case VCTRS_TYPE_integer: return int_detect_missing(proxy); case VCTRS_TYPE_double: return dbl_detect_missing(proxy); case VCTRS_TYPE_complex: return cpl_detect_missing(proxy); case VCTRS_TYPE_raw: return raw_detect_missing(proxy); case VCTRS_TYPE_character: return chr_detect_missing(proxy); case VCTRS_TYPE_list: return list_detect_missing(proxy); case VCTRS_TYPE_dataframe: return df_detect_missing(proxy); case VCTRS_TYPE_null: return r_globals.empty_lgl; case VCTRS_TYPE_scalar: stop_scalar_type(proxy, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_detect_missing", type); } r_stop_unreachable(); } // ----------------------------------------------------------------------------- #define DETECT_MISSING(CTYPE, CBEGIN, IS_MISSING) do { \ const r_ssize size = vec_size(x); \ \ r_obj* out = KEEP(r_new_logical(size)); \ int* v_out = r_lgl_begin(out); \ \ CTYPE const* v_x = CBEGIN(x); \ \ for (r_ssize i = 0; i < size; ++i) { \ v_out[i] = IS_MISSING(v_x[i]); \ } \ \ FREE(1); \ return out; \ } while (0) static inline r_obj* lgl_detect_missing(r_obj* x) { DETECT_MISSING(int, r_lgl_cbegin, lgl_is_missing); } static inline r_obj* int_detect_missing(r_obj* x) { DETECT_MISSING(int, r_int_cbegin, int_is_missing); } static inline r_obj* dbl_detect_missing(r_obj* x) { DETECT_MISSING(double, r_dbl_cbegin, dbl_is_missing); } static inline r_obj* cpl_detect_missing(r_obj* x) { DETECT_MISSING(r_complex, r_cpl_cbegin, cpl_is_missing); } static inline r_obj* raw_detect_missing(r_obj* x) { DETECT_MISSING(unsigned char, r_uchar_cbegin, raw_is_missing); } static inline r_obj* chr_detect_missing(r_obj* x) { DETECT_MISSING(r_obj*, r_chr_cbegin, chr_is_missing); } static inline r_obj* list_detect_missing(r_obj* x) { DETECT_MISSING(r_obj*, r_list_cbegin, list_is_missing); } #undef DETECT_MISSING // ----------------------------------------------------------------------------- static inline r_obj* df_detect_missing(r_obj* x) { int n_prot = 0; const r_ssize n_col = r_length(x); const r_ssize size = vec_size(x); r_obj* const* v_x = r_list_cbegin(x); // A location vector to track rows where we still need to check for missing // values. After we iterate through all columns, `v_loc` points to the missing // rows. r_ssize loc_size = size; r_obj* loc_shelter = KEEP_N(r_alloc_raw(loc_size * sizeof(r_ssize)), &n_prot); r_ssize* v_loc = (r_ssize*) r_raw_begin(loc_shelter); for (r_ssize i = 0; i < loc_size; ++i) { v_loc[i] = i; } for (r_ssize i = 0; i < n_col; ++i) { r_obj* col = v_x[i]; loc_size = col_detect_missing(col, v_loc, loc_size); // If all rows have at least one non-missing value, break if (loc_size == 0) { break; } } r_obj* out = KEEP_N(r_new_logical(size), &n_prot); int* v_out = r_lgl_begin(out); r_p_lgl_fill(v_out, 0, size); for (r_ssize i = 0; i < loc_size; ++i) { const r_ssize loc = v_loc[i]; v_out[loc] = 1; } FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static inline r_ssize col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { const enum vctrs_type type = vec_proxy_typeof(x); switch (type) { case VCTRS_TYPE_logical: return lgl_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_integer: return int_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_double: return dbl_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_complex: return cpl_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_raw: return raw_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_character: return chr_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_list: return list_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should have been flattened by now."); case VCTRS_TYPE_null: r_abort("Unexpected `NULL` column found in a data frame."); case VCTRS_TYPE_scalar: stop_scalar_type(x, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_detect_missing", type); } } // ----------------------------------------------------------------------------- /* * The data frame algorithm for `vec_detect_missing()` is fast because this * inner for loop doesn't have any `if` branches in it. We utilize the fact that * this is a no-op when the element isn't missing: * `new_loc_size += IS_MISSING(v_x[loc])` * This is faster than doing `if (IS_MISSING())` at each iteration, especially * when there is a moderate amount of missing values, which makes that branch * fairly unpredictable. * * `r_ssize* v_loc` is a location vector that tracks which rows we still need * to check for missingness. It is "narrowed" after each column is processed to * only point to the rows that might still be missing. After all columns are * processed, it points to exactly where the missing rows are. Here is some * pseudo R code that demonstrates how `v_loc` changes: * * ``` * df <- data.frame( * x = c(1, NA, NA, 2, NA, 3), * y = c(NA, NA, 1, 2, NA, 4) * ) * df * #> x y * #> 1 1 NA * #> 2 NA NA * #> 3 NA 1 * #> 4 2 2 * #> 5 NA NA * #> 6 3 4 * * # Initially any row could be missing * loc_size <- 6 * loc <- 1:6 * * # After processing the first column, only rows 2, 3, and 5 could be missing * loc_size <- 3 * loc <- c(2, 3, 5) * * # After processing the second column, only 2 and 5 could be missing * # This is the last column, so these are the missing rows * loc_size <- 2 * loc <- c(2, 5) * ``` * * For more details, see: https://github.com/r-lib/vctrs/pull/1584 */ #define COL_DETECT_MISSING(CTYPE, CBEGIN, IS_MISSING) do { \ CTYPE const* v_x = CBEGIN(x); \ r_ssize new_loc_size = 0; \ \ for (r_ssize i = 0; i < loc_size; ++i) { \ const r_ssize loc = v_loc[i]; \ v_loc[new_loc_size] = loc; \ new_loc_size += IS_MISSING(v_x[loc]); \ } \ \ return new_loc_size; \ } while (0) static inline r_ssize lgl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(int, r_lgl_cbegin, lgl_is_missing); } static inline r_ssize int_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(int, r_int_cbegin, int_is_missing); } static inline r_ssize dbl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(double, r_dbl_cbegin, dbl_is_missing); } static inline r_ssize cpl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(r_complex, r_cpl_cbegin, cpl_is_missing); } static inline r_ssize raw_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(unsigned char, r_uchar_cbegin, raw_is_missing); } static inline r_ssize chr_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(r_obj*, r_chr_cbegin, chr_is_missing); } static inline r_ssize list_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(r_obj*, r_list_cbegin, list_is_missing); } #undef COL_DETECT_MISSING // ----------------------------------------------------------------------------- r_obj* ffi_vec_any_missing(r_obj* x) { return r_lgl(vec_any_missing(x)); } bool vec_any_missing(r_obj* x) { return vec_first_missing(x) != vec_size(x); } r_ssize vec_first_missing(r_obj* x) { r_obj* proxy = KEEP(vec_proxy_equal(x)); r_ssize out = proxy_first_missing(proxy); FREE(1); return out; } static inline r_ssize proxy_first_missing(r_obj* proxy) { const enum vctrs_type type = vec_proxy_typeof(proxy); switch (type) { case VCTRS_TYPE_logical: return lgl_first_missing(proxy); case VCTRS_TYPE_integer: return int_first_missing(proxy); case VCTRS_TYPE_double: return dbl_first_missing(proxy); case VCTRS_TYPE_complex: return cpl_first_missing(proxy); case VCTRS_TYPE_raw: return raw_first_missing(proxy); case VCTRS_TYPE_character: return chr_first_missing(proxy); case VCTRS_TYPE_list: return list_first_missing(proxy); case VCTRS_TYPE_dataframe: return df_first_missing(proxy); case VCTRS_TYPE_null: return 0; case VCTRS_TYPE_scalar: stop_scalar_type(proxy, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_first_missing", type); } r_stop_unreachable(); } // ----------------------------------------------------------------------------- #define FIRST_MISSING(CTYPE, CBEGIN, IS_MISSING) do { \ const r_ssize size = r_length(x); \ \ CTYPE const* v_x = CBEGIN(x); \ \ for (r_ssize i = 0; i < size; ++i) { \ if (IS_MISSING(v_x[i])) { \ return i; \ } \ } \ \ return size; \ } while (0) static inline r_ssize lgl_first_missing(r_obj* x) { FIRST_MISSING(int, r_lgl_cbegin, lgl_is_missing); } static inline r_ssize int_first_missing(r_obj* x) { FIRST_MISSING(int, r_int_cbegin, int_is_missing); } static inline r_ssize dbl_first_missing(r_obj* x) { FIRST_MISSING(double, r_dbl_cbegin, dbl_is_missing); } static inline r_ssize cpl_first_missing(r_obj* x) { FIRST_MISSING(r_complex, r_cpl_cbegin, cpl_is_missing); } static inline r_ssize raw_first_missing(r_obj* x) { FIRST_MISSING(unsigned char, r_uchar_cbegin, raw_is_missing); } static inline r_ssize chr_first_missing(r_obj* x) { FIRST_MISSING(r_obj*, r_chr_cbegin, chr_is_missing); } static inline r_ssize list_first_missing(r_obj* x) { FIRST_MISSING(r_obj*, r_list_cbegin, list_is_missing); } #undef FIRST_MISSING // ----------------------------------------------------------------------------- static inline r_ssize df_first_missing(r_obj* x) { const r_ssize n_cols = r_length(x); const r_ssize size = vec_size(x); r_ssize i = 0; if (n_cols > 0) { // First perform a very cheap check to see if there is at least 1 missing // value in the first column. If not, then we are done. If there is at least // 1 missing value, we start the loop below from there by updating `i`. This // avoids the more expensive rowwise poly-op loop when there aren't any // missing values. r_obj* col = r_list_get(x, 0); i = vec_first_missing(col); if (i == size) { return size; } } int n_prot = 0; poly_unary_bool_fn* const fn_is_missing = poly_p_is_missing(VCTRS_TYPE_dataframe); struct poly_vec* p_poly_x = new_poly_vec(x, VCTRS_TYPE_dataframe); KEEP_N(p_poly_x->shelter, &n_prot); const void* v_x = p_poly_x->p_vec; r_ssize out = size; for (; i < size; ++i) { if (fn_is_missing(v_x, i)) { out = i; break; } } FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static inline const unsigned char* r_uchar_cbegin(r_obj* x) { // TODO: Move to the rlang library return (const unsigned char*) r_raw_cbegin(x); } vctrs/src/conditions.c0000644000176200001440000001145314362266120014525 0ustar liggesusers#include "vctrs.h" #include "utils.h" // [[ include("vctrs.h") ]] void stop_scalar_type(r_obj* x, struct vctrs_arg* arg, struct r_lazy call) { r_obj* ffi_call = KEEP(r_lazy_eval(call)); ffi_call = KEEP(r_expr_protect(ffi_call)); r_obj* stop_call = KEEP(r_call4(r_sym("stop_scalar_type"), KEEP(r_protect(x)), KEEP(vctrs_arg(arg)), ffi_call)); r_eval(stop_call, vctrs_ns_env); r_stop_unreachable(); } // [[ include("vctrs.h") ]] void stop_assert_size(r_ssize actual, r_ssize required, struct vctrs_arg* arg, struct r_lazy call) { r_obj* ffi_call = KEEP(r_lazy_eval(call)); ffi_call = KEEP(r_expr_protect(ffi_call)); r_obj* syms[5] = { syms_actual, syms_required, r_syms.arg, r_syms.call, NULL }; r_obj* args[5] = { KEEP(r_int(actual)), KEEP(r_int(required)), KEEP(vctrs_arg(arg)), ffi_call, NULL }; r_obj* stop_call = KEEP(r_call_n(syms_stop_assert_size, syms, args)); r_eval(stop_call, vctrs_ns_env); never_reached("stop_assert_size"); } // [[ include("vctrs.h") ]] void stop_incompatible_type(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, bool cast) { SEXP syms[6] = { syms_x, syms_y, syms_x_arg, syms_y_arg, syms_action, NULL }; SEXP args[6] = { PROTECT(r_protect(x)), PROTECT(r_protect(y)), PROTECT(vctrs_arg(x_arg)), PROTECT(vctrs_arg(y_arg)), cast ? chrs_convert : chrs_combine, NULL }; SEXP call = PROTECT(r_call_n(syms_stop_incompatible_type, syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_incompatible_type"); } r_no_return void stop_incompatible_size(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { r_obj* syms[8] = { syms_x, syms_y, syms_x_size, syms_y_size, syms_x_arg, syms_y_arg, r_syms.call, NULL }; r_obj* args[8] = { KEEP(r_protect(x)), KEEP(r_protect(y)), KEEP(r_int(x_size)), KEEP(r_int(y_size)), KEEP(vctrs_arg(x_arg)), KEEP(vctrs_arg(y_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_incompatible_size, syms, args)); r_eval(ffi_call, vctrs_ns_env); r_stop_unreachable(); } void stop_recycle_incompatible_size(r_ssize x_size, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call) { r_obj* syms[5] = { r_sym("x_size"), r_sym("size"), r_sym("x_arg"), syms_call, NULL }; r_obj* args[5] = { KEEP(r_int(x_size)), KEEP(r_int(size)), KEEP(vctrs_arg(x_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* stop_call = KEEP(r_call_n(r_sym("stop_recycle_incompatible_size"), syms, args)); r_eval(stop_call, vctrs_ns_env); never_reached("stop_recycle_incompatible_size"); } void stop_incompatible_shape(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, int axis, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { SEXP syms[8] = { r_sym("x"), r_sym("y"), r_sym("x_size"), r_sym("y_size"), r_sym("axis"), r_sym("x_arg"), r_sym("y_arg"), NULL }; SEXP args[8] = { PROTECT(r_protect(x)), PROTECT(r_protect(y)), PROTECT(r_int(x_size)), PROTECT(r_int(y_size)), PROTECT(r_int(axis)), PROTECT(vctrs_arg(p_x_arg)), PROTECT(vctrs_arg(p_y_arg)), NULL }; SEXP call = PROTECT(r_call_n(r_sym("stop_incompatible_shape"), syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_incompatible_shape"); } void stop_corrupt_factor_levels(SEXP x, struct vctrs_arg* arg) { SEXP call = PROTECT(Rf_lang3(Rf_install("stop_corrupt_factor_levels"), PROTECT(r_protect(x)), PROTECT(vctrs_arg(arg)))); Rf_eval(call, vctrs_ns_env); never_reached("stop_corrupt_factor_levels"); } void stop_corrupt_ordered_levels(SEXP x, struct vctrs_arg* arg) { SEXP call = PROTECT(Rf_lang3(Rf_install("stop_corrupt_ordered_levels"), PROTECT(r_protect(x)), PROTECT(vctrs_arg(arg)))); Rf_eval(call, vctrs_ns_env); never_reached("stop_corrupt_ordered_levels"); } vctrs/src/assert.c0000644000176200001440000000716614401377400013661 0ustar liggesusers#include "vctrs.h" #include "decl/assert-decl.h" r_obj* ffi_obj_check_vector(r_obj* x, r_obj* frame) { struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_lazy = { .x = r_syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); obj_check_vector(x, &arg, call); return r_null; } void obj_check_vector(r_obj* x, struct vctrs_arg* arg, struct r_lazy call) { if (!obj_is_vector(x)) { stop_scalar_type(x, arg, call); } } r_obj* ffi_vec_check_size(r_obj* x, r_obj* ffi_size, r_obj* frame) { struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_lazy = { .x = r_syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); const r_ssize size = r_arg_as_ssize(ffi_size, "size"); vec_check_size(x, size, &arg, call); return r_null; } void vec_check_size(r_obj* x, r_ssize size, struct vctrs_arg* arg, struct r_lazy call) { const r_ssize x_size = vec_size_3(x, arg, call); if (x_size != size) { stop_assert_size(x_size, size, arg, call); } } static r_no_return void stop_non_list_type(r_obj* x, struct vctrs_arg* arg, struct r_lazy call) { r_eval_with_xyz(KEEP(r_parse("stop_non_list_type(x, y, z)")), x, KEEP(vctrs_arg(arg)), KEEP(r_lazy_eval(call)), vctrs_ns_env); r_stop_unreachable(); } r_obj* ffi_check_list(r_obj* x, r_obj* frame) { struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_data = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_data); obj_check_list(x, &arg, call); return r_null; } void obj_check_list(r_obj* x, struct vctrs_arg* arg, struct r_lazy call) { if (!obj_is_list(x)) { stop_non_list_type(x, arg, call); } } r_obj* ffi_list_check_all_vectors(r_obj* x, r_obj* frame) { // This is an internal error obj_check_list(x, vec_args.x, (struct r_lazy) {.x = frame, .env = r_null }); struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_caller_data = { .x = syms.arg, .env = frame }; struct vctrs_arg arg_caller = new_lazy_arg(&arg_caller_data); r_ssize i = 0; struct vctrs_arg* arg = new_subscript_arg_vec(&arg_caller, x, &i); KEEP(arg->shelter); r_ssize n = r_length(x); r_obj* const * v_x = r_list_cbegin(x); for (; i < n; ++i) { obj_check_vector(v_x[i], arg, call); } FREE(1); return r_null; } r_obj* ffi_list_check_all_size(r_obj* xs, r_obj* ffi_size, r_obj* frame) { // This is an internal error obj_check_list(xs, vec_args.x, (struct r_lazy) {.x = frame, .env = r_null }); struct r_lazy arg_lazy = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); struct r_lazy call = { .x = r_syms.call, .env = frame }; r_ssize size = r_arg_as_ssize(ffi_size, "size"); list_check_all_size(xs, size, &arg, call); return r_null; } static void list_check_all_size(r_obj* xs, r_ssize size, struct vctrs_arg* p_arg, struct r_lazy call) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } r_ssize i = 0; r_ssize xs_size = r_length(xs); r_obj* xs_names = r_names(xs); r_obj* const* v_xs = r_list_cbegin(xs); struct vctrs_arg* p_x_arg = new_subscript_arg(p_arg, xs_names, xs_size, &i); KEEP(p_x_arg->shelter); for (; i < xs_size; ++i) { vec_check_size(v_xs[i], size, p_x_arg, call); } FREE(1); } vctrs/src/globals.c0000644000176200001440000000645014402367170014002 0ustar liggesusers#include "vctrs.h" struct syms syms; struct strings strings; struct chrs chrs; struct fns fns; struct vec_args vec_args; struct lazy_args lazy_args; struct lazy_calls lazy_calls; struct r_dyn_array* globals_shelter = NULL; #define INIT_ARG(ARG) \ static struct vctrs_arg ARG; ARG = new_wrapper_arg(NULL, #ARG); \ vec_args.ARG = &ARG #define INIT_ARG2(ARG, STR) \ static struct vctrs_arg ARG; ARG = new_wrapper_arg(NULL, STR); \ vec_args.ARG = &ARG // Defines both a string and a length 1 character vector #define INIT_STRING(ARG) \ strings.ARG = r_str(#ARG); \ r_dyn_list_push_back(globals_shelter, strings.ARG); \ chrs.ARG = r_chr(#ARG); \ r_dyn_list_push_back(globals_shelter, chrs.ARG); #define INIT_LAZY_ARG(ARG) \ lazy_args.ARG = (struct r_lazy) { .x = r_chr(#ARG), .env = r_null }; \ r_dyn_list_push_back(globals_shelter, lazy_calls.ARG.x) #define INIT_LAZY_ARG_2(ARG, STR) \ lazy_args.ARG = (struct r_lazy) { .x = r_chr(STR), .env = r_null }; \ r_dyn_list_push_back(globals_shelter, lazy_args.ARG.x) #define INIT_CALL(ARG) \ lazy_calls.ARG = (struct r_lazy) { .x = r_parse(#ARG "()"), .env = r_null }; \ r_dyn_list_push_back(globals_shelter, lazy_calls.ARG.x) void vctrs_init_globals(r_obj* ns) { size_t n_strings = sizeof(struct lazy_calls) / sizeof(struct r_lazy); size_t n_lazy_calls = sizeof(struct strings) / sizeof(r_obj*); size_t n_globals = n_strings + n_lazy_calls; globals_shelter = r_new_dyn_vector(R_TYPE_list, n_globals); r_preserve(globals_shelter->shelter); // Symbols ----------------------------------------------------------- syms.arg = r_sym("arg"); syms.dot_arg = r_sym(".arg"); syms.dot_call = r_sym(".call"); syms.dot_error_arg = r_sym(".error_arg"); syms.dot_error_call = r_sym(".error_call"); syms.haystack_arg = r_sym("haystack_arg"); syms.needles_arg = r_sym("needles_arg"); syms.recurse = r_sym("recurse"); syms.repair_arg = r_sym("repair_arg"); syms.times_arg = r_sym("times_arg"); syms.to_arg = r_sym("to_arg"); syms.value_arg = r_sym("value_arg"); syms.x_arg = r_sym("x_arg"); syms.y_arg = r_sym("y_arg"); // Strings and characters -------------------------------------------- INIT_STRING(AsIs); INIT_STRING(repair); // Args -------------------------------------------------------------- INIT_ARG2(dot_name_repair, ".name_repair"); INIT_ARG2(dot_ptype, ".ptype"); INIT_ARG2(dot_size, ".size"); INIT_ARG2(empty, ""); INIT_ARG(i); INIT_ARG(max_fill); INIT_ARG(n); INIT_ARG(value); INIT_ARG(x); INIT_ARG(indices); INIT_ARG(sizes); // Lazy args --------------------------------------------------------- INIT_LAZY_ARG_2(dot_name_repair, ".name_repair"); // Calls ------------------------------------------------------------- INIT_CALL(vec_assign); INIT_CALL(vec_assign_params); INIT_CALL(vec_assign_seq); INIT_CALL(vec_init); INIT_CALL(vec_ptype_finalise); INIT_CALL(vec_recycle); INIT_CALL(vec_recycle_common); INIT_CALL(vec_size); INIT_CALL(vec_size_common); INIT_CALL(list_all_size); } vctrs/src/ptype2.c0000644000176200001440000002105014362266120013571 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/ptype2-decl.h" // [[ register() ]] r_obj* ffi_ptype2_opts(r_obj* x, r_obj* y, r_obj* ffi_opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = r_syms.call, .env = frame, }; struct ptype2_opts opts = new_ptype2_opts(x, y, &x_arg, &y_arg, call, ffi_opts); int _left; return vec_ptype2_opts(&opts, &_left); } r_obj* vec_ptype2_opts_impl(const struct ptype2_opts* opts, int* left, bool first_pass) { r_obj* x = opts->x; r_obj* y = opts->y; struct vctrs_arg* x_arg = opts->p_x_arg; struct vctrs_arg* y_arg = opts->p_y_arg; enum vctrs_type x_type = vec_typeof(x); enum vctrs_type y_type = vec_typeof(y); if (x_type == VCTRS_TYPE_null) { *left = y == r_null; return vec_ptype2_from_unspecified(opts, x_type, y, y_arg); } if (y_type == VCTRS_TYPE_null) { *left = x == r_null; return vec_ptype2_from_unspecified(opts, x_type, x, x_arg); } if (x_type == VCTRS_TYPE_unspecified) { return vec_ptype2_from_unspecified(opts, y_type, y, y_arg); } if (y_type == VCTRS_TYPE_unspecified) { return vec_ptype2_from_unspecified(opts, x_type, x, x_arg); } if (x_type == VCTRS_TYPE_scalar) { stop_scalar_type(x, x_arg, opts->call); } if (y_type == VCTRS_TYPE_scalar) { stop_scalar_type(y, y_arg, opts->call); } if (x_type != VCTRS_TYPE_s3 && y_type != VCTRS_TYPE_s3) { return vec_ptype2_switch_native(opts, x_type, y_type, left); } if (x_type == VCTRS_TYPE_s3 || y_type == VCTRS_TYPE_s3) { r_obj* out = KEEP(vec_ptype2_dispatch_native(opts, x_type, y_type, left)); if (out != r_null) { out = vec_shaped_ptype(out, x, y, x_arg, y_arg); FREE(1); return out; } FREE(1); } // Try native dispatch again with prototypes, in case the prototype // is another type. FIXME: Use R-level callback instead. if (first_pass) { struct ptype2_opts mut_opts = *opts; mut_opts.x = KEEP(vec_ptype(x, x_arg, opts->call)); mut_opts.y = KEEP(vec_ptype(y, y_arg, opts->call)); r_obj* out = vec_ptype2_opts_impl(&mut_opts, left, false); FREE(2); return out; } return vec_ptype2_dispatch_s3(opts); } r_obj* vec_ptype2_opts(const struct ptype2_opts* opts, int* left) { return vec_ptype2_opts_impl(opts, left, true); } static r_obj* vec_ptype2_switch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left) { r_obj* x = opts->x; r_obj* y = opts->y; struct vctrs_arg* x_arg = opts->p_x_arg; struct vctrs_arg* y_arg = opts->p_y_arg; enum vctrs_type2 type2 = vec_typeof2_impl(x_type, y_type, left); switch (type2) { case VCTRS_TYPE2_null_null: return r_null; case VCTRS_TYPE2_logical_logical: return vec_shaped_ptype(r_globals.empty_lgl, x, y, x_arg, y_arg); case VCTRS_TYPE2_logical_integer: case VCTRS_TYPE2_integer_integer: return vec_shaped_ptype(r_globals.empty_int, x, y, x_arg, y_arg); case VCTRS_TYPE2_logical_double: case VCTRS_TYPE2_integer_double: case VCTRS_TYPE2_double_double: return vec_shaped_ptype(r_globals.empty_dbl, x, y, x_arg, y_arg); case VCTRS_TYPE2_integer_complex: case VCTRS_TYPE2_double_complex: case VCTRS_TYPE2_complex_complex: return vec_shaped_ptype(r_globals.empty_cpl, x, y, x_arg, y_arg); case VCTRS_TYPE2_character_character: return vec_shaped_ptype(r_globals.empty_chr, x, y, x_arg, y_arg); case VCTRS_TYPE2_raw_raw: return vec_shaped_ptype(r_globals.empty_raw, x, y, x_arg, y_arg); case VCTRS_TYPE2_list_list: return vec_shaped_ptype(r_globals.empty_list, x, y, x_arg, y_arg); case VCTRS_TYPE2_dataframe_dataframe: return df_ptype2(opts); default: return vec_ptype2_dispatch_s3(opts); } } /** * Return non-unspecified type. * * This is normally the `vec_ptype()` of the other input, but if the * common class fallback is enabled we return the `vec_ptype2()` of * this input with itself. This way we may return a fallback sentinel which can be * treated specially, for instance in `vec_c(NA, x, NA)`. */ r_obj* vec_ptype2_from_unspecified(const struct ptype2_opts* opts, enum vctrs_type other_type, r_obj* other, struct vctrs_arg* other_arg) { if (other_type == VCTRS_TYPE_unspecified || other_type == VCTRS_TYPE_null) { return vec_ptype(other, other_arg, opts->call); } if (opts->fallback.s3) { const struct ptype2_opts self_self_opts = (const struct ptype2_opts) { .x = other, .y = other, .p_x_arg = other_arg, .p_y_arg = other_arg, .fallback = opts->fallback }; int _left = 0; return vec_ptype2_opts(&self_self_opts, &_left); } return vec_ptype(other, other_arg, opts->call); } struct is_coercible_data { const struct ptype2_opts* opts; int* dir; r_obj* out; }; static void vec_is_coercible_cb(void* data_) { struct is_coercible_data* data = (struct is_coercible_data*) data_; data->out = vec_ptype2_opts(data->opts, data->dir); } static void vec_is_coercible_e(const struct ptype2_opts* opts, int* dir, ERR* err) { struct is_coercible_data data = { .opts = opts, .dir = dir, .out = r_null }; *err = r_try_catch(&vec_is_coercible_cb, &data, syms_vctrs_error_incompatible_type, NULL, NULL); } bool vec_is_coercible(const struct ptype2_opts* opts, int* dir) { ERR err = NULL; vec_is_coercible_e(opts, dir, &err); return !err; } r_obj* vec_ptype2_e(const struct ptype2_opts* opts, int* dir, ERR* err) { struct is_coercible_data data = { .opts = opts, .dir = dir, .out = r_null }; *err = r_try_catch(&vec_is_coercible_cb, &data, syms_vctrs_error_incompatible_type, NULL, NULL); return data.out; } // [[ register() ]] r_obj* ffi_is_coercible(r_obj* x, r_obj* y, r_obj* opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &x_arg, &y_arg, call, opts); int dir = 0; return r_lgl(vec_is_coercible(&c_opts, &dir)); } // [[ register() ]] r_obj* ffi_ptype2(r_obj* x, r_obj* y, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; int _left; return vec_ptype2(x, y, &x_arg, &y_arg, &_left, call); } struct ptype2_opts new_ptype2_opts(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, r_obj* opts) { return (struct ptype2_opts) { .x = x, .y = y, .p_x_arg = p_x_arg, .p_y_arg = p_y_arg, .call = call, .fallback = new_fallback_opts(opts) }; } struct fallback_opts new_fallback_opts(r_obj* opts) { return (struct fallback_opts) { .s3 = r_int_get(r_list_get(opts, 0), 0) }; } void vctrs_init_ptype2(r_obj* ns) { } vctrs/src/c-unchop.h0000644000176200001440000000012614315060310014057 0ustar liggesusers#ifndef VCTRS_C_UNCHOP_H #define VCTRS_C_UNCHOP_H #include "vctrs-core.h" #endif vctrs/src/names.h0000644000176200001440000000412114317050265013457 0ustar liggesusers#ifndef VCTRS_NAMES_H #define VCTRS_NAMES_H #include "vctrs-core.h" #include "owned.h" #include "utils.h" r_obj* vec_names(r_obj* x); r_obj* vec_names2(r_obj* x); r_obj* vec_proxy_names(r_obj* x); r_obj* vec_unique_names(r_obj* x, bool quiet); r_obj* vec_unique_colnames(r_obj* x, bool quiet); r_obj* outer_names(r_obj* names, r_obj* outer, r_ssize n); r_obj* apply_name_spec(r_obj* name_spec, r_obj* outer, r_obj* inner, r_ssize n); enum name_repair_type { NAME_REPAIR_none = 0, NAME_REPAIR_minimal, NAME_REPAIR_unique, NAME_REPAIR_universal, NAME_REPAIR_check_unique, NAME_REPAIR_custom = 99 }; struct name_repair_opts { r_obj* shelter; enum name_repair_type type; struct r_lazy name_repair_arg; r_obj* fn; bool quiet; struct r_lazy call; }; struct name_repair_opts new_name_repair_opts(r_obj* name_repair, struct r_lazy name_repair_arg, bool quiet, struct r_lazy call); r_obj* vec_as_universal_names(r_obj* names, bool quiet); r_obj* vec_as_custom_names(r_obj* names, const struct name_repair_opts* opts); extern struct name_repair_opts unique_repair_default_opts; extern struct name_repair_opts unique_repair_silent_opts; extern struct name_repair_opts no_repair_opts; static struct name_repair_opts const * const p_unique_repair_default_opts = &unique_repair_default_opts; static struct name_repair_opts const * const p_unique_repair_silent_opts = &unique_repair_silent_opts; static struct name_repair_opts const * const p_no_repair_opts = &no_repair_opts; r_obj* vec_as_names(r_obj* names, const struct name_repair_opts* opts); const char* name_repair_arg_as_c_string(enum name_repair_type type); bool is_unique_names(r_obj* names); r_obj* vec_as_unique_names(r_obj* names, bool quiet); r_obj* r_seq_chr(const char* prefix, r_ssize n); r_obj* r_chr_paste_prefix(r_obj* names, const char* prefix, const char* sep); r_obj* vec_set_names(r_obj* x, r_obj* names); r_obj* vec_proxy_set_names(r_obj* x, r_obj* names, const enum vctrs_owned owned); #endif vctrs/src/size.c0000644000176200001440000002062014401377400013320 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/size-decl.h" // [[ register() ]] r_obj* ffi_size(r_obj* x, r_obj* frame) { struct r_lazy call = { .x = frame, .env = r_null }; return r_len(vec_size_3(x, vec_args.x, call)); } r_ssize vec_size(r_obj* x) { return vec_size_3(x, vec_args.x, lazy_calls.vec_size); } r_ssize vec_size_3(r_obj* x, struct vctrs_arg* p_arg, struct r_lazy call) { struct vec_error_opts err = { .p_arg = p_arg, .call = call }; return vec_size_opts(x, &err); } static r_ssize vec_size_opts(r_obj* x, const struct vec_error_opts* opts) { struct vctrs_proxy_info info = vec_proxy_info(x); KEEP(info.shelter); r_obj* data = info.proxy; r_ssize size; switch (info.type) { case VCTRS_TYPE_null: size = 0; break; case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: case VCTRS_TYPE_list: size = vec_raw_size(data); break; case VCTRS_TYPE_dataframe: size = df_size(data); break; default: stop_scalar_type(x, opts->p_arg, opts->call); } FREE(1); return size; } static r_ssize vec_raw_size(r_obj* x) { r_obj* dimensions = r_dim(x); if (dimensions == r_null || r_length(dimensions) == 0) { return r_length(x); } if (r_typeof(dimensions) != R_TYPE_integer) { r_stop_internal("Corrupt vector, `dim` attribute is not an integer vector."); } return r_int_get(dimensions, 0); } // [[ register() ]] r_obj* ffi_list_sizes(r_obj* x, r_obj* frame) { struct vec_error_opts err = { .p_arg = vec_args.x, .call = { .x = frame, .env = r_null } }; return list_sizes(x, &err); } r_obj* list_sizes(r_obj* x, const struct vec_error_opts* opts) { if (!obj_is_list(x)) { r_abort_lazy_call(opts->call, "%s must be a list, not %s.", r_c_str_format_error_arg("x"), r_obj_type_friendly(x)); } r_ssize size = vec_size(x); r_obj* const * v_x = r_list_cbegin(x); r_obj* out = KEEP(r_alloc_integer(size)); int* v_out = r_int_begin(out); r_obj* names = vec_names(x); r_attrib_poke_names(out, names); r_ssize i = 0; struct vctrs_arg* arg = new_subscript_arg_vec(opts->p_arg, x, &i); KEEP(arg->shelter); struct vec_error_opts local_opts = *opts; local_opts.p_arg = arg; for (; i < size; ++i) { v_out[i] = vec_size_opts(v_x[i], &local_opts); } FREE(2); return out; } r_obj* ffi_list_all_size(r_obj* xs, r_obj* ffi_size, r_obj* frame) { // This is an internal error obj_check_list(xs, vec_args.x, (struct r_lazy) {.x = frame, .env = r_null }); r_ssize size = r_arg_as_ssize(ffi_size, "size"); return r_lgl(list_all_size(xs, size)); } static bool list_all_size(r_obj* xs, r_ssize size) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } r_ssize i = 0; r_ssize xs_size = r_length(xs); r_obj* xs_names = r_names(xs); r_obj* const* v_xs = r_list_cbegin(xs); struct vctrs_arg* p_x_arg = new_subscript_arg(vec_args.x, xs_names, xs_size, &i); KEEP(p_x_arg->shelter); bool out = true; for (; i < xs_size; ++i) { r_obj* x = v_xs[i]; // Scalar list elements throw an error internal to `list_all_size()` r_ssize x_size = vec_size_3(x, p_x_arg, lazy_calls.list_all_size); if (x_size != size) { out = false; break; } } FREE(1); return out; } r_ssize df_rownames_size(r_obj* x) { for (r_obj* attr = r_attrib(x); attr != r_null; attr = r_node_cdr(attr)) { if (r_node_tag(attr) != r_syms.row_names) { continue; } return rownames_size(r_node_car(attr)); } return -1; } // For performance, avoid Rf_getAttrib() because it automatically transforms // the rownames into an integer vector r_ssize df_size(r_obj* x) { r_ssize n = df_rownames_size(x); if (n < 0) { r_stop_internal("Corrupt data frame: row.names are missing"); } return n; } // Supports bare lists as well r_ssize df_raw_size(r_obj* x) { r_ssize n = df_rownames_size(x); if (n >= 0) { return n; } return df_raw_size_from_list(x); } r_ssize df_raw_size_from_list(r_obj* x) { if (r_length(x) >= 1) { return vec_size(r_list_get(x, 0)); } else { return 0; } } // [[ register() ]] SEXP vctrs_df_size(SEXP x) { return r_int(df_raw_size(x)); } r_obj* vec_check_recycle(r_obj* x, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call) { if (x == r_null) { return r_null; } r_ssize n_x = vec_size(x); if (n_x == size) { return x; } if (n_x == 1L) { r_obj* i = KEEP(compact_rep(1, size)); r_obj* out = vec_slice_unsafe(x, i); FREE(1); return out; } stop_recycle_incompatible_size(n_x, size, x_arg, call); } // [[ register() ]] r_obj* ffi_recycle(r_obj* x, r_obj* size_obj, r_obj* frame) { if (x == r_null || size_obj == r_null) { return r_null; } struct r_lazy recycle_call = { .x = frame, .env = r_null }; size_obj = KEEP(vec_cast(size_obj, r_globals.empty_int, vec_args.empty, vec_args.empty, recycle_call)); R_len_t size = r_int_get(size_obj, 0); FREE(1); struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; return vec_check_recycle(x, size, &x_arg, call); } r_obj* vec_recycle_fallback(r_obj* x, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call) { if (x == r_null) { return r_null; } r_ssize x_size = vec_size(x); if (x_size == size) { return x; } if (x_size == 1) { r_obj* subscript = KEEP(r_alloc_integer(size)); r_int_fill(subscript, 1, size); r_obj* out = vec_slice_fallback(x, subscript); FREE(1); return out; } stop_recycle_incompatible_size(x_size, size, x_arg, call); } r_obj* ffi_as_short_length(r_obj* n, r_obj* frame) { struct r_lazy call = { .x = frame, .env = r_null }; struct r_lazy arg_lazy = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); return r_len(vec_as_short_length(n, &arg, call)); } r_ssize vec_as_short_length(r_obj* n, struct vctrs_arg* p_arg, struct r_lazy call) { r_ssize out = vec_as_ssize(n, p_arg, call); if (out < 0) { r_abort_lazy_call(call, "%s must be a positive number or zero.", vec_arg_format(p_arg)); } if (out > INT_MAX) { // Ideally we'd mention long vector support in an info bullets r_abort_lazy_call(call, "%s is too large a number and long vectors are not supported.", vec_arg_format(p_arg)); } return out; } // Adapted from `r_arg_as_ssize()` r_ssize vec_as_ssize(r_obj* n, struct vctrs_arg* p_arg, struct r_lazy call) { if (r_is_object(n)) { struct cast_opts cast_opts = { .x = n, .to = r_globals.empty_dbl, .p_x_arg = p_arg, .call = call }; ERR err = NULL; n = vec_cast_e(&cast_opts, &err); if (err) { goto invalid; } } KEEP(n); switch (r_typeof(n)) { case R_TYPE_double: { if (r_length(n) != 1) { goto invalid; } double out = r_dbl_get(n, 0); if (out == r_globals.na_int) { goto invalid; } if (out != floor(out)) { r_abort_lazy_call(call, "%s must be a whole number, not a fractional number.", vec_arg_format(p_arg)); } if (out > R_SSIZE_MAX) { r_abort_lazy_call(call, "%s is too large a number.", vec_arg_format(p_arg)); } FREE(1); return (r_ssize) out; } case R_TYPE_integer: { if (r_length(n) != 1) { goto invalid; } int out = r_int_get(n, 0); if (out == r_globals.na_int) { goto invalid; } FREE(1); return (r_ssize) out; } invalid: default: r_abort_lazy_call(call, "%s must be a single number, not %s.", vec_arg_format(p_arg), r_obj_type_friendly_length(n)); } } vctrs/src/order-groups.h0000644000176200001440000001241414422506663015015 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_GROUPS_H #define VCTRS_ORDER_GROUPS_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- // This seems to be a reasonable default to start with for tracking group sizes // and is what base R uses. It is expanded by 2x every time we need to // reallocate. It is also capped to the size of `x`. #define GROUP_DATA_SIZE_DEFAULT 100000 // ----------------------------------------------------------------------------- /* * Info related to 1 column / vector worth of groupings * * @member self A RAWSXP for the struct memory. * @member data An integer vector of group sizes. * @member p_data A pointer to `data`. * @member data_pi The protection index for `data` which allows us to * `REPROTECT()` on the fly. * @member data_size The current allocated size of `data`. * @member n_groups The current number of groups seen so far. * Always `<= data_size`. * @member max_group_size The maximum group size seen so far. */ struct group_info { SEXP self; SEXP data; int* p_data; PROTECT_INDEX data_pi; r_ssize data_size; r_ssize n_groups; r_ssize max_group_size; }; #define PROTECT_GROUP_INFO(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->data, &(p_info)->data_pi); \ *(p_n) += 2; \ } while(0) // ----------------------------------------------------------------------------- /* * `group_infos` contains information about 2 `group_info` structs. It contains * a pointer which points to 2 `group_info` pointers. * * For a single atomic vector, `current = 0` is always set and only one of the * structs is ever used. * * For a data frame with multiple columns, after every column `current` is * flipped between 0 and 1, giving us a chance to read the group information * off the previous column (which allows us to chunk the current column into * groups) while also updating the group information of the chunks of * the current one. * * @member self A RAWSXP for the struct memory. * @member p_p_group_info_data A RAWSXP for the p_p_group_info array memory. * @member p_p_group_info A pointer to two `group_info` pointers. * @member max_data_size The maximum data size that can be allocated when * reallocating an individual `p_group_info`. This is set to the size of * `x`. * @member current The current `group_info` pointer we are using. This is * either 0 or 1. * @member force_groups Was group information requested by the user? If so, we * always have to track group information. * @member ignore_groups Should group tracking be ignored? This is the default * for atomic vectors unless groups information is explicitly requested. For * data frames, this is true over all columns except the last one (for * performance) unless `force_groups` is true. */ struct group_infos { SEXP self; SEXP p_p_group_info_data; struct group_info** p_p_group_info; r_ssize max_data_size; int current; bool force_groups; bool ignore_groups; }; #define PROTECT_GROUP_INFOS(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT((p_info)->p_p_group_info_data); \ *(p_n) += 2; \ PROTECT_GROUP_INFO((p_info)->p_p_group_info[0], (p_n)); \ PROTECT_GROUP_INFO((p_info)->p_p_group_info[1], (p_n)); \ } while(0) // ----------------------------------------------------------------------------- struct group_info* new_group_info(void); struct group_infos* new_group_infos(struct group_info* p_group_info0, struct group_info* p_group_info1, r_ssize max_data_size, bool force_groups, bool ignore_groups); void groups_swap(struct group_infos* p_group_infos); // ----------------------------------------------------------------------------- /* * Extract the current `group_info*` */ static inline struct group_info* groups_current(struct group_infos* p_group_infos) { return p_group_infos->p_p_group_info[p_group_infos->current]; } // ----------------------------------------------------------------------------- void groups_size_push(r_ssize size, struct group_infos* p_group_infos); /* * Inline version of `groups_size_push()` that only attempts to push if * we aren't ignoring groups. Important for this to be inline for performance, * especially with atomic vectors where order generally isn't required. */ static inline void groups_size_maybe_push(r_ssize size, struct group_infos* p_group_infos) { if (p_group_infos->ignore_groups) { return; } else { groups_size_push(size, p_group_infos); } } // ----------------------------------------------------------------------------- #endif vctrs/src/utils-dispatch.c0000644000176200001440000000553014416313204015304 0ustar liggesusers#include "vctrs.h" #include "decl/utils-dispatch-decl.h" // [[ register() ]] r_obj* ffi_class_type(r_obj* x) { return r_chr(class_type_as_str(class_type(x))); } enum vctrs_class_type class_type(r_obj* x) { if (!r_is_object(x)) { return VCTRS_CLASS_none; } r_obj* cls = KEEP(r_class(x)); // Avoid corrupt objects where `x` is an object, but the class is NULL if (cls == r_null) { FREE(1); return VCTRS_CLASS_none; } enum vctrs_class_type type = class_type_impl(cls); FREE(1); return type; } static enum vctrs_class_type class_type_impl(r_obj* cls) { int n = r_length(cls); r_obj* const* p = r_chr_cbegin(cls); // First check for bare types for which we know how many strings are // the classes composed of switch (n) { case 1: { r_obj* p0 = p[0]; if (p0 == strings_data_frame) { return VCTRS_CLASS_bare_data_frame; } else if (p0 == strings_factor) { return VCTRS_CLASS_bare_factor; } else if (p0 == strings_date) { return VCTRS_CLASS_bare_date; } else if (p0 == strings.AsIs) { return VCTRS_CLASS_bare_asis; } break; } case 2: { r_obj* p0 = p[0]; r_obj* p1 = p[1]; if (p0 == strings_ordered && p1 == strings_factor) { return VCTRS_CLASS_bare_ordered; } if (p1 == strings_posixt) { if (p0 == strings_posixct) { return VCTRS_CLASS_bare_posixct; } else if (p0 == strings_posixlt) { return VCTRS_CLASS_bare_posixlt; } } break; } case 3: { if (p[0] == strings_tbl_df && p[1] == strings_tbl && p[2] == strings_data_frame) { return VCTRS_CLASS_bare_tibble; } break; }} // Now check for inherited classes p = p + n - 1; r_obj* last = *p; if (last == strings_data_frame) { return VCTRS_CLASS_data_frame; } else if (last == strings_list) { return VCTRS_CLASS_list; } return VCTRS_CLASS_unknown; } static const char* class_type_as_str(enum vctrs_class_type type) { switch (type) { case VCTRS_CLASS_list: return "list"; case VCTRS_CLASS_data_frame: return "data_frame"; case VCTRS_CLASS_bare_asis: return "bare_asis"; case VCTRS_CLASS_bare_data_frame: return "bare_data_frame"; case VCTRS_CLASS_bare_tibble: return "bare_tibble"; case VCTRS_CLASS_bare_factor: return "bare_factor"; case VCTRS_CLASS_bare_ordered: return "bare_ordered"; case VCTRS_CLASS_bare_date: return "bare_date"; case VCTRS_CLASS_bare_posixct: return "bare_posixct"; case VCTRS_CLASS_bare_posixlt: return "bare_posixlt"; case VCTRS_CLASS_unknown: return "unknown"; case VCTRS_CLASS_none: return "none"; } never_reached("class_type_as_str"); } bool vec_is_partial(r_obj* x) { return x == r_null || (r_typeof(x) == R_TYPE_list && r_inherits(x, "vctrs_partial")); } // [[ register() ]] r_obj* ffi_is_partial(r_obj* x) { return r_lgl(vec_is_partial(x)); } vctrs/src/typeof2.c0000644000176200001440000004620314315060310013734 0ustar liggesusers#include "vctrs.h" /** * Type for symmetric binary dispatch. * * Permuting `x` and `y` does not change the typeof2. * * After adding entries in `vec_typeof2()`, adjust the list of types * in helper-types.R. This will ensure the consistency of the new * entries. */ /** * [[ include("utils.h") ]] * * @param left Output parameter. Set to 1 when the common type comes * from the left, 0 when it comes from the right, and -1 when it * comes from both sides. This means that "left" is the default * when coerced to a boolean value. */ enum vctrs_type2 vec_typeof2_impl(enum vctrs_type type_x, enum vctrs_type type_y, int* left) { switch (type_x) { case VCTRS_TYPE_null: { switch (type_y) { case VCTRS_TYPE_null: *left = -1; return VCTRS_TYPE2_null_null; case VCTRS_TYPE_unspecified: *left = 0; return VCTRS_TYPE2_null_unspecified; case VCTRS_TYPE_logical: *left = 0; return VCTRS_TYPE2_null_logical; case VCTRS_TYPE_integer: *left = 0; return VCTRS_TYPE2_null_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_null_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_null_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_null_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_null_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_null_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_null_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_null_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_null_scalar; } } case VCTRS_TYPE_unspecified: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_unspecified; case VCTRS_TYPE_unspecified: *left = -1; return VCTRS_TYPE2_unspecified_unspecified; case VCTRS_TYPE_logical: *left = 0; return VCTRS_TYPE2_unspecified_logical; case VCTRS_TYPE_integer: *left = 0; return VCTRS_TYPE2_unspecified_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_unspecified_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_unspecified_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_unspecified_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_unspecified_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_unspecified_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_unspecified_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_unspecified_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_unspecified_scalar; } } case VCTRS_TYPE_logical: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_logical; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_logical; case VCTRS_TYPE_logical: *left = -1; return VCTRS_TYPE2_logical_logical; case VCTRS_TYPE_integer: *left = 0; return VCTRS_TYPE2_logical_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_logical_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_logical_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_logical_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_logical_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_logical_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_logical_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_logical_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_logical_scalar; } } case VCTRS_TYPE_integer: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_integer; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_integer; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_integer; case VCTRS_TYPE_integer: *left = -1; return VCTRS_TYPE2_integer_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_integer_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_integer_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_integer_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_integer_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_integer_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_integer_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_integer_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_integer_scalar; } } case VCTRS_TYPE_double: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_double; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_double; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_double; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_double; case VCTRS_TYPE_double: *left = -1; return VCTRS_TYPE2_double_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_double_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_double_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_double_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_double_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_double_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_double_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_double_scalar; } } case VCTRS_TYPE_complex: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_complex; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_complex; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_complex; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_complex; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_complex; case VCTRS_TYPE_complex: *left = -1; return VCTRS_TYPE2_complex_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_complex_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_complex_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_complex_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_complex_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_complex_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_complex_scalar; } } case VCTRS_TYPE_character: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_character; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_character; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_character; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_character; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_character; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_character; case VCTRS_TYPE_character: *left = -1; return VCTRS_TYPE2_character_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_character_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_character_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_character_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_character_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_character_scalar; } } case VCTRS_TYPE_raw: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_raw; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_raw; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_raw; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_raw; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_raw; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_raw; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_raw; case VCTRS_TYPE_raw: *left = -1; return VCTRS_TYPE2_raw_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_raw_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_raw_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_raw_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_raw_scalar; } } case VCTRS_TYPE_list: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_list; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_list; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_list; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_list; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_list; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_list; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_list; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_list; case VCTRS_TYPE_list: *left = -1; return VCTRS_TYPE2_list_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_list_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_list_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_list_scalar; } } case VCTRS_TYPE_dataframe: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_dataframe; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_dataframe; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_dataframe; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_dataframe; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_dataframe; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_dataframe; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_dataframe; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_dataframe; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_list_dataframe; case VCTRS_TYPE_dataframe: *left = -1; return VCTRS_TYPE2_dataframe_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_dataframe_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_dataframe_scalar; } } case VCTRS_TYPE_s3: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_s3; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_s3; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_s3; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_s3; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_s3; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_s3; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_s3; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_s3; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_list_s3; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_dataframe_s3; case VCTRS_TYPE_s3: *left = -1; return VCTRS_TYPE2_S3_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_S3_scalar; } } case VCTRS_TYPE_scalar: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_scalar; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_scalar; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_scalar; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_scalar; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_scalar; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_scalar; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_scalar; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_scalar; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_list_scalar; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_dataframe_scalar; case VCTRS_TYPE_s3: *left = 1; return VCTRS_TYPE2_S3_scalar; case VCTRS_TYPE_scalar: *left = -1; return VCTRS_TYPE2_scalar_scalar; } }} r_stop_unreachable(); } enum vctrs_type2 vec_typeof2(r_obj* x, r_obj* y) { int _; return vec_typeof2_impl(vec_typeof(x), vec_typeof(y), &_); } const char* vctrs_type2_as_str(enum vctrs_type2 type) { switch (type) { case VCTRS_TYPE2_null_null: return "VCTRS_TYPE2_null_null"; case VCTRS_TYPE2_null_logical: return "VCTRS_TYPE2_null_logical"; case VCTRS_TYPE2_null_integer: return "VCTRS_TYPE2_null_integer"; case VCTRS_TYPE2_null_double: return "VCTRS_TYPE2_null_double"; case VCTRS_TYPE2_null_complex: return "VCTRS_TYPE2_null_complex"; case VCTRS_TYPE2_null_character: return "VCTRS_TYPE2_null_character"; case VCTRS_TYPE2_null_raw: return "VCTRS_TYPE2_null_raw"; case VCTRS_TYPE2_null_list: return "VCTRS_TYPE2_null_list"; case VCTRS_TYPE2_null_dataframe: return "VCTRS_TYPE2_null_dataframe"; case VCTRS_TYPE2_null_s3: return "VCTRS_TYPE2_null_s3"; case VCTRS_TYPE2_null_unspecified: return "VCTRS_TYPE2_null_unspecified"; case VCTRS_TYPE2_null_scalar: return "VCTRS_TYPE2_null_scalar"; case VCTRS_TYPE2_unspecified_logical: return "VCTRS_TYPE2_unspecified_logical"; case VCTRS_TYPE2_unspecified_integer: return "VCTRS_TYPE2_unspecified_integer"; case VCTRS_TYPE2_unspecified_double: return "VCTRS_TYPE2_unspecified_double"; case VCTRS_TYPE2_unspecified_complex: return "VCTRS_TYPE2_unspecified_complex"; case VCTRS_TYPE2_unspecified_character: return "VCTRS_TYPE2_unspecified_character"; case VCTRS_TYPE2_unspecified_raw: return "VCTRS_TYPE2_unspecified_raw"; case VCTRS_TYPE2_unspecified_list: return "VCTRS_TYPE2_unspecified_list"; case VCTRS_TYPE2_unspecified_dataframe: return "VCTRS_TYPE2_unspecified_dataframe"; case VCTRS_TYPE2_unspecified_s3: return "VCTRS_TYPE2_unspecified_s3"; case VCTRS_TYPE2_unspecified_unspecified: return "VCTRS_TYPE2_unspecified_unspecified"; case VCTRS_TYPE2_unspecified_scalar: return "VCTRS_TYPE2_unspecified_scalar"; case VCTRS_TYPE2_logical_logical: return "VCTRS_TYPE2_logical_logical"; case VCTRS_TYPE2_logical_integer: return "VCTRS_TYPE2_logical_integer"; case VCTRS_TYPE2_logical_double: return "VCTRS_TYPE2_logical_double"; case VCTRS_TYPE2_logical_complex: return "VCTRS_TYPE2_logical_complex"; case VCTRS_TYPE2_logical_character: return "VCTRS_TYPE2_logical_character"; case VCTRS_TYPE2_logical_raw: return "VCTRS_TYPE2_logical_raw"; case VCTRS_TYPE2_logical_list: return "VCTRS_TYPE2_logical_list"; case VCTRS_TYPE2_logical_dataframe: return "VCTRS_TYPE2_logical_dataframe"; case VCTRS_TYPE2_logical_s3: return "VCTRS_TYPE2_logical_s3"; case VCTRS_TYPE2_logical_scalar: return "VCTRS_TYPE2_logical_scalar"; case VCTRS_TYPE2_integer_integer: return "VCTRS_TYPE2_integer_integer"; case VCTRS_TYPE2_integer_double: return "VCTRS_TYPE2_integer_double"; case VCTRS_TYPE2_integer_complex: return "VCTRS_TYPE2_integer_complex"; case VCTRS_TYPE2_integer_character: return "VCTRS_TYPE2_integer_character"; case VCTRS_TYPE2_integer_raw: return "VCTRS_TYPE2_integer_raw"; case VCTRS_TYPE2_integer_list: return "VCTRS_TYPE2_integer_list"; case VCTRS_TYPE2_integer_dataframe: return "VCTRS_TYPE2_integer_dataframe"; case VCTRS_TYPE2_integer_s3: return "VCTRS_TYPE2_integer_s3"; case VCTRS_TYPE2_integer_scalar: return "VCTRS_TYPE2_integer_scalar"; case VCTRS_TYPE2_double_double: return "VCTRS_TYPE2_double_double"; case VCTRS_TYPE2_double_complex: return "VCTRS_TYPE2_double_complex"; case VCTRS_TYPE2_double_character: return "VCTRS_TYPE2_double_character"; case VCTRS_TYPE2_double_raw: return "VCTRS_TYPE2_double_raw"; case VCTRS_TYPE2_double_list: return "VCTRS_TYPE2_double_list"; case VCTRS_TYPE2_double_dataframe: return "VCTRS_TYPE2_double_dataframe"; case VCTRS_TYPE2_double_s3: return "VCTRS_TYPE2_double_s3"; case VCTRS_TYPE2_double_scalar: return "VCTRS_TYPE2_double_scalar"; case VCTRS_TYPE2_complex_complex: return "VCTRS_TYPE2_complex_complex"; case VCTRS_TYPE2_complex_character: return "VCTRS_TYPE2_complex_character"; case VCTRS_TYPE2_complex_raw: return "VCTRS_TYPE2_complex_raw"; case VCTRS_TYPE2_complex_list: return "VCTRS_TYPE2_complex_list"; case VCTRS_TYPE2_complex_dataframe: return "VCTRS_TYPE2_complex_dataframe"; case VCTRS_TYPE2_complex_s3: return "VCTRS_TYPE2_complex_s3"; case VCTRS_TYPE2_complex_scalar: return "VCTRS_TYPE2_complex_scalar"; case VCTRS_TYPE2_character_character: return "VCTRS_TYPE2_character_character"; case VCTRS_TYPE2_character_raw: return "VCTRS_TYPE2_character_raw"; case VCTRS_TYPE2_character_list: return "VCTRS_TYPE2_character_list"; case VCTRS_TYPE2_character_dataframe: return "VCTRS_TYPE2_character_dataframe"; case VCTRS_TYPE2_character_s3: return "VCTRS_TYPE2_character_s3"; case VCTRS_TYPE2_character_scalar: return "VCTRS_TYPE2_character_scalar"; case VCTRS_TYPE2_raw_raw: return "VCTRS_TYPE2_raw_raw"; case VCTRS_TYPE2_raw_list: return "VCTRS_TYPE2_raw_list"; case VCTRS_TYPE2_raw_dataframe: return "VCTRS_TYPE2_raw_dataframe"; case VCTRS_TYPE2_raw_s3: return "VCTRS_TYPE2_raw_s3"; case VCTRS_TYPE2_raw_scalar: return "VCTRS_TYPE2_raw_scalar"; case VCTRS_TYPE2_list_list: return "VCTRS_TYPE2_list_list"; case VCTRS_TYPE2_list_dataframe: return "VCTRS_TYPE2_list_dataframe"; case VCTRS_TYPE2_list_s3: return "VCTRS_TYPE2_list_s3"; case VCTRS_TYPE2_list_scalar: return "VCTRS_TYPE2_list_scalar"; case VCTRS_TYPE2_dataframe_dataframe: return "VCTRS_TYPE2_dataframe_dataframe"; case VCTRS_TYPE2_dataframe_s3: return "VCTRS_TYPE2_dataframe_s3"; case VCTRS_TYPE2_dataframe_scalar: return "VCTRS_TYPE2_dataframe_scalar"; case VCTRS_TYPE2_S3_s3: return "VCTRS_TYPE2_S3_s3"; case VCTRS_TYPE2_S3_scalar: return "VCTRS_TYPE2_S3_scalar"; case VCTRS_TYPE2_scalar_scalar: return "VCTRS_TYPE2_scalar_scalar"; } r_stop_unreachable(); } r_obj* ffi_typeof2(r_obj* x, r_obj* y) { enum vctrs_type2 type = vec_typeof2(x, y); return r_chr(vctrs_type2_as_str(type)); } vctrs/src/rlang.c0000644000176200001440000000025314465445266013471 0ustar liggesusers// This is an include point for the implementations of the rlang // library. It should be included in a single and separate compilation // unit. #include "rlang/rlang.c" vctrs/src/c-unchop.c0000644000176200001440000001752314511027312014066 0ustar liggesusers#include "rlang-types.h" #include "vctrs.h" enum fallback_homogeneous { FALLBACK_HOMOGENEOUS_false = 0, FALLBACK_HOMOGENEOUS_true }; #include "decl/c-unchop-decl.h" static r_obj* list_unchop(r_obj* xs, r_obj* indices, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call) { obj_check_list(xs, p_error_arg, error_call); if (indices == r_null) { return vec_c(xs, ptype, name_spec, name_repair, p_error_arg, error_call); } // Apply size/type checking to `indices` before possibly early exiting from // having a `NULL` common type or needing to apply a fallback obj_check_list(indices, vec_args.indices, error_call); r_ssize xs_size = vec_size(xs); if (xs_size != vec_size(indices)) { r_abort("`x` and `indices` must be lists of the same size."); } ptype = KEEP(vec_ptype_common_params(xs, ptype, S3_FALLBACK_true, p_error_arg, error_call)); if (needs_vec_c_fallback(ptype)) { r_obj* out = list_unchop_fallback( ptype, xs, indices, name_spec, name_repair, FALLBACK_HOMOGENEOUS_false, p_error_arg, error_call ); FREE(1); return out; } // FIXME: Needed for dplyr::summarise() which passes a non-fallback ptype if (needs_vec_c_homogeneous_fallback(xs, ptype)) { r_obj* out = list_unchop_fallback( ptype, xs, indices, name_spec, name_repair, FALLBACK_HOMOGENEOUS_true, p_error_arg, error_call ); FREE(1); return out; } if (ptype == r_null) { FREE(1); return r_null; } bool assign_names = !r_inherits(name_spec, "rlang_zap"); r_obj* xs_names = KEEP(r_names(xs)); bool xs_is_named = xs_names != r_null && !is_data_frame(ptype); // `out_size` is computed from `indices` r_ssize out_size = 0; for (r_ssize i = 0; i < xs_size; ++i) { out_size += r_length(r_list_get(indices, i)); } r_obj* locs = KEEP(list_as_locations(indices, out_size, r_null)); r_obj* proxy = vec_proxy_recurse(ptype); r_keep_loc proxy_pi; KEEP_HERE(proxy, &proxy_pi); proxy = vec_init(proxy, out_size); KEEP_AT(proxy, proxy_pi); r_obj* out_names = r_null; r_keep_loc out_names_pi; KEEP_HERE(out_names, &out_names_pi); r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( p_error_arg, xs_names, xs_size, &i ); KEEP(p_x_arg->shelter); struct cast_opts unchop_cast_opts = { .to = ptype, .p_x_arg = p_x_arg, .call = error_call }; const struct vec_assign_opts unchop_assign_opts = { .recursive = true, .assign_names = assign_names, .ignore_outer_names = true, .call = error_call }; for (; i < xs_size; ++i) { r_obj* x = r_list_get(xs, i); if (x == r_null) { continue; } r_obj* loc = r_list_get(locs, i); const r_ssize loc_size = r_length(loc); // Each element of `xs` is recycled to its corresponding index's size x = KEEP(vec_check_recycle(x, loc_size, p_x_arg, error_call)); if (assign_names) { r_obj* outer = xs_is_named ? r_chr_get(xs_names, i) : r_null; r_obj* inner = KEEP(vec_names(x)); r_obj* x_nms = KEEP(apply_name_spec(name_spec, outer, inner, loc_size)); if (x_nms != r_null) { R_LAZY_ALLOC(out_names, out_names_pi, R_TYPE_character, out_size); // If there is no name to assign, skip the assignment since // `out_names` already contains empty strings if (x_nms != chrs_empty) { out_names = chr_assign(out_names, loc, x_nms, VCTRS_OWNED_true); KEEP_AT(out_names, out_names_pi); } } FREE(2); } unchop_cast_opts.x = x; x = KEEP(vec_cast_opts(&unchop_cast_opts)); // Total ownership of `proxy` because it was freshly created with `vec_init()` proxy = vec_proxy_assign_opts(proxy, loc, x, VCTRS_OWNED_true, &unchop_assign_opts); KEEP_AT(proxy, proxy_pi); FREE(2); } if (is_data_frame(proxy)) { df_c_fallback(proxy, ptype, xs, out_size, name_spec, name_repair, error_call); } r_obj* out = KEEP(vec_restore_recurse(proxy, ptype, VCTRS_OWNED_true)); if (out_names != r_null) { out_names = KEEP(vec_as_names(out_names, name_repair)); out = vec_set_names(out, out_names); FREE(1); } else if (!assign_names) { // FIXME: `vec_ptype2()` doesn't consistently zaps names, so `out` // might have been initialised with names. This branch can be // removed once #1020 is resolved. out = vec_set_names(out, r_null); } FREE(7); return out; } r_obj* ffi_list_unchop(r_obj* x, r_obj* indices, r_obj* ptype, r_obj* name_spec, r_obj* name_repair, r_obj* frame) { struct r_lazy error_arg_lazy = { .x = r_syms.error_arg, .env = frame }; struct vctrs_arg error_arg = new_lazy_arg(&error_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, r_lazy_null, false, error_call); KEEP(name_repair_opts.shelter); r_obj* out = list_unchop( x, indices, ptype, name_spec, &name_repair_opts, &error_arg, error_call ); FREE(1); return out; } // This is essentially: // vec_slice_fallback(vec_c_fallback_invoke(!!!x), order(vec_c(!!!indices))) // with recycling of each element of `x` to the corresponding index size static r_obj* list_unchop_fallback(r_obj* ptype, r_obj* xs, r_obj* indices, r_obj* name_spec, const struct name_repair_opts* name_repair, enum fallback_homogeneous homogeneous, struct vctrs_arg* p_error_arg, struct r_lazy error_call) { r_ssize xs_size = vec_size(xs); r_obj* xs_names = r_names(xs); xs = KEEP(r_clone_referenced(xs)); r_ssize out_size = 0; r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( p_error_arg, xs_names, xs_size, &i ); KEEP(p_x_arg->shelter); // Recycle `xs` elements to the size of their corresponding index for (; i < xs_size; ++i) { r_obj* x = r_list_get(xs, i); r_ssize index_size = r_length(r_list_get(indices, i)); out_size += index_size; r_list_poke(xs, i, vec_recycle_fallback(x, index_size, p_x_arg, error_call)); } indices = KEEP(list_as_locations(indices, out_size, r_null)); r_obj* out = r_null; if (homogeneous) { out = KEEP(vec_c_fallback_invoke(xs, name_spec, error_call)); } else { out = KEEP(vec_c_fallback(ptype, xs, name_spec, name_repair, p_error_arg, error_call)); } const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = r_null, .call = error_call }; indices = KEEP(vec_c( indices, r_globals.empty_int, r_null, &name_repair_opts, vec_args.indices, error_call )); const int* p_indices = r_int_cbegin(indices); r_obj* locations = KEEP(r_alloc_integer(out_size)); int* p_locations = r_int_begin(locations); // Initialize with missing to handle locations that are never selected for (r_ssize i = 0; i < out_size; ++i) { p_locations[i] = r_globals.na_int; } for (r_ssize i = 0; i < out_size; ++i) { const int index = p_indices[i]; if (index == r_globals.na_int) { continue; } p_locations[index - 1] = i + 1; } out = KEEP(vec_slice_fallback(out, locations)); FREE(7); return out; } vctrs/src/order-collate.c0000644000176200001440000000543514404336165015117 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" // ----------------------------------------------------------------------------- static SEXP chr_apply(SEXP x, SEXP chr_proxy_collate); static SEXP df_apply(SEXP x, SEXP chr_proxy_collate); // [[ include("order-collate.h") ]] SEXP proxy_apply_chr_proxy_collate(SEXP proxy, SEXP chr_proxy_collate) { if (chr_proxy_collate == r_null) { return proxy; } chr_proxy_collate = PROTECT(r_as_function(chr_proxy_collate, "chr_proxy_collate")); SEXP out; switch (vec_proxy_typeof(proxy)) { case VCTRS_TYPE_character: out = chr_apply(proxy, chr_proxy_collate); break; case VCTRS_TYPE_dataframe: out = df_apply(proxy, chr_proxy_collate); break; default: out = proxy; } UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- static SEXP chr_apply(SEXP x, SEXP chr_proxy_collate) { // Don't use vctrs dispatch utils because we match argument positionally SEXP call = PROTECT(Rf_lang2(syms_chr_proxy_collate, syms_x)); SEXP mask = PROTECT(r_alloc_empty_environment(R_GlobalEnv)); Rf_defineVar(syms_chr_proxy_collate, chr_proxy_collate, mask); Rf_defineVar(syms_x, x, mask); SEXP out = PROTECT(Rf_eval(call, mask)); if (vec_typeof(out) != VCTRS_TYPE_character) { Rf_errorcall( R_NilValue, "`chr_proxy_collate` must return a character vector." ); } R_len_t x_size = vec_size(x); R_len_t out_size = vec_size(out); if (x_size != out_size) { Rf_errorcall( R_NilValue, "`chr_proxy_collate` must return a vector of the same length (%i, not %i).", x_size, out_size ); } UNPROTECT(3); return out; } // ----------------------------------------------------------------------------- static SEXP df_apply(SEXP x, SEXP chr_proxy_collate) { const r_ssize n_cols = r_length(x); const SEXP* v_x = VECTOR_PTR_RO(x); r_ssize i = 0; for (; i < n_cols; ++i) { SEXP col = v_x[i]; if (vec_proxy_typeof(col) == VCTRS_TYPE_character) { break; } } if (i == n_cols) { // No character columns return x; } SEXP out = PROTECT(r_clone_referenced(x)); for (; i < n_cols; ++i) { SEXP col = v_x[i]; if (vec_proxy_typeof(col) != VCTRS_TYPE_character) { continue; } col = chr_apply(col, chr_proxy_collate); SET_VECTOR_ELT(out, i, col); } UNPROTECT(1); return out; } vctrs/src/decl/0000755000176200001440000000000014511320527013111 5ustar liggesusersvctrs/src/decl/interval-decl.h0000644000176200001440000000225614315060310016011 0ustar liggesusers// Initialized at load time struct vctrs_arg args_start_; static struct vctrs_arg* const args_start = &args_start_; struct vctrs_arg args_end_; static struct vctrs_arg* const args_end = &args_end_; struct vctrs_arg args_lower_; static struct vctrs_arg* const args_lower = &args_lower_; struct vctrs_arg args_upper_; static struct vctrs_arg* const args_upper = &args_upper_; static r_obj* vec_interval_group_info(r_obj* start, r_obj* end, bool abutting, enum vctrs_interval_missing missing, bool locations); static r_obj* vec_interval_complement(r_obj* start, r_obj* end, r_obj* lower, r_obj* upper); static r_obj* vec_interval_locate_containers(r_obj* start, r_obj* end); static inline r_obj* interval_order(r_obj* start, r_obj* end, r_obj* direction, r_obj* na_value, r_ssize size); static inline enum vctrs_interval_missing parse_missing(r_obj* missing); vctrs/src/decl/size-decl.h0000644000176200001440000000025014362266120015140 0ustar liggesusersstatic r_ssize vec_size_opts(r_obj* x, const struct vec_error_opts* opts); static r_ssize vec_raw_size(r_obj* x); static bool list_all_size(r_obj* xs, r_ssize size); vctrs/src/decl/proxy-restore-decl.h0000644000176200001440000000046114315060310017023 0ustar liggesusersstatic r_obj* syms_vec_restore_dispatch; static r_obj* fns_vec_restore_dispatch; static r_obj* vec_restore_4(r_obj* x, r_obj* to, enum vctrs_owned owned, enum vctrs_recurse recurse); static r_obj* vec_restore_dispatch(r_obj* x, r_obj* to); vctrs/src/decl/type-info-decl.h0000644000176200001440000000010014373202700016066 0ustar liggesusersstatic enum vctrs_type vec_base_typeof(r_obj* x, bool proxied); vctrs/src/decl/c-unchop-decl.h0000644000176200001440000000133414362266120015706 0ustar liggesusersstatic r_obj* list_unchop(r_obj* xs, r_obj* indices, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call); static r_obj* list_unchop_fallback(r_obj* ptype, r_obj* xs, r_obj* indices, r_obj* name_spec, const struct name_repair_opts* name_repair, enum fallback_homogeneous homogenous, struct vctrs_arg* p_error_arg, struct r_lazy error_call); vctrs/src/decl/set-decl.h0000644000176200001440000000000014362266120014752 0ustar liggesusersvctrs/src/decl/poly-op-decl.h0000644000176200001440000000147314315060310015564 0ustar liggesusersstatic int p_df_equal_na_equal(const void* x, r_ssize i, const void* y, r_ssize j); static int p_df_compare_na_equal(const void* x, r_ssize i, const void* y, r_ssize j); static bool p_df_is_missing(const void* x, r_ssize i); static bool p_df_is_incomplete(const void* x, r_ssize i); static void init_nil_poly_vec(struct poly_vec* p_poly_vec); static void init_lgl_poly_vec(struct poly_vec* p_poly_vec); static void init_int_poly_vec(struct poly_vec* p_poly_vec); static void init_dbl_poly_vec(struct poly_vec* p_poly_vec); static void init_cpl_poly_vec(struct poly_vec* p_poly_vec); static void init_chr_poly_vec(struct poly_vec* p_poly_vec); static void init_raw_poly_vec(struct poly_vec* p_poly_vec); static void init_list_poly_vec(struct poly_vec* p_poly_vec); static void init_df_poly_vec(struct poly_vec* p_poly_vec); vctrs/src/decl/shape-decl.h0000644000176200001440000000115214315060310015257 0ustar liggesusersstatic r_obj* vec_shape2(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); static r_obj* vec_shape2_impl(r_obj* x_dimensions, r_obj* y_dimensions, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); static r_obj* vec_shape(r_obj* dimensions); static inline int vec_dimension2(int x_dimension, int y_dimension, int axis, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); vctrs/src/decl/proxy-decl.h0000644000176200001440000000204114315060310015336 0ustar liggesusersr_obj* syms_vec_proxy; r_obj* syms_vec_proxy_equal; r_obj* syms_vec_proxy_equal_array; r_obj* syms_vec_proxy_compare; r_obj* syms_vec_proxy_compare_array; r_obj* syms_vec_proxy_order; r_obj* syms_vec_proxy_order_array; r_obj* fns_vec_proxy_equal_array; r_obj* fns_vec_proxy_compare_array; r_obj* fns_vec_proxy_order_array; static r_obj* vec_proxy_2(r_obj* x, enum vctrs_recurse recurse); static inline r_obj* vec_proxy_equal_impl(r_obj* x); static inline r_obj* vec_proxy_compare_impl(r_obj* x); static inline r_obj* vec_proxy_order_impl(r_obj* x); static inline r_obj* vec_proxy_equal_method(r_obj* x); static inline r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method); static inline r_obj* vec_proxy_compare_method(r_obj* x); static inline r_obj* vec_proxy_compare_invoke(r_obj* x, r_obj* method); static inline r_obj* vec_proxy_order_method(r_obj* x); static inline r_obj* vec_proxy_order_invoke(r_obj* x, r_obj* method); static inline r_obj* df_proxy(r_obj* x, enum vctrs_proxy_kind kind); static inline r_obj* df_proxy_recurse(r_obj* x); vctrs/src/decl/slice-chop-decl.h0000644000176200001440000000140214402367170016216 0ustar liggesusersstatic r_obj* vec_chop_base(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop_shaped(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop_df(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop_fallback(r_obj* x, struct vctrs_chop_indices* p_indices); static r_obj* chop_fallback_shaped(r_obj* x, struct vctrs_chop_indices* p_indices); static r_obj* vec_as_chop_sizes(r_obj* sizes, r_ssize size); vctrs/src/decl/ptype-decl.h0000644000176200001440000000126214315060310015322 0ustar liggesusersstatic SEXP syms_vec_ptype; static SEXP syms_vec_ptype_finalise_dispatch; static SEXP fns_vec_ptype_finalise_dispatch; static inline r_obj* vec_ptype_slice(r_obj* x, r_obj* empty); static r_obj* df_ptype(r_obj* x, bool bare); static r_obj* col_ptype(r_obj* x); static r_obj* s3_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call); static inline r_obj* vec_ptype_method(r_obj* x); static inline r_obj* vec_ptype_invoke(r_obj* x, r_obj* method); static r_obj* vec_ptype_finalise_unspecified(r_obj* x); static r_obj* vec_ptype_finalise_dispatch(r_obj* x); static r_obj* vec_ptype_final_call; static struct r_lazy vec_ptype_final_lazy_call; vctrs/src/decl/subscript-loc-decl.h0000644000176200001440000000631314315060310016754 0ustar liggesusersstatic r_obj* lgl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* int_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* int_invert_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* int_filter_zero(r_obj* subscript, r_ssize n_zero); static r_obj* int_filter_missing(r_obj* subscript, r_ssize n_missing); static r_obj* int_filter_oob(r_obj* subscript, r_ssize n, r_ssize n_oob); static void int_check_consecutive(r_obj* subscript, r_ssize n, r_ssize n_extend, const struct location_opts* opts); static r_obj* dbl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* chr_as_location(r_obj* subscript, r_obj* names, const struct location_opts* opts); static void stop_subscript_missing(r_obj* i, const struct location_opts* opts); static void stop_subscript_empty(r_obj* i, const struct location_opts* opts); static void stop_subscript_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts); static void stop_subscript_negative_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts); static void stop_subscript_oob_name(r_obj* i, r_obj* names, const struct location_opts* opts); static void stop_location_negative(r_obj* i, const struct location_opts* opts); static void stop_location_zero(r_obj* i, const struct location_opts* opts); static void stop_indicator_size(r_obj* i, r_obj* n, const struct location_opts* opts); static void stop_location_negative_missing(r_obj* i, const struct location_opts* opts); static void stop_location_negative_positive(r_obj* i, const struct location_opts* opts); static void stop_location_oob_non_consecutive(r_obj* i, r_ssize size, const struct location_opts* opts); static enum subscript_missing parse_subscript_arg_missing(r_obj* x, struct r_lazy call); static enum num_loc_negative parse_loc_negative(r_obj* x, struct r_lazy call); static enum num_loc_oob parse_loc_oob(r_obj* x, struct r_lazy call); static enum num_loc_zero parse_loc_zero(r_obj* x, struct r_lazy call); static void stop_subscript_arg_missing(struct r_lazy call); static void stop_bad_negative(struct r_lazy call); static void stop_bad_oob(struct r_lazy call); static void stop_bad_zero(struct r_lazy call); vctrs/src/decl/subscript-decl.h0000644000176200001440000000167514315060310016207 0ustar liggesusersstatic r_obj* fns_cnd_body_subscript_dim; static r_obj* new_error_subscript_type(r_obj* subscript, const struct subscript_opts* opts, r_obj* body); static enum subscript_type_action parse_subscript_arg_type(r_obj* x, const char* kind); static r_obj* obj_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err); static r_obj* dbl_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err); static r_obj* dbl_cast_subscript_fallback(r_obj* subscript, const struct subscript_opts* opts, ERR* err); static r_obj* syms_new_dbl_cast_subscript_body; static r_obj* syms_lossy_err; static r_obj* syms_new_error_subscript_type; vctrs/src/decl/slice-assign-decl.h0000644000176200001440000000134314315060310016542 0ustar liggesusersstatic r_obj* syms_vec_assign_fallback; static r_obj* fns_vec_assign_fallback; static r_obj* vec_assign_fallback(r_obj* x, r_obj* index, r_obj* value); static r_obj* vec_proxy_assign_names(r_obj* proxy, r_obj* index, r_obj* value, const enum vctrs_owned owned); static r_obj* lgl_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned); static r_obj* int_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned); static r_obj* dbl_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned); static r_obj* cpl_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned); static r_obj* raw_assign(r_obj* x, r_obj* index, r_obj* value, const enum vctrs_owned owned); vctrs/src/decl/arg-decl.h0000644000176200001440000000100114315060310014721 0ustar liggesusersstatic int fill_arg_buffer(struct vctrs_arg* arg, char* buf, r_ssize cur_size, r_ssize tot_size); static r_ssize counter_arg_fill(void* data, char* buf, r_ssize remaining); static r_ssize wrapper_arg_fill(void* data, char* buf, r_ssize remaining); static r_ssize lazy_arg_fill(void* data, char* buf, r_ssize remaining); static r_ssize subscript_arg_fill(void* p_data, char* buf, r_ssize remaining); static bool is_empty_arg(struct vctrs_arg* arg); vctrs/src/decl/empty-decl.h0000644000176200001440000000005114315060310015312 0ustar liggesusersstatic r_obj* list_drop_empty(r_obj* x); vctrs/src/decl/utils-dispatch-decl.h0000644000176200001440000000025214416313204017121 0ustar liggesusersenum vctrs_class_type class_type(r_obj* x); static enum vctrs_class_type class_type_impl(r_obj* cls); static const char* class_type_as_str(enum vctrs_class_type type); vctrs/src/decl/match-joint-decl.h0000644000176200001440000000053014350637775016423 0ustar liggesusersstatic inline r_obj* vec_joint_proxy_order(r_obj* x, r_obj* y); static inline r_obj* vec_joint_proxy_order_independent(r_obj* x, r_obj* y); static inline r_obj* vec_joint_proxy_order_dependent(r_obj* x, r_obj* y); static inline r_obj* vec_joint_proxy_order_s3(r_obj* x, r_obj* y); static inline r_obj* df_joint_proxy_order(r_obj* x, r_obj* y); vctrs/src/decl/expand-decl.h0000644000176200001440000000007614362266120015453 0ustar liggesusersstatic inline enum vctrs_expand_vary parse_vary(r_obj* vary); vctrs/src/decl/slice-interleave-decl.h0000644000176200001440000000007714315060310017417 0ustar liggesusersstatic r_obj* vec_interleave_indices(r_ssize n, r_ssize size); vctrs/src/decl/ptype2-decl.h0000644000176200001440000000034314315060310015403 0ustar liggesusersstatic r_obj* vec_ptype2_switch_native(const struct ptype2_opts* opts, enum vctrs_type x_type, enum vctrs_type y_type, int* left); vctrs/src/decl/dictionary-decl.h0000644000176200001440000000005614276722575016356 0ustar liggesusersstatic inline uint32_t dict_key_size(SEXP x); vctrs/src/decl/c-decl.h0000644000176200001440000000037214362266120014415 0ustar liggesusersstatic inline bool vec_implements_base_c(r_obj* x); static inline int vec_c_fallback_validate_args(r_obj* x, r_obj* name_spec); static void stop_vec_c_fallback(r_obj* xs, int err_type, struct r_lazy call); static bool df_needs_fallback(r_obj* x); vctrs/src/decl/cast-decl.h0000644000176200001440000000043714315060310015116 0ustar liggesusersstatic r_obj* vec_cast_switch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy); static r_obj* vec_cast_dispatch_s3(const struct cast_opts* opts); vctrs/src/decl/compare-decl.h0000644000176200001440000000073114315060310015607 0ustar liggesusersstatic r_obj* df_compare(r_obj* x, r_obj* y, bool na_equal, r_ssize size); static void df_compare_impl(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal); static void vec_compare_col(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal); vctrs/src/decl/size-common-decl.h0000644000176200001440000000025314315060310016420 0ustar liggesusersstatic r_obj* vctrs_size2_common(r_obj* x, r_obj* y, struct counters* counters, void* data); vctrs/src/decl/runs-decl.h0000644000176200001440000000466714364250244015177 0ustar liggesusersstatic r_obj* vec_detect_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call); static r_obj* vec_locate_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call); static struct r_vector_bool* vec_detect_run_bounds_bool(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call); static inline void lgl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void int_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void dbl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void cpl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void chr_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void raw_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void list_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void df_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void lgl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void int_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void dbl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void cpl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void chr_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void raw_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void list_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline r_ssize compute_iter_loc(r_ssize size, enum vctrs_run_bound which); static inline r_ssize compute_iter_step(enum vctrs_run_bound which); static inline enum vctrs_run_bound as_run_bound(r_obj* which, struct r_lazy error_call); vctrs/src/decl/ptype-common-decl.h0000644000176200001440000000024014315060310016603 0ustar liggesusersstatic r_obj* ptype2_common(r_obj* current, r_obj* next, struct counters* counters, void* data); vctrs/src/decl/typeof2-s3-decl.h0000644000176200001440000000033714315060310016076 0ustar liggesusersstatic enum vctrs_type2_s3 vec_typeof2_s3_impl2(SEXP x, SEXP y, enum vctrs_type type_y, int* left); vctrs/src/decl/type-integer64-decl.h0000644000176200001440000000022614276722575016776 0ustar liggesusersstatic inline void int64_unpack(int64_t x, r_ssize i, double* v_left, double* v_right); static inline int64_t int64_pack(double left, double right); vctrs/src/decl/assert-decl.h0000644000176200001440000000050414362266120015471 0ustar liggesusersstatic r_no_return void stop_non_list_type(r_obj* x, struct vctrs_arg* arg, struct r_lazy call); static void list_check_all_size(r_obj* xs, r_ssize size, struct vctrs_arg* p_arg, struct r_lazy call); vctrs/src/decl/rank-decl.h0000644000176200001440000000230014315060310015106 0ustar liggesusersstatic inline enum ties parse_ties(r_obj* ties); static inline enum incomplete parse_incomplete(r_obj* incomplete); static inline bool r_lgl_all(r_obj* x); static r_obj* vec_rank(r_obj* x, enum ties ties_type, enum incomplete incomplete_type, r_obj* direction, r_obj* na_value, bool nan_distinct, r_obj* chr_proxy_collate); static void vec_rank_min(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); static void vec_rank_max(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); static void vec_rank_sequential(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); static void vec_rank_dense(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); vctrs/src/decl/rep-decl.h0000644000176200001440000000335714363556517015004 0ustar liggesusersstatic inline void stop_rep_times_size(struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void check_rep_times(int times, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void check_rep_each_times(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline bool multiply_would_overflow(r_ssize x, r_ssize y); static inline bool plus_would_overflow(r_ssize x, r_ssize y); static inline void stop_rep_size_oob(struct r_lazy call); static r_obj* vec_rep_each_uniform(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_times_arg); static r_obj* vec_rep_each_impl(r_obj* x, r_obj* times, const r_ssize times_size, struct r_lazy error_call, struct vctrs_arg* p_times_arg); static inline void stop_rep_times_negative(struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_times_missing(struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_times_oob(int times, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_each_times_negative(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_each_times_missing(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_each_times_oob(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static r_obj* vec_unrep(r_obj* x, struct r_lazy error_call); vctrs/src/decl/ptype2-dispatch-decl.h0000644000176200001440000000004714315060310017201 0ustar liggesusersstatic r_obj* syms_vec_ptype2_default; vctrs/src/decl/type-data-frame-decl.h0000644000176200001440000000246414511320527017155 0ustar liggesusersstatic r_obj* syms_df_lossy_cast; static r_obj* fns_df_lossy_cast; static r_obj* new_compact_rownames(r_ssize n); static r_ssize df_size_from_n(r_obj* n); static r_obj* c_data_frame_class(r_obj* cls); static r_obj* data_frame(r_obj* x, r_ssize size, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call); static r_obj* df_list(r_obj* x, r_ssize size, bool unpack, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call); static r_obj* df_list_drop_null(r_obj* x); static r_obj* df_list_unpack(r_obj* x); static void init_bare_data_frame(r_obj* x, r_ssize n); static r_obj* df_ptype2_match(const struct ptype2_opts* opts, r_obj* x_names, r_obj* y_names); static r_obj* df_ptype2_loop(const struct ptype2_opts* opts, r_obj* y_names); static r_obj* df_cast_match(const struct cast_opts* opts, r_obj* x_names, r_obj* to_names); static r_obj* df_cast_loop(const struct cast_opts* opts, r_obj* names); static r_ssize df_flatten_loop(r_obj* x, r_obj* out, r_obj* out_names, r_ssize counter); vctrs/src/decl/match-decl.h0000644000176200001440000003061314511320527015266 0ustar liggesusers// Initialised at load time struct vctrs_arg args_incomplete_; static struct vctrs_arg* const args_incomplete = &args_incomplete_; struct vctrs_arg args_no_match_; static struct vctrs_arg* const args_no_match = &args_no_match_; struct vctrs_arg args_remaining_; static struct vctrs_arg* const args_remaining = &args_remaining_; static r_obj* vec_locate_matches(r_obj* needles, r_obj* haystack, r_obj* condition, r_obj* filter, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool nan_distinct, r_obj* chr_proxy_collate, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call); static r_obj* df_locate_matches(r_obj* needles, r_obj* haystack, r_obj* needles_complete, r_obj* haystack_complete, r_ssize size_needles, r_ssize size_haystack, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call); static void df_locate_matches_recurse(r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack); static void df_locate_matches_with_containers(int n_containers, const int* v_container_ids, r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack); static inline r_ssize int_locate_upper_incomplete(const int* v_haystack_complete, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static inline r_ssize int_locate_lower_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static inline r_ssize int_locate_upper_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static inline struct vctrs_match_bounds int_locate_match(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static r_obj* df_joint_xtfrm_by_col(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, r_ssize n_cols, bool nan_distinct, r_obj* chr_proxy_collate); static r_obj* df_detect_complete_by_col(r_obj* x, r_ssize x_size, r_ssize n_cols); static inline void parse_condition(r_obj* condition, r_ssize n_cols, enum vctrs_ops* v_ops); static inline struct vctrs_no_match parse_no_match(r_obj* no_match, struct r_lazy call); static inline struct vctrs_remaining parse_remaining(r_obj* remaining, struct r_lazy call); static inline struct vctrs_incomplete parse_incomplete(r_obj* incomplete, struct r_lazy call); static inline enum vctrs_multiple parse_multiple(r_obj* multiple, struct r_lazy call); static inline enum vctrs_relationship parse_relationship(r_obj* relationship, struct r_lazy call); static inline void parse_filter(r_obj* filter, r_ssize n_cols, enum vctrs_filter* v_filters); static r_obj* expand_compact_indices(const int* v_o_haystack, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, bool skip_size_match, bool skip_loc_needles, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, r_ssize size_needles, r_ssize size_haystack, bool any_non_equi, bool has_loc_filter_match_o_haystack, const enum vctrs_filter* v_filters, const int* v_loc_filter_match_o_haystack, const struct poly_df_data* p_haystack, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call); static r_obj* compute_nesting_container_info(r_obj* haystack, r_ssize size_haystack, const enum vctrs_ops* v_ops); static r_obj* compute_nesting_container_ids(r_obj* x, const int* v_order, const int* v_group_sizes, const int* v_outer_group_sizes, r_ssize size, r_ssize n_groups, bool has_outer_group_sizes); static inline bool p_nesting_container_df_compare_fully_ge_na_equal(const void* x, r_ssize i, const void* y, r_ssize j); static inline int p_matches_df_compare_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters); static inline bool p_matches_df_equal_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters); static inline r_ssize midpoint(r_ssize lhs, r_ssize rhs); static inline void stop_matches_overflow(double size, struct r_lazy call); static inline void stop_matches_nothing(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_remaining(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_incomplete(r_ssize i, struct vctrs_arg* needles_arg, struct r_lazy call); static inline void stop_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void warn_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_relationship_one_to_one(r_ssize i, const char* which, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_relationship_one_to_many(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_relationship_many_to_one(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void warn_matches_relationship_many_to_many(r_ssize i, r_ssize j, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); vctrs/src/decl/rlang-dev-decl.h0000644000176200001440000000000014315060310016025 0ustar liggesusersvctrs/src/decl/missing-decl.h0000644000176200001440000000423614315060310015636 0ustar liggesusersstatic inline r_obj* proxy_detect_missing(r_obj* proxy); static inline r_obj* lgl_detect_missing(r_obj* x); static inline r_obj* int_detect_missing(r_obj* x); static inline r_obj* dbl_detect_missing(r_obj* x); static inline r_obj* cpl_detect_missing(r_obj* x); static inline r_obj* raw_detect_missing(r_obj* x); static inline r_obj* chr_detect_missing(r_obj* x); static inline r_obj* list_detect_missing(r_obj* x); static inline r_obj* df_detect_missing(r_obj* x); static inline r_ssize col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize lgl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize int_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize dbl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize cpl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize raw_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize chr_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize list_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize proxy_first_missing(r_obj* proxy); static inline r_ssize lgl_first_missing(r_obj* x); static inline r_ssize int_first_missing(r_obj* x); static inline r_ssize dbl_first_missing(r_obj* x); static inline r_ssize cpl_first_missing(r_obj* x); static inline r_ssize raw_first_missing(r_obj* x); static inline r_ssize chr_first_missing(r_obj* x); static inline r_ssize list_first_missing(r_obj* x); static inline r_ssize df_first_missing(r_obj* x); static inline const unsigned char* r_uchar_cbegin(r_obj* x); vctrs/src/decl/arg-counter-decl.h0000644000176200001440000000161014315060310016404 0ustar liggesusersstatic r_obj* reduce_impl(r_obj* current, r_obj* rest, struct vctrs_arg* p_parent_arg, struct counters* counters, bool spliced, r_obj* (*impl)(r_obj* current, r_obj* next, struct counters* counters, void* data), void* data); static r_obj* reduce_splice_box(r_obj* current, r_obj* rest, struct vctrs_arg* p_parent_arg, struct counters* counters, r_obj* (*impl)(r_obj* current, r_obj* rest, struct counters* counters, void* data), void* data); vctrs/src/decl/names-decl.h0000644000176200001440000000221514362266120015274 0ustar liggesusersstatic r_obj* syms_as_universal_names; static r_obj* syms_check_unique_names; static r_obj* fns_as_universal_names; static r_obj* fns_check_unique_names; static r_obj* syms_glue_as_name_spec; static r_obj* fns_glue_as_name_spec; static r_obj* syms_internal_spec; static r_obj* syms_set_rownames_dispatch; static r_obj* fns_set_rownames_dispatch; static r_obj* syms_set_names_dispatch; static r_obj* fns_set_names_dispatch; static void describe_repair(r_obj* old_names, r_obj* new_names); static r_obj* check_unique_names(r_obj* names, const struct name_repair_opts* opts); static void vec_validate_minimal_names(r_obj* names, r_ssize n, struct r_lazy call); r_obj* ffi_as_minimal_names(r_obj* names); static bool any_has_suffix(r_obj* names); static r_obj* as_unique_names_impl(r_obj* names, bool quiet); static void stop_large_name(void); static bool is_dotdotint(const char* name); static ptrdiff_t suffix_pos(const char* name); static bool needs_suffix(r_obj* str); static r_obj* names_iota(r_ssize n); static r_obj* vec_unique_names_impl(r_obj* names, r_ssize n, bool quiet); static r_obj* glue_as_name_spec(r_obj* spec); vctrs/src/decl/bind-decl.h0000644000176200001440000000253714362266120015114 0ustar liggesusersstatic r_obj* vec_rbind(r_obj* xs, r_obj* ptype, r_obj* id, struct name_repair_opts* name_repair, r_obj* name_spec, struct r_lazy error_call); static r_obj* as_df_row(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call); static r_obj* as_df_row_impl(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call); static struct name_repair_opts validate_bind_name_repair(r_obj* name_repair, bool allow_minimal); static r_obj* vec_cbind(r_obj* xs, r_obj* ptype, r_obj* size, struct name_repair_opts* name_repair, struct r_lazy error_call); static r_obj* cbind_names_to(bool has_names, r_obj* names_to, r_obj* ptype, struct r_lazy error_call); static r_obj* as_df_col(r_obj* x, r_obj* outer, bool* allow_pack, struct r_lazy error_call); static r_obj* cbind_container_type(r_obj* x, void* data); static r_obj* syms_vec_cbind_frame_ptype; static r_obj* fns_vec_cbind_frame_ptype; static r_obj* shaped_as_df_col(r_obj* x, r_obj* outer); static r_obj* vec_as_df_col(r_obj* x, r_obj* outer); vctrs/src/slice-interleave.c0000644000176200001440000000223614315060310015575 0ustar liggesusers#include "vctrs.h" #include "decl/slice-interleave-decl.h" // [[ register() ]] r_obj* ffi_interleave_indices(r_obj* n, r_obj* size) { r_ssize c_n = r_arg_as_ssize(n, "n"); r_ssize c_size = r_arg_as_ssize(size, "size"); return vec_interleave_indices(c_n, c_size); } static r_obj* vec_interleave_indices(r_ssize n, r_ssize size) { if (n < 0) { r_stop_internal( "`n` must be greater than or equal to 0." ); } if (size < 0) { r_stop_internal( "`size` must be greater than or equal to 0." ); } const r_ssize total_size = r_ssize_mult(n, size); if (total_size > R_LEN_T_MAX) { r_abort( "Long vectors are not yet supported in `vec_interleave()`. " "Result from interleaving would have size %td, which is larger " "than the maximum supported size of 2^31 - 1.", total_size ); } r_obj* out = KEEP(r_alloc_list(n)); for (r_ssize i = 0; i < n; ++i) { const r_ssize start = i + 1; r_obj* elt = r_alloc_integer(size); r_list_poke(out, i, elt); int* v_elt = r_int_begin(elt); for (r_ssize j = 0; j < size; ++j) { v_elt[j] = start + n * j; } } FREE(1); return out; } vctrs/src/order.c0000644000176200001440000043024414511320527013470 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" #include "type-data-frame.h" #include "type-complex.h" // ----------------------------------------------------------------------------- /* * High level description of `vec_order()` * * Heavily inspired by `radixsort.c` in base R and `forder()` from data.table * https://github.com/wch/r-source/blob/trunk/src/main/radixsort.c * https://github.com/Rdatatable/data.table/blob/master/src/forder.c * * Additional resources about radix sorting: * http://codercorner.com/RadixSortRevisited.htm * http://stereopsis.com/radix.html * * The very end of this has a MSB radix sort implementation * https://eternallyconfuzzled.com/sorting-c-introduction-to-the-automatic-ordering-of-data * * ----------------------------------------------------------------------------- * Integers * * This uses a combination of 3 ordering algorithms. * * - `int_order_insertion()` - An insertion sort is used when `x` is very * small. This has less overhead than the counting or radix sort and is * faster for small input. * * - `int_order_counting()` - A counting sort is used when `x` has a range * of less than `INT_ORDER_COUNTING_RANGE_BOUNDARY`. For integers with a * small range like this, the bucketing in the counting sort can be very * fast when compared with the recursive multipass approach of the radix sort. * * - `int_order_radix()` - A radix sort is used for everything else. * This is a MSB radix sort. It orders the vector 1 byte (8 bits) at a time, * so for a 4 byte int this makes a maximum of 4 passes over each integer. * It orders from most significant byte to least significant. After each * pass, there are 256 buckets (1 for each possible byte value). Each bucket * is then ordered separately on the next byte. This happens recursively for * the 4 passes. When the buckets get small enough in size, the insertion * sort is used to finish them off. * * For radix sorting, we have to use unsigned types for bit shifting to * work reliably. We map `int` to `uint32_t` in a way that preserves order, * and also handle `na_last` and `decreasing` in this mapping. This all happens * in `int_adjust()`. It is assumed and checked at load time that * `sizeof(int) == 4`. * * ----------------------------------------------------------------------------- * Doubles * * This uses a combination of 2 ordering algorithms: * * - `dbl_order_insertion()` - An insertion sort is used when `x` is very small. * * - `dbl_order_radix()` - This is similar to `int_order_radix()`, see above, * but makes a max of 8 passes over the data. * * For doubles, we assume `sizeof(double) == 8`, which should pretty much be * ensured by IEEE 754 specifications. * * For the mapping here, it is possible to map `double -> uint64_t` in an * order preserving way. This is very cool, and involves always flipping the * sign bit of the value, and flipping all other bits if the value was negative. * This is described more in: http://stereopsis.com/radix.html. * This is implemented in `dbl_adjust()` which also handles `na_last` and * `decreasing`. For `na_last`, we treat `NA_real_` and `NaN` equivalently. * Base R does as well, but data.table does not. * * ----------------------------------------------------------------------------- * Characters * * Character vector ordering is a bit trickier than integers or doubles. It * uses two internal details in R: * * - CHARSXP values in R are in a global string pool, and * repeated strings like `c("hi", "hi")` both point to the same CHARSXP. * * - There is a property that all vectors have called the TRUELENGTH. This is * used to overallocate in lists, but is otherwise unused * in CHARSXPs. The truelength is stored as a `r_ssize`. * * Character ordering is achieved by first iterating through `x` and extracting * the unique values by using the TRUELENGTH. If the truelength is positive * or zero, we save it as an unseen string and set the truelength to `-1`. * Otherwise if it is negative we assume we have seen it already. At the * end of this we have a vector of unique strings. We order this with * `chr_order_radix()`. Then we iterate over the now sorted unique strings * and set their truelength to `-i - 1` (where `i` is the index while * iterating). This marks the unique strings with their ordering (as a negative * value to be different from R) and also updates the original character vector. * We then iterate through the original vector again, plucking off the now * updated truelength integer values. This gives us an integer proxy for the * ordering, which we can run through `int_order_chunk()` to get the final * ordering. * * The ordering of unique characters uses a combination of 2 ordering * algorithms: * * - `chr_order_insertion()` - Used when `x` is small. * * - `chr_order_radix()` - Same principle as integer/double ordering, but * we iterate 1 character at a time. We assume a C locale here. Any non-ASCII * and non-UTF-8 strings are translated up front by * `vec_normalize_encoding()`. * * ----------------------------------------------------------------------------- * Logicals * * Uses the same infrastructure as integers. Because the number of possible * unique values is low, this will always use either an insertion sort for * small vectors, or a counting sort for large ones. * * ----------------------------------------------------------------------------- * Complex * * We treat complex as a data frame of two double columns. We order the * real part first using `dbl_order_chunk()`, then order the imaginary part also * using `dbl_order_chunk()`. * * ----------------------------------------------------------------------------- * Data frames * * Multi-column data frame ordering uses the same principle as MSB ordering. * It starts with the first column (the most "significant" one) and orders it. * While ordering the column, group sizes are tracked ("groups" are duplicate * values in the column). The next column is broken into chunks corresponding * to these group sizes from the first column, and the chunks are ordered * individually. While ordering the chunks of the 2nd column, group sizes are * again tracked to use in subsequent columns. */ // ----------------------------------------------------------------------------- #define UINT8_MAX_SIZE (UINT8_MAX + 1) /* * Maximum number of passes required to completely sort ints and doubles */ #define INT_MAX_RADIX_PASS 4 #define DBL_MAX_RADIX_PASS 8 /* * Maximum range allowed when deciding whether or not to use a counting sort * vs a radix sort. Counting sort is somewhat faster when less than this * boundary value. */ #define INT_ORDER_COUNTING_RANGE_BOUNDARY 100000 /* * Size of `x` that determines when an insertion sort should be used. Seems * to work better than 256 (from limited testing), base R uses 200. * Somewhat based on this post: * https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/ */ #define ORDER_INSERTION_BOUNDARY 128 /* * Adjustments for translating current `pass` into the current `radix` byte * that we need to shift to. */ #define PASS_TO_RADIX(X, MAX) (MAX - 1 - X) #define SHIFT_ADJUSTMENT -CHAR_BIT // ----------------------------------------------------------------------------- static inline bool parse_nan_distinct(SEXP nan_distinct); // [[ register() ]] SEXP vctrs_order(SEXP x, SEXP direction, SEXP na_value, SEXP nan_distinct, SEXP chr_proxy_collate) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); return vec_order(x, direction, na_value, c_nan_distinct, chr_proxy_collate); } static SEXP vec_order_info_impl(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, bool chr_ordered, bool group_sizes); // [[ include("order.h") ]] SEXP vec_order(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate) { const bool chr_ordered = true; const bool group_sizes = false; SEXP info = vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered, group_sizes); return r_list_get(info, 0); } // ----------------------------------------------------------------------------- static SEXP vec_locate_sorted_groups(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate); // [[ register() ]] SEXP vctrs_locate_sorted_groups(SEXP x, SEXP direction, SEXP na_value, SEXP nan_distinct, SEXP chr_proxy_collate) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); return vec_locate_sorted_groups( x, direction, na_value, c_nan_distinct, chr_proxy_collate ); } static SEXP vec_locate_sorted_groups(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate) { const bool chr_ordered = true; SEXP info = KEEP(vec_order_info( x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered )); SEXP o = r_list_get(info, 0); const int* p_o = r_int_cbegin(o); SEXP sizes = r_list_get(info, 1); const int* p_sizes = r_int_cbegin(sizes); r_ssize n_groups = r_length(sizes); SEXP loc = KEEP(r_alloc_list(n_groups)); SEXP key_loc = KEEP(r_alloc_integer(n_groups)); int* p_key_loc = r_int_begin(key_loc); int start = 0; for (r_ssize i = 0; i < n_groups; ++i) { p_key_loc[i] = p_o[start]; const int size = p_sizes[i]; SEXP elt = r_alloc_integer(size); r_list_poke(loc, i, elt); int* p_elt = r_int_begin(elt); R_len_t k = 0; for (int j = 0; j < size; ++j) { p_elt[k] = p_o[start]; ++start; ++k; } } SEXP key = KEEP(vec_slice(x, key_loc)); // Construct output data frame SEXP out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, key); r_list_poke(out, 1, loc); SEXP names = KEEP(r_alloc_character(2)); r_chr_poke(names, 0, strings_key); r_chr_poke(names, 1, strings_loc); r_attrib_poke(out, r_syms.names, names); out = new_data_frame(out, n_groups); FREE(6); return out; } // ----------------------------------------------------------------------------- /* * Returns a list of size three. * - The first element of the list contains the ordering as an integer vector. * - The second element of the list contains the group sizes as an integer * vector. * - The third element of the list contains the max group size as an integer. */ // [[ include("order.h") ]] SEXP vec_order_info(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, bool chr_ordered) { const bool group_sizes = true; return vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered, group_sizes); } // [[ register() ]] SEXP vctrs_order_info(SEXP x, SEXP direction, SEXP na_value, SEXP nan_distinct, SEXP chr_proxy_collate, SEXP chr_ordered) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); bool c_chr_ordered = r_bool_as_int(chr_ordered); return vec_order_info(x, direction, na_value, c_nan_distinct, chr_proxy_collate, c_chr_ordered); } static inline size_t vec_compute_n_bytes_lazy_raw(SEXP x, const enum vctrs_type type); static inline size_t vec_compute_n_bytes_lazy_counts(SEXP x, const enum vctrs_type type); static SEXP parse_na_value(SEXP na_value); static SEXP parse_direction(SEXP direction); static SEXP vec_order_expand_args(SEXP x, SEXP decreasing, SEXP na_largest); static SEXP vec_order_compute_na_last(SEXP na_largest, SEXP decreasing); static void vec_order_switch(SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, bool chr_ordered, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static SEXP vec_order_info_impl(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, bool chr_ordered, bool group_sizes) { // TODO call struct r_lazy call = r_lazy_null; int n_prot = 0; SEXP decreasing = PROTECT_N(parse_direction(direction), &n_prot); SEXP na_largest = PROTECT_N(parse_na_value(na_value), &n_prot); // Call on `x` before potentially flattening cols with `vec_proxy_order()` SEXP args = PROTECT_N(vec_order_expand_args(x, decreasing, na_largest), &n_prot); R_len_t arg_size = vec_check_size_common(args, 0, vec_args.empty, call); args = PROTECT_N(vec_check_recycle_common(args, arg_size, vec_args.empty, call), &n_prot); decreasing = VECTOR_ELT(args, 0); na_largest = VECTOR_ELT(args, 1); SEXP na_last = PROTECT_N(vec_order_compute_na_last(na_largest, decreasing), &n_prot); SEXP proxy = PROTECT_N(vec_proxy_order(x), &n_prot); proxy = PROTECT_N(vec_normalize_encoding(proxy), &n_prot); proxy = PROTECT_N(proxy_apply_chr_proxy_collate(proxy, chr_proxy_collate), &n_prot); r_ssize size = vec_size(proxy); const enum vctrs_type type = vec_proxy_typeof(proxy); // Compute the maximum size required for auxiliary working memory const size_t n_bytes_lazy_raw = vec_compute_n_bytes_lazy_raw(proxy, type); // Auxiliary vectors to hold intermediate results while ordering. // If `x` is a data frame we allocate enough room for the largest column type. struct lazy_raw* p_lazy_x_chunk = new_lazy_raw(size, n_bytes_lazy_raw); PROTECT_LAZY_VEC(p_lazy_x_chunk, &n_prot); struct lazy_raw* p_lazy_x_aux = new_lazy_raw(size, n_bytes_lazy_raw); PROTECT_LAZY_VEC(p_lazy_x_aux, &n_prot); struct lazy_raw* p_lazy_o_aux = new_lazy_raw(size, sizeof(int)); PROTECT_LAZY_VEC(p_lazy_o_aux, &n_prot); struct lazy_raw* p_lazy_bytes = new_lazy_raw(size, sizeof(uint8_t)); PROTECT_LAZY_VEC(p_lazy_bytes, &n_prot); // Compute the maximum size of the `counts` vector needed during radix // ordering. 4 * 256 for integers, 8 * 256 for doubles. size_t n_bytes_lazy_counts = vec_compute_n_bytes_lazy_counts(proxy, type); r_ssize size_lazy_counts = UINT8_MAX_SIZE * n_bytes_lazy_counts; struct lazy_raw* p_lazy_counts = new_lazy_raw(size_lazy_counts, sizeof(r_ssize)); PROTECT_LAZY_VEC(p_lazy_counts, &n_prot); // Determine if group tracking can be turned off. // We turn if off if ordering non-data frame input as long as // locations haven't been requested by the user. // It is more efficient to ignore it when possible. bool force_groups = group_sizes; bool ignore_groups = force_groups ? false : (is_data_frame(proxy) ? false : true); // Construct the two sets of group info needed for tracking groups. // We switch between them after each data frame column is processed. struct group_info* p_group_info0 = new_group_info(); PROTECT_GROUP_INFO(p_group_info0, &n_prot); struct group_info* p_group_info1 = new_group_info(); PROTECT_GROUP_INFO(p_group_info1, &n_prot); struct group_infos* p_group_infos = new_group_infos( p_group_info0, p_group_info1, size, force_groups, ignore_groups ); PROTECT_GROUP_INFOS(p_group_infos, &n_prot); // Used for character ordering - lazily generated to be fast // when not ordering character vectors struct truelength_info* p_truelength_info = new_truelength_info(size); PROTECT_TRUELENGTH_INFO(p_truelength_info, &n_prot); struct order* p_order = new_order(size); PROTECT_ORDER(p_order, &n_prot); vec_order_switch( proxy, decreasing, na_last, nan_distinct, chr_ordered, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); SEXP out = PROTECT_N(r_alloc_list(3), &n_prot); r_list_poke(out, 0, p_order->data); if (group_sizes) { struct group_info* p_group_info = groups_current(p_group_infos); SEXP sizes = p_group_info->data; sizes = r_int_resize(sizes, p_group_info->n_groups); r_list_poke(out, 1, sizes); r_list_poke(out, 2, r_int((int) p_group_info->max_group_size)); } UNPROTECT(n_prot); return out; } // ----------------------------------------------------------------------------- static void df_order(SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, bool chr_ordered, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static void vec_order_base_switch(SEXP x, bool decreasing, bool na_last, bool nan_distinct, bool chr_ordered, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static void vec_order_switch(SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, bool chr_ordered, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { if (type == VCTRS_TYPE_dataframe) { df_order( x, decreasing, na_last, nan_distinct, chr_ordered, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); return; } if (r_length(decreasing) != 1) { Rf_errorcall( R_NilValue, "Internal error: Size of decreasing != 1, but " "`vec_order_expand_args()` didn't catch it." ); } if (r_length(na_last) != 1) { Rf_errorcall( R_NilValue, "Internal error: Size of na_last != 1, but " "`vec_order_expand_args()` didn't catch it." ); } bool c_decreasing = LOGICAL(decreasing)[0]; bool c_na_last = LOGICAL(na_last)[0]; vec_order_base_switch( x, c_decreasing, c_na_last, nan_distinct, chr_ordered, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); } // ----------------------------------------------------------------------------- static void int_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void lgl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void dbl_order(SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void cpl_order(SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void chr_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static void chr_appearance(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); // Used on bare vectors and the first column of data frame `x`s static void vec_order_base_switch(SEXP x, bool decreasing, bool na_last, bool nan_distinct, bool chr_ordered, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { switch (type) { case VCTRS_TYPE_integer: { int_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_logical: { lgl_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_double: { dbl_order( x, decreasing, na_last, nan_distinct, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_complex: { cpl_order( x, decreasing, na_last, nan_distinct, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_character: { if (chr_ordered) { chr_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); } else { chr_appearance( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); } break; } case VCTRS_TYPE_dataframe: { Rf_errorcall(R_NilValue, "Internal error: Data frames should have been handled by now"); } default: { Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } } // ----------------------------------------------------------------------------- static void int_order_chunk_impl(bool decreasing, bool na_last, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); /* * These are the main entry points for integer ordering. They are nearly * identical except `int_order()` assumes that `p_x` cannot be * modified directly and is user input. * * `int_order_chunk()` assumes `p_x` is modifiable by reference. It is called * when iterating over data frame columns and `p_x` is the 2nd or greater * column, in which case `p_x` is really a chunk of that column that has been * copied into `x_chunk`. * * `int_order()` assumes `p_x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly unless a * counting sort is going to be used, in which case `p_x` can be used directly. */ static void int_order_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { void* p_x_chunk = p_lazy_x_chunk->p_data; const enum vctrs_sortedness sortedness = int_sortedness( p_x_chunk, size, decreasing, na_last, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } int_order_chunk_impl( decreasing, na_last, size, p_x_chunk, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void int_order_impl(const int* p_x, bool decreasing, bool na_last, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void int_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const int* p_x = INTEGER_RO(x); const enum vctrs_sortedness sortedness = int_sortedness( p_x, size, decreasing, na_last, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } int_order_impl( p_x, decreasing, na_last, size, true, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void int_adjust(const bool decreasing, const bool na_last, const r_ssize size, void* p_x); static void int_compute_range(const int* p_x, r_ssize size, int* p_x_min, uint32_t* p_range); static void int_order_counting(const int* p_x, r_ssize size, int x_min, uint32_t range, bool initialized, bool decreasing, bool na_last, int* p_o, int* p_o_aux, struct group_infos* p_group_infos); static void int_order_insertion(const r_ssize size, uint32_t* p_x, int* p_o, struct group_infos* p_group_infos); static void int_order_radix(const r_ssize size, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos); /* * `int_order_chunk_impl()` is used by both `int_order_chunk()` and by * `chr_order_chunk()` */ static void int_order_chunk_impl(bool decreasing, bool na_last, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { if (size <= ORDER_INSERTION_BOUNDARY) { int_adjust(decreasing, na_last, size, p_x); int_order_insertion(size, p_x, p_o, p_group_infos); return; } int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint32_t range; int x_min; int_compute_range(p_x, size, &x_min, &range); /* * If in counting order range and on the second or higher column, we will * need `p_o_aux` as working memory. At this point, `p_o` will have been * initialized from ordering the first column. */ if (range < INT_ORDER_COUNTING_RANGE_BOUNDARY) { const bool initialized = true; int_order_counting( p_x, size, x_min, range, initialized, decreasing, na_last, p_o, p_o_aux, p_group_infos ); return; } uint32_t* p_x_aux = (uint32_t*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); int_adjust(decreasing, na_last, size, p_x); int_order_radix( size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } /* * `int_order_impl()` is used by both `int_order()` and by * `chr_order()`. * * For `chr_order()`, the TRUELENGTH values will already be extracted into * `p_lazy_x_chunk`. In this case, we set `copy = false` to tell * `int_order_impl()` to use `p_lazy_x_chunk` directly rather than copying `p_x` * over to `p_x_chunk`. */ static void int_order_impl(const int* p_x, bool decreasing, bool na_last, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { if (size <= ORDER_INSERTION_BOUNDARY) { int* p_o = init_order(p_order); void* p_x_chunk; if (copy) { p_x_chunk = init_lazy_raw(p_lazy_x_chunk); memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); } else { p_x_chunk = p_lazy_x_chunk->p_data; } int_adjust(decreasing, na_last, size, p_x_chunk); int_order_insertion(size, p_x_chunk, p_o, p_group_infos); return; } uint32_t range; int x_min; int_compute_range(p_x, size, &x_min, &range); /* * If in counting order range and on the first column / single vector, * `p_o_aux` won't be used, so no need to initialize it. * * Also, `p_o` will be filled directly, so for performance we don't * initialize its order. */ if (range < INT_ORDER_COUNTING_RANGE_BOUNDARY) { const bool initialized = false; int* p_o = p_order->p_data; int* p_o_aux = (int*) p_lazy_o_aux->p_data; int_order_counting( p_x, size, x_min, range, initialized, decreasing, na_last, p_o, p_o_aux, p_group_infos ); p_order->initialized = true; return; } int* p_o = init_order(p_order); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint32_t* p_x_aux = (uint32_t*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); void* p_x_chunk; if (copy) { p_x_chunk = init_lazy_raw(p_lazy_x_chunk); memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); } else { p_x_chunk = p_lazy_x_chunk->p_data; } int_adjust(decreasing, na_last, size, p_x_chunk); int_order_radix( size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } // ----------------------------------------------------------------------------- static inline uint32_t int_map_to_uint32(int x); /* * - Shifts the integer elements of `p_x` in a way that correctly maintains * ordering for `na_last` and `decreasing` * * - After shifting, also maps the elements from `int32_t` to `uint32_t` and * stores them back in `p_x`. * * - Used before both the integer insertion sort and radix sort, which both * expect their input to already have been "adjusted" for `na_last` and * `decreasing` and expect a `uint32_t` pointer input. * * - If `na_last = true`, `NA` is always the maximum element, so we set it to * `UINT32_MAX`. In that case, we also shift all non-NA values down by 1 to * make room for it (defined by `na_shift`). * * - If `na_last = false`, we set `NA` to the minimum element of 0. * * - The multiplication by `direction` applies to non-NA values and correctly * orders inputs based on whether we are in a decreasing order or not. */ static void int_adjust(const bool decreasing, const bool na_last, const r_ssize size, void* p_x) { const int direction = decreasing ? -1 : 1; const uint32_t na_u32 = na_last ? UINT32_MAX : 0; const int na_shift = na_last ? -1 : 0; const int* p_x_int = (const int*) p_x; uint32_t* p_x_u32 = (uint32_t*) p_x; for (r_ssize i = 0; i < size; ++i) { int elt = p_x_int[i]; if (elt == NA_INTEGER) { p_x_u32[i] = na_u32; continue; } elt = elt * direction + na_shift; p_x_u32[i] = int_map_to_uint32(elt); } } #define HEX_UINT32_SIGN_BIT 0x80000000u // Flipping the sign bit is all we need to do to map in an order preserving way. // [INT32_MIN, INT32_MAX] => [0, UINT32_MAX] static inline uint32_t int_map_to_uint32(int x) { return ((uint32_t) x) ^ HEX_UINT32_SIGN_BIT; } #undef HEX_UINT32_SIGN_BIT // ----------------------------------------------------------------------------- /* * `int_compute_range()` computes the range of all values in `p_x`. * It is used by counting sort to computes buckets with `p_x[i] - x_min`. * * - `p_range` and `p_x_min` are updated on the way out to retain both the * range and the minimum value. * * - `NA` values are skipped over. If all values are `NA`, we defer to radix * sort (which definitely can handle that case) by returning a `range` of the * maximum uint32 value (which will be greater than * INT_ORDER_COUNTING_RANGE_BOUNDARY). */ static void int_compute_range(const int* p_x, r_ssize size, int* p_x_min, uint32_t* p_range) { uint32_t range = UINT32_MAX; int x_min = NA_INTEGER; int x_max = NA_INTEGER; r_ssize i = 0; // Find first non-NA value while (i < size) { const int elt = p_x[i]; if (elt == NA_INTEGER) { ++i; continue; } x_min = elt; x_max = elt; range = 0; // Bump to next `i` since we know this one's value ++i; break; } // All NAs - Return max range to signal to use radix sort if (x_min == NA_INTEGER) { *p_x_min = x_min; *p_range = range; return; } // Now that we have initial values, iterate through the rest // to compute the final min/max. for (r_ssize j = i; j < size; ++j) { const int elt = p_x[j]; if (elt == NA_INTEGER) { continue; } if (elt > x_max) { x_max = elt; } else if (elt < x_min) { x_min = elt; } } /* * - Max possible range is from * `c(.Machine$integer.max, -.Machine$integer.max)` which is exactly the * max of a `uint32_t`. * - We need to go up to `intmax_t` to avoid intermediate overflow. * - `+ 1` to get an inclusive range on both ends. */ range = (uint32_t) r__intmax_add(r__intmax_subtract(x_max, x_min), 1); *p_x_min = x_min; *p_range = range; } // ----------------------------------------------------------------------------- /* * The counting sort expects `p_x` to be unadjusted (i.e. `int_adjust()` has * not been used). It handles `decreasing` and `na_last` internally. * * Counting sort is used when `p_x` has a range less than * `INT_ORDER_COUNTING_RANGE_BOUNDARY`. In these cases radix sort * doesn't spread out values as much when looking at individual radixes. * * Counting sort does not modify `p_x` in any way. */ static void int_order_counting(const int* p_x, r_ssize size, int x_min, uint32_t range, bool initialized, bool decreasing, bool na_last, int* p_o, int* p_o_aux, struct group_infos* p_group_infos) { // - Only allocate this once (counts are reset to 0 at end) // - Allocating as static allows us to allocate an array this large // - `+ 1` to ensure there is room for the extra `NA` bucket static r_ssize p_counts[INT_ORDER_COUNTING_RANGE_BOUNDARY + 1] = { 0 }; // `NA` values get counted in the last used bucket uint32_t na_bucket = range; r_ssize na_count = 0; // Sanity check if (range > INT_ORDER_COUNTING_RANGE_BOUNDARY) { Rf_errorcall(R_NilValue, "Internal error: `range > INT_ORDER_COUNTING_RANGE_BOUNDARY`."); } // Histogram pass for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; if (elt == NA_INTEGER) { ++na_count; } else { const uint32_t bucket = elt - x_min; ++p_counts[bucket]; } } // Add `NA` counts once at the end p_counts[na_bucket] = na_count; r_ssize cumulative = 0; // Handle decreasing/increasing by altering the order in which // counts are accumulated const int direction = decreasing ? -1 : 1; r_ssize j = decreasing ? range - 1 : 0; // `na_last = false` pushes NA counts to the front if (!na_last && na_count != 0) { p_counts[na_bucket] = cumulative; cumulative += na_count; groups_size_maybe_push(na_count, p_group_infos); } // Accumulate counts, skip zeros for (uint32_t i = 0; i < range; ++i) { r_ssize count = p_counts[j]; if (count == 0) { j += direction; continue; } // Insert current cumulative value, then increment p_counts[j] = cumulative; cumulative += count; // At this point we will handle this group completely groups_size_maybe_push(count, p_group_infos); j += direction; } // `na_last = true` pushes NA counts to the back if (na_last && na_count != 0) { p_counts[na_bucket] = cumulative; groups_size_maybe_push(na_count, p_group_infos); } // If order is not initialized, we are on the first column / atomic vector // and can place the order directly into the result. Much faster than // initializing, placing in `p_o_aux`, and copying back over. if (initialized) { for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; uint32_t bucket = (elt == NA_INTEGER) ? na_bucket : elt - x_min; const r_ssize loc = p_counts[bucket]++; p_o_aux[loc] = p_o[i]; } memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); } else { for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; uint32_t bucket = (elt == NA_INTEGER) ? na_bucket : elt - x_min; const r_ssize loc = p_counts[bucket]++; p_o[loc] = i + 1; } } // Reset counts for next column. // Only reset what we might have touched. // `+ 1` to reset the NA bucket too. memset(p_counts, 0, (range + 1) * sizeof(r_ssize)); } // ----------------------------------------------------------------------------- /* * `int_order_insertion()` is used in two ways: * - It is how we "finish off" radix sorts rather than deep recursion. * - If we have an original `x` input that is small enough, we just immediately * insertion sort it. * * For small inputs, it is much faster than deeply recursing with * radix ordering. * * Insertion ordering expects that `p_x` has been adjusted with `int_adjust()` * which takes care of `na_last` and `decreasing` and also maps `int32_t` to * `uint32_t` ahead of time. */ static void int_order_insertion(const r_ssize size, uint32_t* p_x, int* p_o, struct group_infos* p_group_infos) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const uint32_t x_elt = p_x[i]; const int o_elt = p_o[i]; r_ssize j = i - 1; while (j >= 0) { const uint32_t x_cmp_elt = p_x[j]; if (x_elt >= x_cmp_elt) { break; } int o_cmp_elt = p_o[j]; // Swap p_x[j + 1] = x_cmp_elt; p_o[j + 1] = o_cmp_elt; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_o[j + 1] = o_elt; } // We've ordered a small chunk, we need to push at least one group size. // Depends on the post-ordered results so we have to do this // in a separate loop. r_ssize group_size = 1; uint32_t previous = p_x[0]; for (r_ssize i = 1; i < size; ++i) { const uint32_t current = p_x[i]; // Continue the current group run if (current == previous) { ++group_size; continue; } // Push current run size and reset size tracker groups_size_maybe_push(group_size, p_group_infos); group_size = 1; previous = current; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); } // ----------------------------------------------------------------------------- static uint8_t int_compute_skips(const uint32_t* p_x, r_ssize size, bool* p_skips); static void int_order_radix_recurse(const r_ssize size, const uint8_t pass, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos); /* * Integer radix ordering entry point * * Expects that `int_adjust()` has been called on `p_x`, which takes care * of `na_last` and `decreasing` and also maps `int32_t` to `uint32_t` once * up front so we don't have to do it for each radix pass. * * Sorts `p_x` and `p_o` in place */ static void int_order_radix(const r_ssize size, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos) { bool p_skips[INT_MAX_RADIX_PASS]; uint8_t pass = int_compute_skips(p_x, size, p_skips); // Skipped all passes - Happens when `x` is 1 value repeated if (pass == INT_MAX_RADIX_PASS) { groups_size_maybe_push(size, p_group_infos); return; } int_order_radix_recurse( size, pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_skips, p_group_infos ); } // ----------------------------------------------------------------------------- static inline uint8_t int_extract_uint32_byte(uint32_t x, uint8_t shift); /* * Recursive function for radix ordering. Orders the current byte, then iterates * over the sub groups and recursively calls itself on each subgroup to order * the next byte. */ static void int_order_radix_recurse(const r_ssize size, const uint8_t pass, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { int_order_insertion(size, p_x, p_o, p_group_infos); return; } // Skip passes where our up front check told us that all bytes were the same uint8_t next_pass = pass + 1; r_ssize* p_counts_next_pass = p_counts + UINT8_MAX_SIZE; while (next_pass < INT_MAX_RADIX_PASS && p_skips[next_pass]) { ++next_pass; p_counts_next_pass += UINT8_MAX_SIZE; } const uint8_t radix = PASS_TO_RADIX(pass, INT_MAX_RADIX_PASS); const uint8_t shift = radix * 8; uint8_t byte = 0; // Histogram for this pass for (r_ssize i = 0; i < size; ++i) { const uint32_t x_elt = p_x[i]; byte = int_extract_uint32_byte(x_elt, shift); p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass == INT_MAX_RADIX_PASS) { // If we are already at the last pass, we are done groups_size_maybe_push(size, p_group_infos); } else { // Otherwise, recurse on next byte using the same `size` since // the group size hasn't changed int_order_radix_recurse( size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Replace with `cumulative` first, then bump `cumulative`. // `p_counts` now represents starting locations for each radix group. p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_o_aux[loc] = p_o[i]; p_x_aux[loc] = p_x[i]; } // Copy back over memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } // Set to zero to clear for subsequent groups p_counts[i] = 0; // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { groups_size_maybe_push(1, p_group_infos); ++p_x; ++p_o; continue; } // Can get here in the case of ties, like c(1L, 1L), which have a // `group_size` of 2 in the last radix, but there is nothing left to // compare so we are done. if (next_pass == INT_MAX_RADIX_PASS) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // Order next byte of this subgroup int_order_radix_recurse( group_size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); p_x += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- /* * Do a parallel histogram run over all 4 passes to determine if any passes * can be skipped (because all bytes were the same) */ static uint8_t int_compute_skips(const uint32_t* p_x, r_ssize size, bool* p_skips) { uint8_t radix_start = PASS_TO_RADIX(0, INT_MAX_RADIX_PASS); uint8_t shift_start = radix_start * 8; for (uint8_t i = 0; i < INT_MAX_RADIX_PASS; ++i) { p_skips[i] = true; } uint8_t p_bytes[INT_MAX_RADIX_PASS]; const uint32_t elt0 = p_x[0]; // Get bytes of first element in MSD->LSD order. // Placed in `p_bytes` in a way that aligns with the `pass` variable for (uint8_t pass = 0, shift = shift_start; pass < INT_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { p_bytes[pass] = int_extract_uint32_byte(elt0, shift); } // Check to see which passes are skippable for (r_ssize i = 1; i < size; ++i) { uint8_t n_skips = INT_MAX_RADIX_PASS; const uint32_t elt = p_x[i]; for (uint8_t pass = 0, shift = shift_start; pass < INT_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { bool skip = p_skips[pass]; if (skip) { p_skips[pass] = (p_bytes[pass] == int_extract_uint32_byte(elt, shift)); } else { --n_skips; } } // No passes are skippable if (n_skips == 0) { break; } } uint8_t pass = 0; // Shift forward to the first pass with varying bytes while (pass < INT_MAX_RADIX_PASS && p_skips[pass]) { ++pass; } return pass; } // ----------------------------------------------------------------------------- // Bytes will be extracted 8 bits at a time. // This is a MSB radix sort, so they are extracted MSB->LSB. static inline uint8_t int_extract_uint32_byte(uint32_t x, uint8_t shift) { return (x >> shift) & UINT8_MAX; } // ----------------------------------------------------------------------------- /* * Entry points for logical ordering. These just use integer infrastructure. */ static void lgl_order_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { int_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void lgl_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { int_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } // ----------------------------------------------------------------------------- static void dbl_order_chunk_impl(bool decreasing, bool na_last, bool nan_distinct, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); /* * These are the main entry points for double ordering. They are nearly * identical except `dbl_order()` assumes that `p_x` cannot be * modified directly and is user input. * * `dbl_order_chunk()` assumes `p_x` is modifiable by reference. It is called * when iterating over data frame columns and `p_x` is the 2nd or greater * column, in which case `p_x` is really a chunk of that column that has been * copied into `x_chunk`. * * `dbl_order()` assumes `p_x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly. * * Unlike `int_order_chunk()`, there is no intermediate counting sort, as it is * sort of unclear how to compute the range of a double vector in the same * way, and even after adjusting to a `uint64_t`, it is unlikely that they * have a very small range of values. */ static void dbl_order_chunk(bool decreasing, bool na_last, bool nan_distinct, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { void* p_x_chunk = p_lazy_x_chunk->p_data; dbl_order_chunk_impl( decreasing, na_last, nan_distinct, size, p_x_chunk, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void dbl_order_impl(const double* p_x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos); static void dbl_order(SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const double* p_x = REAL_RO(x); dbl_order_impl( p_x, decreasing, na_last, nan_distinct, size, true, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void dbl_adjust(const bool decreasing, const bool na_last, const bool nan_distinct, const r_ssize size, void* p_x); static void dbl_order_insertion(const r_ssize size, uint64_t* p_x, int* p_o, struct group_infos* p_group_infos); static void dbl_order_radix(const r_ssize size, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos); /* * Used by `dbl_order_chunk()` and by `cpl_order()` * * Unlike `int_order_chunk_impl()`, `dbl_order_chunk_impl()` also deals with * sortedness since we don't have an up front sortedness check on complex * vectors. */ static void dbl_order_chunk_impl(bool decreasing, bool na_last, bool nan_distinct, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const enum vctrs_sortedness sortedness = dbl_sortedness( p_x, size, decreasing, na_last, nan_distinct, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } dbl_adjust(decreasing, na_last, nan_distinct, size, p_x); if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x, p_o, p_group_infos); return; } uint64_t* p_x_aux = (uint64_t*) init_lazy_raw(p_lazy_x_aux); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); dbl_order_radix( size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } /* * Used by `dbl_order()` and by `cpl_order()` * * Unlike `int_order_impl()`, `dbl_order_impl()` also deals with sortedness * since we don't have an up front sortedness check on complex vectors. * * When dealing with complex vectors, `p_x` and `p_lazy_x_chunk->p_data` will * already point to the same memory. In this case, we don't need to copy `p_x` * into `p_lazy_x_chunk`, so we set `copy = false` which tells * `dbl_order_impl()` to just use `p_lazy_x_chunk` directly. */ static void dbl_order_impl(const double* p_x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { const enum vctrs_sortedness sortedness = dbl_sortedness( p_x, size, decreasing, na_last, nan_distinct, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } int* p_o = init_order(p_order); void* p_x_chunk; if (copy) { p_x_chunk = init_lazy_raw(p_lazy_x_chunk); memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); } else { p_x_chunk = p_lazy_x_chunk->p_data; } dbl_adjust(decreasing, na_last, nan_distinct, size, p_x_chunk); if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x_chunk, p_o, p_group_infos); return; } uint64_t* p_x_aux = (uint64_t*) init_lazy_raw(p_lazy_x_aux); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); memset(p_counts, 0, p_lazy_counts->size); dbl_order_radix( size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } // ----------------------------------------------------------------------------- static inline void dbl_adjust_nan_identical(const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64); static inline void dbl_adjust_nan_distinct(const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64); /* * When mapping double -> uint64_t: * * Smallest possible value comes from: * dbl_map_to_uint64(-Inf) -> 4503599627370495 * * One larger is: * dbl_map_to_uint64(-.Machine$double.xmax) -> 4503599627370496 * * Largest possible value comes from: * dbl_map_to_uint64(Inf) -> 18442240474082181120 * * One smaller is: * dbl_map_to_uint64(.Machine$double.xmax) -> 18442240474082181119 * * This gives us room to manually map: * If (!nan_distinct): * dbl_map_to_uint64(NA_real_) -> UINT64_MAX (or 0 if `na_last = false`) * dbl_map_to_uint64(NaN) -> UINT64_MAX (or 0 if `na_last = false`) * If (nan_distinct): * dbl_map_to_uint64(NA_real_) -> UINT64_MAX (or 0 if `na_last = false`) * dbl_map_to_uint64(NaN) -> UINT64_MAX - 1 (or 1 if `na_last = false`) * When using `nan_distinct`, NaN is always ordered between NA_real_ and * non-missing numbers, regardless of `decreasing`. */ static void dbl_adjust(const bool decreasing, const bool na_last, const bool nan_distinct, const r_ssize size, void* p_x) { double* p_x_dbl = (double*) p_x; uint64_t* p_x_u64 = (uint64_t*) p_x; if (nan_distinct) { dbl_adjust_nan_distinct(decreasing, na_last, size, p_x_dbl, p_x_u64); } else { dbl_adjust_nan_identical(decreasing, na_last, size, p_x_dbl, p_x_u64); } } static inline uint64_t dbl_map_to_uint64(double x); static inline void dbl_adjust_nan_identical(const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64) { const int direction = decreasing ? -1 : 1; const uint64_t na_u64 = na_last ? UINT64_MAX : 0; for (r_ssize i = 0; i < size; ++i) { double elt = p_x_dbl[i]; if (isnan(elt)) { p_x_u64[i] = na_u64; continue; } elt = elt * direction; p_x_u64[i] = dbl_map_to_uint64(elt); } } static inline void dbl_adjust_nan_distinct(const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64) { const int direction = decreasing ? -1 : 1; const uint64_t na_u64 = na_last ? UINT64_MAX : 0; const uint64_t nan_u64 = na_last ? UINT64_MAX - 1 : 1; for (r_ssize i = 0; i < size; ++i) { double elt = p_x_dbl[i]; const enum vctrs_dbl type = dbl_classify(elt); switch (type) { case VCTRS_DBL_number: { elt = elt * direction; p_x_u64[i] = dbl_map_to_uint64(elt); break; } case VCTRS_DBL_missing: { p_x_u64[i] = na_u64; break; } case VCTRS_DBL_nan: { p_x_u64[i] = nan_u64; break; } } } } static inline uint64_t dbl_flip_uint64(uint64_t x); static union { double d; uint64_t u64; } d_u64; /* * Map `double -> `uint64_t` retaining ordering. * * Assumes `x` is not a `NA_real_` or `NaN` value. * Correctly handles `Inf` and `-Inf`. */ static inline uint64_t dbl_map_to_uint64(double x) { // Catch `-0` vs `0` if (x == 0) { x = 0; } // Reinterpret as uint64_t without changing bytes d_u64.d = x; d_u64.u64 = dbl_flip_uint64(d_u64.u64); return d_u64.u64; } #define HEX_UINT64_SIGN 0x8000000000000000u #define HEX_UINT64_ONES 0xffffffffffffffffu // To retain ordering in mapping from double -> uint64_t we always have to // flip the sign bit, and for negative numbers we also flip all of the other // bits. Described more here: http://stereopsis.com/radix.html static inline uint64_t dbl_flip_uint64(uint64_t x) { const uint64_t mask = (x & HEX_UINT64_SIGN) ? HEX_UINT64_ONES : HEX_UINT64_SIGN; return x ^ mask; } #undef HEX_UINT64_SIGN #undef HEX_UINT64_ONES // ----------------------------------------------------------------------------- /* * `dbl_order_insertion()` is used in two ways: * - It is how we "finish off" radix sorts rather than deep recursion. * - If we have an original `x` input that is small enough, we just immediately * insertion sort it. * * For small inputs, it is much faster than deeply recursing with * radix ordering. * * Insertion ordering expects that `p_x` has been adjusted with `dbl_adjust()` * which takes care of `na_last` and `decreasing` and also maps `double` to * `uint64_t` ahead of time. * * It is essentially the same as `int_order_insertion()` with different types. */ static void dbl_order_insertion(const r_ssize size, uint64_t* p_x, int* p_o, struct group_infos* p_group_infos) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const uint64_t x_elt = p_x[i]; const int o_elt = p_o[i]; r_ssize j = i - 1; while (j >= 0) { const uint64_t x_cmp_elt = p_x[j]; if (x_elt >= x_cmp_elt) { break; } int o_cmp_elt = p_o[j]; // Swap p_x[j + 1] = x_cmp_elt; p_o[j + 1] = o_cmp_elt; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_o[j + 1] = o_elt; } // We've ordered a small chunk, we need to push at least one group size. // Depends on the post-ordered results so we have to do this // in a separate loop. r_ssize group_size = 1; uint64_t previous = p_x[0]; for (r_ssize i = 1; i < size; ++i) { const uint64_t current = p_x[i]; // Continue the current group run if (current == previous) { ++group_size; continue; } // Push current run size and reset size tracker groups_size_maybe_push(group_size, p_group_infos); group_size = 1; previous = current; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); } // ----------------------------------------------------------------------------- static uint8_t dbl_compute_skips(const uint64_t* p_x, r_ssize size, bool* p_skips); static void dbl_order_radix_recurse(const r_ssize size, const uint8_t pass, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos); /* * Double radix ordering entry point * * Expects that `dbl_adjust()` has been called on `p_x`, which takes care * of `na_last` and `decreasing` and also maps `double` to `uint64_t` once * up front so we don't have to do it for each radix pass. * * Sorts `p_x` and `p_o` in place */ static void dbl_order_radix(const r_ssize size, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos) { bool p_skips[DBL_MAX_RADIX_PASS]; uint8_t pass = dbl_compute_skips(p_x, size, p_skips); // Skipped all passes - Happens when `x` is 1 value repeated if (pass == DBL_MAX_RADIX_PASS) { groups_size_maybe_push(size, p_group_infos); return; } dbl_order_radix_recurse( size, pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_skips, p_group_infos ); } // ----------------------------------------------------------------------------- static inline uint8_t dbl_extract_uint64_byte(uint64_t x, uint8_t shift); /* * Recursive function for radix ordering. Orders the current byte, then iterates * over the sub groups and recursively calls itself on each subgroup to order * the next byte. * * This needs 8 passes, unlike the 4 required by `int_order_radix()`. */ static void dbl_order_radix_recurse(const r_ssize size, const uint8_t pass, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x, p_o, p_group_infos); return; } // Skip passes where our up front check told us that all bytes were the same uint8_t next_pass = pass + 1; r_ssize* p_counts_next_pass = p_counts + UINT8_MAX_SIZE; while (next_pass < DBL_MAX_RADIX_PASS && p_skips[next_pass]) { ++next_pass; p_counts_next_pass += UINT8_MAX_SIZE; } const uint8_t radix = PASS_TO_RADIX(pass, DBL_MAX_RADIX_PASS); const uint8_t shift = radix * 8; uint8_t byte = 0; // Histogram for (r_ssize i = 0; i < size; ++i) { const uint64_t x_elt = p_x[i]; byte = dbl_extract_uint64_byte(x_elt, shift); p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass == DBL_MAX_RADIX_PASS) { // If we are already at the last pass, we are done groups_size_maybe_push(size, p_group_infos); } else { // Otherwise, recurse on next byte using the same `size` since // the group size hasn't changed dbl_order_radix_recurse( size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Replace with `cumulative` first, then bump `cumulative`. // `p_counts` now represents starting locations for each radix group. p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_o_aux[loc] = p_o[i]; p_x_aux[loc] = p_x[i]; } // Copy back over memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } p_counts[i] = 0; // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { groups_size_maybe_push(1, p_group_infos); ++p_x; ++p_o; continue; } // Can get here in the case of ties, like c(1, 1), which have a // `group_size` of 2 in the last radix, but there is nothing left to // compare so we are done. if (next_pass == DBL_MAX_RADIX_PASS) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // Order next byte of this subgroup dbl_order_radix_recurse( group_size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); p_x += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- /* * Detect completely skippable bytes * * There are 8 passes over a double, 1 for each byte. Often times for the entire * `x` vector a few of those passes are useless because all of the bytes are * the same. This does an up front computation in 1 pass over the data to * determine which bytes are completely skippable. * * It is worth noting that just because byte 0 wasn't skippable doesn't mean * that byte 1 isn't. With the way that doubles are mapped to uint64_t, it * is often the case that, for small doubles, bytes 0-2 aren't skippable but * the rest of them are (for example, this happens with doubles in the range * of 1:128). This provides a nice performance increase there. */ static uint8_t dbl_compute_skips(const uint64_t* p_x, r_ssize size, bool* p_skips) { uint8_t radix_start = PASS_TO_RADIX(0, DBL_MAX_RADIX_PASS); uint8_t shift_start = radix_start * 8; for (uint8_t i = 0; i < DBL_MAX_RADIX_PASS; ++i) { p_skips[i] = true; } uint8_t p_bytes[DBL_MAX_RADIX_PASS]; const uint64_t elt0 = p_x[0]; // Get bytes of first element in MSD->LSD order. // Placed in `p_bytes` in a way that aligns with the `pass` variable for (uint8_t pass = 0, shift = shift_start; pass < DBL_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { p_bytes[pass] = dbl_extract_uint64_byte(elt0, shift); } // Check to see which passes are skippable for (r_ssize i = 1; i < size; ++i) { uint8_t n_skips = DBL_MAX_RADIX_PASS; const uint64_t elt = p_x[i]; for (uint8_t pass = 0, shift = shift_start; pass < DBL_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { bool skip = p_skips[pass]; if (skip) { p_skips[pass] = (p_bytes[pass] == dbl_extract_uint64_byte(elt, shift)); } else { --n_skips; } } // No passes are skippable if (n_skips == 0) { break; } } uint8_t pass = 0; // Shift forward to the first pass with varying bytes while (pass < DBL_MAX_RADIX_PASS && p_skips[pass]) { ++pass; } return pass; } // ----------------------------------------------------------------------------- // Bytes will be extracted 8 bits at a time. // This is a MSB radix sort, so they are extracted MSB->LSB. static inline uint8_t dbl_extract_uint64_byte(uint64_t x, uint8_t shift) { return (x >> shift) & UINT8_MAX; } // ----------------------------------------------------------------------------- /* * `cpl_order()` uses the fact that Rcomplex is really just a rcrd * type of two double vectors. It orders first on the real vector, and then on * the imaginary vector. * * `cpl_order_chunk()` isn't required. It would only be called from data frames * when there is a complex column, but in those cases we split the column * into two double vectors (real / imaginary) and "rerun" the column using * `dbl_order_chunk()`. */ static void cpl_order(SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { // We treat complex as a two column data frame, so we have to use group // information for at least the first column. // - If a complex atomic vector is used, `ignore_groups` will be true unless // the user also requested group information. // - If the first column of a df is a complex column, `ignore_groups` will // be false. bool reset_ignore_groups = false; if (p_group_infos->ignore_groups) { p_group_infos->ignore_groups = false; reset_ignore_groups = true; } const Rcomplex* p_x_cpl = COMPLEX_RO(x); // When a complex column is present, // `lazy_x_chunk` and `lazy_x_aux` are created to have the // size of a double vector. double* p_x_chunk_dbl = (double*) init_lazy_raw(p_lazy_x_chunk); // Handle the real portion first for (r_ssize i = 0; i < size; ++i) { p_x_chunk_dbl[i] = cpl_normalise_missing(p_x_cpl[i]).r; } /* * Call double ordering algorithm on real section. * * In this case, both `p_x_chunk_dbl` and `p_lazy_x_chunk` are passed through, * but we set `copy = false` which tells `dbl_order_impl()` not to copy * the input (`p_x_chunk_dbl`) over to the chunk vector of (`p_lazy_x_chunk`). * It has already been done when we extracted the real section. */ dbl_order_impl( p_x_chunk_dbl, decreasing, na_last, nan_distinct, size, false, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); // Ordering will now be initialized int* p_o = p_order->p_data; // Reset `ignore_groups` for the second pass if we don't need to track groups. // This happens if an atomic complex vector is passed in and the user // hasn't requested group information. if (reset_ignore_groups) { p_group_infos->ignore_groups = true; } // Get the number of group chunks from the first pass struct group_info* p_group_info_pre = groups_current(p_group_infos); r_ssize n_groups = p_group_info_pre->n_groups; // If there were no ties, we are completely done if (n_groups == size) { return; } // Swap to other group info to prepare for the imaginary section groups_swap(p_group_infos); // Fill with the imaginary portion. // Uses updated ordering to place it in sequential order. for (r_ssize i = 0; i < size; ++i) { const int loc = p_o[i] - 1; p_x_chunk_dbl[i] = cpl_normalise_missing(p_x_cpl[loc]).i; } // Iterate over the group chunks from the first pass for (r_ssize group = 0; group < n_groups; ++group) { r_ssize group_size = p_group_info_pre->p_data[group]; // Fast handling of simplest case if (group_size == 1) { ++p_x_chunk_dbl; ++p_o; groups_size_maybe_push(1, p_group_infos); continue; } dbl_order_chunk_impl( decreasing, na_last, nan_distinct, group_size, p_x_chunk_dbl, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); p_x_chunk_dbl += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- static void chr_mark_sorted_uniques(const SEXP* p_x, r_ssize size, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_bytes, struct truelength_info* p_truelength_info); static inline void chr_extract_ordering(const SEXP* p_x, r_ssize size, int* p_x_aux); static void chr_order_radix(const r_ssize size, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes); /* * These are the main entry points for character ordering. * * `chr_order_chunk()` assumes `p_x` is modifiable by reference. It also * assumes that `chr_mark_sorted_uniques()` has already been called. For data * frame columns where `chr_order_chunk()` is called on each group chunk, * `chr_mark_sorted_uniques()` is only called once on the entire column. * * `chr_order()` assumes `x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly. * * `chr_order_chunk()` essentially calls `int_order_chunk()`, however we can't * call it directly because we don't have access to all the required arguments. * Specifically we reuse `p_x` here as the auxiliary data structure for integer * ordering, but to call `int_order_chunk()` we would need the lazy wrapper * for it. * * Because these functions modify TRUELENGTHs, we have to reset them on the * way out. `chr_order()` does it directly, but `chr_order_chunk()` relies * on `df_order()` to do it after the entire column is processed. It is * important to not error inside these functions because the TRUELENGTHs won't * be reset if we do. */ static void chr_order_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos) { void* p_x_chunk = p_lazy_x_chunk->p_data; const enum vctrs_sortedness sortedness = chr_sortedness( p_x_chunk, size, decreasing, na_last, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } void* p_x_aux = init_lazy_raw(p_lazy_x_aux); // Move integer ordering into `p_x_aux`. // `p_x_aux` is allocated as the larger of `int` and `SEXP*`. chr_extract_ordering(p_x_chunk, size, p_x_aux); /* * Call integer ordering algorithm on TRUELENGTHs. Reuse the chunk memory of * `p_lazy_x_chunk` that held the current `SEXP*` chunk as the new auxiliary * memory since those are no longer needed. It is allocated as the * larger of `int` and `SEXP*` so it is large enough. */ int_order_chunk_impl( decreasing, na_last, size, p_x_aux, p_o, p_lazy_x_chunk, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } struct chr_order_info { SEXP x; bool decreasing; bool na_last; r_ssize size; struct order* p_order; struct lazy_raw* p_lazy_x_chunk; struct lazy_raw* p_lazy_x_aux; struct lazy_raw* p_lazy_o_aux; struct lazy_raw* p_lazy_bytes; struct lazy_raw* p_lazy_counts; struct group_infos* p_group_infos; struct truelength_info* p_truelength_info; }; struct chr_order_cleanup_info { struct truelength_info* p_truelength_info; }; static SEXP chr_order_exec(void* p_data); static void chr_order_cleanup(void* p_data); /* * `chr_order()` directly modifies the `TRUELENGTH()` values of the CHARSXPs * in `x`. These must be reset after the call with `truelength_reset()`. To * ensure that this function is called (even on a longjump), * `R_ExecWithCleanup()` is used. */ static void chr_order(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { struct chr_order_info info = { .x = x, .decreasing = decreasing, .na_last = na_last, .size = size, .p_order = p_order, .p_lazy_x_chunk = p_lazy_x_chunk, .p_lazy_x_aux = p_lazy_x_aux, .p_lazy_o_aux = p_lazy_o_aux, .p_lazy_bytes = p_lazy_bytes, .p_lazy_counts = p_lazy_counts, .p_group_infos = p_group_infos, .p_truelength_info = p_truelength_info }; struct chr_order_cleanup_info cleanup_info = { .p_truelength_info = p_truelength_info }; R_ExecWithCleanup( chr_order_exec, &info, chr_order_cleanup, &cleanup_info ); } static void chr_order_internal(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static SEXP chr_order_exec(void* p_data) { struct chr_order_info* p_info = (struct chr_order_info*) p_data; chr_order_internal( p_info->x, p_info->decreasing, p_info->na_last, p_info->size, p_info->p_order, p_info->p_lazy_x_chunk, p_info->p_lazy_x_aux, p_info->p_lazy_o_aux, p_info->p_lazy_bytes, p_info->p_lazy_counts, p_info->p_group_infos, p_info->p_truelength_info ); return R_NilValue; } static void chr_order_cleanup(void* p_data) { struct chr_order_cleanup_info* p_info = (struct chr_order_cleanup_info*) p_data; truelength_reset(p_info->p_truelength_info); } static void chr_order_internal(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { const SEXP* p_x = STRING_PTR_RO(x); const enum vctrs_sortedness sortedness = chr_sortedness( p_x, size, decreasing, na_last, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } // Sort unique strings and mark their truelengths with ordering. // Use `p_lazy_x_chunk` as auxiliary memory for `chr_order_radix()` so we // hopefully don't have to also allocate `p_lazy_x_aux`. chr_mark_sorted_uniques( p_x, size, p_lazy_x_chunk, p_lazy_bytes, p_truelength_info ); void* p_x_chunk = init_lazy_raw(p_lazy_x_chunk); // Move integer ordering into `p_x_chunk`. // `p_x_chunk` is allocated as the larger of `int` and `SEXP*`. chr_extract_ordering(p_x, size, p_x_chunk); /* * Call integer ordering algorithm on TRUELENGTHs. * * In this case, both `p_x_chunk` and `p_lazy_x_chunk` are passed through, * but we set `copy = false` which tells `int_order_impl()` not to copy * the input (`p_x_chunk`) over to the chunk vector of (`p_lazy_x_chunk`). * It has already been done when we extracted the truelength ordering. */ int_order_impl( p_x_chunk, decreasing, na_last, size, false, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } // ----------------------------------------------------------------------------- /* * Pull ordering off of marked `p_x` and place it into `p_x_aux` working memory. * We mark the CHARSXP TRUELENGTHs with negative ordering to be different from * what R might use, so that gets reversed here to get the true ordering back. */ static inline void chr_extract_ordering(const SEXP* p_x, r_ssize size, int* p_x_aux) { for (r_ssize i = 0; i < size; ++i) { SEXP elt = p_x[i]; if (elt == NA_STRING) { p_x_aux[i] = NA_INTEGER; continue; } // Negative to flip where we set the order using a negative value. // Cast to `int` because `TRUELENGTH()` returns a `r_ssize`. p_x_aux[i] = (int) -TRUELENGTH(elt); } } // ----------------------------------------------------------------------------- static void chr_mark_uniques(const SEXP* p_x, r_ssize size, struct truelength_info* p_truelength_info); /* * `chr_mark_sorted_uniques()` runs through the strings in `p_x` and places the * unique strings in `p_truelength_info->p_uniques`. It marks the unique ones * with a negative TRUELENGTH as it goes. Since identical strings share the * same CHARSXP, this marks all strings in the vector at once. * * After detecting all unique strings, it sorts them in place with * `chr_order_radix()`. * * Finally, it loops over the now sorted unique strings and marks them with * their ordering (as a negative value). This allows `chr_order_chunk()` to loop * through `p_x` and just pluck off the TRUELENGTH value, which will be an * integer proxy for the value's ordering. * * `truelength_save()` also saves the unique strings and their original * TRUELENGTH values so they can be reset after each column with * `truelength_reset()`. */ static void chr_mark_sorted_uniques(const SEXP* p_x, r_ssize size, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_bytes, struct truelength_info* p_truelength_info) { chr_mark_uniques(p_x, size, p_truelength_info); r_ssize n_uniques = p_truelength_info->n_uniques_used; SEXP* p_x_aux = (SEXP*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); // Sorts uniques in ascending order using `p_x_aux` for working memory. // Assumes no `NA`! chr_order_radix( n_uniques, p_truelength_info->max_string_size, p_truelength_info->p_uniques, p_x_aux, p_truelength_info->p_sizes, p_truelength_info->p_sizes_aux, p_bytes ); // Mark unique sorted strings with their order. // Use a negative value to differentiate with R. for (r_ssize i = 0; i < n_uniques; ++i) { SEXP elt = p_truelength_info->p_uniques[i]; SET_TRUELENGTH(elt, -i - 1); } } static void chr_mark_uniques(const SEXP* p_x, r_ssize size, struct truelength_info* p_truelength_info) { for (r_ssize i = 0; i < size; ++i) { SEXP elt = p_x[i]; // `NA_STRING` is replaced by `NA_INTEGER` for use in integer ordering if (elt == NA_STRING) { continue; } r_ssize truelength = TRUELENGTH(elt); if (truelength < 0) { // We have already seen and saved this string continue; } if (truelength > 0) { // Retain R's usage of TRUELENGTH. Normally defaults to 0, so if the value // is positive, it means R is using it. Should be extremely rare. truelength_save_string(elt, truelength, p_truelength_info); } // CHARSXP string lengths are never "long" int elt_size = (int) r_length(elt); // Track max string size to know how deep to recurse if (p_truelength_info->max_string_size < elt_size) { p_truelength_info->max_string_size = elt_size; } // Save this unique value and its size so we can order uniques truelength_save_unique(elt, p_truelength_info); truelength_save_size(elt_size, p_truelength_info); // Mark as negative to note that we have seen this string. // R uses positive or zero truelengths. SET_TRUELENGTH(elt, -1); } } // ----------------------------------------------------------------------------- static bool chr_str_ge(SEXP x, SEXP y, int x_size, const R_len_t pass); /* * Insertion order for character vectors. This occurs in the radix ordering * once we drop below a certain chunk size. * * One optimization done here is to take advantage of the `pass` info, which * will indicate that all characters before this pass are identical already * and don't need to be checked by `strcmp()`. */ static void chr_order_insertion(const r_ssize size, const R_len_t pass, SEXP* p_x, int* p_sizes) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const SEXP x_elt = p_x[i]; const int x_size = p_sizes[i]; r_ssize j = i - 1; while (j >= 0) { const SEXP x_cmp_elt = p_x[j]; if (chr_str_ge(x_elt, x_cmp_elt, x_size, pass)) { break; } int x_cmp_size = p_sizes[j]; // Swap p_x[j + 1] = x_cmp_elt; p_sizes[j + 1] = x_cmp_size; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_sizes[j + 1] = x_size; } } // ----------------------------------------------------------------------------- static void chr_order_radix_recurse(const r_ssize size, const R_len_t pass, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes); /* * Entry point for radix ordering of characters. * * This is different from with integers / doubles because: * - `p_x` will contain only unique strings * - `p_x` will not contain any `NA` strings * - We just need to sort `p_x` in place, no need to track group information, * which is instead done by `int_order_chunk()` later * - The number of passes is variable here, because strings have a variable * length. * - We also track the character sizes because repeated `r_length()` calls * can get expensive over just indexing into the array. */ static void chr_order_radix(const r_ssize size, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes) { R_len_t pass = 0; chr_order_radix_recurse( size, pass, max_size, p_x, p_x_aux, p_sizes, p_sizes_aux, p_bytes ); } // ----------------------------------------------------------------------------- /* * Recursive function for ordering the `p_x` unique strings * * For ASCII strings, 1 character aligns with 1 byte, so we can order them * 1 character at a time from left to right (MSB to LSB). * * For UTF-8 strings, the implementation of UTF-8 is done so that UTF-8 * characters are made up of between 1-4 bytes. Luckily, treating them as * a sequence of single bytes like we do for ASCII orders identically to * treating them as their full 1-4 byte sequence. * * Because these are variable length, some strings are shorter than others. * Shorter strings should order lower than longer strings if they are otherwise * equivalent, so we reserve the 0-th bucket of `p_counts` for counting * implicit empty strings. Normally this would be an issue because this is * the bucket for ASCII value 0, but this is the null value, which is not * allowed in R strings! * * Additionally, we don't have to worry about having an `NA` bucket because * there will be no missing values in the unique set. */ static void chr_order_radix_recurse(const r_ssize size, const R_len_t pass, const R_len_t max_size, SEXP* p_x, SEXP* p_x_aux, int* p_sizes, int* p_sizes_aux, uint8_t* p_bytes) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { chr_order_insertion(size, pass, p_x, p_sizes); return; } // We don't carry along `p_counts` from an up front allocation since // the strings have variable length r_ssize p_counts[UINT8_MAX_SIZE] = { 0 }; const int next_pass = pass + 1; // NA values won't be in `p_x` so we can reserve the 0th bucket for "" const uint8_t missing_bucket = 0; uint8_t byte = 0; // Histogram for (r_ssize i = 0; i < size; ++i) { const R_len_t x_elt_size = p_sizes[i]; // Check if there are characters left in the string and extract the next // one if so, otherwise assume implicit "". if (pass < x_elt_size) { const SEXP x_elt = p_x[i]; const char* c_x_elt = CHAR(x_elt); byte = (uint8_t) c_x_elt[pass]; } else { byte = missing_bucket; } p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass != max_size) { // If we are not at the last pass, recurse on next byte using // the same `size` since the group size hasn't changed chr_order_radix_recurse( size, next_pass, max_size, p_x, p_x_aux, p_sizes, p_sizes_aux, p_bytes ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Insert current cumulative value, then increment p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_x_aux[loc] = p_x[i]; p_sizes_aux[loc] = p_sizes[i]; } // Copy back over memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); memcpy(p_sizes, p_sizes_aux, size * sizeof(*p_sizes_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { ++p_x; ++p_sizes; continue; } if (next_pass == max_size) { p_x += group_size; p_sizes += group_size; continue; } // Order next byte of this subgroup chr_order_radix_recurse( group_size, next_pass, max_size, p_x, p_x_aux, p_sizes, p_sizes_aux, p_bytes ); p_x += group_size; p_sizes += group_size; } } // ----------------------------------------------------------------------------- /* * Check if `x` is greater than `y` lexicographically in a C-locale. * * - `x` and `y` are guaranteed to be different and not `NA`, so we don't gain * anything from pointer comparisons. * * - This is called from `chr_order_insertion()` from inside the radix ordering, * so we can use information about the current `pass` to only compare * characters that are actually different. */ static bool chr_str_ge(SEXP x, SEXP y, int x_size, const R_len_t pass) { // Pure insertion sort - we know nothing yet if (pass == 0) { const char* c_x = CHAR(x); const char* c_y = CHAR(y); int cmp = strcmp(c_x, c_y); return cmp >= 0; } // Otherwise we know they are equal up to the position before `pass`, but // it might have been equality with implicit "" so we need to check the // length of one of them const int last_pass = pass - 1; // We are comparing length with C 0-based indexing so we have to do +1. if (x_size < last_pass + 1) { return true; } const char* c_x = CHAR(x); const char* c_y = CHAR(y); // Now start the comparison at `last_pass`, which we know exists c_x = c_x + last_pass; c_y = c_y + last_pass; int cmp = strcmp(c_x, c_y); return cmp >= 0; } // ----------------------------------------------------------------------------- static void chr_appearance_counting(const SEXP* p_x, r_ssize size, bool initialized, int* p_o, int* p_o_aux, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static void chr_appearance_chunk(bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { const SEXP* p_x_chunk = (const SEXP*) p_lazy_x_chunk->p_data; const bool initialized = true; int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); chr_appearance_counting( p_x_chunk, size, initialized, p_o, p_o_aux, p_group_infos, p_truelength_info ); } struct chr_appearance_info { SEXP x; bool decreasing; bool na_last; r_ssize size; struct order* p_order; struct lazy_raw* p_lazy_x_chunk; struct lazy_raw* p_lazy_x_aux; struct lazy_raw* p_lazy_o_aux; struct lazy_raw* p_lazy_bytes; struct lazy_raw* p_lazy_counts; struct group_infos* p_group_infos; struct truelength_info* p_truelength_info; }; struct chr_appearance_cleanup_info { struct truelength_info* p_truelength_info; }; static SEXP chr_appearance_exec(void* p_data); static void chr_appearance_cleanup(void* p_data); /* * `chr_appearance()` directly modifies the `TRUELENGTH()` values of the * CHARSXPs in `x`. These must be reset after the call with * `truelength_reset()`. In practice, `chr_appearance_counting()` will call * `truelength_reset()` for us, however, to ensure that this function is called * (even on a longjump), `R_ExecWithCleanup()` is used. */ static void chr_appearance(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { struct chr_appearance_info info = { .x = x, .decreasing = decreasing, .na_last = na_last, .size = size, .p_order = p_order, .p_lazy_x_chunk = p_lazy_x_chunk, .p_lazy_x_aux = p_lazy_x_aux, .p_lazy_o_aux = p_lazy_o_aux, .p_lazy_bytes = p_lazy_bytes, .p_lazy_counts = p_lazy_counts, .p_group_infos = p_group_infos, .p_truelength_info = p_truelength_info }; struct chr_appearance_cleanup_info cleanup_info = { .p_truelength_info = p_truelength_info }; R_ExecWithCleanup( chr_appearance_exec, &info, chr_appearance_cleanup, &cleanup_info ); } static void chr_appearance_internal(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static SEXP chr_appearance_exec(void* p_data) { struct chr_appearance_info* p_info = (struct chr_appearance_info*) p_data; chr_appearance_internal( p_info->x, p_info->decreasing, p_info->na_last, p_info->size, p_info->p_order, p_info->p_lazy_x_chunk, p_info->p_lazy_x_aux, p_info->p_lazy_o_aux, p_info->p_lazy_bytes, p_info->p_lazy_counts, p_info->p_group_infos, p_info->p_truelength_info ); return R_NilValue; } static void chr_appearance_cleanup(void* p_data) { struct chr_appearance_cleanup_info* p_info = (struct chr_appearance_cleanup_info*) p_data; truelength_reset(p_info->p_truelength_info); } static void chr_appearance_internal(SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { const SEXP* p_x = STRING_PTR_RO(x); const bool initialized = false; int* p_o = p_order->p_data; int* p_o_aux = NULL; chr_appearance_counting( p_x, size, initialized, p_o, p_o_aux, p_group_infos, p_truelength_info ); } /* * `chr_appearance_counting()` groups elements of `p_x` in order * of appearance using an algorithm that is extremely similar to the * counting sort used in `int_order_counting()`. * * For algorithms that end up calling `vec_order()` twice to compute order by * appearance, the first ordering can use appearance order for character * vectors, which is faster than lexicographical order, while still actually * sorting other atomic types. The final result after the second ordering will * still end up being in appearance order. */ static void chr_appearance_counting(const SEXP* p_x, r_ssize size, bool initialized, int* p_o, int* p_o_aux, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { for (r_ssize i = 0; i < size; ++i) { SEXP elt = p_x[i]; r_ssize truelength = TRUELENGTH(elt); // We have already seen and saved this string, so "increment" its counter if (truelength < 0) { SET_TRUELENGTH(elt, truelength - 1); continue; } if (truelength > 0) { // Retain R's usage of TRUELENGTH. Normally defaults to 0, so if the value // is positive, it means R is using it. Should be extremely rare. truelength_save_string(elt, truelength, p_truelength_info); } // Save the unique string for appearance ordering below truelength_save_unique(elt, p_truelength_info); // Mark as negative to note that we have seen this string. // R uses positive or zero truelengths. SET_TRUELENGTH(elt, -1); } r_ssize cumulative = 0; SEXP* p_uniques = p_truelength_info->p_uniques; r_ssize n_uniques = p_truelength_info->n_uniques_used; for (r_ssize i = 0; i < n_uniques; ++i) { SEXP elt = p_uniques[i]; r_ssize group_size = -TRUELENGTH(elt); // Push group sizes accumulated in order of appearance groups_size_maybe_push(group_size, p_group_infos); // Set cumulative value (i.e. group start location), then increment SET_TRUELENGTH(elt, cumulative); cumulative += group_size; } // If order is not initialized, we are on the first column / atomic vector // and can place the order directly into the result. Much faster than // initializing, placing in `p_o_aux`, and copying back over. if (initialized) { for (r_ssize i = 0; i < size; ++i) { const SEXP elt = p_x[i]; const r_ssize loc = TRUELENGTH(elt); SET_TRUELENGTH(elt, loc + 1); p_o_aux[loc] = p_o[i]; } memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); } else { for (r_ssize i = 0; i < size; ++i) { const SEXP elt = p_x[i]; const r_ssize loc = TRUELENGTH(elt); SET_TRUELENGTH(elt, loc + 1); p_o[loc] = i + 1; } } // Reset truelengths for next chunk/column truelength_reset(p_truelength_info); } // ----------------------------------------------------------------------------- struct df_order_info { SEXP x; SEXP decreasing; SEXP na_last; bool nan_distinct; bool chr_ordered; r_ssize size; struct order* p_order; struct lazy_raw* p_lazy_x_chunk; struct lazy_raw* p_lazy_x_aux; struct lazy_raw* p_lazy_o_aux; struct lazy_raw* p_lazy_bytes; struct lazy_raw* p_lazy_counts; struct group_infos* p_group_infos; struct truelength_info* p_truelength_info; }; struct df_order_cleanup_info { struct truelength_info* p_truelength_info; }; static SEXP df_order_exec(void* p_data); static void df_order_cleanup(void* p_data); /* * `df_order()` is the main user of `p_group_infos`. It uses the grouping * of the current column to break up the next column into sub groups. That * process is continued until either all columns have been processed or we * can tell all of the values apart. * * Internally `df_order()` may call `chr_order_chunk()` to order character * columns. The TRUELENGTHs of the column are marked with * `chr_mark_sorted_uniques()`, and generally they are reset after each * column is processed by using `truelength_reset()`. However, if a longjump * occurs after the column is marked but before it is reset, then the * truelengths won't be reset. This might happen if an allocation fails, or * if an error is thrown. To carefully handle this case, * `R_ExecWithCleanup()` is used to ensure that `truelength_reset()` is * always called. When there aren't any character columns or if there are * character columns and the truelengths were reset normally, this does * nothing. */ static void df_order(SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, bool chr_ordered, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { struct df_order_info info = { .x = x, .decreasing = decreasing, .na_last = na_last, .nan_distinct = nan_distinct, .chr_ordered = chr_ordered, .size = size, .p_order = p_order, .p_lazy_x_chunk = p_lazy_x_chunk, .p_lazy_x_aux = p_lazy_x_aux, .p_lazy_o_aux = p_lazy_o_aux, .p_lazy_bytes = p_lazy_bytes, .p_lazy_counts = p_lazy_counts, .p_group_infos = p_group_infos, .p_truelength_info = p_truelength_info }; struct df_order_cleanup_info cleanup_info = { .p_truelength_info = p_truelength_info }; R_ExecWithCleanup( df_order_exec, &info, df_order_cleanup, &cleanup_info ); } static void df_order_internal(SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, bool chr_ordered, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); static SEXP df_order_exec(void* p_data) { struct df_order_info* p_info = (struct df_order_info*) p_data; df_order_internal( p_info->x, p_info->decreasing, p_info->na_last, p_info->nan_distinct, p_info->chr_ordered, p_info->size, p_info->p_order, p_info->p_lazy_x_chunk, p_info->p_lazy_x_aux, p_info->p_lazy_o_aux, p_info->p_lazy_bytes, p_info->p_lazy_counts, p_info->p_group_infos, p_info->p_truelength_info ); return R_NilValue; } static void df_order_cleanup(void* p_data) { struct df_order_cleanup_info* p_info = (struct df_order_cleanup_info*) p_data; truelength_reset(p_info->p_truelength_info); } static void vec_order_chunk_switch(bool decreasing, bool na_last, bool nan_distinct, bool chr_ordered, r_ssize size, const enum vctrs_type type, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info); #define DF_ORDER_EXTRACT_CHUNK(CONST_DEREF, CTYPE) do { \ const CTYPE* p_col = CONST_DEREF(col); \ CTYPE* p_x_chunk_col = (CTYPE*) p_x_chunk; \ \ /* Extract the next group chunk and place in */ \ /* sequential order for cache friendliness */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk_col[j] = p_col[loc]; \ } \ } while (0) #define DF_ORDER_EXTRACT_CHUNK_CPL() do { \ const Rcomplex* p_col = COMPLEX_RO(col); \ double* p_x_chunk_col = (double*) p_x_chunk; \ \ if (rerun_complex) { \ /* First pass - real */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk_col[j] = cpl_normalise_missing(p_col[loc]).r; \ } \ \ /* Decrement `i` to rerun column */ \ --i; \ } else { \ /* Second pass - imaginary */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk_col[j] = cpl_normalise_missing(p_col[loc]).i; \ } \ } \ } while (0) static void df_order_internal(SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, bool chr_ordered, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { r_ssize n_cols = r_length(x); bool recycle_decreasing; r_ssize n_decreasing = r_length(decreasing); int* p_decreasing = LOGICAL(decreasing); if (n_decreasing == 1) { recycle_decreasing = true; } else if (n_decreasing == n_cols) { recycle_decreasing = false; } else { Rf_errorcall( R_NilValue, "Internal error: `vec_order_expand_args()` should expand " "`decreasing` to have length 1 or length equal " "to the number of columns of `x` after calling `vec_proxy_order()`." ); } bool recycle_na_last; r_ssize n_na_last = r_length(na_last); int* p_na_last = LOGICAL(na_last); if (n_na_last == 1) { recycle_na_last = true; } else if (n_na_last == n_cols) { recycle_na_last = false; } else { Rf_errorcall( R_NilValue, "Internal error: `vec_order_expand_args()` should expand " "`na_last` to have length 1 or length equal " "to the number of columns of `x` after calling `vec_proxy_order()`." ); } // Special case no columns if (n_cols == 0) { init_order(p_order); if (size != 0) { groups_size_maybe_push(size, p_group_infos); } return; } SEXP col = VECTOR_ELT(x, 0); bool col_decreasing = p_decreasing[0]; bool col_na_last = p_na_last[0]; enum vctrs_type type = vec_proxy_typeof(col); // Apply on one column to fill `p_group_infos`. // First column is immutable and we must copy into `x_chunk`. vec_order_base_switch( col, col_decreasing, col_na_last, nan_distinct, chr_ordered, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); // For complex, we have to rerun the column a second time on the // imaginary part. This is done by decrementing `i` after processing // the real part so the column is rerun. bool rerun_complex = false; // Iterate over remaining columns by group chunk for (r_ssize i = 1; i < n_cols; ++i) { // Get the number of group chunks from previous column group info struct group_info* p_group_info_pre = groups_current(p_group_infos); r_ssize n_groups = p_group_info_pre->n_groups; // If there were no ties, we are completely done if (n_groups == size) { break; } if (!recycle_decreasing) { col_decreasing = p_decreasing[i]; } if (!recycle_na_last) { col_na_last = p_na_last[i]; } // Reset pointer between columns since we increment it as // we iterate through the groups, but need it to start from the beginning // on the next column. `p_o` is initialized now that we have already // processed at least one column. int* p_o_col = p_order->p_data; col = VECTOR_ELT(x, i); type = vec_proxy_typeof(col); // If we are on the rerun pass, flip this back off so the // imaginary part is extracted below. if (type == VCTRS_TYPE_complex) { rerun_complex = rerun_complex ? false : true; } // Pre-sort unique characters once for the whole column. // Don't sort uniques if computing appearance ordering. if (chr_ordered && type == VCTRS_TYPE_character) { const SEXP* p_col = STRING_PTR_RO(col); chr_mark_sorted_uniques( p_col, size, p_lazy_x_aux, p_lazy_bytes, p_truelength_info ); } // Turn off group tracking if: // - We are on the last column // - The user didn't request group information // - That column isn't the first pass of a complex column if (i == n_cols - 1 && !p_group_infos->force_groups && !rerun_complex) { p_group_infos->ignore_groups = true; } // Swap to other group info to prepare for this column groups_swap(p_group_infos); // Ensure `x_chunk` is initialized to hold chunks void* p_x_chunk = init_lazy_raw(p_lazy_x_chunk); // Iterate over this column's group chunks for (r_ssize group = 0; group < n_groups; ++group) { r_ssize group_size = p_group_info_pre->p_data[group]; // Fast handling of simplest case if (group_size == 1) { ++p_o_col; groups_size_maybe_push(1, p_group_infos); continue; } // Extract current chunk and place into `x_chunk` in sequential order switch (type) { case VCTRS_TYPE_integer: DF_ORDER_EXTRACT_CHUNK(INTEGER_RO, int); break; case VCTRS_TYPE_logical: DF_ORDER_EXTRACT_CHUNK(LOGICAL_RO, int); break; case VCTRS_TYPE_double: DF_ORDER_EXTRACT_CHUNK(REAL_RO, double); break; case VCTRS_TYPE_character: DF_ORDER_EXTRACT_CHUNK(STRING_PTR_RO, SEXP); break; case VCTRS_TYPE_complex: DF_ORDER_EXTRACT_CHUNK_CPL(); break; default: Rf_errorcall(R_NilValue, "Unknown data frame column type in `vec_order()`."); } vec_order_chunk_switch( col_decreasing, col_na_last, nan_distinct, chr_ordered, group_size, type, p_o_col, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); p_o_col += group_size; } // Reset TRUELENGTHs between columns if ordering character vectors. // When ordering by appearance, `chr_appearance_counting()` resets the // TRUELENGTHs between chunks. if (chr_ordered && type == VCTRS_TYPE_character) { truelength_reset(p_truelength_info); } } } #undef DF_ORDER_EXTRACT_CHUNK #undef DF_ORDER_EXTRACT_CHUNK_CPL // ----------------------------------------------------------------------------- /* * Switch function specifically for column chunks generated when * processing a data frame */ static void vec_order_chunk_switch(bool decreasing, bool na_last, bool nan_distinct, bool chr_ordered, r_ssize size, const enum vctrs_type type, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos, struct truelength_info* p_truelength_info) { switch (type) { case VCTRS_TYPE_integer: { int_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_logical: { lgl_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_double: { dbl_order_chunk( decreasing, na_last, nan_distinct, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_complex: { // Complex types are run in two passes, once over real then over imaginary dbl_order_chunk( decreasing, na_last, nan_distinct, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_character: { if (chr_ordered) { chr_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } else { chr_appearance_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos, p_truelength_info ); } break; } case VCTRS_TYPE_dataframe: { Rf_errorcall(R_NilValue, "Internal error: df-cols should have already been flattened."); break; } default: { Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`"); } } } // ----------------------------------------------------------------------------- static inline size_t df_compute_n_bytes_lazy_raw(SEXP x); /* * Compute the minimum size required for `lazy_x_aux` and `lazy_x_chunk`. * * For complex, we split the vector into two double vectors. We only need to * allocate 1 double vector though, and it will be reused for both the real * and imaginary parts. */ static inline size_t vec_compute_n_bytes_lazy_raw(SEXP x, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_integer: case VCTRS_TYPE_logical: return sizeof(int); case VCTRS_TYPE_double: return sizeof(double); case VCTRS_TYPE_complex: // Complex types will be split into two double vectors return sizeof(double); case VCTRS_TYPE_character: // Auxiliary data will store SEXP and ints, so return the larger return sizeof(SEXP) > sizeof(int) ? sizeof(SEXP) : sizeof(int); case VCTRS_TYPE_dataframe: return df_compute_n_bytes_lazy_raw(x); default: Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } // `x` should be a flattened df with no df-cols static inline size_t df_compute_n_bytes_lazy_raw(SEXP x) { r_ssize n_cols = r_length(x); size_t multiplier = 0; for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); const enum vctrs_type type = vec_proxy_typeof(col); size_t col_multiplier = vec_compute_n_bytes_lazy_raw(col, type); if (col_multiplier > multiplier) { multiplier = col_multiplier; } } return multiplier; } // ----------------------------------------------------------------------------- static size_t df_compute_n_bytes_lazy_counts(SEXP x); /* * Compute the minimum size required for `p_counts` * * - For integer, we use 4 passes. * - For double, we use 8 passes. * - Logical uses integer radix sorting. * - Character uses integer radix sorting. * - Complex uses double radix sorting. */ static inline size_t vec_compute_n_bytes_lazy_counts(SEXP x, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_integer: case VCTRS_TYPE_logical: case VCTRS_TYPE_character: return INT_MAX_RADIX_PASS; case VCTRS_TYPE_double: case VCTRS_TYPE_complex: return DBL_MAX_RADIX_PASS; case VCTRS_TYPE_dataframe: return df_compute_n_bytes_lazy_counts(x); default: Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } // `x` should be a flattened df with no df-cols static size_t df_compute_n_bytes_lazy_counts(SEXP x) { r_ssize n_cols = r_length(x); size_t multiplier = 0; for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); const enum vctrs_type type = vec_proxy_typeof(col); size_t col_multiplier = vec_compute_n_bytes_lazy_counts(col, type); if (col_multiplier > multiplier) { multiplier = col_multiplier; } } return multiplier; } // ----------------------------------------------------------------------------- static SEXP df_expand_args(SEXP x, SEXP args); /* * `vec_order_expand_args()` checks the type and length of `decreasing` and * `na_largest` and possibly expands them. * * `x` is expected to be the original input, before `vec_proxy_order()` is * called on it. * * If `x` is not a data frame, `decreasing` and `na_largest` must be boolean * values. If `x` is something like a rcrd type with a multi-column data frame * proxy, then restricting to a boolean argument is correct, and works because * the single value will be recycled across the columns. * * If `x` is a data frame, and `decreasing` or `na_largest` is size 1, we return * it untouched and it will be recycled correctly. * * If `x` is a data frame and the size of the arg matches the number of * columns of `x`, we have to be careful to "expand" the arg to match * the number of columns of `x` that will exist after `vec_proxy_order()` * is called. It flattens df-cols which might either already exist in `x`, * or may arise from rcrd columns that have data frame proxies. The majority * of the code here is for tracking this expansion. */ static SEXP vec_order_expand_args(SEXP x, SEXP decreasing, SEXP na_largest) { SEXP args = PROTECT(r_new_list(2)); SET_VECTOR_ELT(args, 0, decreasing); SET_VECTOR_ELT(args, 1, na_largest); // Don't check length here. These might be vectorized if `x` is a data frame. if (TYPEOF(decreasing) != LGLSXP) { Rf_errorcall(R_NilValue, "Internal error: `decreasing` must be logical"); } if (lgl_any_na(decreasing)) { Rf_errorcall(R_NilValue, "Internal error: `decreasing` can't contain missing values."); } if (TYPEOF(na_largest) != LGLSXP) { Rf_errorcall(R_NilValue, "Internal error: `na_largest` must be logical"); } if (lgl_any_na(na_largest)) { Rf_errorcall(R_NilValue, "Internal error: `na_largest` can't contain missing values."); } if (is_data_frame(x)) { args = df_expand_args(x, args); UNPROTECT(1); return args; } if (r_length(decreasing) != 1) { Rf_errorcall(R_NilValue, "`direction` must be a single value when `x` is not a data frame."); } if (r_length(na_largest) != 1) { Rf_errorcall(R_NilValue, "`na_value` must be a single value when `x` is not a data frame."); } UNPROTECT(1); return args; } static SEXP expand_arg(SEXP arg, const int* p_expansions, r_ssize arg_size, r_ssize size); static int vec_decreasing_expansion(SEXP x); static SEXP df_expand_args(SEXP x, SEXP args) { SEXP decreasing = VECTOR_ELT(args, 0); SEXP na_largest = VECTOR_ELT(args, 1); r_ssize n_decreasing = r_length(decreasing); r_ssize n_na_largest = r_length(na_largest); r_ssize n_cols = r_length(x); // They will be recycled correctly even if columns get flattened if (n_decreasing == 1 && n_na_largest == 1) { return args; } // Must start out with the same length as the number of columns if (n_decreasing != 1 && n_decreasing != n_cols) { Rf_errorcall( R_NilValue, "`direction` should have length 1 or length equal to the number of " "columns of `x` when `x` is a data frame." ); } if (n_na_largest != 1 && n_na_largest != n_cols) { Rf_errorcall( R_NilValue, "`na_value` should have length 1 or length equal to the number of " "columns of `x` when `x` is a data frame." ); } SEXP expansions = PROTECT(Rf_allocVector(INTSXP, n_cols)); int* p_expansions = INTEGER(expansions); int size = 0; bool needs_expansion = false; // Compute expansion factor for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); int expansion = vec_decreasing_expansion(col); if (expansion != 1) { needs_expansion = true; } p_expansions[i] = expansion; size += expansion; } if (!needs_expansion) { UNPROTECT(1); return args; } decreasing = expand_arg(decreasing, p_expansions, n_decreasing, size); SET_VECTOR_ELT(args, 0, decreasing); na_largest = expand_arg(na_largest, p_expansions, n_na_largest, size); SET_VECTOR_ELT(args, 1, na_largest); UNPROTECT(1); return args; } static SEXP expand_arg(SEXP arg, const int* p_expansions, r_ssize n_arg, r_ssize size) { if (n_arg == 1) { return arg; } SEXP out = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_out = LOGICAL(out); int* p_arg = LOGICAL(arg); int k = 0; // Fill `out` with repeated `arg` values to match expanded size for (r_ssize i = 0; i < n_arg; ++i) { int col_arg = p_arg[i]; int expansion = p_expansions[i]; for (r_ssize j = 0; j < expansion; ++j) { p_out[k] = col_arg; ++k; } } UNPROTECT(1); return out; } static int df_decreasing_expansion(SEXP x); static int vec_decreasing_expansion(SEXP x) { // Bare vectors if (!OBJECT(x) && !has_dim(x)) { return 1; } // Compute number of cols in df-cols, // and do proxy-compare on the cols as needed if (is_data_frame(x)) { return df_decreasing_expansion(x); } int expansion; // Otherwise we have an S3 column that could have a data frame // ordering proxy containing multiple columns, so we need to check for that SEXP proxy = PROTECT(vec_proxy_order(x)); // If the `proxy` is a data frame, the expansion factor is the // number of columns. Otherwise it is 1. if (is_data_frame(proxy)) { expansion = Rf_length(proxy); } else { expansion = 1; } UNPROTECT(1); return expansion; } // 0-col df-cols get dropped from the comparison proxy, so returning `0` here // when a df-col has no columns should be correct static int df_decreasing_expansion(SEXP x) { r_ssize n_cols = r_length(x); int out = 0; // Accumulate the expansion factors of the cols of the df-col for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); out += vec_decreasing_expansion(col); } return out; } // ----------------------------------------------------------------------------- /* * `na_value` -> `na_largest` is parsed as: * largest -> TRUE * smallest -> FALSE * `na_largest` maps directly to `na_last` unless we are in decreasing order, * in which case `na_last = !na_largest`. */ static SEXP vec_order_compute_na_last(SEXP na_largest, SEXP decreasing) { const r_ssize size = r_length(na_largest); if (size != r_length(decreasing)) { r_stop_internal( "`na_largest` and `decreasing` should already match in size." ); } SEXP na_last = PROTECT(r_new_logical(size)); int* p_na_last = LOGICAL(na_last); const int* p_na_largest = LOGICAL_RO(na_largest); const int* p_decreasing = LOGICAL_RO(decreasing); for (r_ssize i = 0; i < size; ++i) { p_na_last[i] = p_decreasing[i] ? !p_na_largest[i] : p_na_largest[i]; } UNPROTECT(1); return na_last; } // ----------------------------------------------------------------------------- static int parse_na_value_one(SEXP x); static SEXP parse_na_value(SEXP na_value) { // Don't care about length here, checked later if (TYPEOF(na_value) != STRSXP) { Rf_errorcall(R_NilValue, "`na_value` must be a character vector."); } R_len_t size = Rf_length(na_value); const SEXP* p_na_value = STRING_PTR_RO(na_value); SEXP na_largest = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_na_largest = LOGICAL(na_largest); for (R_len_t i = 0; i < size; ++i) { p_na_largest[i] = parse_na_value_one(p_na_value[i]); } UNPROTECT(1); return na_largest; } static int parse_na_value_one(SEXP x) { if (x == NA_STRING) { Rf_errorcall(R_NilValue, "`na_value` can't be missing."); } const char* c_x = CHAR(x); if (!strcmp(c_x, "largest")) return 1; if (!strcmp(c_x, "smallest")) return 0; Rf_errorcall( R_NilValue, "`na_value` must contain only \"largest\" or \"smallest\"." ); } static int parse_direction_one(SEXP x); static SEXP parse_direction(SEXP direction) { // Don't care about length here, checked later if (TYPEOF(direction) != STRSXP) { Rf_errorcall(R_NilValue, "`direction` must be a character vector."); } R_len_t size = Rf_length(direction); const SEXP* p_direction = STRING_PTR_RO(direction); SEXP decreasing = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_decreasing = LOGICAL(decreasing); for (R_len_t i = 0; i < size; ++i) { p_decreasing[i] = parse_direction_one(p_direction[i]); } UNPROTECT(1); return decreasing; } static int parse_direction_one(SEXP x) { if (x == NA_STRING) { Rf_errorcall(R_NilValue, "`direction` can't be missing."); } const char* c_x = CHAR(x); if (!strcmp(c_x, "asc")) return 0; if (!strcmp(c_x, "desc")) return 1; Rf_errorcall( R_NilValue, "`direction` must contain only \"asc\" or \"desc\"." ); } static inline bool parse_nan_distinct(SEXP nan_distinct) { if (TYPEOF(nan_distinct) != LGLSXP) { Rf_errorcall(R_NilValue, "`nan_distinct` must be a logical vector."); } if (Rf_length(nan_distinct) != 1) { Rf_errorcall(R_NilValue, "`nan_distinct` must be length 1."); } int c_nan_distinct = LOGICAL_RO(nan_distinct)[0]; if (c_nan_distinct == NA_LOGICAL) { Rf_errorcall(R_NilValue, "`nan_distinct` can't be missing."); } return (bool) c_nan_distinct; } vctrs/src/ptype2.h0000644000176200001440000000416214362266120013603 0ustar liggesusers#ifndef VCTRS_PTYPE2_H #define VCTRS_PTYPE2_H #include "vctrs-core.h" // Sync with R constants in ptype2.R #define S3_FALLBACK_DEFAULT 0 enum s3_fallback { S3_FALLBACK_false = 0, S3_FALLBACK_true }; struct fallback_opts { enum s3_fallback s3; }; struct ptype2_opts { r_obj* x; r_obj* y; struct vctrs_arg* p_x_arg; struct vctrs_arg* p_y_arg; struct r_lazy call; struct fallback_opts fallback; }; r_obj* vec_ptype2_opts(const struct ptype2_opts* opts, int* left); static inline r_obj* vec_ptype2_params(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, int* left) { const struct ptype2_opts opts = { .x = x, .y = y, .p_x_arg = p_x_arg, .p_y_arg = p_y_arg, .call = call }; return vec_ptype2_opts(&opts, left); } static inline r_obj* vec_ptype2(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, int* left, struct r_lazy call) { const struct ptype2_opts opts = { .x = x, .y = y, .p_x_arg = p_x_arg, .p_y_arg = p_y_arg, .call = call }; return vec_ptype2_opts(&opts, left); } bool vec_is_coercible(const struct ptype2_opts* opts, int* dir); r_obj* vec_ptype2_e(const struct ptype2_opts* opts, int* dir, ERR* err); struct ptype2_opts new_ptype2_opts(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, r_obj* opts); struct fallback_opts new_fallback_opts(r_obj* opts); r_obj* vec_ptype2_from_unspecified(const struct ptype2_opts* opts, enum vctrs_type other_type, r_obj* other, struct vctrs_arg* other_arg); #endif vctrs/src/ptype-common.c0000644000176200001440000000631114362266120015000 0ustar liggesusers#include "vctrs.h" #include "decl/ptype-common-decl.h" // [[ register(external = TRUE) ]] r_obj* ffi_ptype_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* types = KEEP(rlang_env_dots_list(env)); r_obj* ptype = KEEP(r_eval(r_node_car(args), env)); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); r_obj* out = vec_ptype_common_params(types, ptype, S3_FALLBACK_false, &arg, call); FREE(2); return out; } // [[ register(external = TRUE) ]] r_obj* ffi_ptype_common_opts(r_obj* call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* types = KEEP(rlang_env_dots_list(env)); r_obj* ptype = KEEP(r_eval(r_node_car(args), env)); args = r_node_cdr(args); r_obj* opts = KEEP(r_eval(r_node_car(args), env)); struct ptype_common_opts ptype_opts = { .call = { .x = syms.dot_call, .env = env }, .fallback = new_fallback_opts(opts) }; r_obj* out = vec_ptype_common_opts(types, ptype, &ptype_opts); FREE(3); return out; } r_obj* vec_ptype_common_opts(r_obj* dots, r_obj* ptype, const struct ptype_common_opts* opts) { if (!vec_is_partial(ptype)) { return vec_ptype(ptype, vec_args.dot_ptype, opts->call); } if (r_is_true(r_peek_option("vctrs.no_guessing"))) { r_abort_lazy_call(r_lazy_null, "strict mode is activated; you must supply complete `.ptype`."); } // Remove constness struct ptype_common_opts mut_opts = *opts; // Start reduction with the `.ptype` argument r_obj* type = KEEP(reduce(ptype, vec_args.dot_ptype, mut_opts.p_arg, dots, &ptype2_common, &mut_opts)); type = vec_ptype_finalise(type); FREE(1); return type; } r_obj* vec_ptype_common_params(r_obj* dots, r_obj* ptype, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call) { struct ptype_common_opts opts = { .call = call, .p_arg = p_arg, .fallback = { .s3 = s3_fallback } }; return vec_ptype_common_opts(dots, ptype, &opts); } static r_obj* ptype2_common(r_obj* current, r_obj* next, struct counters* counters, void* p_data) { int left = -1; struct ptype_common_opts* p_common_opts = (struct ptype_common_opts*) p_data; const struct ptype2_opts opts = { .x = current, .y = next, .p_x_arg = counters->curr_arg, .p_y_arg = counters->next_arg, .call = p_common_opts->call, .fallback = p_common_opts->fallback }; current = vec_ptype2_opts(&opts, &left); // Update current if RHS is the common type. Otherwise the previous // counter stays in effect. if (!left) { counters_shift(counters); } return current; } vctrs/src/complete.h0000644000176200001440000000017214315060310014154 0ustar liggesusers#ifndef VCTRS_COMPLETE_H #define VCTRS_COMPLETE_H #include "vctrs-core.h" r_obj* vec_detect_complete(r_obj* x); #endif vctrs/src/cast-bare.c0000644000176200001440000000667514315060310014216 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" r_obj* int_as_logical(r_obj* x, bool* lossy) { int* data = r_int_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_logical(n)); int* out_data = r_lgl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; if (elt == r_globals.na_int) { *out_data = r_globals.na_lgl; continue; } if (elt != 0 && elt != 1) { *lossy = true; FREE(1); return r_null; } *out_data = elt; } FREE(1); return out; } r_obj* dbl_as_logical(r_obj* x, bool* lossy) { double* data = r_dbl_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_logical(n)); int* out_data = r_lgl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { double elt = *data; if (isnan(elt)) { *out_data = r_globals.na_lgl; continue; } if (elt != 0 && elt != 1) { *lossy = true; FREE(1); return r_null; } *out_data = (int) elt; } FREE(1); return out; } r_obj* chr_as_logical(r_obj* x, bool* lossy) { r_obj* const* x_p = r_chr_cbegin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_logical(n)); int* p_out = r_lgl_begin(out); for (r_ssize i = 0; i < n; ++i) { r_obj* str = x_p[i]; if (str == r_globals.na_str) { p_out[i] = r_globals.na_lgl; continue; } const char* elt = r_str_c_string(str); switch (elt[0]) { case 'T': if (elt[1] == '\0' || strcmp(elt, "TRUE") == 0) { p_out[i] = 1; continue; } break; case 'F': if (elt[1] == '\0' || strcmp(elt, "FALSE") == 0) { p_out[i] = 0; continue; } break; case 't': if (strcmp(elt, "true") == 0) { p_out[i] = 1; continue; } break; case 'f': if (strcmp(elt, "false") == 0) { p_out[i] = 0; continue; } break; default: break; } *lossy = true; FREE(1); return r_null; } FREE(1); return out; } r_obj* lgl_as_integer(r_obj* x, bool* lossy) { return Rf_coerceVector(x, INTSXP); } r_obj* dbl_as_integer(r_obj* x, bool* lossy) { double* data = r_dbl_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_integer(n)); int* out_data = r_int_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { double elt = *data; if (elt <= INT_MIN || elt >= INT_MAX + 1.0) { *lossy = true; FREE(1); return r_null; } if (isnan(elt)) { *out_data = r_globals.na_int; continue; } int value = (int) elt; if (value != elt) { *lossy = true; FREE(1); return r_null; } *out_data = value; } FREE(1); return out; } r_obj* lgl_as_double(r_obj* x, bool* lossy) { int* data = r_lgl_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_double(n)); double* out_data = r_dbl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; *out_data = (elt == r_globals.na_lgl) ? r_globals.na_dbl : elt; } FREE(1); return out; } r_obj* int_as_double(r_obj* x, bool* lossy) { int* data = r_int_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_double(n)); double* out_data = r_dbl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; *out_data = (elt == r_globals.na_int) ? r_globals.na_dbl : elt; } FREE(1); return out; } vctrs/src/ptype.c0000644000176200001440000001204514373202700013507 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/ptype-decl.h" // [[ register() ]] r_obj* ffi_ptype(r_obj* x, r_obj* x_arg_ffi, r_obj* frame) { struct vctrs_arg x_arg = vec_as_arg(x_arg_ffi); struct r_lazy call = { .x = r_syms.call, .env = frame }; return vec_ptype(x, &x_arg, call); } r_obj* vec_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call) { switch (vec_typeof(x)) { case VCTRS_TYPE_null: return r_null; case VCTRS_TYPE_unspecified: return vctrs_shared_empty_uns; case VCTRS_TYPE_logical: return vec_ptype_slice(x, r_globals.empty_lgl); case VCTRS_TYPE_integer: return vec_ptype_slice(x, r_globals.empty_int); case VCTRS_TYPE_double: return vec_ptype_slice(x, r_globals.empty_dbl); case VCTRS_TYPE_complex: return vec_ptype_slice(x, r_globals.empty_cpl); case VCTRS_TYPE_character: return vec_ptype_slice(x, r_globals.empty_chr); case VCTRS_TYPE_raw: return vec_ptype_slice(x, r_globals.empty_raw); case VCTRS_TYPE_list: return vec_ptype_slice(x, r_globals.empty_list); case VCTRS_TYPE_dataframe: return df_ptype(x, true); case VCTRS_TYPE_s3: return s3_ptype(x, x_arg, call); case VCTRS_TYPE_scalar: stop_scalar_type(x, x_arg, call); } r_stop_unreachable(); } static r_obj* col_ptype(r_obj* x) { return vec_ptype(x, vec_args.empty, r_lazy_null); } static inline r_obj* vec_ptype_slice(r_obj* x, r_obj* empty) { if (r_attrib(x) == r_null) { return empty; } else { // Slicing preserves attributes return vec_slice(x, r_null); } } static r_obj* s3_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call) { switch (class_type(x)) { case VCTRS_CLASS_bare_tibble: return df_ptype(x, true); case VCTRS_CLASS_data_frame: return df_ptype(x, false); case VCTRS_CLASS_bare_data_frame: r_stop_internal("Bare data frames should be handled by `vec_ptype()`."); case VCTRS_CLASS_none: r_stop_internal("Non-S3 classes should be handled by `vec_ptype()`."); default: break; } if (vec_is_partial(x)) { return x; } r_obj* method = KEEP(vec_ptype_method(x)); r_obj* out; if (method == r_null) { obj_check_vector(x, x_arg, call); out = vec_slice(x, r_null); } else { out = vec_ptype_invoke(x, method); } FREE(1); return out; } static inline r_obj* vec_ptype_method(r_obj* x) { r_obj* cls = KEEP(s3_get_class(x)); r_obj* method = s3_class_find_method("vec_ptype", cls, vctrs_method_table); FREE(1); return method; } static inline r_obj* vec_ptype_invoke(r_obj* x, r_obj* method) { return vctrs_dispatch1(syms_vec_ptype, method, syms_x, x); } r_obj* df_ptype(r_obj* x, bool bare) { r_obj* row_nms = KEEP(df_rownames(x)); r_obj* ptype = r_null; if (bare) { ptype = KEEP(bare_df_map(x, &col_ptype)); } else { ptype = KEEP(df_map(x, &col_ptype)); } if (r_typeof(row_nms) == R_TYPE_character) { r_attrib_poke(ptype, r_syms.row_names, r_globals.empty_chr); } FREE(2); return ptype; } // [[ register() ]] r_obj* vec_ptype_finalise(r_obj* x) { if (x == r_null) { return x; } struct r_lazy call = lazy_calls.vec_ptype_finalise; if (!r_is_object(x)) { obj_check_vector(x, vec_args.x, call); return x; } if (vec_is_unspecified(x)) { return vec_ptype_finalise_unspecified(x); } if (vec_is_partial(x)) { return vec_ptype_finalise_dispatch(x); } obj_check_vector(x, vec_args.x, call); switch (class_type(x)) { case VCTRS_CLASS_bare_tibble: case VCTRS_CLASS_bare_data_frame: return bare_df_map(x, &vec_ptype_finalise); case VCTRS_CLASS_data_frame: return df_map(x, &vec_ptype_finalise); case VCTRS_CLASS_none: r_stop_internal("Non-S3 classes should have returned by now."); default: return vec_ptype_finalise_dispatch(x); } } static r_obj* vec_ptype_finalise_unspecified(r_obj* x) { r_ssize size = r_length(x); if (size == 0) { return r_globals.empty_lgl; } r_obj* out = KEEP(r_alloc_logical(size)); r_lgl_fill(out, r_globals.na_lgl, size); FREE(1); return out; } static r_obj* vec_ptype_finalise_dispatch(r_obj* x) { return vctrs_dispatch1( syms_vec_ptype_finalise_dispatch, fns_vec_ptype_finalise_dispatch, syms_x, x ); } r_obj* vec_ptype_final(r_obj* x) { r_obj* out = KEEP(vec_ptype(x, vec_args.x, vec_ptype_final_lazy_call)); out = vec_ptype_finalise(out); FREE(1); return out; } void vctrs_init_ptype(r_obj* ns) { syms_vec_ptype = r_sym("vec_ptype"); syms_vec_ptype_finalise_dispatch = r_sym("vec_ptype_finalise_dispatch"); fns_vec_ptype_finalise_dispatch = r_eval(syms_vec_ptype_finalise_dispatch, ns); vec_ptype_final_call = r_parse("vec_ptype_final()"); r_preserve_global(vec_ptype_final_call); vec_ptype_final_lazy_call = (struct r_lazy) { .x = vec_ptype_final_call, .env = r_null }; } static r_obj* syms_vec_ptype = NULL; static r_obj* syms_vec_ptype_finalise_dispatch = NULL; static r_obj* fns_vec_ptype_finalise_dispatch = NULL; static r_obj* vec_ptype_final_call = NULL; static struct r_lazy vec_ptype_final_lazy_call = { 0 }; vctrs/src/type-data-frame.c0000644000176200001440000006050714511320527015336 0ustar liggesusers#include "utils-dispatch.h" #include "vctrs.h" #include "type-data-frame.h" #include "decl/type-data-frame-decl.h" bool is_data_frame(r_obj* x) { return r_typeof(x) == R_TYPE_list && class_type_is_data_frame(class_type(x)); } bool is_native_df(r_obj* x) { enum vctrs_class_type type = class_type(x); return type == VCTRS_CLASS_bare_data_frame || type == VCTRS_CLASS_bare_tibble; } bool is_bare_data_frame(r_obj* x) { return class_type(x) == VCTRS_CLASS_bare_data_frame; } bool is_bare_tibble(r_obj* x) { return class_type(x) == VCTRS_CLASS_bare_tibble; } r_obj* new_data_frame(r_obj* x, r_ssize n) { x = KEEP(r_clone_referenced(x)); init_data_frame(x, n); FREE(1); return x; } // [[ register() ]] r_obj* ffi_new_data_frame(r_obj* args) { args = r_node_cdr(args); r_obj* x = r_node_car(args); args = r_node_cdr(args); r_obj* n = r_node_car(args); args = r_node_cdr(args); r_obj* cls = r_node_car(args); args = r_node_cdr(args); r_obj* attrib = args; r_keep_loc pi; KEEP_HERE(attrib, &pi); if (r_typeof(x) != R_TYPE_list) { r_abort_call(r_null, "`x` must be a list"); } bool has_names = false; bool has_rownames = false; r_obj* out = KEEP(r_clone_referenced(x)); for (r_obj* node = attrib; node != r_null; node = r_node_cdr(node)) { r_obj* tag = r_node_tag(node); // We might add dynamic dots later on if (tag == r_syms.class_) { r_stop_internal("Can't supply `class` in `...`."); } if (tag == r_syms.names) { has_names = true; continue; } if (tag == r_syms.row_names) { // We used to validate a user supplied `n` against a user supplied // `row.names`, but that requires extracting out the `rownames_size()`, // which can materialize ALTREP row name objects and is prohibitively // expensive (tidyverse/dplyr#6596). So instead we say that user supplied // `row.names` overrides both the implied size of `x` and a user supplied // `n`, even if they are incompatible. has_rownames = true; continue; } } // Take names from `x` if `attrib` doesn't have any if (!has_names) { r_obj* nms = r_globals.empty_chr; if (r_length(out)) { nms = r_names(out); } KEEP(nms); if (nms != r_null) { attrib = r_new_node(nms, attrib); r_node_poke_tag(attrib, r_syms.names); KEEP_AT(attrib, pi); } FREE(1); } if (!has_rownames) { // Data frame size is determined in the following order: // - By `row.names`, if provided, which will already be in `attrib` // - By `n`, if provided (this is fully overriden by `row.names`) // - By `x`, if neither `n` nor `row.names` is provided, where `x` could be // a data frame with its own row names attribute or a bare list const r_ssize size = n != r_null ? df_size_from_n(n) : df_raw_size(x); r_obj* rn = KEEP(new_compact_rownames(size)); attrib = r_new_node(rn, attrib); r_node_poke_tag(attrib, r_syms.row_names); FREE(1); KEEP_AT(attrib, pi); } if (cls == r_null) { cls = classes_data_frame; } else { cls = c_data_frame_class(cls); } KEEP(cls); attrib = r_new_node(cls, attrib); r_node_poke_tag(attrib, r_syms.class_); FREE(1); KEEP_AT(attrib, pi); r_poke_attrib(out, attrib); r_mark_object(out); FREE(2); return out; } static r_ssize df_size_from_n(r_obj* n) { if (r_typeof(n) != R_TYPE_integer || r_length(n) != 1) { r_abort("`n` must be an integer of size 1."); } r_ssize out = r_int_get(n, 0); if (out == r_globals.na_int) { r_abort("`n` can't be missing."); } if (out < 0) { r_abort("`n` can't be negative."); } return out; } static r_obj* c_data_frame_class(r_obj* cls) { if (r_typeof(cls) != R_TYPE_character) { r_abort_call(r_null, "`class` must be NULL or a character vector"); } return chr_c(cls, classes_data_frame); } // [[ register() ]] r_obj* ffi_data_frame(r_obj* x, r_obj* size, r_obj* name_repair, r_obj* frame) { struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, lazy_args.dot_name_repair, false, error_call); KEEP(name_repair_opts.shelter); r_ssize c_size = 0; if (size == r_null) { c_size = vec_check_size_common(x, 0, vec_args.empty, error_call); } else { c_size = vec_as_short_length(size, vec_args.dot_size, error_call); } r_obj* out = data_frame(x, c_size, &name_repair_opts, error_call); FREE(1); return out; } static r_obj* data_frame(r_obj* x, r_ssize size, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call) { const bool unpack = true; r_obj* out = KEEP(df_list(x, size, unpack, p_name_repair_opts, error_call)); out = new_data_frame(out, size); FREE(1); return out; } // [[ register() ]] r_obj* ffi_df_list(r_obj* x, r_obj* size, r_obj* unpack, r_obj* name_repair, r_obj* frame) { struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, lazy_args.dot_name_repair, false, error_call); KEEP(name_repair_opts.shelter); r_ssize c_size = 0; if (size == r_null) { c_size = vec_check_size_common(x, 0, vec_args.empty, error_call); } else { c_size = vec_as_short_length(size, vec_args.dot_size, error_call); } const bool c_unpack = r_arg_as_bool(unpack, ".unpack"); r_obj* out = df_list(x, c_size, c_unpack, &name_repair_opts, error_call); FREE(1); return out; } static r_obj* df_list(r_obj* x, r_ssize size, bool unpack, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call) { if (r_typeof(x) != R_TYPE_list) { r_stop_internal("`x` must be a list."); } x = KEEP(vec_check_recycle_common(x, size, vec_args.empty, error_call)); r_ssize n_cols = r_length(x); // Unnamed columns are auto-named with `""` if (r_names(x) == r_null) { r_obj* names = KEEP(r_new_character(n_cols)); r_attrib_poke_names(x, names); FREE(1); } x = KEEP(df_list_drop_null(x)); if (unpack) { x = df_list_unpack(x); } KEEP(x); r_obj* names = KEEP(r_names(x)); names = KEEP(vec_as_names(names, p_name_repair_opts)); r_attrib_poke_names(x, names); FREE(5); return x; } static r_obj* df_list_drop_null(r_obj* x) { r_ssize n_cols = r_length(x); r_ssize count = 0; for (r_ssize i = 0; i < n_cols; ++i) { count += r_list_get(x, i) == r_null; } if (count == 0) { return x; } r_obj* names = KEEP(r_names(x)); r_obj* const * p_names = r_chr_cbegin(names); r_ssize n_out = n_cols - count; r_obj* out = KEEP(r_alloc_list(n_out)); r_obj* out_names = KEEP(r_alloc_character(n_out)); r_ssize out_i = 0; for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = r_list_get(x, i); if (col != r_null) { r_list_poke(out, out_i, col); r_chr_poke(out_names, out_i, p_names[i]); ++out_i; } } r_attrib_poke_names(out, out_names); FREE(3); return out; } static r_obj* df_list_unpack(r_obj* x) { r_obj* names = KEEP(r_names(x)); r_obj* const * p_names = r_chr_cbegin(names); bool any_needs_unpack = false; r_ssize n_cols = r_length(x); r_ssize i = 0; for (; i < n_cols; ++i) { // Only unpack unnamed data frames if (p_names[i] != strings_empty) { continue; } r_obj* col = r_list_get(x, i); if (is_data_frame(col)) { any_needs_unpack = true; break; } } if (!any_needs_unpack) { FREE(1); return x; } r_obj* unpack = KEEP(r_new_logical(n_cols)); int* p_unpack = LOGICAL(unpack); for (r_ssize j = 0; j < n_cols; ++j) { p_unpack[j] = 0; } r_ssize width = i; for (; i < n_cols; ++i) { // Only unpack unnamed data frames if (p_names[i] != strings_empty) { ++width; continue; } r_obj* col = r_list_get(x, i); if (is_data_frame(col)) { width += r_length(col); p_unpack[i] = 1; } else { ++width; } } r_obj* out = KEEP(r_new_list(width)); r_obj* out_names = KEEP(r_new_character(width)); r_ssize loc = 0; // Unpack loop for (r_ssize i = 0; i < n_cols; ++i) { if (!p_unpack[i]) { r_list_poke(out, loc, r_list_get(x, i)); r_chr_poke(out_names, loc, p_names[i]); ++loc; continue; } r_obj* col = r_list_get(x, i); r_obj* col_names = KEEP(r_names(col)); if (r_typeof(col_names) != R_TYPE_character) { r_stop_internal( "Encountered corrupt data frame. " "Data frames must have character column names." ); } r_obj* const * p_col_names = r_chr_cbegin(col_names); r_ssize col_i = 0; r_ssize stop = loc + r_length(col); for (; loc < stop; ++loc, ++col_i) { r_list_poke(out, loc, r_list_get(col, col_i)); r_chr_poke(out_names, loc, p_col_names[col_i]); } loc = stop; FREE(1); } r_attrib_poke_names(out, out_names); FREE(4); return out; } enum rownames_type rownames_type(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_character: return ROWNAMES_TYPE_identifiers; case R_TYPE_integer: if (r_length(x) == 2 && r_int_begin(x)[0] == r_globals.na_int) { return ROWNAMES_TYPE_automatic_compact; } else { return ROWNAMES_TYPE_automatic; } default: r_stop_internal("Unexpected type `%s`.", Rf_type2char(r_typeof(x))); } } static r_ssize compact_rownames_length(r_obj* x) { return abs(r_int_get(x, 1)); } // [[ include("type-data-frame.h") ]] r_ssize rownames_size(r_obj* rn) { switch (rownames_type(rn)) { case ROWNAMES_TYPE_identifiers: case ROWNAMES_TYPE_automatic: return r_length(rn); case ROWNAMES_TYPE_automatic_compact: return compact_rownames_length(rn); } never_reached("rownames_size"); } // [[ include("type-data-frame.h") ]] void init_data_frame(r_obj* x, r_ssize n) { r_attrib_poke(x, r_syms.class_, classes_data_frame); init_bare_data_frame(x, n); } // [[ include("type-data-frame.h") ]] void init_tibble(r_obj* x, r_ssize n) { r_attrib_poke(x, r_syms.class_, classes_tibble); init_bare_data_frame(x, n); } static void init_bare_data_frame(r_obj* x, r_ssize n) { if (r_length(x) == 0) { r_attrib_poke(x, r_syms.names, r_globals.empty_chr); } init_compact_rownames(x, n); } // [[ include("type-data-frame.h") ]] void init_compact_rownames(r_obj* x, r_ssize n) { r_obj* rn = KEEP(new_compact_rownames(n)); r_attrib_poke(x, r_syms.row_names, rn); FREE(1); } static r_obj* new_compact_rownames(r_ssize n) { if (n <= 0) { return r_globals.empty_int; } r_obj* out = r_alloc_integer(2); int* out_data = r_int_begin(out); out_data[0] = r_globals.na_int; out_data[1] = -n; return out; } // vctrs type methods ------------------------------------------------ // [[ register() ]] r_obj* ffi_df_ptype2_opts(r_obj* x, r_obj* y, r_obj* opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &x_arg, &y_arg, call, opts); return df_ptype2(&c_opts); } r_obj* df_ptype2(const struct ptype2_opts* opts) { r_obj* x_names = KEEP(r_names(opts->x)); r_obj* y_names = KEEP(r_names(opts->y)); r_obj* out = r_null; if (equal_object(x_names, y_names)) { out = df_ptype2_loop(opts, x_names); } else { out = df_ptype2_match(opts, x_names, y_names); } FREE(2); return out; } r_obj* df_ptype2_match(const struct ptype2_opts* opts, r_obj* x_names, r_obj* y_names) { r_obj* x = opts->x; r_obj* y = opts->y; r_obj* x_dups_pos = KEEP(vec_match(x_names, y_names)); r_obj* y_dups_pos = KEEP(vec_match(y_names, x_names)); int* x_dups_pos_data = r_int_begin(x_dups_pos); int* y_dups_pos_data = r_int_begin(y_dups_pos); r_ssize x_len = r_length(x_names); r_ssize y_len = r_length(y_names); // Count columns that are only in `y` r_ssize rest_len = 0; for (r_ssize i = 0; i < y_len; ++i) { if (y_dups_pos_data[i] == r_globals.na_int) { ++rest_len; } } r_ssize out_len = x_len + rest_len; r_obj* out = KEEP(r_alloc_list(out_len)); r_obj* nms = KEEP(r_alloc_character(out_len)); r_attrib_poke(out, r_syms.names, nms); r_ssize i = 0; r_ssize y_arg_loc = 0; struct vctrs_arg* named_x_arg = new_subscript_arg(opts->p_x_arg, x_names, x_len, &i); KEEP(named_x_arg->shelter); struct vctrs_arg* named_y_arg = new_subscript_arg(opts->p_y_arg, y_names, y_len, &y_arg_loc); KEEP(named_y_arg->shelter); // Fill in prototypes of all the columns that are in `x`, in order for (; i < x_len; ++i) { r_ssize dup = x_dups_pos_data[i]; r_obj* col = r_list_get(x, i); struct ptype2_opts col_opts = *opts; col_opts.x = col; col_opts.p_x_arg = named_x_arg; r_obj* type; if (dup == r_globals.na_int) { col_opts.y = vctrs_shared_empty_uns; col_opts.p_y_arg = NULL; type = vec_ptype2_from_unspecified(&col_opts, vec_typeof(col), col, named_x_arg); } else { // 1-based index --dup; y_arg_loc = dup; col_opts.y = r_list_get(y, dup); col_opts.p_y_arg = named_y_arg; int _left; type = vec_ptype2_opts(&col_opts, &_left); } r_list_poke(out, i, type); r_chr_poke(nms, i, r_chr_get(x_names, i)); } // Fill in prototypes of the columns that are only in `y` for (r_ssize j = 0; i < out_len; ++j) { r_ssize dup = y_dups_pos_data[j]; if (dup == r_globals.na_int) { r_obj* col = r_list_get(y, j); y_arg_loc = j; struct ptype2_opts col_opts = *opts; col_opts.y = col; col_opts.p_y_arg = named_y_arg; col_opts.x = vctrs_shared_empty_uns; col_opts.p_x_arg = NULL; r_obj* type = vec_ptype2_from_unspecified(&col_opts, vec_typeof(col), col, named_y_arg); r_list_poke(out, i, type); r_chr_poke(nms, i, r_chr_get(y_names, j)); ++i; } } init_data_frame(out, 0); FREE(6); return out; } static r_obj* df_ptype2_loop(const struct ptype2_opts* opts, r_obj* names) { r_obj* x = opts->x; r_obj* y = opts->y; r_ssize len = r_length(names); r_obj* out = KEEP(r_alloc_list(len)); r_attrib_poke(out, r_syms.names, names); r_ssize i = 0; struct vctrs_arg* named_x_arg = new_subscript_arg_vec(opts->p_x_arg, out, &i); KEEP(named_x_arg->shelter); struct vctrs_arg* named_y_arg = new_subscript_arg_vec(opts->p_y_arg, out, &i); KEEP(named_y_arg->shelter); for (; i < len; ++i) { struct ptype2_opts col_opts = *opts; col_opts.x = r_list_get(x, i); col_opts.y = r_list_get(y, i); col_opts.p_x_arg = named_x_arg; col_opts.p_y_arg = named_y_arg; int _left; r_obj* type = vec_ptype2_opts(&col_opts, &_left); r_list_poke(out, i, type); } init_data_frame(out, 0); FREE(3); return out; } // [[ register() ]] r_obj* ffi_df_cast_opts(r_obj* x, r_obj* to, r_obj* opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy to_arg_ = { .x = syms.to_arg, .env = frame }; struct vctrs_arg to_arg = new_lazy_arg(&to_arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; struct cast_opts c_opts = new_cast_opts(x, to, &x_arg, &to_arg, call, opts); return df_cast_opts(&c_opts); } // Take all columns of `to` and preserve the order. Common columns are // cast to their types in `to`. Extra `x` columns are dropped and // cause a lossy cast. Extra `to` columns are filled with missing // values. // [[ include("cast.h") ]] r_obj* df_cast_opts(const struct cast_opts* opts) { r_obj* x_names = KEEP(r_names(opts->x)); r_obj* to_names = KEEP(r_names(opts->to)); if (x_names == r_null || to_names == r_null) { r_stop_internal("Data frame must have names."); } r_obj* out = r_null; if (equal_object(x_names, to_names)) { out = df_cast_loop(opts, x_names); } else { out = df_cast_match(opts, x_names, to_names); } FREE(2); return out; } static r_obj* df_cast_match(const struct cast_opts* opts, r_obj* x_names, r_obj* to_names) { r_obj* x = opts->x; r_obj* to = opts->to; r_obj* to_dups_pos = KEEP(vec_match(to_names, x_names)); int* to_dups_pos_data = r_int_begin(to_dups_pos); r_ssize to_len = r_length(to_dups_pos); r_obj* out = KEEP(r_alloc_list(to_len)); r_attrib_poke(out, r_syms.names, to_names); r_ssize size = df_size(x); r_ssize common_len = 0; r_ssize i = 0; r_ssize x_arg_loc = 0; struct vctrs_arg* named_x_arg = new_subscript_arg(opts->p_x_arg, x_names, r_length(x_names), &x_arg_loc); KEEP(named_x_arg->shelter); struct vctrs_arg* named_to_arg = new_subscript_arg(opts->p_to_arg, to_names, to_len, &i); KEEP(named_to_arg->shelter); for (; i < to_len; ++i) { r_ssize pos = to_dups_pos_data[i]; r_obj* col; if (pos == r_globals.na_int) { r_obj* to_col = r_list_get(to, i); col = vec_init(to_col, size); // FIXME: Need to initialise the vector because we currently use // `vec_assign()` in `vec_rbind()` before falling back. Attach // an attribute to recognise unspecified vectors in // `base_c_invoke()`. if (opts->fallback.s3 && vec_is_common_class_fallback(to_col)) { KEEP(col); r_attrib_poke(col, r_sym("vctrs:::unspecified"), r_true); FREE(1); } } else { --pos; // 1-based index ++common_len; x_arg_loc = pos; struct cast_opts col_opts = { .x = r_list_get(x, pos), .to = r_list_get(to, i), .p_x_arg = named_x_arg, .p_to_arg = named_to_arg, .call = opts->call, .fallback = opts->fallback }; col = vec_cast_opts(&col_opts); } r_list_poke(out, i, col); } // Restore data frame size before calling `vec_restore()`. `x` and // `to` might not have any columns to compute the original size. init_data_frame(out, size); r_attrib_poke(out, r_syms.row_names, df_rownames(x)); r_ssize extra_len = r_length(x) - common_len; if (extra_len) { r_obj* ffi_x_arg = KEEP(vctrs_arg(opts->p_x_arg)); r_obj* ffi_to_arg = KEEP(vctrs_arg(opts->p_to_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(opts->call)); out = vctrs_dispatch6(syms_df_lossy_cast, fns_df_lossy_cast, syms_out, out, syms_x, x, syms_to, to, syms_x_arg, ffi_x_arg, syms_to_arg, ffi_to_arg, syms_call, ffi_call); FREE(3); } FREE(4); return out; } static r_obj* df_cast_loop(const struct cast_opts* opts, r_obj* names) { r_obj* x = opts->x; r_obj* to = opts->to; r_ssize len = r_length(names); r_obj* out = KEEP(r_alloc_list(len)); r_attrib_poke(out, r_syms.names, names); r_ssize size = df_size(x); r_ssize i = 0; struct vctrs_arg* named_x_arg = new_subscript_arg(opts->p_x_arg, names, len, &i); KEEP(named_x_arg->shelter); struct vctrs_arg* named_to_arg = new_subscript_arg(opts->p_to_arg, names, len, &i); KEEP(named_to_arg->shelter); for (; i < len; ++i) { struct cast_opts col_opts = { .x = r_list_get(x, i), .to = r_list_get(to, i), .p_x_arg = named_x_arg, .p_to_arg = named_to_arg, .call = opts->call, .fallback = opts->fallback }; r_obj* col = vec_cast_opts(&col_opts); r_list_poke(out, i, col); } // Restore data frame size before calling `vec_restore()`. `x` and // `to` might not have any columns to compute the original size. init_data_frame(out, size); r_attrib_poke(out, r_syms.row_names, df_rownames(x)); FREE(3); return out; } // If negative index, value is appended r_obj* df_poke(r_obj* x, r_ssize i, r_obj* value) { if (i >= 0) { r_list_poke(x, i, value); return x; } r_ssize ncol = r_length(x); r_obj* tmp = KEEP(r_resize(x, ncol + 1)); Rf_copyMostAttrib(x, tmp); x = tmp; r_list_poke(x, ncol, value); FREE(1); return x; } r_obj* df_poke_at(r_obj* x, r_obj* name, r_obj* value) { r_obj* names = KEEP(r_names(x)); r_ssize i = r_chr_find(names, name); FREE(1); x = KEEP(df_poke(x, i, value)); if (i < 0) { r_obj* names = KEEP(r_names(x)); r_chr_poke(names, r_length(x) - 1, name); FREE(1); } FREE(1); return x; } static inline r_ssize df_flat_width(r_obj* x) { r_ssize n = r_length(x); r_ssize out = n; r_obj* const * v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n; ++i) { r_obj* col = v_x[i]; if (is_data_frame(col)) { out = out + df_flat_width(col) - 1; } } return out; } struct flatten_info { bool flatten; r_ssize width; }; static inline struct flatten_info df_flatten_info(r_obj* x) { bool flatten = false; r_ssize n = r_length(x); r_ssize width = n; r_obj* const * v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n; ++i) { r_obj* col = v_x[i]; if (is_data_frame(col)) { flatten = true; width = width + df_flat_width(col) - 1; } } return (struct flatten_info){flatten, width}; } // [[ register() ]] r_obj* ffi_df_flatten_info(r_obj* x) { struct flatten_info info = df_flatten_info(x); r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, r_lgl(info.flatten)); r_list_poke(out, 1, r_int(info.width)); FREE(1); return out; } // Might return duplicate names. Currently only used for equality // proxy so this doesn't matter. A less bare bone version would repair // names. // // [[ register() ]] r_obj* df_flatten(r_obj* x) { struct flatten_info info = df_flatten_info(x); if (!info.flatten) { return x; } r_obj* out = KEEP(r_alloc_list(info.width)); r_obj* out_names = KEEP(r_alloc_character(info.width)); r_attrib_poke_names(out, out_names); df_flatten_loop(x, out, out_names, 0); init_data_frame(out, df_size(x)); FREE(2); return out; } static r_ssize df_flatten_loop(r_obj* x, r_obj* out, r_obj* out_names, r_ssize counter) { r_ssize n = r_length(x); r_obj* x_names = KEEP(r_names(x)); for (r_ssize i = 0; i < n; ++i) { r_obj* col = r_list_get(x, i); if (is_data_frame(col)) { counter = df_flatten_loop(col, out, out_names, counter); } else { r_list_poke(out, counter, col); r_chr_poke(out_names, counter, r_chr_get(x_names, i)); ++counter; } } FREE(1); return counter; } r_obj* df_repair_names(r_obj* x, struct name_repair_opts* name_repair) { r_obj* nms = KEEP(r_names(x)); r_obj* repaired = KEEP(vec_as_names(nms, name_repair)); // Should this go through proxy and restore so that classes can // update metadata and check invariants when special columns are // renamed? if (nms != repaired) { x = KEEP(r_clone_referenced(x)); r_attrib_poke_names(x, repaired); FREE(1); } FREE(2); return x; } void vctrs_init_type_data_frame(r_obj* ns) { syms_df_lossy_cast = r_sym("df_lossy_cast"); fns_df_lossy_cast = r_eval(syms_df_lossy_cast, ns); } static r_obj* syms_df_lossy_cast = NULL; static r_obj* fns_df_lossy_cast = NULL; vctrs/src/vctrs.h0000644000176200001440000002003414402367170013517 0ustar liggesusers#ifndef VCTRS_H #define VCTRS_H #include "vctrs-core.h" // Vector types ------------------------------------------------- SEXP vec_unspecified(R_len_t n); bool vec_is_unspecified(SEXP x); #include "type-info.h" #include "arg-counter.h" #include "arg.h" #include "assert.h" #include "c.h" #include "cast-bare.h" #include "cast-dispatch.h" #include "cast.h" #include "compare.h" #include "complete.h" #include "conditions.h" #include "dictionary.h" #include "dim.h" #include "equal.h" #include "expand.h" #include "hash.h" #include "lazy.h" #include "match-compare.h" #include "match-joint.h" #include "missing.h" #include "names.h" #include "order-collate.h" #include "order-groups.h" #include "order-sortedness.h" #include "order-truelength.h" #include "order.h" #include "owned.h" #include "poly-op.h" #include "proxy.h" #include "proxy-restore.h" #include "ptype-common.h" #include "ptype.h" #include "ptype2-dispatch.h" #include "ptype2.h" #include "rep.h" #include "runs.h" #include "set.h" #include "shape.h" #include "size-common.h" #include "size.h" #include "slice-assign.h" #include "slice.h" #include "slice-chop.h" #include "strides.h" #include "subscript-loc.h" #include "subscript.h" #include "translate.h" #include "typeof2.h" #include "typeof2-s3.h" #include "utils-dispatch.h" #include "utils.h" // Vector methods ------------------------------------------------ enum vctrs_proxy_kind { VCTRS_PROXY_KIND_equal = 0, VCTRS_PROXY_KIND_compare, VCTRS_PROXY_KIND_order }; SEXP vec_proxy(SEXP x); SEXP vec_proxy_equal(SEXP x); SEXP vec_proxy_compare(SEXP x); SEXP vec_proxy_order(SEXP x); SEXP vec_proxy_unwrap(SEXP x); SEXP vec_slice_shaped(enum vctrs_type type, SEXP x, SEXP index); bool vec_requires_fallback(SEXP x, struct vctrs_proxy_info info); r_obj* vec_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call); SEXP vec_ptype_finalise(SEXP x); bool vec_is_unspecified(SEXP x); SEXP vec_names(SEXP x); SEXP vec_proxy_names(SEXP x); SEXP vec_group_loc(SEXP x); SEXP vec_match_params(SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline SEXP vec_match(SEXP needles, SEXP haystack) { return vec_match_params(needles, haystack, true, NULL, NULL, r_lazy_null); } bool is_data_frame(SEXP x); uint32_t hash_object(SEXP x); void hash_fill(uint32_t* p, R_len_t n, SEXP x, bool na_equal); SEXP vec_unique(SEXP x); bool duplicated_any(SEXP names); // Data frame column iteration ---------------------------------- // Used in functions that treat data frames as vectors of rows, but // iterate over columns. Examples are `vec_equal()` and // `vec_compare()`. /** * @member row_known A boolean array of size `n_row`. Allocated on the R heap. * Initially, all values are initialized to `false`. As we iterate along the * columns, we flip the corresponding row's `row_known` value to `true` if we * can determine the `out` value for that row from the current columns. * Once a row's `row_known` value is `true`, we never check that row again * as we continue through the columns. * @member p_row_known A pointer to the boolean array stored in `row_known`. * Initialized with `(bool*) RAW(info.row_known)`. * @member remaining The number of `row_known` values that are still `false`. * If this hits `0` before we traverse the entire data frame, we can exit * immediately because all `out` values are already known. * @member size The number of rows in the data frame. */ struct df_short_circuit_info { SEXP row_known; bool* p_row_known; PROTECT_INDEX row_known_pi; R_len_t remaining; R_len_t size; }; #define PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, p_n) do { \ PROTECT_WITH_INDEX((p_info)->row_known, &(p_info)->row_known_pi); \ *(p_n) += 1; \ } while (0) static inline struct df_short_circuit_info new_df_short_circuit_info(R_len_t size, bool lazy) { SEXP row_known; bool* p_row_known; if (lazy) { row_known = PROTECT(R_NilValue); p_row_known = NULL; } else { row_known = PROTECT(Rf_allocVector(RAWSXP, size * sizeof(bool))); p_row_known = (bool*) RAW(row_known); // To begin with, no rows have a known comparison value memset(p_row_known, false, size * sizeof(bool)); } struct df_short_circuit_info info = { .row_known = row_known, .p_row_known = p_row_known, .remaining = size, .size = size }; UNPROTECT(1); return info; } static inline void init_lazy_df_short_circuit_info(struct df_short_circuit_info* p_info) { if (p_info->row_known != R_NilValue) { return; } p_info->row_known = Rf_allocVector(RAWSXP, p_info->size * sizeof(bool)); REPROTECT(p_info->row_known, p_info->row_known_pi); p_info->p_row_known = (bool*) RAW(p_info->row_known); } // Factor methods ----------------------------------------------- SEXP chr_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg); SEXP chr_as_ordered(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg); SEXP fct_as_character(SEXP x, struct vctrs_arg* x_arg); SEXP fct_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg); SEXP ord_as_character(SEXP x, struct vctrs_arg* x_arg); // Datetime methods --------------------------------------------- SEXP date_as_date(SEXP x); SEXP date_as_posixct(SEXP x, SEXP to); SEXP date_as_posixlt(SEXP x, SEXP to); SEXP posixct_as_date(SEXP x, bool* lossy); SEXP posixlt_as_date(SEXP x, bool* lossy); SEXP posixct_as_posixct(SEXP x, SEXP to); SEXP posixlt_as_posixct(SEXP x, SEXP to); SEXP posixct_as_posixlt(SEXP x, SEXP to); SEXP posixlt_as_posixlt(SEXP x, SEXP to); SEXP vec_date_restore(SEXP x, SEXP to, const enum vctrs_owned owned); SEXP vec_posixct_restore(SEXP x, SEXP to, const enum vctrs_owned owned); SEXP vec_posixlt_restore(SEXP x, SEXP to, const enum vctrs_owned owned); SEXP date_datetime_ptype2(SEXP x, SEXP y); SEXP datetime_datetime_ptype2(SEXP x, SEXP y); // Growable vector ---------------------------------------------- struct growable { SEXP x; SEXPTYPE type; void* array; PROTECT_INDEX idx; int n; int capacity; }; struct growable new_growable(SEXPTYPE type, int capacity); SEXP growable_values(struct growable* g); static inline void growable_push_int(struct growable* g, int i) { if (g->n == g->capacity) { g->capacity *= 2; g->x = Rf_lengthgets(g->x, g->capacity); REPROTECT(g->x, g->idx); g->array = INTEGER(g->x); } int* p = (int*) g->array; p[g->n] = i; ++(g->n); } #define PROTECT_GROWABLE(g, n) do { \ PROTECT_WITH_INDEX((g)->x, &((g)->idx)); \ *n += 1; \ } while(0) #define UNPROTECT_GROWABLE(g) do { UNPROTECT(1);} while(0) // Conditions --------------------------------------------------- r_no_return void stop_scalar_type(SEXP x, struct vctrs_arg* arg, struct r_lazy call); __attribute__((noreturn)) void stop_assert_size(r_ssize actual, r_ssize required, struct vctrs_arg* arg, struct r_lazy call); __attribute__((noreturn)) void stop_incompatible_type(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, bool cast); __attribute__((noreturn)) void stop_recycle_incompatible_size(r_ssize x_size, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call); __attribute__((noreturn)) void stop_incompatible_shape(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, int axis, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); void stop_corrupt_factor_levels(SEXP x, struct vctrs_arg* arg) __attribute__((noreturn)); void stop_corrupt_ordered_levels(SEXP x, struct vctrs_arg* arg) __attribute__((noreturn)); #endif vctrs/src/rlang-dev.h0000644000176200001440000000051514315060310014224 0ustar liggesusers#ifndef VCTRS_RLANG_DEV_H #define VCTRS_RLANG_DEV_H #include static inline const char* r_c_str_format_error_arg(const char* x) { r_obj* ffi_x = KEEP(r_chr(x)); const char* out = r_format_error_arg(ffi_x); FREE(1); return out; } // vmax-protected result const char* r_obj_type_friendly_length(r_obj* x); #endif vctrs/src/match-compare.h0000644000176200001440000001261314350124306015074 0ustar liggesusers#ifndef VCTRS_MATCH_COMPARE_H #define VCTRS_MATCH_COMPARE_H #include "vctrs-core.h" #include "poly-op.h" #include "type-complex.h" /* * These comparison operators are designed to match the comparison order * returned from: * `vec_order(x, direction = "asc", na_value = "smallest", nan_distinct = nan_distinct)` * * They are intended for internal use in `vec_joint_xtfrm()`, which uses that * exact setup to call `vec_order_info()`. * * In particular, double and complex types match the ordering results from * using `nan_distinct`. If `false`, they are treated equally. If `true`, * since this is ascending order and `NA` values are the smallest value, it * places `NA` before `NaN` followed by real numbers to match `vec_order()`. */ // ----------------------------------------------------------------------------- static inline int lgl_order_compare_na_equal(int x, int y, bool nan_distinct) { return lgl_compare_na_equal(x, y); } static inline int int_order_compare_na_equal(int x, int y, bool nan_distinct) { return int_compare_na_equal(x, y); } static inline int dbl_order_compare_na_equal(double x, double y, bool nan_distinct) { enum vctrs_dbl x_class = dbl_classify(x); enum vctrs_dbl y_class = dbl_classify(y); switch (x_class) { case VCTRS_DBL_number: { switch (y_class) { case VCTRS_DBL_number: return dbl_compare_scalar(x, y); case VCTRS_DBL_missing: return 1; case VCTRS_DBL_nan: return 1; } } case VCTRS_DBL_missing: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return 0; case VCTRS_DBL_nan: return nan_distinct ? -1 : 0; } } case VCTRS_DBL_nan: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return nan_distinct ? 1 : 0; case VCTRS_DBL_nan: return 0; } } } r_stop_unreachable(); } static inline int cpl_order_compare_na_equal(r_complex x, r_complex y, bool nan_distinct) { x = cpl_normalise_missing(x); y = cpl_normalise_missing(y); const int cmp = dbl_order_compare_na_equal(x.r, y.r, nan_distinct); if (cmp == 0) { return dbl_order_compare_na_equal(x.i, y.i, nan_distinct); } else { return cmp; } } static inline int chr_order_compare_na_equal(r_obj* x, r_obj* y, bool nan_distinct) { return chr_compare_na_equal(x, y); } // ----------------------------------------------------------------------------- #define P_ORDER_COMPARE_NA_EQUAL(CTYPE, ORDER_COMPARE_NA_EQUAL) do { \ return ORDER_COMPARE_NA_EQUAL(((CTYPE const*) p_x)[i], ((CTYPE const*) p_y)[j], nan_distinct); \ } while (0) static inline int p_lgl_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(int, lgl_order_compare_na_equal); } static inline int p_int_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(int, int_order_compare_na_equal); } static inline int p_dbl_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(double, dbl_order_compare_na_equal); } static inline int p_cpl_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(r_complex, cpl_order_compare_na_equal); } static inline int p_chr_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(r_obj*, chr_order_compare_na_equal); } #undef P_ORDER_COMPARE_NA_EQUAL static inline int p_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_integer: return p_int_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_double: return p_dbl_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_complex: return p_cpl_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_character: return p_chr_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); default: stop_unimplemented_vctrs_type("p_order_compare_na_equal", type); } } static inline int p_df_order_compare_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, bool nan_distinct) { struct poly_df_data* x_data = (struct poly_df_data*) x; struct poly_df_data* y_data = (struct poly_df_data*) y; r_ssize n_col = x_data->n_col; enum vctrs_type* v_col_type = x_data->v_col_type; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { int cmp = p_order_compare_na_equal( v_x_col_ptr[col], i, v_y_col_ptr[col], j, nan_distinct, v_col_type[col] ); if (cmp == 0) { // Equal values for this column continue; } // Difference detected return cmp; } // All columns were equal return 0; } // ----------------------------------------------------------------------------- #endif vctrs/src/altrep.c0000644000176200001440000000017414315060310013630 0ustar liggesusers#include #include "altrep.h" // [[ register() ]] r_obj* vctrs_is_altrep(r_obj* x) { return r_lgl(ALTREP(x)); } vctrs/src/equal.c0000644000176200001440000003422014315060310013447 0ustar liggesusers#include "vctrs.h" #include // ----------------------------------------------------------------------------- static SEXP vec_equal(SEXP x, SEXP y, bool na_equal); // [[ register() ]] SEXP vctrs_equal(SEXP x, SEXP y, SEXP na_equal) { bool c_na_equal = r_bool_as_int(na_equal); return vec_equal(x, y, c_na_equal); } static inline SEXP lgl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static inline SEXP int_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static inline SEXP dbl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static inline SEXP cpl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static inline SEXP chr_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static inline SEXP raw_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static inline SEXP list_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); static inline SEXP df_equal(SEXP x, SEXP y, R_len_t size, bool na_equal); /* * Recycling and casting is done at the R level */ static SEXP vec_equal(SEXP x, SEXP y, bool na_equal) { SEXP x_proxy = PROTECT(vec_proxy_equal(x)); SEXP y_proxy = PROTECT(vec_proxy_equal(y)); x_proxy = PROTECT(vec_normalize_encoding(x_proxy)); y_proxy = PROTECT(vec_normalize_encoding(y_proxy)); R_len_t size = vec_size(x_proxy); enum vctrs_type type = vec_proxy_typeof(x_proxy); if (type != vec_proxy_typeof(y_proxy) || size != vec_size(y_proxy)) { Rf_errorcall(R_NilValue, "`x` and `y` must have same types and lengths."); } SEXP out; switch (type) { case VCTRS_TYPE_logical: out = lgl_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_integer: out = int_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_double: out = dbl_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_complex: out = cpl_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_character: out = chr_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_raw: out = raw_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_list: out = list_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_dataframe: out = df_equal(x_proxy, y_proxy, size, na_equal); break; case VCTRS_TYPE_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", type); } UNPROTECT(4); return out; } // ----------------------------------------------------------------------------- #define EQUAL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE) \ SEXP out = PROTECT(r_new_logical(size)); \ int* p_out = LOGICAL(out); \ \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ if (na_equal) { \ for (R_len_t i = 0; i < size; ++i) { \ p_out[i] = EQUAL_NA_EQUAL(p_x[i], p_y[i]); \ } \ } else { \ for (R_len_t i = 0; i < size; ++i) { \ p_out[i] = EQUAL_NA_PROPAGATE(p_x[i], p_y[i]); \ } \ } \ \ UNPROTECT(1); \ return out; static inline SEXP lgl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(int, LOGICAL_RO, lgl_equal_na_equal, lgl_equal_na_propagate); } static inline SEXP int_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(int, INTEGER_RO, int_equal_na_equal, int_equal_na_propagate); } static inline SEXP dbl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(double, REAL_RO, dbl_equal_na_equal, dbl_equal_na_propagate); } static inline SEXP cpl_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal, cpl_equal_na_propagate); } static inline SEXP chr_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(SEXP, STRING_PTR_RO, chr_equal_na_equal, chr_equal_na_propagate); } static inline SEXP raw_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(Rbyte, RAW_RO, raw_equal_na_equal, raw_equal_na_propagate); } static inline SEXP list_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { EQUAL(SEXP, VECTOR_PTR_RO, list_equal_na_equal, list_equal_na_propagate); } #undef EQUAL // ----------------------------------------------------------------------------- static void vec_equal_col_na_equal(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info); static void vec_equal_col_na_propagate(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info); static SEXP df_equal(SEXP x, SEXP y, R_len_t size, bool na_equal) { int nprot = 0; SEXP out = PROTECT_N(r_new_logical(size), &nprot); int* p_out = LOGICAL(out); // Initialize to "equality" value // and only change if we learn that it differs for (R_len_t i = 0; i < size; ++i) { p_out[i] = 1; } struct df_short_circuit_info info = new_df_short_circuit_info(size, false); struct df_short_circuit_info* p_info = &info; PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, &nprot); R_len_t n_col = Rf_length(x); if (n_col != Rf_length(y)) { Rf_errorcall(R_NilValue, "`x` and `y` must have the same number of columns"); } void (*vec_equal_col)(SEXP, SEXP, int*, struct df_short_circuit_info*); if (na_equal) { vec_equal_col = vec_equal_col_na_equal; } else { vec_equal_col = vec_equal_col_na_propagate; } const SEXP* p_x = VECTOR_PTR_RO(x); const SEXP* p_y = VECTOR_PTR_RO(y); for (R_len_t i = 0; i < n_col; ++i) { vec_equal_col(p_x[i], p_y[i], p_out, p_info); if (p_info->remaining == 0) { break; } } UNPROTECT(nprot); return out; } // ----------------------------------------------------------------------------- #define EQUAL_COL(CTYPE, CONST_DEREF, EQUAL) do { \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ for (R_len_t i = 0; i < p_info->size; ++i) { \ if (p_info->p_row_known[i]) { \ continue; \ } \ \ int eq = EQUAL(p_x[i], p_y[i]); \ \ if (eq <= 0) { \ p_out[i] = eq; \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ } \ } while (0) static void vec_equal_col_na_equal(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: EQUAL_COL(int, LOGICAL_RO, lgl_equal_na_equal); break; case VCTRS_TYPE_integer: EQUAL_COL(int, INTEGER_RO, int_equal_na_equal); break; case VCTRS_TYPE_double: EQUAL_COL(double, REAL_RO, dbl_equal_na_equal); break; case VCTRS_TYPE_complex: EQUAL_COL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal); break; case VCTRS_TYPE_character: EQUAL_COL(SEXP, STRING_PTR_RO, chr_equal_na_equal); break; case VCTRS_TYPE_raw: EQUAL_COL(Rbyte, RAW_RO, raw_equal_na_equal); break; case VCTRS_TYPE_list: EQUAL_COL(SEXP, VECTOR_PTR_RO, list_equal_na_equal); break; case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should be flattened already."); case VCTRS_TYPE_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", vec_proxy_typeof(x)); } } static void vec_equal_col_na_propagate(SEXP x, SEXP y, int* p_out, struct df_short_circuit_info* p_info) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: EQUAL_COL(int, LOGICAL_RO, lgl_equal_na_propagate); break; case VCTRS_TYPE_integer: EQUAL_COL(int, INTEGER_RO, int_equal_na_propagate); break; case VCTRS_TYPE_double: EQUAL_COL(double, REAL_RO, dbl_equal_na_propagate); break; case VCTRS_TYPE_complex: EQUAL_COL(Rcomplex, COMPLEX_RO, cpl_equal_na_propagate); break; case VCTRS_TYPE_character: EQUAL_COL(SEXP, STRING_PTR_RO, chr_equal_na_propagate); break; case VCTRS_TYPE_raw: EQUAL_COL(Rbyte, RAW_RO, raw_equal_na_propagate); break; case VCTRS_TYPE_list: EQUAL_COL(SEXP, VECTOR_PTR_RO, list_equal_na_propagate); break; case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should be flattened already."); case VCTRS_TYPE_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", vec_proxy_typeof(x)); } } #undef EQUAL_COL // ----------------------------------------------------------------------------- // Missingness is never propagated through objects, // so `na_equal` is always `true` in these macros #define EQUAL_ALL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL) \ do { \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ for (R_len_t i = 0; i < n; ++i) { \ if (!EQUAL_NA_EQUAL(p_x[i], p_y[i])) { \ return false; \ } \ } \ return true; \ } \ while (0) static inline bool vec_equal_attrib(SEXP x, SEXP y); // [[ include("vctrs.h") ]] bool equal_object(SEXP x, SEXP y) { x = PROTECT(vec_normalize_encoding(x)); y = PROTECT(vec_normalize_encoding(y)); bool out = equal_object_normalized(x, y); UNPROTECT(2); return out; } // Assumes `vec_normalize_encoding()` has already been called // [[ include("vctrs.h") ]] bool equal_object_normalized(SEXP x, SEXP y) { SEXPTYPE type = TYPEOF(x); if (type != TYPEOF(y)) { return false; } // Pointer comparison is all that is required for these types switch (type) { case NILSXP: case SYMSXP: case SPECIALSXP: case BUILTINSXP: case CHARSXP: case ENVSXP: case EXTPTRSXP: return x == y; } // For other types, try a pointer comparison first before // performing an in depth equality check if (x == y) { return true; } switch (type) { // Handled below case LGLSXP: case INTSXP: case REALSXP: case STRSXP: case RAWSXP: case CPLXSXP: case EXPRSXP: case VECSXP: break; case DOTSXP: case LANGSXP: case LISTSXP: case BCODESXP: { if (!equal_object_normalized(ATTRIB(x), ATTRIB(y))) { return false; } if (!equal_object_normalized(CAR(x), CAR(y))) { return false; } x = CDR(x); y = CDR(y); if (!equal_object_normalized(x, y)) { return false; } return true; } case CLOSXP: if (!equal_object_normalized(ATTRIB(x), ATTRIB(y))) { return false; } if (!equal_object_normalized(BODY(x), BODY(y))) { return false; } if (!equal_object_normalized(CLOENV(x), CLOENV(y))) { return false; } if (!equal_object_normalized(FORMALS(x), FORMALS(y))) { return false; } return true; case NILSXP: case SYMSXP: case SPECIALSXP: case BUILTINSXP: case CHARSXP: case ENVSXP: case EXTPTRSXP: // These are handled above with pointer comparison r_stop_internal("Unexpected reference type."); default: stop_unimplemented_type("equal_object_normalized", TYPEOF(x)); } R_len_t n = Rf_length(x); if (n != Rf_length(y)) { return false; } if (!vec_equal_attrib(x, y)) { return false; } switch (type) { case LGLSXP: EQUAL_ALL(int, LOGICAL_RO, lgl_equal_na_equal); case INTSXP: EQUAL_ALL(int, INTEGER_RO, int_equal_na_equal); case REALSXP: EQUAL_ALL(double, REAL_RO, dbl_equal_na_equal); case STRSXP: EQUAL_ALL(SEXP, STRING_PTR_RO, chr_equal_na_equal); case RAWSXP: EQUAL_ALL(Rbyte, RAW_RO, raw_equal_na_equal); case CPLXSXP: EQUAL_ALL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal); case EXPRSXP: case VECSXP: EQUAL_ALL(SEXP, VECTOR_PTR_RO, list_equal_na_equal); default: stop_unimplemented_type("equal_object", type); } } #undef EQUAL_ALL // [[ register() ]] SEXP vctrs_equal_object(SEXP x, SEXP y) { return Rf_ScalarLogical(equal_object(x, y)); } // TODO: Sort attributes by tag before comparison static inline bool vec_equal_attrib(SEXP x, SEXP y) { SEXP x_attrs = ATTRIB(x); SEXP y_attrs = ATTRIB(y); while (x_attrs != R_NilValue) { if (y_attrs == R_NilValue) { return false; } SEXP x_tag = TAG(x_attrs); SEXP y_tag = TAG(x_attrs); if (x_tag != y_tag) { return false; } if (!equal_object_normalized(CAR(x_attrs), CAR(y_attrs))) { return false; } x_attrs = CDR(x_attrs); y_attrs = CDR(y_attrs); } return true; } // [[ include("vctrs.h") ]] bool equal_names(SEXP x, SEXP y) { SEXP x_names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); SEXP y_names = PROTECT(Rf_getAttrib(y, R_NamesSymbol)); bool out = equal_object(x_names, y_names); UNPROTECT(2); return out; } vctrs/src/type-complex.h0000644000176200001440000000270214511320527015002 0ustar liggesusers#ifndef VCTRS_TYPE_COMPLEX_H #define VCTRS_TYPE_COMPLEX_H #include "vctrs.h" /* * Normalises a complex value so that if one side is missing, both are. This * ensures that all missing complex values are grouped together, no matter * what type of missingness it is. NA and NaN can still be separated by * `nan_distinct`, resulting in 4 different combinations of missingness. These * 4 groups of missingness will still all be grouped together, either before * or after any non-missing values have appeared. * See issue #1403 for more information. */ static inline r_complex cpl_normalise_missing(r_complex x) { const double na = r_globals.na_dbl; const double nan = R_NaN; const enum vctrs_dbl r_type = dbl_classify(x.r); const enum vctrs_dbl i_type = dbl_classify(x.i); switch (r_type) { case VCTRS_DBL_number: switch (i_type) { case VCTRS_DBL_number: return x; case VCTRS_DBL_missing: return (r_complex) { .r = na, .i = na}; case VCTRS_DBL_nan: return (r_complex) { .r = nan, .i = nan}; } case VCTRS_DBL_missing: switch (i_type) { case VCTRS_DBL_number: return (r_complex) { .r = na, .i = na}; case VCTRS_DBL_missing: return x; case VCTRS_DBL_nan: return x; } case VCTRS_DBL_nan: switch (i_type) { case VCTRS_DBL_number: return (r_complex) { .r = nan, .i = nan}; case VCTRS_DBL_missing: return x; case VCTRS_DBL_nan: return x; } } never_reached("cpl_normalise_missing"); } #endif vctrs/src/type-tibble.h0000644000176200001440000000031614315060310014564 0ustar liggesusers#ifndef VCTRS_TYPE_TIBBLE_H #define VCTRS_TYPE_TIBBLE_H #include "vctrs-core.h" #include "ptype2.h" SEXP tib_ptype2(const struct ptype2_opts* opts); SEXP tib_cast(const struct cast_opts* opts); #endif vctrs/src/arg.h0000644000176200001440000000263214315060310013120 0ustar liggesusers#ifndef VCTRS_ARG_H #define VCTRS_ARG_H #include "vctrs-core.h" // Materialise an argument tag as a CHARSXP. r_obj* vctrs_arg(struct vctrs_arg* arg); // Materialise an argument tag as a vmax-protected C string. const char* vec_arg_format(struct vctrs_arg* p_arg); // Simple wrapper around a string struct vctrs_arg new_wrapper_arg(struct vctrs_arg* parent, const char* arg); struct vctrs_arg new_lazy_arg(struct r_lazy* data); // Wrapper around a counter representing the current position of the // argument struct arg_data_counter { struct vctrs_arg* p_parent; r_ssize* i; r_obj** names; r_ssize* names_i; }; struct vctrs_arg new_counter_arg(struct vctrs_arg* parent, struct arg_data_counter* data); struct arg_data_counter new_counter_arg_data(struct vctrs_arg* p_parent, r_ssize* i, r_obj** names, r_ssize* names_i); struct vctrs_arg* new_subscript_arg_vec(struct vctrs_arg* parent, r_obj* x, r_ssize* p_i); struct vctrs_arg* new_subscript_arg(struct vctrs_arg* parent, r_obj* names, r_ssize n, r_ssize* p_i); #endif vctrs/src/rlang/0000755000176200001440000000000014532470452013313 5ustar liggesusersvctrs/src/rlang/parse.h0000644000176200001440000000021214465445266014603 0ustar liggesusers#ifndef RLANG_PARSE_H #define RLANG_PARSE_H r_obj* r_parse(const char* str); r_obj* r_parse_eval(const char* str, r_obj* env); #endif vctrs/src/rlang/stack.h0000644000176200001440000000047014465445266014604 0ustar liggesusers#ifndef RLANG_STACK_H #define RLANG_STACK_H void r_on_exit(r_obj* expr, r_obj* frame); r_obj* r_peek_frame(void); r_obj* r_caller_env(r_obj* n); r_obj* r_sys_frame(int n, r_obj* frame); r_obj* r_sys_call(int n, r_obj* frame); static inline void r_yield_interrupt(void) { R_CheckUserInterrupt(); } #endif vctrs/src/rlang/rlang-types.h0000644000176200001440000000446214465445266015751 0ustar liggesusers#ifndef RLANG_RLANG_TYPES_H #define RLANG_RLANG_TYPES_H #define R_NO_REMAP #include #include // Use `r_visible` to mark your init function. Then users can compile // with `-fvisibility=hidden -DHAVE_VISIBILITY_ATTRIBUTE` to link to // your library (as opposed to dynamically loading it) without risking // symbol clashes. #define r_visible attribute_visible extern #ifdef __GNUC__ # define r_unused __attribute__ ((unused)) #else # define r_unused #endif #define r_no_return __attribute__ ((noreturn)) typedef struct SEXPREC r_obj; typedef Rcomplex r_complex; typedef R_xlen_t r_ssize; #define R_SSIZE_MAX R_XLEN_T_MAX #define R_SSIZE_MIN (-R_XLEN_T_MAX) #ifdef LONG_VECTOR_SUPPORT # define R_PRI_SSIZE "td" #else # define R_PRI_SSIZE "d" #endif enum r_type { R_TYPE_null = 0, R_TYPE_symbol = 1, R_TYPE_pairlist = 2, R_TYPE_closure = 3, R_TYPE_environment = 4, R_TYPE_promise = 5, R_TYPE_call = 6, R_TYPE_special = 7, R_TYPE_builtin = 8, R_TYPE_string = 9, R_TYPE_logical = 10, R_TYPE_integer = 13, R_TYPE_double = 14, R_TYPE_complex = 15, R_TYPE_character = 16, R_TYPE_dots = 17, R_TYPE_any = 18, R_TYPE_list = 19, R_TYPE_expression = 20, R_TYPE_bytecode = 21, R_TYPE_pointer = 22, R_TYPE_weakref = 23, R_TYPE_raw = 24, R_TYPE_s4 = 25, R_TYPE_new = 30, R_TYPE_free = 31, R_TYPE_function = 99 }; #define r_null R_NilValue struct r_pair { r_obj* x; r_obj* y; }; struct r_triple { r_obj* x; r_obj* y; r_obj* z; }; struct r_pair_ptr_ssize { void* ptr; r_ssize size; }; struct r_pair_callback { r_obj* (*fn)(void* data); void* data; }; struct r_lazy { r_obj* x; r_obj* env; }; #define KEEP PROTECT #define FREE UNPROTECT #define KEEP2(x, y) (KEEP(x), KEEP(y)) #define KEEP_N(x, n) (++(*n), KEEP(x)) #define r_keep_loc PROTECT_INDEX #define KEEP_AT REPROTECT #define KEEP_HERE PROTECT_WITH_INDEX #define KEEP_WHILE(X, EXPR) do { \ KEEP(X); \ EXPR; \ FREE(1); \ } while (0) #define RLANG_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) #endif vctrs/src/rlang/state.h0000644000176200001440000000063114465445266014616 0ustar liggesusers#ifndef RLANG_STATE_H #define RLANG_STATE_H static inline r_obj* r_peek_option(const char* name) { return Rf_GetOption1(Rf_install(name)); } static inline void r_poke_option(const char* name, r_obj* value) { r_obj* args = KEEP(r_new_node(value, r_null)); r_node_poke_tag(args, r_sym(name)); r_obj* call = KEEP(r_new_call(r_syms.options, args)); r_eval(call, r_envs.base); FREE(2); } #endif vctrs/src/rlang/env-binding.c0000644000176200001440000000462514465445266015700 0ustar liggesusers#include "rlang.h" #include "env.h" bool r_env_binding_is_promise(r_obj* env, r_obj* sym) { r_obj* obj = r_env_find(env, sym); return r_typeof(obj) == R_TYPE_promise && PRVALUE(obj) == r_syms.unbound; } bool r_env_binding_is_active(r_obj* env, r_obj* sym) { return R_BindingIsActive(sym, env); } static r_obj* new_binding_types(r_ssize n) { r_obj* types = r_alloc_integer(n); int* types_ptr = r_int_begin(types); memset(types_ptr, 0, n * sizeof *types_ptr); return types; } static enum r_env_binding_type which_env_binding(r_obj* env, r_obj* sym) { if (r_env_binding_is_active(env, sym)) { // Check for active bindings first, since promise detection triggers // active bindings through `r_env_find()` (#1376) return R_ENV_BINDING_TYPE_active; } if (r_env_binding_is_promise(env, sym)) { return R_ENV_BINDING_TYPE_promise; } return R_ENV_BINDING_TYPE_value; } static inline r_obj* binding_as_sym(bool list, r_obj* bindings, r_ssize i) { if (list) { r_obj* out = r_list_get(bindings, i); if (r_typeof(out) != R_TYPE_symbol) { r_abort("Binding must be a symbol."); } return out; } else { return r_str_as_symbol(r_chr_get(bindings, i)); } } static r_ssize detect_special_binding(r_obj* env, r_obj* bindings, bool symbols) { r_ssize n = r_length(bindings); for (r_ssize i = 0; i < n; ++i) { r_obj* sym = binding_as_sym(symbols, bindings, i); if (which_env_binding(env, sym)) { return i; } } return -1; } // Returns NULL if all values to spare an alloc r_obj* r_env_binding_types(r_obj* env, r_obj* bindings) { if (r_typeof(env) != R_TYPE_environment) { r_abort("Expected environment in promise binding predicate."); } bool symbols; switch (r_typeof(bindings)) { case R_TYPE_list: symbols = true; break; case R_TYPE_character: symbols = false; break; default: r_abort("Internal error: Unexpected `bindings` type in `r_env_binding_types()`"); } r_ssize i = detect_special_binding(env, bindings, symbols); if (i < 0) { return r_null; } r_ssize n = r_length(bindings); r_obj* types = KEEP(new_binding_types(n)); int* types_ptr = r_int_begin(types) + i; while (i < n) { r_obj* sym = binding_as_sym(symbols, bindings, i); *types_ptr = which_env_binding(env, sym); ++i; ++types_ptr; } FREE(1); return types; } vctrs/src/rlang/attrib.c0000644000176200001440000000733014465445266014761 0ustar liggesusers#include "rlang.h" r_obj* r_attrib_push(r_obj* x, r_obj* tag, r_obj* value) { r_obj* attrs = r_new_node(value, r_attrib(x)); r_node_poke_tag(attrs, tag); r_poke_attrib(x, attrs); return attrs; } /** * - If `sentinel` is found in the first node: `parent_out` is `r_null` * - If `sentinel` is not found: both return value and `parent_out` * are `r_null` * - If `sentinel` is `r_null`, this is like a full shallow duplication * but returns tail node */ r_obj* r_pairlist_clone_until(r_obj* node, r_obj* sentinel, r_obj** parent_out) { r_obj* parent = r_null; r_obj* cur = node; int n_kept = 0; while (true) { if (cur == sentinel) { FREE(n_kept); *parent_out = parent; return node; } // Return NULL if sentinel is not found if (cur == r_null) { FREE(n_kept); *parent_out = r_null; return r_null; } r_obj* tag = r_node_tag(cur); cur = r_new_node(r_node_car(cur), r_node_cdr(cur)); r_node_poke_tag(cur, tag); if (parent == r_null) { KEEP_N(cur, &n_kept); node = cur; } else { r_node_poke_cdr(parent, cur); } parent = cur; cur = r_node_cdr(cur); } r_stop_unreachable(); } r_obj* r_attrs_set_at(r_obj* attrs, r_obj* node, r_obj* value) { r_obj* sentinel = r_node_cdr(node); r_obj* new_node = r_null; attrs = KEEP(r_pairlist_clone_until(attrs, sentinel, &new_node)); r_node_poke_car(new_node, value); FREE(1); return attrs; } r_obj* r_attrs_zap_at(r_obj* attrs, r_obj* node, r_obj* value) { r_obj* sentinel = node; r_obj* new_node = r_null; attrs = KEEP(r_pairlist_clone_until(attrs, sentinel, &new_node)); if (new_node == r_null) { // `node` is the first node of `attrs` attrs = r_node_cdr(attrs); } else { r_node_poke_cdr(new_node, r_node_cdr(node)); } FREE(1); return attrs; } r_obj* r_clone2(r_obj* x) { r_obj* attrs = KEEP(r_attrib(x)); // Prevent attributes from being cloned r_poke_attrib(x, r_null); r_obj* out = r_clone(x); r_poke_attrib(x, attrs); r_poke_attrib(out, attrs); FREE(1); return out; } r_obj* r_attrib_set(r_obj* x, r_obj* tag, r_obj* value) { r_obj* attrs = r_attrib(x); r_obj* out = KEEP(r_clone2(x)); r_obj* node = attrs; while (node != r_null) { if (r_node_tag(node) == tag) { if (value == r_null) { attrs = r_attrs_zap_at(attrs, node, value); } else { attrs = r_attrs_set_at(attrs, node, value); } r_poke_attrib(out, attrs); FREE(1); return out; } node = r_node_cdr(node); } if (value != r_null) { // Just add to the front if attribute does not exist yet attrs = KEEP(r_new_node(out, attrs)); r_node_poke_tag(attrs, tag); r_node_poke_car(attrs, value); r_poke_attrib(out, attrs); FREE(1); } FREE(1); return out; } /** * With push_ prefix, assumes there is no `class` attribute in the * node list merge. This is for low-level construction of objects. */ // Caller must poke the object bit static r_obj* node_push_classes(r_obj* node, const char** tags, r_ssize n) { r_obj* tags_chr = KEEP(r_chr_n(tags, n)); r_obj* attrs = r_new_node(tags_chr, node); r_node_poke_tag(attrs, r_syms.class_); FREE(1); return attrs; } void r_attrib_push_classes(r_obj* x, const char** tags, r_ssize n) { r_obj* attrs = r_attrib(x); attrs = node_push_classes(attrs, tags, n); SET_ATTRIB(x, attrs); SET_OBJECT(x, 1); } void r_attrib_push_class(r_obj* x, const char* tag) { static const char* tags[1] = { "" }; tags[0] = tag; r_attrib_push_classes(x, tags, 1); } bool r_is_named(r_obj* x) { r_obj* nms = r_names(x); if (r_typeof(nms) != R_TYPE_character) { return false; } if (r_chr_has(nms, "")) { return false; } return true; } vctrs/src/rlang/fn.h0000644000176200001440000000171714465445266014107 0ustar liggesusers#ifndef RLANG_FN_H #define RLANG_FN_H static inline r_obj* r_fn_body(r_obj* fn) { return BODY_EXPR(fn); } static inline void r_fn_poke_body(r_obj* fn, r_obj* body) { SET_BODY(fn, body); } static inline r_obj* r_fn_env(r_obj* fn) { return CLOENV(fn); } static inline void r_fn_poke_env(r_obj* fn, r_obj* env) { SET_CLOENV(fn, env); } static inline r_obj* r_new_function(r_obj* formals, r_obj* body, r_obj* env) { SEXP fn = Rf_allocSExp(R_TYPE_closure); SET_FORMALS(fn, formals); SET_BODY(fn, body); SET_CLOENV(fn, env); return fn; } r_obj* r_as_function(r_obj* x, const char* arg); static inline bool r_is_function(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_closure: case R_TYPE_builtin: case R_TYPE_special: return true; default: return false; } } static inline bool r_is_primitive(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_builtin: case R_TYPE_special: return true; default: return false; } } #endif vctrs/src/rlang/vendor.h0000644000176200001440000000016014465445266014770 0ustar liggesusers#ifndef RLANG_VENDOR_H #define RLANG_VENDOR_H extern uint64_t (*r_xxh3_64bits)(const void*, size_t); #endif vctrs/src/rlang/dict.c0000644000176200001440000002024314465445266014415 0ustar liggesusers#include #include "dict.h" #define DICT_LOAD_THRESHOLD 0.75 #define DICT_GROWTH_FACTOR 2 static size_t size_round_power_2(size_t size); #include "decl/dict-decl.h" #define DICT_DEREF(D) r_list_cbegin(D) #define DICT_KEY(V) r_list_get(V, 0) #define DICT_VALUE(V) r_list_get(V, 1) #define DICT_CDR(V) r_list_get(V, 2) #define DICT_POKE_KEY(D, K) r_list_poke(D, 0, K) #define DICT_POKE_VALUE(D, V) r_list_poke(D, 1, V) #define DICT_POKE_CDR(D, N) r_list_poke(D, 2, N) #define V_DICT_KEY(V) (V)[0] #define V_DICT_VALUE(V) (V)[1] #define V_DICT_CDR(V) (V)[2] static r_obj* new_dict_node(r_obj* key, r_obj* value) { r_obj* bucket = r_alloc_list(3); DICT_POKE_KEY(bucket, key); DICT_POKE_VALUE(bucket, value); return bucket; } struct r_dict* r_new_dict(r_ssize size) { if (size <= 0) { r_abort("`size` of dictionary must be positive."); } size = size_round_power_2(size); r_obj* shelter = KEEP(r_alloc_list(2)); r_obj* dict_raw = r_alloc_raw0(sizeof(struct r_dict)); r_list_poke(shelter, 0, dict_raw); struct r_dict* p_dict = r_raw_begin(dict_raw); p_dict->shelter = shelter; p_dict->buckets = r_alloc_list(size); r_list_poke(shelter, 1, p_dict->buckets); p_dict->p_buckets = r_list_cbegin(p_dict->buckets); p_dict->n_buckets = size; r_attrib_poke(shelter, r_syms.class_, r_chr("rlang_dict")); FREE(1); return p_dict; } void r_dict_resize(struct r_dict* p_dict, r_ssize size) { if (size < 0) { size = p_dict->n_buckets * DICT_GROWTH_FACTOR; } struct r_dict* p_new_dict = r_new_dict(size); KEEP(p_new_dict->shelter); r_ssize n = r_length(p_dict->buckets); r_obj* const * p_buckets = p_dict->p_buckets; for (r_ssize i = 0; i < n; ++i) { r_obj* bucket = p_buckets[i]; while (bucket != r_null) { r_obj* const * v_bucket = DICT_DEREF(bucket); r_obj* key = V_DICT_KEY(v_bucket); r_obj* value = V_DICT_VALUE(v_bucket); r_dict_put(p_new_dict, key, value); bucket = V_DICT_CDR(v_bucket); } } // Update all data in place except the shelter and the raw sexp // which must stay validly protected by the callers r_obj* old_shelter = p_dict->shelter; r_list_poke(old_shelter, 1, r_list_get(p_new_dict->shelter, 1)); memcpy(p_dict, p_new_dict, sizeof(*p_dict)); p_dict->shelter = old_shelter; FREE(1); } static size_t size_round_power_2(size_t size) { size_t out = 1; while (out < size) { out <<= 1; } return out; } static r_ssize dict_hash(const struct r_dict* p_dict, r_obj* key) { uint64_t hash = r_xxh3_64bits(&key, sizeof(r_obj*)); return hash % p_dict->n_buckets; } // Returns previous value of `key` if it existed or a C `NULL` r_obj* r_dict_poke(struct r_dict* p_dict, r_obj* key, r_obj* value) { r_ssize hash; r_obj* parent; r_obj* node = dict_find_node_info(p_dict, key, &hash, &parent); if (node != r_null) { r_obj* old = DICT_VALUE(node); DICT_POKE_VALUE(node, value); return old; } else { dict_push(p_dict, hash, parent, key, value); return NULL; } } // Returns `false` if `key` already exists in the dictionary, `true` // otherwise bool r_dict_put(struct r_dict* p_dict, r_obj* key, r_obj* value) { r_ssize hash; r_obj* parent; r_obj* node = dict_find_node_info(p_dict, key, &hash, &parent); if (node != r_null) { return false; } else { dict_push(p_dict, hash, parent, key, value); return true; } } static void dict_push(struct r_dict* p_dict, r_ssize hash, r_obj* parent, r_obj* key, r_obj* value) { r_obj* node = KEEP(new_dict_node(key, value)); if (parent == r_null) { // Empty bucket r_list_poke(p_dict->buckets, hash, node); } else { DICT_POKE_CDR(parent, node); } ++p_dict->n_entries; float load = (float) p_dict->n_entries / (float) p_dict->n_buckets; if (!p_dict->prevent_resize && load > DICT_LOAD_THRESHOLD) { r_dict_resize(p_dict, -1); } FREE(1); } // Returns `true` if key existed and was deleted. Returns `false` if // the key could not be deleted because it did not exist in the dict. bool r_dict_del(struct r_dict* p_dict, r_obj* key) { r_ssize hash; r_obj* parent; r_obj* node = dict_find_node_info(p_dict, key, &hash, &parent); if (node == r_null) { return false; } r_obj* node_cdr = DICT_CDR(node); if (parent == r_null) { r_list_poke(p_dict->buckets, hash, node_cdr); } else { DICT_POKE_CDR(parent, node_cdr); } return true; } bool r_dict_has(struct r_dict* p_dict, r_obj* key) { return dict_find_node(p_dict, key) != r_null; } r_obj* r_dict_get(struct r_dict* p_dict, r_obj* key) { r_obj* out = r_dict_get0(p_dict, key); if (!out) { r_abort("Can't find key in dictionary."); } return out; } /* The 0-suffixed variant returns a C `NULL` if the object doesn't exist. The regular variant throws an error in that case. */ r_obj* r_dict_get0(struct r_dict* p_dict, r_obj* key) { r_obj* node = dict_find_node(p_dict, key); if (node == r_null) { return NULL; } else { return DICT_VALUE(node); } } static r_obj* dict_find_node(struct r_dict* p_dict, r_obj* key) { r_ssize i = dict_hash(p_dict, key); r_obj* bucket = p_dict->p_buckets[i]; while (bucket != r_null) { r_obj* const * v_bucket = DICT_DEREF(bucket); if (V_DICT_KEY(v_bucket) == key) { return bucket; } bucket = V_DICT_CDR(v_bucket); } return r_null; } // Also returns hash and parent node if any static r_obj* dict_find_node_info(struct r_dict* p_dict, r_obj* key, r_ssize* hash, r_obj** parent) { r_ssize i = dict_hash(p_dict, key); *hash = i; r_obj* bucket = p_dict->p_buckets[i]; *parent = r_null; while (bucket != r_null) { r_obj* const * v_bucket = DICT_DEREF(bucket); if (V_DICT_KEY(v_bucket) == key) { return bucket; } *parent = bucket; bucket = V_DICT_CDR(v_bucket); } return r_null; } struct r_dict_iterator* r_new_dict_iterator(struct r_dict* p_dict) { r_obj* shelter = r_alloc_raw(sizeof(struct r_dict_iterator)); struct r_dict_iterator* p_it = r_raw_begin(shelter); p_it->shelter = shelter; p_it->key = r_null; p_it->value = r_null; p_it->i = 0; p_it->n = p_dict->n_buckets; p_it->v_buckets = p_dict->p_buckets; if (p_it->n == 0) { r_stop_internal("Empty dictionary."); } p_it->node = p_it->v_buckets[0]; return p_it; } bool r_dict_next(struct r_dict_iterator* p_it) { if (p_it->v_buckets == NULL) { return false; } r_obj* node = p_it->node; while (node == r_null) { r_ssize i = ++p_it->i; if (i >= p_it->n) { p_it->v_buckets = NULL; return false; } node = p_it->v_buckets[i]; p_it->node = node; } r_obj* const * v_node = DICT_DEREF(node); p_it->key = V_DICT_KEY(v_node); p_it->value = V_DICT_VALUE(v_node); p_it->node = V_DICT_CDR(v_node); return true; } static const char* v_dict_it_df_names_c_strings[] = { "key", "value" }; static const enum r_type v_dict_it_df_types[] = { R_TYPE_list, R_TYPE_list }; enum dict_it_df_locs { DICT_IT_DF_LOCS_key, DICT_IT_DF_LOCS_value }; #define DICT_IT_DF_SIZE R_ARR_SIZEOF(v_dict_it_df_types) r_obj* r_dict_as_df_list(struct r_dict* p_dict) { r_obj* nms = KEEP(r_chr_n(v_dict_it_df_names_c_strings, DICT_IT_DF_SIZE)); r_obj* out = KEEP(r_alloc_df_list(p_dict->n_entries, nms, v_dict_it_df_types, DICT_IT_DF_SIZE)); r_obj* key = r_list_get(out, DICT_IT_DF_LOCS_key); r_obj* value = r_list_get(out, DICT_IT_DF_LOCS_value); struct r_dict_iterator* p_it = r_new_dict_iterator(p_dict); KEEP(p_it->shelter); for (r_ssize i = 0; r_dict_next(p_it); ++i) { r_list_poke(key, i, p_it->key); r_list_poke(value, i, p_it->value); } FREE(3); return out; } r_obj* r_dict_as_list(struct r_dict* p_dict) { r_obj* out = KEEP(r_alloc_list(p_dict->n_entries)); struct r_dict_iterator* p_it = r_new_dict_iterator(p_dict); KEEP(p_it->shelter); for (r_ssize i = 0; r_dict_next(p_it); ++i) { r_list_poke(out, i, p_it->value); } FREE(2); return out; } vctrs/src/rlang/obj.c0000644000176200001440000000575714465445266014261 0ustar liggesusers#include "rlang.h" #define PRECIOUS_DICT_INIT_SIZE 256 static struct r_dict* p_precious_dict = NULL; #include "decl/obj-decl.h" r_obj* r_vec_clone(r_obj* x) { r_obj* out = KEEP(r_clone(x)); r_obj* names = r_names(x); if (names != r_null) { r_attrib_poke_names(out, r_clone(names)); } FREE(1); return out; } r_obj* r_vec_clone_shared(r_obj* x) { if (r_is_shared(x)) { return r_vec_clone(x); } r_obj* names = r_names(x); if (names != r_null && r_is_shared(names)) { r_attrib_poke_names(x, r_clone(names)); return x; } return x; } void (_r_preserve)(r_obj* x) { if (!_r_use_local_precious_list) { return; } r_obj* stack = r_dict_get0(p_precious_dict, x); if (!stack) { stack = KEEP(new_precious_stack(x)); r_dict_put(p_precious_dict, x, stack); FREE(1); } push_precious(stack); } void (_r_unpreserve)(r_obj* x) { if (!_r_use_local_precious_list) { return; } r_obj* stack = r_dict_get0(p_precious_dict, x); if (!stack) { r_abort("Can't unpreserve `x` because it was not being preserved."); } int n = pop_precious(stack); if (n < 0) { r_stop_internal("`n` unexpectedly < 0."); } if (n == 0) { r_dict_del(p_precious_dict, x); } } static r_obj* new_precious_stack(r_obj* x) { r_obj* stack = KEEP(r_alloc_list(2)); // Store (0) protection count and (1) element to protect r_list_poke(stack, 0, r_int(0)); r_list_poke(stack, 1, x); FREE(1); return stack; } static int push_precious(r_obj* stack) { r_obj* n = r_list_get(stack, 0); int* p_n = r_int_begin(n); return ++(*p_n); } static int pop_precious(r_obj* stack) { r_obj* n = r_list_get(stack, 0); int* p_n = r_int_begin(n); return --(*p_n); } // For unit tests struct r_dict* rlang__precious_dict(void) { return p_precious_dict; } enum r_type r_chr_as_r_type(r_obj* type) { if (!r_is_string(type)) { r_abort("`type` must be a character string."); } return r_c_str_as_r_type(r_chr_get_c_string(type, 0)); } const char* obj_address_formatter = "%p"; r_obj* r_obj_address(r_obj* x) { static char buf[1000]; snprintf(buf, 1000, obj_address_formatter, (void*) x); return Rf_mkChar(buf); } r_obj* (*r_obj_encode_utf8)(r_obj* x) = NULL; r_obj* r_as_label(r_obj* x) { return r_eval_with_x(as_label_call, x, r_ns_env("rlang")); } void r_init_library_obj(r_obj* ns) { p_precious_dict = r_new_dict(PRECIOUS_DICT_INIT_SIZE); KEEP(p_precious_dict->shelter); r_env_poke(ns, r_sym(".__rlang_lib_precious_dict__."), p_precious_dict->shelter); FREE(1); // The Microsoft C library doesn't implement the hexadecimal // formatter correctly const char* null_addr = r_str_c_string(r_obj_address(r_null)); if (null_addr[0] != '0' || null_addr[1] != 'x') { obj_address_formatter = "0x%p"; } r_obj_encode_utf8 = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_obj_encode_utf8"); as_label_call = r_parse("as_label(x)"); r_preserve_global(as_label_call); } static r_obj* as_label_call = NULL; vctrs/src/rlang/quo.h0000644000176200001440000000037614465445266014310 0ustar liggesusers#ifndef RLANG_QUO_H #define RLANG_QUO_H extern r_obj* (*r_quo_get_expr)(r_obj* quo); extern r_obj* (*r_quo_set_expr)(r_obj* quo, r_obj* expr); extern r_obj* (*r_quo_get_env)(r_obj* quo); extern r_obj* (*r_quo_set_env)(r_obj* quo, r_obj* env); #endif vctrs/src/rlang/c-utils.h0000644000176200001440000000664614465445266015072 0ustar liggesusers#ifndef RLANG_C_UTILS_H #define RLANG_C_UTILS_H #include #include #include "cnd.h" #define R_ARR_SIZEOF(X) sizeof(X) / sizeof(X[0]) #define R_MIN(a, b) ((a) < (b) ? (a) : (b)) #define R_MAX(a, b) ((a) > (b) ? (a) : (b)) // Like `memset()` with support for multi-byte types #define R_MEM_SET(TYPE, PTR, VALUE, N) do { \ TYPE* v = (PTR); \ TYPE value = (VALUE); \ size_t n = (N); \ for (size_t i = 0; i < n; ++i) { \ v[i] = value; \ } \ } while(0) void* r_shelter_deref(r_obj* x); // Allow integers up to 2^52, same as R_XLEN_T_MAX when long vector // support is enabled #define RLANG_MAX_DOUBLE_INT 4503599627370496 #define RLANG_MIN_DOUBLE_INT -4503599627370496 static inline bool r_dbl_is_whole(double x) { if (x > RLANG_MAX_DOUBLE_INT || x < RLANG_MIN_DOUBLE_INT) { return false; } // C99 guarantees existence of the int_least_N_t types, even on // machines that don't support arithmetic on width N: if (x != (int_least64_t) x) { return false; } return true; } // Adapted from CERT C coding standards static inline intmax_t r__intmax_add(intmax_t x, intmax_t y) { if ((y > 0 && x > (INTMAX_MAX - y)) || (y < 0 && x < (INTMAX_MIN - y))) { r_stop_internal("Values too large to be added."); } return x + y; } static inline intmax_t r__intmax_subtract(intmax_t x, intmax_t y) { if ((y > 0 && x < (INTMAX_MIN + y)) || (y < 0 && x > (INTMAX_MAX + y))) { r_stop_internal("Subtraction resulted in overflow or underflow."); } return x - y; } static inline r_ssize r_ssize_add(r_ssize x, r_ssize y) { intmax_t out = r__intmax_add(x, y); if (out > R_SSIZE_MAX) { r_stop_internal("Result too large for an `r_ssize`."); } return (r_ssize) out; } static inline r_ssize r_ssize_mult(r_ssize x, r_ssize y) { if (x > 0) { if (y > 0) { if (x > (R_SSIZE_MAX / y)) { goto error; } } else { if (y < (R_SSIZE_MIN / x)) { goto error; } } } else { if (y > 0) { if (x < (R_SSIZE_MIN / y)) { goto error; } } else { if ( (x != 0) && (y < (R_SSIZE_MAX / x))) { goto error; } } } return x * y; error: r_stop_internal("Result too large for an `r_ssize`."); } static inline int r_int_min(int x, int y) { return (y < x) ? y : x; } static inline int r_int_max(int x, int y) { return (y < x) ? x : y; } static inline r_ssize r_ssize_min(r_ssize x, r_ssize y) { return (y < x) ? y : x; } static inline r_ssize r_ssize_max(r_ssize x, r_ssize y) { return (y < x) ? x : y; } static inline int r_ssize_as_integer(r_ssize x) { if (x > INT_MAX || x < INT_MIN) { r_stop_internal("Result can't be represented as `int`."); } return (int) x; } static inline double r_ssize_as_double(r_ssize x) { if (x > DBL_MAX || x < -DBL_MAX) { r_stop_internal("Result can't be represented as `double`."); } return (double) x; } static inline r_ssize r_double_as_ssize(double x) { if (x > R_SSIZE_MAX || x < R_SSIZE_MIN) { r_stop_internal("Result can't be represented as `r_ssize`."); } return (r_ssize) x; } static inline double r_double_mult(double x, double y) { double out = x * y; if (!isfinite(out)) { r_stop_internal("Can't multiply double values."); } return out; } #endif vctrs/src/rlang/walk.h0000644000176200001440000000733014465445266014437 0ustar liggesusers#ifndef RLANG_INTERNAL_WALK_H #define RLANG_INTERNAL_WALK_H /** * Direction of iteration * * Each non-leaf node of the sexp tree is visited twice: First before * visiting the children, and again after the children have been * visited. See * about * this iteration process. * * There are three directions: * - Incoming: The first time a non-leaf node is visited. * - Leaf: After reaching a leaf node, the direction changes from * incoming to outgoing. * - Outgoing: The second time a non-leaf node is visited on the way back. */ enum r_sexp_it_direction { R_SEXP_IT_DIRECTION_leaf = 0, R_SEXP_IT_DIRECTION_incoming, R_SEXP_IT_DIRECTION_outgoing }; enum r_sexp_it_relation { R_SEXP_IT_RELATION_none = -1, R_SEXP_IT_RELATION_root = 0, R_SEXP_IT_RELATION_attrib, // Nodes R_SEXP_IT_RELATION_node_car, R_SEXP_IT_RELATION_node_cdr, R_SEXP_IT_RELATION_node_tag, R_SEXP_IT_RELATION_symbol_string, R_SEXP_IT_RELATION_symbol_value, R_SEXP_IT_RELATION_symbol_internal, R_SEXP_IT_RELATION_function_fmls, R_SEXP_IT_RELATION_function_body, R_SEXP_IT_RELATION_function_env, R_SEXP_IT_RELATION_environment_frame, R_SEXP_IT_RELATION_environment_enclos, R_SEXP_IT_RELATION_environment_hashtab, R_SEXP_IT_RELATION_promise_value, R_SEXP_IT_RELATION_promise_expr, R_SEXP_IT_RELATION_promise_env, R_SEXP_IT_RELATION_pointer_prot, R_SEXP_IT_RELATION_pointer_tag, // Vectors R_SEXP_IT_RELATION_list_elt, R_SEXP_IT_RELATION_character_elt, R_SEXP_IT_RELATION_expression_elt }; enum r_sexp_it_raw_relation { R_SEXP_IT_RAW_RELATION_root = 0, R_SEXP_IT_RAW_RELATION_attrib, R_SEXP_IT_RAW_RELATION_node_tag, R_SEXP_IT_RAW_RELATION_node_car, R_SEXP_IT_RAW_RELATION_node_cdr, R_SEXP_IT_RAW_RELATION_vector_elt }; struct r_sexp_iterator { r_obj* shelter; bool skip_incoming; r_obj* x; enum r_type type; int depth; r_obj* parent; enum r_sexp_it_relation rel; r_ssize i; enum r_sexp_it_direction dir; /* private: */ struct r_dyn_array* p_stack; }; struct r_sexp_iterator* r_new_sexp_iterator(r_obj* root); bool r_sexp_next(struct r_sexp_iterator* p_it); bool r_sexp_skip(struct r_sexp_iterator* p_it); static inline enum r_sexp_it_raw_relation r_sexp_it_raw_relation(enum r_sexp_it_relation rel) { switch (rel) { case R_SEXP_IT_RELATION_root: return R_SEXP_IT_RAW_RELATION_root; case R_SEXP_IT_RELATION_attrib: return R_SEXP_IT_RAW_RELATION_attrib; case R_SEXP_IT_RELATION_node_car: case R_SEXP_IT_RELATION_symbol_string: case R_SEXP_IT_RELATION_environment_frame: case R_SEXP_IT_RELATION_function_fmls: case R_SEXP_IT_RELATION_promise_value: return R_SEXP_IT_RAW_RELATION_node_car; case R_SEXP_IT_RELATION_node_cdr: case R_SEXP_IT_RELATION_symbol_value: case R_SEXP_IT_RELATION_environment_enclos: case R_SEXP_IT_RELATION_function_body: case R_SEXP_IT_RELATION_promise_expr: case R_SEXP_IT_RELATION_pointer_prot: return R_SEXP_IT_RAW_RELATION_node_cdr; case R_SEXP_IT_RELATION_node_tag: case R_SEXP_IT_RELATION_symbol_internal: case R_SEXP_IT_RELATION_environment_hashtab: case R_SEXP_IT_RELATION_function_env: case R_SEXP_IT_RELATION_promise_env: case R_SEXP_IT_RELATION_pointer_tag: return R_SEXP_IT_RAW_RELATION_node_tag; case R_SEXP_IT_RELATION_list_elt: case R_SEXP_IT_RELATION_character_elt: case R_SEXP_IT_RELATION_expression_elt: return R_SEXP_IT_RAW_RELATION_vector_elt; default: r_abort("Unimplemented type."); } } const char* r_sexp_it_direction_as_c_string(enum r_sexp_it_direction dir); const char* r_sexp_it_relation_as_c_string(enum r_sexp_it_relation rel); const char* r_sexp_it_raw_relation_as_c_string(enum r_sexp_it_raw_relation rel); #endif vctrs/src/rlang/df.h0000644000176200001440000000047514465445266014075 0ustar liggesusers#ifndef RLANG_DF_H #define RLANG_DF_H r_obj* r_alloc_df_list(r_ssize n_rows, r_obj* names, const enum r_type* v_types, r_ssize types_size); void r_init_data_frame(r_obj* x, r_ssize n_nows); void r_init_tibble(r_obj* x, r_ssize n_rows); #endif vctrs/src/rlang/cnd.c0000644000176200001440000001136114465445266014237 0ustar liggesusers#include "rlang.h" #include "decl/cnd-decl.h" #define BUFSIZE 8192 #define INTERP(BUF, FMT, DOTS) \ { \ va_list dots; \ va_start(dots, FMT); \ vsnprintf(BUF, BUFSIZE, FMT, dots); \ va_end(dots); \ \ BUF[BUFSIZE - 1] = '\0'; \ } static r_obj* msg_call = NULL; void r_inform(const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_eval_with_x(msg_call, KEEP(r_chr(buf)), r_envs.ns); FREE(1); } static r_obj* wng_call = NULL; void r_warn(const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_eval_with_x(wng_call, KEEP(r_chr(buf)), r_envs.ns); FREE(1); } static r_obj* err_call = NULL; void r_abort(const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_obj* message = KEEP(r_chr(buf)); // Evaluate in a mask but forward error call to the current frame r_obj* frame = KEEP(r_peek_frame()); r_obj* mask = KEEP(r_alloc_environment(2, frame)); r_env_poke(mask, r_syms.error_call_flag, frame); struct r_pair args[] = { { r_syms.message, message } }; r_exec_n(r_null, r_syms.abort, args, R_ARR_SIZEOF(args), mask); while (1); // No return } r_no_return void r_abort_n(const struct r_pair* args, int n) { r_exec_mask_n(r_null, r_syms.abort, args, n, r_peek_frame()); r_stop_unreachable(); } r_no_return void r_abort_call(r_obj* call, const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_obj* message = KEEP(r_chr(buf)); struct r_pair args[] = { { r_syms.message, message }, { r_syms.call, call } }; r_obj* frame = KEEP(r_peek_frame()); r_exec_mask_n(r_null, r_syms.abort, args, R_ARR_SIZEOF(args), frame); r_stop_unreachable(); } void r_cnd_signal(r_obj* cnd) { r_eval_with_x(cnd_signal_call, cnd, r_envs.base); } // For `R_interrupts_suspended` #include #include #ifdef _WIN32 #include void r_interrupt(void) { UserBreak = 1; R_CheckUserInterrupt(); } #else #include void r_interrupt(void) { Rf_onintr(); } #endif enum r_cnd_type r_cnd_type(r_obj* cnd) { r_obj* classes = r_class(cnd); if (r_typeof(cnd) != R_TYPE_list || r_typeof(classes) != R_TYPE_character) { goto error; } r_obj* const * v_classes = r_chr_cbegin(classes); r_ssize n_classes = r_length(classes); for (r_ssize i = n_classes - 2; i >= 0; --i) { r_obj* class_str = v_classes[i]; if (class_str == r_strs.error) { return R_CND_TYPE_error; } if (class_str == r_strs.warning) { return R_CND_TYPE_warning; } if (class_str == r_strs.message) { return R_CND_TYPE_message; } if (class_str == r_strs.interrupt) { return R_CND_TYPE_interrupt; } } if (r_inherits(cnd, "condition")) { return R_CND_TYPE_condition; } error: r_abort("`cnd` is not a condition object."); } void r_init_library_cnd(void) { msg_call = r_parse("message(x)"); r_preserve(msg_call); wng_call = r_parse("warning(x, call. = FALSE)"); r_preserve(wng_call); err_call = r_parse("rlang::abort(x)"); r_preserve(err_call); cnd_signal_call = r_parse("rlang::cnd_signal(x)"); r_preserve(cnd_signal_call); // Silence "'noreturn' attribute does not apply to types warning". // It seems like GCC doesn't handle attributes in casts so we need // to cast through a typedef. // https://stackoverflow.com/questions/9441262/function-pointer-to-attribute-const-function typedef r_no_return void (*r_stop_internal_t)(const char*, int, r_obj*, const char* fmt, ...); r_stop_internal = (r_stop_internal_t) R_GetCCallable("rlang", "rlang_stop_internal2"); r_format_error_arg = (const char* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_format_error_arg"); r_obj_type_friendly_full = (const char* (*)(r_obj*, bool, bool)) r_peek_c_callable("rlang", "rlang_obj_type_friendly_full"); } r_no_return void (*r_stop_internal)(const char* file, int line, r_obj* call, const char* fmt, ...) = NULL; static r_obj* cnd_signal_call = NULL; const char* (*r_format_error_arg)(r_obj* arg) = NULL; const char* (*r_obj_type_friendly_full)(r_obj* x, bool value, bool length) = NULL; const char* r_format_lazy_error_arg(struct r_lazy arg) { r_obj* ffi_arg = KEEP(r_lazy_eval(arg)); const char* out = r_format_error_arg(ffi_arg); FREE(1); return out; } vctrs/src/rlang/formula.h0000644000176200001440000000033014465445266015137 0ustar liggesusers#ifndef RLANG_FORMULA_H #define RLANG_FORMULA_H bool r_is_formula(r_obj* x, int scoped, int lhs); r_obj* r_f_rhs(r_obj* f); r_obj* r_f_lhs(r_obj* f); r_obj* r_f_env(r_obj* f); bool r_f_has_env(r_obj* f); #endif vctrs/src/rlang/parse.c0000644000176200001440000000132514465445266014604 0ustar liggesusers#include "rlang.h" #include static void abort_parse(r_obj* code, const char* why) { if (r_peek_option("rlang__verbose_errors") != r_null) { r_obj_print(code); } r_abort("Internal error: %s", why); } r_obj* r_parse(const char* str) { r_obj* str_ = KEEP(r_chr(str)); ParseStatus status; r_obj* out = KEEP(R_ParseVector(str_, -1, &status, r_null)); if (status != PARSE_OK) { abort_parse(str_, "Parsing failed"); } if (r_length(out) != 1) { abort_parse(str_, "Expected a single expression"); } out = r_list_get(out, 0); FREE(2); return out; } r_obj* r_parse_eval(const char* str, r_obj* env) { r_obj* out = r_eval(KEEP(r_parse(str)), env); FREE(1); return out; } vctrs/src/rlang/vec.c0000644000176200001440000002016514465445266014252 0ustar liggesusers#include "rlang.h" #include #include r_obj* r_chr_n(const char* const * strings, r_ssize n) { r_obj* out = KEEP(r_alloc_character(n)); for (r_ssize i = 0; i < n; ++i) { r_chr_poke(out, i, r_str(strings[i])); } FREE(1); return out; } #if R_VERSION >= R_Version(3, 4, 0) #define HAS_VIRTUAL_SIZE 1 #else #define HAS_VIRTUAL_SIZE 0 #endif #define RESIZE(R_TYPE, C_TYPE, CONST_DEREF, DEREF) \ do { \ r_ssize x_size = r_length(x); \ if (x_size == size) { \ return x; \ } \ if (!ALTREP(x) && size < x_size && HAS_VIRTUAL_SIZE) { \ SETLENGTH(x, size); \ SET_TRUELENGTH(x, x_size); \ SET_GROWABLE_BIT(x); \ return x; \ } \ \ const C_TYPE* p_x = CONST_DEREF(x); \ r_obj* out = KEEP(r_alloc_vector(R_TYPE, size)); \ C_TYPE* p_out = DEREF(out); \ \ r_ssize cpy_size = (size > x_size) ? x_size : size; \ memcpy(p_out, p_x, cpy_size * sizeof(C_TYPE)); \ \ FREE(1); \ return out; \ } while (0) #define RESIZE_BARRIER(R_TYPE, CONST_DEREF, SET) \ do { \ r_ssize x_size = r_length(x); \ if (x_size == size) { \ return x; \ } \ if (!ALTREP(x) && size < x_size && HAS_VIRTUAL_SIZE) { \ SETLENGTH(x, size); \ SET_TRUELENGTH(x, x_size); \ SET_GROWABLE_BIT(x); \ return x; \ } \ \ r_obj* const * p_x = CONST_DEREF(x); \ r_obj* out = KEEP(r_alloc_vector(R_TYPE, size)); \ \ r_ssize cpy_size = (size > x_size) ? x_size : size; \ for (r_ssize i = 0; i < cpy_size; ++i) { \ SET(out, i, p_x[i]); \ } \ \ FREE(1); \ return out; \ } while (0) // Compared to `Rf_xlengthgets()` this does not initialise the new // extended locations with `NA` r_obj* r_lgl_resize(r_obj* x, r_ssize size) { RESIZE(R_TYPE_logical, int, r_lgl_cbegin, r_lgl_begin); } r_obj* r_int_resize(r_obj* x, r_ssize size) { RESIZE(R_TYPE_integer, int, r_int_cbegin, r_int_begin); } r_obj* r_dbl_resize(r_obj* x, r_ssize size) { RESIZE(R_TYPE_double, double, r_dbl_cbegin, r_dbl_begin); } r_obj* r_cpl_resize(r_obj* x, r_ssize size) { RESIZE(R_TYPE_complex, r_complex, r_cpl_cbegin, r_cpl_begin); } r_obj* r_raw_resize(r_obj* x, r_ssize size) { RESIZE(R_TYPE_raw, unsigned char, r_raw_cbegin, r_raw_begin); } r_obj* r_chr_resize(r_obj* x, r_ssize size) { RESIZE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke); } r_obj* r_list_resize(r_obj* x, r_ssize size) { RESIZE_BARRIER(R_TYPE_list, r_list_cbegin, r_list_poke); } #undef RESIZE #undef RESIZE_BARRIER r_obj* r_list_compact(r_obj* x) { r_ssize n = r_length(x); r_obj* inc = KEEP(r_alloc_logical(n)); int* v_inc = r_int_begin(inc); r_obj* const * v_x = r_list_cbegin(x); r_ssize new_n = 0; for (r_ssize i = 0; i < n; ++i) { v_inc[i] = v_x[i] != r_null; new_n += v_inc[i]; } r_obj* out = KEEP(r_alloc_list(new_n)); for (r_ssize i = 0, count = 0; i < n; ++i) { if (v_inc[i]) { r_list_poke(out, count, v_x[i]); ++count; } } FREE(2); return out; } r_obj* r_list_of_as_ptr_ssize(r_obj* xs, enum r_type type, struct r_pair_ptr_ssize** p_v_out) { if (r_typeof(xs) != R_TYPE_list) { r_abort("`xs` must be a list."); } r_ssize n = r_length(xs); r_obj* shelter = KEEP(r_alloc_raw(sizeof(struct r_pair_ptr_ssize) * n)); struct r_pair_ptr_ssize* v_out = r_raw_begin(shelter); r_obj* const * v_xs = r_list_cbegin(xs); for (r_ssize i = 0; i < n; ++i) { r_obj* x = v_xs[i]; if (r_typeof(x) != type) { r_abort("`xs` must be a list of vectors of type `%s`.", r_type_as_c_string(type)); } v_out[i] = (struct r_pair_ptr_ssize) { .ptr = r_int_begin(x), .size = r_length(x) }; } FREE(1); *p_v_out = v_out; return shelter; } // FIXME: Does this have a place in the library? void r_vec_poke_n(r_obj* x, r_ssize offset, r_obj* y, r_ssize from, r_ssize n) { if ((r_length(x) - offset) < n) { r_abort("Can't copy data to `x` because it is too small"); } if ((r_length(y) - from) < n) { r_abort("Can't copy data from `y` because it is too small"); } switch (r_typeof(x)) { case R_TYPE_logical: { int* src_data = r_lgl_begin(y); int* dest_data = r_lgl_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_integer: { int* src_data = r_int_begin(y); int* dest_data = r_int_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_double: { double* src_data = r_dbl_begin(y); double* dest_data = r_dbl_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_complex: { r_complex* src_data = r_cpl_begin(y); r_complex* dest_data = r_cpl_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_raw: { unsigned char* src_data = RAW(y); unsigned char* dest_data = RAW(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_character: { r_obj* elt; for (r_ssize i = 0; i != n; ++i) { elt = r_chr_get(y, i + from); r_chr_poke(x, i + offset, elt); } break; } case R_TYPE_list: { r_obj* elt; for (r_ssize i = 0; i != n; ++i) { elt = r_list_get(y, i + from); r_list_poke(x, i + offset, elt); } break; } default: r_abort("Copy requires vectors"); } } void r_vec_poke_range(r_obj* x, r_ssize offset, r_obj* y, r_ssize from, r_ssize to) { r_vec_poke_n(x, offset, y, from, to - from + 1); } bool _r_is_finite(r_obj* x) { r_ssize n = r_length(x); switch(r_typeof(x)) { case R_TYPE_integer: { const int* p_x = r_int_cbegin(x); for (r_ssize i = 0; i < n; ++i) { if (p_x[i] == r_globals.na_int) { return false; } } break; } case R_TYPE_double: { const double* p_x = r_dbl_cbegin(x); for (r_ssize i = 0; i < n; ++i) { if (!isfinite(p_x[i])) { return false; } } break; } case R_TYPE_complex: { const r_complex* p_x = r_cpl_cbegin(x); for (r_ssize i = 0; i < n; ++i) { if (!isfinite(p_x[i].r) || !isfinite(p_x[i].i)) { return false; } } break; } default: r_abort("Internal error: expected a numeric vector"); } return true; } vctrs/src/rlang/node.h0000644000176200001440000000360314465445266014425 0ustar liggesusers#ifndef RLANG_NODE_H #define RLANG_NODE_H static inline r_obj* r_node_car(r_obj* x) { return CAR(x); } static inline r_obj* r_node_cdr(r_obj* x) { return CDR(x); } static inline r_obj* r_node_tag(r_obj* x) { return TAG(x); } static inline r_obj* r_node_caar(r_obj* x) { return CAAR(x); } static inline r_obj* r_node_cadr(r_obj* x) { return CADR(x); } static inline r_obj* r_node_cdar(r_obj* x) { return CDAR(x); } static inline r_obj* r_node_cddr(r_obj* x) { return CDDR(x); } static inline void r_node_poke_car(r_obj* x, r_obj* newcar) { SETCAR(x, newcar); } static inline void r_node_poke_cdr(r_obj* x, r_obj* newcdr) { SETCDR(x, newcdr); } static inline void r_node_poke_tag(r_obj* x, r_obj* tag) { SET_TAG(x, tag); } static inline void r_node_poke_caar(r_obj* x, r_obj* newcaar) { SETCAR(CAR(x), newcaar); } static inline void r_node_poke_cadr(r_obj* x, r_obj* newcar) { SETCADR(x, newcar); } static inline void r_node_poke_cdar(r_obj* x, r_obj* newcdar) { SETCDR(CAR(x), newcdar); } static inline void r_node_poke_cddr(r_obj* x, r_obj* newcdr) { SETCDR(CDR(x), newcdr); } static inline r_obj* r_new_node(r_obj* car, r_obj* cdr) { return Rf_cons(car, cdr); } static inline r_obj* r_new_node3(r_obj* car, r_obj* cdr, r_obj* tag) { r_obj* out = Rf_cons(car, cdr); SET_TAG(out, tag); return out; } r_obj* r_new_pairlist(const struct r_pair* args, int n, r_obj** tail); #define r_pairlist Rf_list1 #define r_pairlist2 Rf_list2 #define r_pairlist3 Rf_list3 #define r_pairlist4 Rf_list4 #define r_pairlist5 Rf_list5 r_obj* r_pairlist_find(r_obj* node, r_obj* tag); r_obj* r_pairlist_rev(r_obj* node); static inline r_obj* r_pairlist_get(r_obj* node, r_obj* tag) { return r_node_car(r_pairlist_find(node, tag)); } static inline r_obj* r_pairlist_tail(r_obj* x) { r_obj* cdr = r_null; while ((cdr = r_node_cdr(x)) != r_null) { x = cdr; } return x; } r_obj* r_node_tree_clone(r_obj* x); #endif vctrs/src/rlang/env-binding.h0000644000176200001440000000054114465445266015676 0ustar liggesusers#ifndef RLANG_ENV_BINDING_H #define RLANG_ENV_BINDING_H enum r_env_binding_type { R_ENV_BINDING_TYPE_value = 0, R_ENV_BINDING_TYPE_promise, R_ENV_BINDING_TYPE_active }; bool r_env_binding_is_promise(r_obj* env, r_obj* sym); bool r_env_binding_is_active(r_obj* env, r_obj* sym); r_obj* r_env_binding_types(r_obj* env, r_obj* bindings); #endif vctrs/src/rlang/formula.c0000644000176200001440000000354114465445266015141 0ustar liggesusers#include "rlang.h" r_obj* r_f_rhs(r_obj* f) { if (r_typeof(f) != LANGSXP) { r_abort("`x` must be a formula"); } switch (r_length(f)) { case 2: return r_node_cadr(f); case 3: return CADDR(f); default: r_abort("Invalid formula"); } } r_obj* r_f_lhs(r_obj* f) { if (r_typeof(f) != LANGSXP) { r_abort("`x` must be a formula"); } switch (r_length(f)) { case 2: return r_null; case 3: return r_node_cadr(f); default: r_abort("Invalid formula"); } } r_obj* r_f_env(r_obj* f) { return r_attrib_get(f, r_sym(".Environment")); } bool r_f_has_env(r_obj* f) { return r_is_environment(r_f_env(f)); } bool r_is_formula(r_obj* x, int scoped, int lhs) { if (r_typeof(x) != R_TYPE_call) { return false; } if (r_node_car(x) != r_syms.tilde) { return false; } if (scoped >= 0) { bool has_env = r_typeof(r_f_env(x)) == R_TYPE_environment; bool has_class = r_inherits(x, "formula"); if (scoped != (has_env && has_class)) { return false; } } if (lhs >= 0) { int has_lhs = r_length(x) > 2; if (lhs != has_lhs) { return false; } } return true; } r_obj* new_raw_formula(r_obj* lhs, r_obj* rhs, r_obj* env) { static r_obj* tilde_sym = NULL; if (!tilde_sym) { tilde_sym = r_sym("~"); } if (!r_is_environment(env) && env != r_null) { r_abort("`env` must be an environment"); } r_obj* f; r_obj* args; if (lhs == r_null) { args = KEEP(r_pairlist(rhs)); } else { args = KEEP(r_pairlist2(lhs, rhs)); } f = KEEP(r_new_call(tilde_sym, args)); r_obj* attrs = KEEP(r_new_node(env, r_null)); r_node_poke_tag(attrs, r_sym(".Environment")); r_poke_attrib(f, attrs); FREE(3); return f; } r_obj* r_new_formula(r_obj* lhs, r_obj* rhs, r_obj* env) { r_obj* f = KEEP(new_raw_formula(lhs, rhs, env)); r_attrib_push_class(f, "formula"); FREE(1); return f; } vctrs/src/rlang/vec-lgl.c0000644000176200001440000000556214465445266015032 0ustar liggesusers#include "rlang.h" #include r_ssize r_lgl_sum(r_obj* x, bool na_true) { if (r_typeof(x) != R_TYPE_logical) { r_abort("Internal error: Excepted logical vector in `r_lgl_sum()`"); } const r_ssize n = r_length(x); const int* v_x = r_lgl_cbegin(x); // This can't overflow since `sum` is necessarily smaller or equal // to the vector length expressed in `r_ssize` r_ssize sum = 0; if (na_true) { for (r_ssize i = 0; i < n; ++i) { sum += (bool) v_x[i]; } } else { for (r_ssize i = 0; i < n; ++i) { sum += (v_x[i] == 1); } } return sum; } r_obj* r_lgl_which(r_obj* x, bool na_propagate) { const enum r_type type = r_typeof(x); if (type != R_TYPE_logical) { r_stop_unexpected_type(type); } const r_ssize n = r_length(x); const int* v_x = r_lgl_cbegin(x); const r_ssize out_n = r_lgl_sum(x, na_propagate); if (out_n > INT_MAX) { r_stop_internal("Can't fit result in an integer vector."); } r_obj* out = KEEP(r_alloc_integer(out_n)); int* v_out = r_int_begin(out); r_obj* names = r_names(x); const bool has_names = (names != r_null); if (na_propagate) { if (has_names) { // Mark `NA` locations with negative location for extracting names later for (r_ssize i = 0, j = 0; i < n && j < out_n; ++i) { const int x_elt = v_x[i]; const bool missing = x_elt == r_globals.na_lgl; const int elt = missing * (-i - 1) + !missing * x_elt * (i + 1); v_out[j] = elt; j += (bool) elt; } } else { for (r_ssize i = 0, j = 0; i < n && j < out_n; ++i) { const int x_elt = v_x[i]; const bool missing = x_elt == r_globals.na_lgl; const int elt = missing * r_globals.na_int + !missing * x_elt * (i + 1); v_out[j] = elt; j += (bool) elt; } } } else { for (r_ssize i = 0, j = 0; i < n && j < out_n; ++i) { const int x_elt = v_x[i]; v_out[j] = i + 1; j += (x_elt == 1); } } if (has_names) { r_obj* const* v_names = r_chr_cbegin(names); r_obj* out_names = r_alloc_character(out_n); r_attrib_poke_names(out, out_names); if (na_propagate) { // `v_out` contains negative locations which tells you the location of the // name to extract while also serving as a signal of where `NA`s should go // in the finalized output for (r_ssize i = 0; i < out_n; ++i) { const int loc = v_out[i]; const int abs_loc = abs(loc); const bool same = (loc == abs_loc); v_out[i] = same * loc + !same * r_globals.na_int; r_chr_poke(out_names, i, v_names[abs_loc - 1]); } } else { // `v_out` doesn't contain `NA`, so we can use the locations directly for (r_ssize i = 0; i < out_n; ++i) { const int loc = v_out[i] - 1; r_chr_poke(out_names, i, v_names[loc]); } } } FREE(1); return out; } vctrs/src/rlang/attrib.h0000644000176200001440000000367614465445266014777 0ustar liggesusers#ifndef RLANG_ATTRIB_H #define RLANG_ATTRIB_H #include "node.h" #include "sym.h" static inline r_obj* r_attrib(r_obj* x) { return ATTRIB(x); } static inline r_obj* r_poke_attrib(r_obj* x, r_obj* attrs) { SET_ATTRIB(x, attrs); return x; } // Unlike Rf_getAttrib(), this never allocates. This also doesn't bump // refcounts or namedness. static inline r_obj* r_attrib_get(r_obj* x, r_obj* tag) { return r_pairlist_get(r_attrib(x), tag); } static inline void r_attrib_poke(r_obj* x, r_obj* sym, r_obj* value) { Rf_setAttrib(x, sym, value); } r_obj* r_attrib_push(r_obj* x, r_obj* tag, r_obj* value); r_obj* r_attrib_set(r_obj* x, r_obj* tag, r_obj* value); static inline r_obj* r_class(r_obj* x) { return r_attrib_get(x, r_syms.class_); } static inline void r_attrib_poke_class(r_obj* x, r_obj* classes) { r_attrib_poke(x, r_syms.class_, classes); } void r_attrib_push_class(r_obj* x, const char* tag); void r_attrib_push_classes(r_obj* x, const char** tags, r_ssize n); static inline r_obj* r_dim(r_obj* x) { return r_attrib_get(x, r_syms.dim); } static inline void r_attrib_poke_dim(r_obj* x, r_obj* dim) { r_attrib_poke(x, r_syms.dim, dim); } static inline r_obj* r_dim_names(r_obj* x) { return r_attrib_get(x, r_syms.dim_names); } static inline void r_attrib_poke_dim_names(r_obj* x, r_obj* dim_names) { r_attrib_poke(x, r_syms.dim_names, dim_names); } static inline r_obj* r_names(r_obj* x) { return r_attrib_get(x, r_syms.names); } static inline void r_attrib_poke_names(r_obj* x, r_obj* nms) { r_attrib_poke(x, r_syms.names, nms); } bool r_is_named(r_obj* x); #define r_attrib_poke(X, SYM, VALUE) Rf_setAttrib(X, SYM, VALUE) #define r_attrib_poke_class(X, VALUE) Rf_setAttrib(X, r_syms.class_, VALUE) #define r_attrib_poke_dim(X, VALUE) Rf_setAttrib(X, r_syms.dim, VALUE) #define r_attrib_poke_dim_names(X, VALUE) Rf_setAttrib(X, r_syms.dim_names, VALUE) #define r_attrib_poke_names(X, VALUE) Rf_setAttrib(X, r_syms.names, VALUE) #endif vctrs/src/rlang/stack.c0000644000176200001440000000504514465445266014602 0ustar liggesusers#include "rlang.h" #include "decl/stack-decl.h" void r_on_exit(r_obj* expr, r_obj* frame) { static r_obj* on_exit_prim = NULL; if (!on_exit_prim) { on_exit_prim = r_base_ns_get("on.exit"); } r_obj* args = r_pairlist2(expr, r_lgl(1)); r_obj* lang = KEEP(r_new_call(on_exit_prim, args)); r_eval(lang, frame); FREE(1); } r_obj* r_peek_frame(void) { return r_eval(peek_frame_call, r_envs.base); } r_obj* r_caller_env(r_obj* n) { if (r_typeof(n) != R_TYPE_environment) { r_stop_internal("`n` must be an environment."); } return r_eval(caller_env_call, n); } static r_obj* sys_frame_call = NULL; static r_obj* sys_call_call = NULL; static int* sys_frame_n_addr = NULL; static int* sys_call_n_addr = NULL; r_obj* r_sys_frame(int n, r_obj* frame) { int n_kept = 0; if (!frame) { frame = r_peek_frame(); KEEP_N(frame, &n_kept); } *sys_frame_n_addr = n; SEXP value = r_eval(sys_frame_call, frame); FREE(n_kept); return value; } r_obj* r_sys_call(int n, r_obj* frame) { int n_kept = 0; if (!frame) { frame = r_peek_frame(); KEEP_N(frame, &n_kept); } *sys_call_n_addr = n; SEXP value = r_eval(sys_call_call, frame); FREE(n_kept); return value; } static r_obj* generate_sys_call(const char* name, int** n_addr) { r_obj* sys_n = KEEP(r_int(0)); *n_addr = r_int_begin(sys_n); r_obj* sys_args = KEEP(r_new_node(sys_n, r_null)); r_obj* sys_call = KEEP(r_new_call(r_base_ns_get(name), sys_args)); r_preserve(sys_call); FREE(3); return sys_call; } void r_init_library_stack(void) { // `sys.frame(sys.nframe())` doesn't work because `sys.nframe()` // returns the number of the frame in which evaluation occurs. It // doesn't return the number of frames on the stack. So we'd need // to evaluate it in the last frame on the stack which is what we // are looking for to begin with. We use instead this workaround: // Call `sys.frame()` from a closure to push a new frame on the // stack, and use negative indexing to get the previous frame. r_obj* current_frame_body = KEEP(r_parse("sys.frame(-1)")); r_obj* current_frame_fn = KEEP(r_new_function(r_null, current_frame_body, r_envs.base)); peek_frame_call = r_new_call(current_frame_fn, r_null); r_preserve(peek_frame_call); FREE(2); sys_frame_call = generate_sys_call("sys.frame", &sys_frame_n_addr); sys_call_call = generate_sys_call("sys.call", &sys_call_n_addr); caller_env_call = r_parse("parent.frame()"); r_preserve_global(caller_env_call); } static r_obj* peek_frame_call = NULL; static r_obj* caller_env_call = NULL; vctrs/src/rlang/eval.c0000644000176200001440000001100714465445266014417 0ustar liggesusers#include "rlang.h" r_obj* r_eval_with_x(r_obj* call, r_obj* x, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(1, parent)); r_env_poke(env, r_syms.x, x); r_obj* out = r_eval(call, env); FREE(1); return out; } r_obj* r_eval_with_xy(r_obj* call, r_obj* x, r_obj* y, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(1, parent)); r_env_poke(env, r_syms.x, x); r_env_poke(env, r_syms.y, y); r_obj* out = r_eval(call, env); FREE(1); return out; } r_obj* r_eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(1, parent)); r_env_poke(env, r_syms.x, x); r_env_poke(env, r_syms.y, y); r_env_poke(env, r_syms.z, z); r_obj* out = r_eval(call, env); FREE(1); return out; } r_obj* r_eval_with_wxyz(r_obj* call, r_obj* w, r_obj* x, r_obj* y, r_obj* z, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(1, parent)); r_env_poke(env, r_syms.w, w); r_env_poke(env, r_syms.x, x); r_env_poke(env, r_syms.y, y); r_env_poke(env, r_syms.z, z); r_obj* out = r_eval(call, env); FREE(1); return out; } // Evaluate call with a preallocated environment containing a single // `x` binding and inheriting from base env. // // Since this has side effects, it should not be used when there is a // chance of recursing into the C library. It should only be used to // evaluate pure R calls or functions from other packages, such as the // base package. static r_obj* shared_x_env; static r_obj* shared_xy_env; static r_obj* shared_xyz_env; r_obj* eval_with_x(r_obj* call, r_obj* x) { r_env_poke(shared_x_env, r_syms.x, x); r_obj* out = KEEP(r_eval(call, shared_x_env)); // Release for gc r_env_poke(shared_x_env, r_syms.x, r_null); FREE(1); return out; } r_obj* eval_with_xy(r_obj* call, r_obj* x, r_obj* y) { r_env_poke(shared_xy_env, r_syms.x, x); r_env_poke(shared_xy_env, r_syms.y, y); r_obj* out = KEEP(r_eval(call, shared_xy_env)); // Release for gc r_env_poke(shared_xy_env, r_syms.x, r_null); r_env_poke(shared_xy_env, r_syms.y, r_null); FREE(1); return out; } r_obj* eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z) { r_env_poke(shared_xyz_env, r_syms.x, x); r_env_poke(shared_xyz_env, r_syms.y, y); r_env_poke(shared_xyz_env, r_syms.z, z); r_obj* out = KEEP(r_eval(call, shared_xyz_env)); // Release for gc r_env_poke(shared_xyz_env, r_syms.x, r_null); r_env_poke(shared_xyz_env, r_syms.y, r_null); r_env_poke(shared_xyz_env, r_syms.z, r_null); FREE(1); return out; } r_obj* r_exec_mask_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* parent) { r_obj* mask = KEEP(r_alloc_environment(n + 1, parent)); r_obj* call = KEEP(r_exec_mask_n_call_poke(fn_sym, fn, args, n, mask)); r_obj* out = r_eval(call, mask); FREE(2); return out; } r_obj* r_exec_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env) { r_obj* call = KEEP(r_exec_mask_n_call_poke(fn_sym, fn, args, n, env)); r_obj* out = r_eval(call, env); FREE(1); return out; } // Create a call from arguments and poke elements with a non-NULL // symbol in `env`. Symbolic arguments are protected from evaluation // with `quote()`. r_obj* r_exec_mask_n_call_poke(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env) { if (fn_sym != r_null) { r_env_poke(env, fn_sym, fn); fn = fn_sym; } r_obj* list = KEEP(r_new_pairlist(args, n, NULL)); r_obj* node = list; while (node != r_null) { r_obj* car = r_node_car(node); r_obj* tag = r_node_tag(node); if (tag == r_null) { // If symbol is not supplied, protect symbolic arguments from // evaluation. If supplied this is not needed because of the // masking. r_node_poke_car(node, r_expr_protect(car)); } else { // If symbol is supplied, assign the value in the environment and // use the symbol instead of the value in the list of arguments r_env_poke(env, tag, car); r_node_poke_car(node, tag); } node = r_node_cdr(node); } r_obj* call = r_new_call(fn, list); FREE(1); return call; } void r_init_library_eval(void) { r_lazy_missing_arg = (struct r_lazy) { .x = r_missing_arg, .env = r_null }; } struct r_lazy r_lazy_null = { 0 }; struct r_lazy r_lazy_missing_arg = { 0 }; vctrs/src/rlang/dyn-array.c0000644000176200001440000000610214465445266015376 0ustar liggesusers#include #include "dyn-array.h" #define R_DYN_ARRAY_GROWTH_FACTOR 2 static r_obj* attribs_dyn_array = NULL; struct r_dyn_array* r_new_dyn_vector(enum r_type type, r_ssize capacity) { r_obj* shelter = KEEP(r_alloc_list(2)); r_poke_attrib(shelter, attribs_dyn_array); r_mark_object(shelter); r_obj* vec_raw = r_alloc_raw(sizeof(struct r_dyn_array)); r_list_poke(shelter, 0, vec_raw); r_obj* vec_data = r_alloc_vector(type, capacity); r_list_poke(shelter, 1, vec_data); struct r_dyn_array* p_vec = r_raw_begin(vec_raw); p_vec->shelter = shelter; p_vec->count = 0; p_vec->capacity = capacity; p_vec->growth_factor = R_DYN_ARRAY_GROWTH_FACTOR; p_vec->type = type; p_vec->elt_byte_size = r_vec_elt_sizeof0(type); p_vec->data = vec_data; switch (type) { case R_TYPE_character: p_vec->v_data = NULL; p_vec->barrier_set = &r_chr_poke; break; case R_TYPE_list: p_vec->v_data = NULL; p_vec->barrier_set = &r_list_poke; break; default: p_vec->barrier_set = NULL; p_vec->v_data = r_vec_begin0(type, vec_data); break; } p_vec->v_data_const = r_vec_cbegin0(type, vec_data); FREE(1); return p_vec; } r_obj* r_dyn_unwrap(struct r_dyn_array* p_arr) { if (p_arr->type == R_TYPE_raw) { return r_raw_resize(p_arr->data, p_arr->count * p_arr->elt_byte_size); } else { return r_vec_resize0(p_arr->type, p_arr->data, p_arr->count); } } struct r_dyn_array* r_new_dyn_array(r_ssize elt_byte_size, r_ssize capacity) { r_ssize arr_byte_size = r_ssize_mult(capacity, elt_byte_size); struct r_dyn_array* p_arr = r_new_dyn_vector(R_TYPE_raw, arr_byte_size); p_arr->capacity = capacity; p_arr->elt_byte_size = elt_byte_size; return p_arr; } void r_dyn_push_back(struct r_dyn_array* p_arr, const void* p_elt) { r_ssize loc = r__dyn_increment(p_arr); if (p_arr->barrier_set) { r_obj* value = *((r_obj* const *) p_elt); p_arr->barrier_set(p_arr->data, loc, value); } else if (p_elt) { memcpy(r_dyn_last(p_arr), p_elt, p_arr->elt_byte_size); } else { memset(r_dyn_last(p_arr), 0, p_arr->elt_byte_size); } } void r_dyn_resize(struct r_dyn_array* p_arr, r_ssize capacity) { enum r_type type = p_arr->type; r_ssize capacity_multiplier = p_arr->type == R_TYPE_raw ? r_ssize_mult(p_arr->elt_byte_size, capacity) : capacity; r_obj* data = r_vec_resize0(type, r_list_get(p_arr->shelter, 1), capacity_multiplier); r_list_poke(p_arr->shelter, 1, data); p_arr->count = r_ssize_min(p_arr->count, capacity); p_arr->capacity = capacity; p_arr->data = data; switch (type) { case R_TYPE_character: case R_TYPE_list: break; default: p_arr->v_data = r_vec_begin0(type, data); break; } p_arr->v_data_const = r_vec_cbegin0(type, data); } void r_init_library_dyn_array(void) { r_preserve_global(attribs_dyn_array = r_pairlist(r_chr("rlang_dyn_array"))); r_node_poke_tag(attribs_dyn_array, r_syms.class_); } vctrs/src/rlang/eval.h0000644000176200001440000001131314465445266014424 0ustar liggesusers#ifndef RLANG_EVAL_H #define RLANG_EVAL_H static inline r_obj* r_eval(r_obj* expr, r_obj* env) { return Rf_eval(expr, env); } r_obj* r_eval_with_x(r_obj* call, r_obj* x, r_obj* parent); r_obj* r_eval_with_xy(r_obj* call, r_obj* x, r_obj* y, r_obj* parent); r_obj* r_eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z, r_obj* parent); r_obj* r_eval_with_wxyz(r_obj* call, r_obj* w, r_obj* x, r_obj* y, r_obj* z, r_obj* parent); r_obj* r_exec_mask_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* parent); r_obj* r_exec_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env); r_obj* r_exec_mask_n_call_poke(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env); static inline r_obj* r_exec_mask1(r_obj* fn_sym, r_obj* fn, r_obj* x_sym, r_obj* x, r_obj* env) { struct r_pair args[] = { { x_sym, x } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask2(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask3(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask4(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask5(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 }, { x5_sym, x5 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask6(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 }, { x5_sym, x5 }, { x6_sym, x6 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask7(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 }, { x5_sym, x5 }, { x6_sym, x6 }, { x7_sym, x7 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_lazy_eval(struct r_lazy lazy) { if (!lazy.env) { // Unitialised lazy variable return r_null; } else if (lazy.env == r_null) { // Forced lazy variable return lazy.x; } else { return r_eval(lazy.x, lazy.env); } } extern struct r_lazy r_lazy_null; extern struct r_lazy r_lazy_missing_arg; static inline r_obj* r_lazy_eval_protect(struct r_lazy lazy) { r_obj* out = KEEP(r_lazy_eval(lazy)); out = r_expr_protect(out); FREE(1); return out; } static inline bool r_lazy_is_null(struct r_lazy call) { return !call.x && !call.env; } #endif vctrs/src/rlang/env.h0000644000176200001440000000554014465445266014272 0ustar liggesusers#ifndef RLANG_ENV_H #define RLANG_ENV_H #include #include extern r_obj* r_methods_ns_env; static inline r_obj* r_env_names(r_obj* env) { return R_lsInternal3(env, TRUE, FALSE); } static inline r_ssize r_env_length(r_obj* env) { if (r_typeof(env) != R_TYPE_environment) { r_abort("Expected an environment"); } return Rf_xlength(env); } static inline r_obj* r_env_parent(r_obj* env) { if (env == r_envs.empty) { r_stop_internal("Can't take the parent of the empty environment."); } return ENCLOS(env); } static inline void r_env_poke_parent(r_obj* env, r_obj* new_parent) { SET_ENCLOS(env, new_parent); } static inline bool r_is_environment(r_obj* x) { return TYPEOF(x) == ENVSXP; } static inline bool r_is_namespace(r_obj* x) { return R_IsNamespaceEnv(x); } static inline r_obj* r_env_find(r_obj* env, r_obj* sym) { return Rf_findVarInFrame3(env, sym, FALSE); } static inline r_obj* r_env_find_anywhere(r_obj* env, r_obj* sym) { return Rf_findVar(sym, env); } r_obj* r_env_find_until(r_obj* env, r_obj* sym, r_obj* last); // TODO: Enable `R_existsVarInFrame()` when R 4.2 is out #define RLANG_USE_R_EXISTS (1 || R_VERSION < R_Version(4, 2, 0)) static inline bool r_env_has(r_obj* env, r_obj* sym) { #if RLANG_USE_R_EXISTS bool r__env_has(r_obj*, r_obj*); return r__env_has(env, sym); #else return R_existsVarInFrame(env, sym); #endif } static inline bool r_env_has_anywhere(r_obj* env, r_obj* sym) { #if RLANG_USE_R_EXISTS bool r__env_has_anywhere(r_obj*, r_obj*); return r__env_has_anywhere(env, sym); #else return TODO(); #endif } r_obj* r_ns_env(const char* pkg); r_obj* r_base_ns_get(const char* name); r_obj* r_alloc_environment(r_ssize size, r_obj* parent); static inline r_obj* r_alloc_empty_environment(r_obj* parent) { // Non-hashed environment. // Very fast and useful when you aren't getting/setting from the result. r_obj* env = Rf_allocSExp(R_TYPE_environment); r_env_poke_parent(env, parent); return env; } r_obj* r_env_as_list(r_obj* x); r_obj* r_list_as_environment(r_obj* x, r_obj* parent); r_obj* r_env_clone(r_obj* env, r_obj* parent); void r_env_coalesce(r_obj* env, r_obj* from); // Silently ignores bindings that are not defined in `env`. static inline void r_env_unbind(r_obj* env, r_obj* sym) { #if (R_VERSION < R_Version(4, 0, 0)) void r__env_unbind(r_obj*, r_obj*); r__env_unbind(env, sym); #else R_removeVarFromFrame(sym, env); #endif } static inline void r_env_poke(r_obj* env, r_obj* sym, r_obj* value) { KEEP(value); Rf_defineVar(sym, value, env); FREE(1); } void r_env_poke_lazy(r_obj* env, r_obj* sym, r_obj* expr, r_obj* eval_env); static inline void r_env_poke_active(r_obj* env, r_obj* sym, r_obj* fn) { KEEP(fn); r_env_unbind(env, sym); R_MakeActiveBinding(sym, fn, env); FREE(1); } bool r_env_inherits(r_obj* env, r_obj* ancestor, r_obj* top); #endif vctrs/src/rlang/cnd.h0000644000176200001440000000427714465445266014254 0ustar liggesusers#ifndef RLANG_CND_H #define RLANG_CND_H #include void r_inform(const char* fmt, ...); void r_warn(const char* fmt, ...); void r_interrupt(void); void r_no_return r_abort(const char* fmt, ...); void r_no_return r_abort_n(const struct r_pair* args, int n); void r_no_return r_abort_call(r_obj* call, const char* fmt, ...); // Formats input as an argument, using cli if available. Returns a // vmax-protected string. extern const char* (*r_format_error_arg)(r_obj* arg); const char* r_format_lazy_error_arg(struct r_lazy arg); // Return vmax-protected strings extern const char* (*r_obj_type_friendly_full)(r_obj* x, bool value, bool length); static inline const char* r_obj_type_friendly(r_obj* x) { return r_obj_type_friendly_full(x, true, false); } extern r_no_return void (*r_stop_internal)(const char* file, int line, r_obj* call, const char* fmt, ...); r_obj* r_peek_frame(void); #define r_stop_internal(...) \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ __VA_ARGS__) #define r_stop_unreachable() \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ "Reached the unreachable") #define r_stop_unimplemented_type(TYPE) \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ "Unimplemented type `%s`.", Rf_type2char(TYPE)) #define r_stop_unexpected_type(TYPE) \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ "Unexpected type `%s`.", Rf_type2char(TYPE)) static inline bool r_is_condition(r_obj* x) { return r_typeof(x) == R_TYPE_list && r_inherits(x, "condition"); } void r_cnd_signal(r_obj* cnd); void r_cnd_inform(r_obj* cnd, bool mufflable); void r_cnd_warn(r_obj* cnd, bool mufflable); void r_cnd_abort(r_obj* cnd, bool mufflable); enum r_cnd_type { R_CND_TYPE_condition = 0, R_CND_TYPE_message = 1, R_CND_TYPE_warning = 2, R_CND_TYPE_error = 3, R_CND_TYPE_interrupt = 4 }; enum r_cnd_type r_cnd_type(r_obj* cnd); #endif vctrs/src/rlang/cpp/0000755000176200001440000000000014465445266014107 5ustar liggesusersvctrs/src/rlang/cpp/vec.cpp0000644000176200001440000000073114465445266015371 0ustar liggesusers#include #include extern "C" { int* r_int_unique0(int* v_data, r_ssize size) { try { return std::unique(v_data, v_data + size); } catch (...) { rcc_abort("r_int_unique0"); } } bool r_list_all_of0(r_obj* const * v_first, r_ssize size, bool (*predicate)(r_obj* x)) { try { return std::all_of(v_first, v_first + size, predicate); } catch (...) { rcc_abort("r_list_all_of"); } } } vctrs/src/rlang/cpp/rlang.cpp0000644000176200001440000000002314465445266015711 0ustar liggesusers#include "vec.cpp" vctrs/src/rlang/quo.c0000644000176200001440000000113214465445266014272 0ustar liggesusers#include "rlang.h" r_obj* (*r_quo_get_expr)(r_obj* quo); r_obj* (*r_quo_set_expr)(r_obj* quo, r_obj* expr); r_obj* (*r_quo_get_env)(r_obj* quo); r_obj* (*r_quo_set_env)(r_obj* quo, r_obj* env); void r_init_library_quo(void) { r_quo_get_expr = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_quo_get_expr"); r_quo_set_expr = (r_obj* (*)(r_obj*, r_obj*)) r_peek_c_callable("rlang", "rlang_quo_set_expr"); r_quo_get_env = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_quo_get_env"); r_quo_set_env = (r_obj* (*)(r_obj*, r_obj*)) r_peek_c_callable("rlang", "rlang_quo_set_env"); } vctrs/src/rlang/debug.c0000644000176200001440000000120414465445266014554 0ustar liggesusers#include "rlang.h" void r_sexp_inspect(r_obj* x) { r_obj* call = KEEP(r_parse(".Internal(inspect(x))")); r_eval_with_x(call, x, r_envs.base); FREE(1); } void r_browse(r_obj* x) { r_env_poke(r_envs.global, r_sym(".debug"), x); r_printf("Object saved in `.debug`:\n"); r_obj_print(x); r_obj* frame = KEEP(r_peek_frame()); r_browse_at(frame); FREE(1); } void r_browse_at(r_obj* env) { // The NULL expression is needed because of a limitation in ESS r_parse_eval("{ browser(); NULL }", env); } void r_dbg_str(r_obj* x) { r_obj* call = KEEP(r_parse("str(x)")); r_eval_with_x(call, x, r_ns_env("utils")); FREE(1); } vctrs/src/rlang/rlang.hpp0000644000176200001440000000065514465445266015147 0ustar liggesusers#ifndef RLANG_RLANG_HPP #define RLANG_RLANG_HPP #include #define R_NO_REMAP #include extern "C" { #include } static inline r_no_return void rcc_abort(const char* fn) { try { throw; } catch (const std::exception& err) { r_abort(err.what()); } catch (...) { r_obj* call = KEEP(r_call(r_sym(fn))); (r_stop_internal)("", -1, call, "Caught unknown C++ exception."); } } #endif vctrs/src/rlang/sym.h0000644000176200001440000000114614465445266014310 0ustar liggesusers#ifndef RLANG_SYM_H #define RLANG_SYM_H // The results of `r_sym_as_` functions must be protected extern r_obj* (*r_sym_as_utf8_character)(r_obj* x); extern r_obj* (*r_sym_as_utf8_string)(r_obj* x); r_obj* r_new_symbol(r_obj* x, int* err); static inline r_obj* r_sym(const char* c_string) { return Rf_install(c_string); } static inline r_obj* r_sym_string(r_obj* sym) { return PRINTNAME(sym); } static inline const char* r_sym_c_string(r_obj* sym) { return CHAR(PRINTNAME(sym)); } bool r_is_symbol(r_obj* sym, const char* string); bool r_is_symbol_any(r_obj* x, const char** strings, int n); #endif vctrs/src/rlang/vec-chr.h0000644000176200001440000000344314465445266015031 0ustar liggesusers#ifndef RLANG_VECTOR_CHR_H #define RLANG_VECTOR_CHR_H #include static inline const char* r_str_c_string(r_obj* str) { return CHAR(str); } bool r_chr_has(r_obj* chr, const char* c_string); bool r_chr_has_any(r_obj* chr, const char** c_strings); r_ssize r_chr_detect_index(r_obj* chr, const char* c_string); void r_chr_fill(r_obj* chr, r_obj* value, r_ssize n); static inline r_obj* r_str_as_character(r_obj* x) { return Rf_ScalarString(x); } /* * A symbol is always in the native encoding. This means that UTF-8 * data frame names undergo a lossy translation when they are * transformed to symbols to create a data mask. To deal with this, we * translate all serialised unicode tags back to UTF-8. This way the * UTF-8 -> native -> UTF-8 translation that occurs during the * character -> symbol -> character conversion fundamental for data * masking is transparent and lossless for the end user. * * Starting from R 4.0, `installChar()` warns when translation to * native encoding is lossy. This warning is disruptive for us since * we correctly translate strings behind the scene. To work around * this, we call `translateChar()` which doesn't warn (at least * currently). If the pointers are the same, no translation is * needed and we can call `installChar()`, which preserves the * current encoding of the string. Otherwise we intern the symbol * with `install()` without encoding. */ static inline r_obj* r_str_as_symbol(r_obj* str) { const char* str_native = Rf_translateChar(str); if (str_native == CHAR(str)) { return Rf_installChar(str); } else { return Rf_install(str_native); } } static inline bool r_str_is_name(r_obj* str) { if (str == r_globals.na_str) { return false; } if (str == r_strs.empty) { return false; } return true; } #endif vctrs/src/rlang/dyn-list-of.h0000644000176200001440000000312214465445266015641 0ustar liggesusers#ifndef RLANG_DYN_LIST_OF_H #define RLANG_DYN_LIST_OF_H struct r_dyn_list_of { r_obj* shelter; r_ssize count; r_ssize capacity; int growth_factor; // Contains the addresses and sizes of each element of the // list-of. If you copy that pointer, consider it invalid after a // push because it might have moved in memory due to a resize. struct r_pair_ptr_ssize* v_data; // private: r_ssize width; enum r_type type; r_ssize elt_byte_size; r_obj* reserve; void* v_reserve; struct r_dyn_array* p_moved_arr; struct r_dyn_array* p_moved_shelter_arr; r_obj* arr_locs; r_ssize* v_arr_locs; struct r_dyn_array* p_arrays; }; struct r_dyn_list_of* r_new_dyn_list_of(enum r_type type, r_ssize capacity, r_ssize width); r_obj* r_lof_unwrap(struct r_dyn_list_of* p_lof); void r_lof_push_back(struct r_dyn_list_of* p_lof); void r_lof_arr_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt); static inline void* r_lof_arr_ptr(struct r_dyn_list_of* p_lof, r_ssize i, r_ssize j) { r_ssize offset = j * p_lof->elt_byte_size; struct r_pair_ptr_ssize* v_arrays = (struct r_pair_ptr_ssize*) r_dyn_pointer(p_lof->p_arrays, i); return ((unsigned char*) v_arrays->ptr) + offset; } static inline void* r_lof_arr_ptr_front(struct r_dyn_list_of* p_lof, r_ssize i) { return r_lof_arr_ptr(p_lof, i, 0); } static inline void* r_lof_arr_ptr_back(struct r_dyn_list_of* p_lof, r_ssize i) { return r_lof_arr_ptr(p_lof, i, p_lof->count - 1); } #endif vctrs/src/rlang/dyn-list-of.c0000644000176200001440000001562014465445266015642 0ustar liggesusers#include #include "decl/dyn-list-of-decl.h" #define R_DYN_LOF_GROWTH_FACTOR 2 #define R_DYN_LOF_INIT_SIZE 32 enum shelter_dyn_list_of { SHELTER_DYN_LOF_raw, SHELTER_DYN_LOF_reserve, SHELTER_DYN_LOF_arr_locs, SHELTER_DYN_LOF_extra_array, SHELTER_DYN_LOF_extra_shelter_array, SHELTER_DYN_LOF_moved_arr, SHELTER_DYN_LOF_moved_shelter_arr, SHELTER_DYN_LOF_arrays, SHELTER_DYN_LOF_SIZE }; struct r_dyn_list_of* r_new_dyn_list_of(enum r_type type, r_ssize capacity, r_ssize width) { switch (type) { case R_TYPE_character: case R_TYPE_list: r_abort("Can't create a dynamic list of barrier vectors."); default: break; } r_obj* shelter = KEEP(r_alloc_list(SHELTER_DYN_LOF_SIZE)); r_obj* lof_raw = r_alloc_raw(sizeof(struct r_dyn_list_of)); r_list_poke(shelter, SHELTER_DYN_LOF_raw, lof_raw); struct r_dyn_array* p_moved_arr = r_new_dyn_array(sizeof(struct r_dyn_array*), R_DYN_LOF_INIT_SIZE); r_list_poke(shelter, SHELTER_DYN_LOF_moved_arr, p_moved_arr->shelter); struct r_dyn_array* p_moved_shelter_arr = r_new_dyn_vector(R_TYPE_list, R_DYN_LOF_INIT_SIZE); r_list_poke(shelter, SHELTER_DYN_LOF_moved_shelter_arr, p_moved_shelter_arr->shelter); r_obj* reserve = r_alloc_vector(type, r_ssize_mult(capacity, width)); r_list_poke(shelter, SHELTER_DYN_LOF_reserve, reserve); void* v_reserve = r_vec_begin(reserve); r_obj* arr_locs = r_alloc_raw(sizeof(r_ssize) * capacity); r_list_poke(shelter, SHELTER_DYN_LOF_arr_locs, arr_locs); r_ssize* v_arr_locs = r_raw_begin(arr_locs); R_MEM_SET(r_ssize, v_arr_locs, -1, capacity); struct r_dyn_array* p_arrays = r_new_dyn_array(sizeof(struct r_pair_ptr_ssize), capacity); r_list_poke(shelter, SHELTER_DYN_LOF_arrays, p_arrays->shelter); struct r_dyn_list_of* p_lof = r_raw_begin(lof_raw); *p_lof = (struct r_dyn_list_of) { .shelter = shelter, .count = 0, .capacity = capacity, .growth_factor = R_DYN_LOF_GROWTH_FACTOR, .v_data = r_dyn_begin(p_arrays), // private: .width = width, .type = type, .elt_byte_size = r_vec_elt_sizeof0(type), .reserve = reserve, .v_reserve = v_reserve, .p_moved_arr = p_moved_arr, .p_moved_shelter_arr = p_moved_shelter_arr, .arr_locs = arr_locs, .v_arr_locs = v_arr_locs, .p_arrays = p_arrays, }; FREE(1); return p_lof; } r_obj* r_lof_unwrap(struct r_dyn_list_of* p_lof) { r_obj* out = KEEP(r_alloc_list(p_lof->count)); enum r_type type = p_lof->type; r_ssize n = p_lof->count; struct r_pair_ptr_ssize* v_arrays = r_dyn_begin(p_lof->p_arrays); for (r_ssize i = 0; i < n; ++i) { struct r_pair_ptr_ssize array = v_arrays[i]; r_list_poke(out, i, r_vec_n(type, array.ptr, array.size)); } FREE(1); return out; } static void r_lof_resize(struct r_dyn_list_of* p_lof, r_ssize capacity) { r_ssize count = p_lof->count; // Resize reserve r_obj* reserve = r_vec_resize0(p_lof->type, p_lof->reserve, r_ssize_mult(capacity, p_lof->width)); r_list_poke(p_lof->shelter, SHELTER_DYN_LOF_reserve, reserve); p_lof->reserve = reserve; p_lof->v_reserve = r_vec_begin0(p_lof->type, reserve); p_lof->capacity = capacity; // Resize array indirections r_obj* arr_locs = r_raw_resize(p_lof->arr_locs, r_ssize_mult(sizeof(r_ssize), capacity)); r_list_poke(p_lof->shelter, SHELTER_DYN_LOF_arr_locs, arr_locs); r_ssize* v_arr_locs = r_raw_begin(arr_locs); r_ssize n_new = capacity - count; R_MEM_SET(r_ssize, v_arr_locs + count, -1, n_new); p_lof->arr_locs = arr_locs; p_lof->v_arr_locs = v_arr_locs; // Resize addresses and update them to point to the new memory r_dyn_resize(p_lof->p_arrays, capacity); struct r_pair_ptr_ssize* v_data = r_dyn_begin(p_lof->p_arrays); p_lof->v_data = v_data; unsigned char* v_reserve_u = (unsigned char*) p_lof->v_reserve; r_ssize bytes = p_lof->width * p_lof->elt_byte_size; for (r_ssize i = 0; i < count; ++i) { // Preserve addresses of moved arrays if (v_arr_locs[i] < 0) { r_ssize offset = i * bytes; v_data[i].ptr = v_reserve_u + offset; } } } void r_lof_push_back(struct r_dyn_list_of* p_lof) { r_ssize count = p_lof->count + 1; if (count > p_lof->capacity) { r_ssize new_size = r_ssize_mult(p_lof->capacity, R_DYN_LOF_GROWTH_FACTOR); r_lof_resize(p_lof, new_size); } p_lof->count = count; unsigned char* v_reserve_u = (unsigned char*) p_lof->v_reserve; r_ssize offset = (count - 1) * p_lof->width * p_lof->elt_byte_size; struct r_pair_ptr_ssize info = { .ptr = v_reserve_u + offset, .size = 0 }; r_dyn_push_back(p_lof->p_arrays, &info); } void r_lof_arr_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt) { if (i >= p_lof->count) { r_stop_internal("Location %d does not exist.", i); } if (reserve_push_back(p_lof, i, p_elt)) { return; } struct r_dyn_array* p_arr = p_lof->p_moved_arr; r_ssize arr_i = p_lof->v_arr_locs[i]; if (arr_i >= p_arr->count) { r_stop_internal("Location %d does not exist in the extra array", arr_i); } struct r_dyn_array* p_inner_arr = R_DYN_GET(struct r_dyn_array*, p_arr, arr_i); r_dyn_push_back(p_inner_arr, p_elt); // Also update pointer in case of resize R_DYN_POKE(struct r_pair_ptr_ssize, p_lof->p_arrays, i, ((struct r_pair_ptr_ssize) { .ptr = r_dyn_begin(p_inner_arr), .size = p_inner_arr->count })); } static bool reserve_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt) { if (p_lof->v_arr_locs[i] >= 0) { return false; } struct r_pair_ptr_ssize* p_arr_info = r_dyn_pointer(p_lof->p_arrays, i); if (p_arr_info->size >= p_lof->width) { // Inner array is getting too big for the reserve. Move it to a // dynamic array. reserve_move(p_lof, i, p_elt); return false; } r_ssize count = ++p_arr_info->size; r_ssize offset = (i * p_lof->width + count - 1) * p_lof->elt_byte_size; void* p = ((unsigned char*) p_lof->v_reserve) + offset; if (p_elt) { memcpy(p, p_elt, p_lof->elt_byte_size); } else { memset(p, 0, p_lof->elt_byte_size); } return true; } static void reserve_move(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt) { struct r_dyn_array* p_moved_arr = p_lof->p_moved_arr; r_ssize n = p_lof->width; struct r_dyn_array* p_new = r_new_dyn_vector(p_lof->type, p_lof->width); r_dyn_list_push_back(p_lof->p_moved_shelter_arr, p_new->shelter); r_dyn_push_back(p_moved_arr, &p_new); void* v_new = r_dyn_begin(p_new); void* v_old = R_DYN_GET(struct r_pair_ptr_ssize, p_lof->p_arrays, i).ptr; memcpy(v_new, v_old, r_ssize_mult(n, p_lof->elt_byte_size)); p_new->count = n; R_DYN_POKE(struct r_pair_ptr_ssize, p_lof->p_arrays, i, ((struct r_pair_ptr_ssize) { .ptr = v_new, .size = n })); p_lof->v_arr_locs[i] = p_moved_arr->count - 1; } vctrs/src/rlang/vec.h0000644000176200001440000002653314465445266014264 0ustar liggesusers#ifndef RLANG_VECTOR_H #define RLANG_VECTOR_H #include static inline int* r_lgl_begin(r_obj* x) { return LOGICAL(x); } static inline int* r_int_begin(r_obj* x) { return INTEGER(x); } static inline double* r_dbl_begin(r_obj* x) { return REAL(x); } static inline r_complex* r_cpl_begin(r_obj* x) { return COMPLEX(x); } static inline void* r_raw_begin(r_obj* x) { return RAW(x); } static inline const int* r_int_cbegin(r_obj* x) { return (const int*) INTEGER(x); } static inline const int* r_lgl_cbegin(r_obj* x) { return (const int*) LOGICAL(x); } static inline const double* r_dbl_cbegin(r_obj* x) { return (const double*) REAL(x); } static inline const r_complex* r_cpl_cbegin(r_obj* x) { return (const r_complex*) COMPLEX(x); } static inline const void* r_raw_cbegin(r_obj* x) { return (const void*) RAW(x); } static inline r_obj* const * r_chr_cbegin(r_obj* x) { return (r_obj* const *) STRING_PTR(x); } static inline r_obj* const * r_list_cbegin(r_obj* x) { #if (R_VERSION < R_Version(3, 5, 0)) return ((r_obj* const *) STRING_PTR(x)); #else return ((r_obj* const *) DATAPTR_RO(x)); #endif } static inline void* r_vec_begin0(enum r_type type, r_obj* x) { switch (type) { case R_TYPE_logical: return r_lgl_begin(x); case R_TYPE_integer: return r_int_begin(x); case R_TYPE_double: return r_dbl_begin(x); case R_TYPE_complex: return r_cpl_begin(x); case R_TYPE_raw: return r_raw_begin(x); default: r_stop_unimplemented_type(type); } } static inline void* r_vec_begin(r_obj* x) { return r_vec_begin0(r_typeof(x), x); } static inline const void* r_vec_cbegin0(enum r_type type, r_obj* x) { switch (type) { case R_TYPE_logical: return r_lgl_cbegin(x); case R_TYPE_integer: return r_int_cbegin(x); case R_TYPE_double: return r_dbl_cbegin(x); case R_TYPE_complex: return r_cpl_cbegin(x); case R_TYPE_raw: return r_raw_cbegin(x); case R_TYPE_character: return r_chr_cbegin(x); case R_TYPE_list: return r_list_cbegin(x); default: r_stop_unimplemented_type(type); } } static inline const void* r_vec_cbegin(r_obj* x) { return r_vec_cbegin0(r_typeof(x), x); } static inline int r_vec_elt_sizeof0(enum r_type type) { switch (type) { case R_TYPE_logical: return sizeof(int); case R_TYPE_integer: return sizeof(int); case R_TYPE_double: return sizeof(double); case R_TYPE_complex: return sizeof(r_complex); case R_TYPE_raw: return sizeof(char); case R_TYPE_character: return sizeof(r_obj*); case R_TYPE_list: return sizeof(r_obj*); default: r_stop_unimplemented_type(type); } } static inline int r_vec_elt_sizeof(r_obj* x) { return r_vec_elt_sizeof0(r_typeof(x)); } static inline int r_lgl_get(r_obj* x, r_ssize i) { return LOGICAL(x)[i]; } static inline int r_int_get(r_obj* x, r_ssize i) { return INTEGER(x)[i]; } static inline double r_dbl_get(r_obj* x, r_ssize i) { return REAL(x)[i]; } static inline r_complex r_cpl_get(r_obj* x, r_ssize i) { return COMPLEX(x)[i]; } static inline char r_raw_get(r_obj* x, r_ssize i) { return RAW(x)[i]; } static inline r_obj* r_chr_get(r_obj* x, r_ssize i) { return STRING_ELT(x, i); } static inline const char* r_chr_get_c_string(r_obj* x, r_ssize i) { return CHAR(r_chr_get(x, i)); } static inline r_obj* r_list_get(r_obj* x, r_ssize i) { return VECTOR_ELT(x, i); } static inline void r_lgl_poke(r_obj* x, r_ssize i, int y) { LOGICAL(x)[i] = y; } static inline void r_int_poke(r_obj* x, r_ssize i, int y) { INTEGER(x)[i] = y; } static inline void r_dbl_poke(r_obj* x, r_ssize i, double y) { REAL(x)[i] = y; } static inline void r_cpl_poke(r_obj* x, r_ssize i, r_complex y) { COMPLEX(x)[i] = y; } static inline void r_raw_poke(r_obj* x, r_ssize i, char y) { RAW(x)[i] = y; } static inline void r_chr_poke(r_obj* x, r_ssize i, r_obj* y) { SET_STRING_ELT(x, i, y); } static inline void r_list_poke(r_obj* x, r_ssize i, r_obj* y) { SET_VECTOR_ELT(x, i, y); } #define r_chr_poke(X, I, Y) SET_STRING_ELT(X, I, Y) #define r_list_poke(X, I, Y) SET_VECTOR_ELT(X, I, Y) static inline r_obj* r_alloc_vector(enum r_type type, r_ssize n) { return Rf_allocVector(type, n); } static inline r_obj* r_alloc_logical(r_ssize n) { return Rf_allocVector(R_TYPE_logical, n); } static inline r_obj* r_alloc_integer(r_ssize n) { return Rf_allocVector(R_TYPE_integer, n); } static inline r_obj* r_alloc_double(r_ssize n) { return Rf_allocVector(R_TYPE_double, n); } static inline r_obj* r_alloc_complex(r_ssize n) { return Rf_allocVector(R_TYPE_complex, n); } static inline r_obj* r_alloc_raw(r_ssize n) { return Rf_allocVector(R_TYPE_raw, n); } static inline r_obj* r_alloc_character(r_ssize n) { return Rf_allocVector(R_TYPE_character, n); } static inline r_obj* r_alloc_list(r_ssize n) { return Rf_allocVector(R_TYPE_list, n); } static inline r_obj* r_alloc_raw0(r_ssize n) { r_obj* out = r_alloc_raw(n); unsigned char* p_out = (unsigned char*) r_raw_begin(out); memset(p_out, 0, n); return out; } static inline r_obj* r_lgl(bool x) { return Rf_ScalarLogical(x); } static inline r_obj* r_int(int x) { return Rf_ScalarInteger(x); } static inline r_obj* r_dbl(double x) { return Rf_ScalarReal(x); } static inline r_obj* r_cpl(r_complex x) { return Rf_ScalarComplex(x); } static inline r_obj* r_raw(char x) { return Rf_ScalarRaw(x); } static inline r_obj* r_str(const char* c_string) { return Rf_mkCharCE(c_string, CE_UTF8); } static inline r_obj* r_chr(const char* c_string) { r_obj* out = KEEP(r_alloc_character(1)); r_chr_poke(out, 0, r_str(c_string)); FREE(1); return out; } static inline r_obj* r_list(r_obj* x) { r_obj* out = r_alloc_list(1); r_list_poke(out, 0, x); return out; } r_obj* r_chr_n(const char* const * strings, r_ssize n); static inline r_obj* r_len(r_ssize x) { if (x > INT_MAX) { return r_dbl(x); } else { return r_int(x); } } // FIXME: Redundant with `r_lgl()` static inline r_obj* r_shared_lgl(bool x) { if (x) { return r_true; } else { return r_false; } } static inline bool _r_has_correct_length(r_obj* x, r_ssize n) { return n < 0 || r_length(x) == n; } extern bool _r_is_finite(r_obj* x); static inline bool _r_is_double(r_obj* x, r_ssize n, int finite) { if (r_typeof(x) != R_TYPE_double || !_r_has_correct_length(x, n)) { return false; } if (finite >= 0 && (bool) finite != _r_is_finite(x)) { return false; } return true; } static inline bool _r_is_complex(r_obj* x, r_ssize n, int finite) { if (r_typeof(x) != R_TYPE_complex || !_r_has_correct_length(x, n)) { return false; } if (finite >= 0 && (bool) finite != _r_is_finite(x)) { return false; } return true; } static inline bool r_is_bool(r_obj* x) { return r_typeof(x) == R_TYPE_logical && r_length(x) == 1 && r_lgl_get(x, 0) != r_globals.na_lgl; } static inline bool r_is_int(r_obj* x) { return r_typeof(x) == R_TYPE_integer && r_length(x) == 1 && r_int_get(x, 0) != r_globals.na_int; } static inline bool r_is_true(r_obj* x) { return r_is_bool(x) && r_lgl_get(x, 0); } static inline bool r_is_false(r_obj* x) { return r_is_bool(x) && !r_lgl_get(x, 0); } static inline bool r_is_string(r_obj* x) { return r_typeof(x) == R_TYPE_character && r_length(x) == 1 && r_chr_get(x, 0) != R_NaString; } static inline bool r_arg_as_bool(r_obj* x, const char* arg) { if (!r_is_bool(x)) { r_abort("`%s` must be `TRUE` or `FALSE`.", arg); } return r_lgl_get(x, 0); } static inline bool r_as_bool(r_obj* x) { return r_arg_as_bool(x, "x"); } static inline int r_arg_as_int(r_obj* x, const char* arg) { if (!r_is_int(x)) { r_abort("`%s` must be a single integer value.", arg); } return r_int_get(x, 0); } static inline int r_as_int(r_obj* x) { return r_arg_as_int(x, "x"); } static inline double r_arg_as_double(r_obj* x, const char* arg) { // TODO: Coercion of int and lgl values if (!_r_is_double(x, 1, -1)) { r_abort("`%s` must be a single double value.", arg); } return r_dbl_get(x, 0); } static inline double r_as_double(r_obj* x) { return r_arg_as_double(x, "x"); } static inline r_complex r_arg_as_complex(r_obj* x, const char* arg) { if (!_r_is_complex(x, 1, 1)) { r_abort("`%s` must be a single complex value.", arg); } return r_cpl_get(x, 0); } static inline r_complex r_as_complex(r_obj* x) { return r_arg_as_complex(x, "x"); } static inline char r_arg_as_char(r_obj* x, const char* arg) { if (r_typeof(x) != R_TYPE_raw && r_length(x) != 1) { r_abort("`%s` must be a single raw value.", arg); } return r_raw_get(x, 0); } static inline char r_as_char(r_obj* x) { return r_arg_as_char(x, "x"); } r_obj* r_lgl_resize(r_obj* x, r_ssize size); r_obj* r_int_resize(r_obj* x, r_ssize size); r_obj* r_dbl_resize(r_obj* x, r_ssize size); r_obj* r_cpl_resize(r_obj* x, r_ssize size); r_obj* r_raw_resize(r_obj* x, r_ssize size); r_obj* r_chr_resize(r_obj* x, r_ssize size); r_obj* r_list_resize(r_obj* x, r_ssize size); static inline r_obj* r_vec_resize0(enum r_type type, r_obj* x, r_ssize size) { switch (type) { case R_TYPE_logical: return r_lgl_resize(x, size); case R_TYPE_integer: return r_int_resize(x, size); case R_TYPE_double: return r_dbl_resize(x, size); case R_TYPE_complex: return r_cpl_resize(x, size); case R_TYPE_raw: return r_raw_resize(x, size); case R_TYPE_character: return r_chr_resize(x, size); case R_TYPE_list: return r_list_resize(x, size); default: r_stop_unimplemented_type(type); } } static inline r_obj* r_vec_resize(r_obj* x, r_ssize size) { return r_vec_resize0(r_typeof(x), x, size); } static inline r_obj* r_vec_n(enum r_type type, void* v_src, r_ssize n) { switch (type) { case R_TYPE_logical: case R_TYPE_integer: case R_TYPE_double: case R_TYPE_complex: case R_TYPE_raw: { r_obj* out = r_alloc_vector(type, n); memcpy(r_vec_begin(out), v_src, n * r_vec_elt_sizeof0(type)); return out; } case R_TYPE_character: case R_TYPE_list: r_abort("TODO: barrier types in `r_vec_n()`"); default: r_stop_unimplemented_type(type); } } static inline r_obj* r_lgl_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_logical, v_src, n); } static inline r_obj* r_int_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_integer, v_src, n); } static inline r_obj* r_dbl_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_double, v_src, n); } static inline r_obj* r_cpl_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_complex, v_src, n); } static inline r_obj* r_raw_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_raw, v_src, n); } static inline r_obj* r_copy_in_raw(const void* src, size_t size) { r_obj* out = r_alloc_raw(size); memcpy(r_raw_begin(out), src, size); return out; } static inline void r_int_fill_iota0(int* p_x, int start, r_ssize n) { for (r_ssize i = 0; i < n; ++i) { p_x[i] = start++; } } static inline void r_int_fill_iota(r_obj* x) { r_int_fill_iota0(r_int_begin(x), 0, r_length(x)); } r_obj* r_list_compact(r_obj* x); r_obj* r_list_of_as_ptr_ssize(r_obj* xs, enum r_type type, struct r_pair_ptr_ssize** p_v_out); // From cpp/vec.cpp int* r_int_unique0(int* v_data, r_ssize size); bool r_list_all_of0(r_obj* const * v_first, r_ssize size, bool (*predicate)(r_obj* x)); static inline int* r_int_unique(r_obj* x) { return r_int_unique0(r_int_begin(x), r_length(x)); } static inline bool r_list_all_of(r_obj* x, bool (*predicate)(r_obj* x)) { return r_list_all_of0(r_list_cbegin(x), r_length(x), predicate); } #endif vctrs/src/rlang/export.h0000644000176200001440000000122714465445266015021 0ustar liggesusers#ifndef RLANG_EXPORT_H #define RLANG_EXPORT_H #include #include #if (defined(R_VERSION) && R_VERSION < R_Version(3, 4, 0)) typedef union {void* p; DL_FUNC fn;} fn_ptr; r_obj* R_MakeExternalPtrFn(DL_FUNC p, r_obj* tag, r_obj* prot); DL_FUNC R_ExternalPtrAddrFn(r_obj* s); #endif typedef DL_FUNC r_void_fn; static inline r_void_fn r_peek_c_callable(const char* pkg, const char* callable) { return R_GetCCallable(pkg, callable); } static inline r_obj* r_new_fn_ptr(r_void_fn p) { return R_MakeExternalPtrFn(p, r_null, r_null); } static inline r_void_fn r_fn_ptr_addr(r_obj* p) { return R_ExternalPtrAddrFn(p); } #endif vctrs/src/rlang/env.c0000644000176200001440000002040514465445266014262 0ustar liggesusers#include "rlang.h" #include "decl/env-decl.h" r_obj* rlang_ns_env; r_obj* r_ns_env(const char* pkg) { r_obj* ns = r_env_find(R_NamespaceRegistry, r_sym(pkg)); if (ns == r_syms.unbound) { r_abort("Can't find namespace `%s`", pkg); } return ns; } static r_obj* ns_env_get(r_obj* env, const char* name) { r_obj* obj = KEEP(r_env_find(env, r_sym(name))); // Can be a promise to a lazyLoadDBfetch() call if (r_typeof(obj) == R_TYPE_promise) { obj = r_eval(obj, r_envs.empty); } if (obj != r_syms.unbound) { FREE(1); return obj; } // Trigger object not found error r_eval(r_sym(name), env); r_stop_unreachable(); } r_obj* r_base_ns_get(const char* name) { return ns_env_get(r_envs.base, name); } r_obj* rlang_ns_get(const char* name) { return ns_env_get(rlang_ns_env, name); } r_obj* r_alloc_environment(r_ssize size, r_obj* parent) { #if R_VERSION < R_Version(4, 1, 0) parent = parent ? parent : r_envs.empty; r_node_poke_car(new_env__parent_node, parent); size = size ? size : 29; r_node_poke_car(new_env__size_node, r_int(size)); r_obj* env = r_eval(new_env_call, r_envs.base); // Free for gc r_node_poke_car(new_env__parent_node, r_null); return env; #else const int hash = 1; return R_NewEnv(parent, hash, size); #endif } r_obj* r_env_as_list(r_obj* env) { r_obj* out = KEEP(eval_with_x(env2list_call, env)); #if R_VERSION < R_Version(4, 0, 0) out = env_as_list_compat(env, out); #endif FREE(1); return out; } // On R < 4.0, the active binding function is returned instead of // its value. We invoke the active bindings here to get consistent // behaviour in all supported R versions. #if R_VERSION < R_Version(4, 0, 0) r_obj* env_as_list_compat(r_obj* env, r_obj* out) { r_obj* nms = KEEP(r_env_names(env)); r_obj* types = KEEP(r_env_binding_types(env, nms)); if (types == r_null) { FREE(2); return out; } r_ssize n = r_length(nms); r_obj* const * p_nms = r_chr_cbegin(nms); const int* p_types = r_int_cbegin(types); for (r_ssize i = 0; i < n; ++i) { enum r_env_binding_type type = p_types[i]; if (type == R_ENV_BINDING_TYPE_active) { r_ssize fn_idx = r_chr_detect_index(nms, r_str_c_string(p_nms[i])); if (fn_idx < 0) { r_abort("Internal error: Can't find active binding in list"); } r_obj* fn = r_list_get(out, fn_idx); r_obj* value = r_eval(KEEP(r_call(fn)), r_envs.empty); r_list_poke(out, fn_idx, value); FREE(1); } } FREE(2); return out; } #endif r_obj* r_env_clone(r_obj* env, r_obj* parent) { if (parent == NULL) { parent = r_env_parent(env); } // This better reproduces the behaviour of `list2env()` which in // turn affects how bindings are stored in the hash table and the // default sort of the character vector generated by `names()`. size_t size = R_MAX(r_length(env), 29); r_obj* out = KEEP(r_alloc_environment(size, parent)); r_env_coalesce(out, env); FREE(1); return out; } void r_env_coalesce(r_obj* env, r_obj* from) { r_obj* nms = KEEP(r_env_names(from)); r_obj* types = KEEP(r_env_binding_types(from, nms)); if (types == r_null) { env_coalesce_plain(env, from, nms); FREE(2); return; } // In older R versions there is no way of accessing the function of // an active binding except through env2list. This makes it // impossible to preserve active bindings without forcing promises. #if R_VERSION < R_Version(4, 0, 0) r_obj* from_list = KEEP(eval_with_x(env2list_call, from)); #else KEEP(r_null); #endif r_ssize n = r_length(nms); r_obj* const * v_nms = r_chr_cbegin(nms); enum r_env_binding_type* v_types = (enum r_env_binding_type*) r_int_begin(types); for (r_ssize i = 0; i < n; ++i) { r_obj* sym = r_str_as_symbol(v_nms[i]); if (r_env_has(env, sym)) { continue; } switch (v_types[i]) { case R_ENV_BINDING_TYPE_value: case R_ENV_BINDING_TYPE_promise: r_env_poke(env, sym, r_env_find(from, sym)); break; case R_ENV_BINDING_TYPE_active: { #if R_VERSION < R_Version(4, 0, 0) r_ssize fn_idx = r_chr_detect_index(nms, r_sym_c_string(sym)); if (fn_idx < 0) { r_stop_internal("Can't find active binding in temporary list."); } r_obj* fn = r_list_get(from_list, fn_idx); #else r_obj* fn = R_ActiveBindingFunction(sym, from); #endif r_env_poke_active(env, sym, fn); break; }} } FREE(3); return; } static void env_coalesce_plain(r_obj* env, r_obj* from, r_obj* nms) { r_ssize n = r_length(nms); r_obj* const * v_nms = r_chr_cbegin(nms); for (r_ssize i = 0; i < n; ++i) { r_obj* sym = r_str_as_symbol(v_nms[i]); if (r_env_has(env, sym)) { continue; } r_env_poke(env, sym, r_env_find(from, sym)); } return; } r_obj* r_list_as_environment(r_obj* x, r_obj* parent) { parent = parent ? parent : r_envs.empty; return eval_with_xy(list2env_call, x, parent); } void r_env_poke_lazy(r_obj* env, r_obj* sym, r_obj* expr, r_obj* eval_env) { KEEP(expr); r_obj* name = KEEP(r_sym_as_utf8_character(sym)); r_node_poke_car(poke_lazy_value_node, expr); r_eval_with_xyz(poke_lazy_call, name, env, eval_env, rlang_ns_env); r_node_poke_car(poke_lazy_value_node, r_null); FREE(2); } #if RLANG_USE_R_EXISTS bool r__env_has(r_obj* env, r_obj* sym) { r_obj* nm = KEEP(r_sym_as_utf8_character(sym)); r_obj* out = eval_with_xyz(exists_call, env, nm, r_false); FREE(1); return r_as_bool(out); } bool r__env_has_anywhere(r_obj* env, r_obj* sym) { r_obj* nm = KEEP(r_sym_as_utf8_character(sym)); r_obj* out = eval_with_xyz(exists_call, env, nm, r_true); FREE(1); return r_as_bool(out); } #endif #if (R_VERSION < R_Version(4, 0, 0)) void r__env_unbind(r_obj* env, r_obj* sym) { // Check if binding exists to avoid `rm()` warning if (r_env_has(env, sym)) { r_obj* nm = KEEP(r_sym_as_utf8_character(sym)); eval_with_xyz(remove_call, env, nm, r_false); FREE(1); } } #endif bool r_env_inherits(r_obj* env, r_obj* ancestor, r_obj* top) { top = top ? top : r_envs.empty; if (r_typeof(env) != R_TYPE_environment) { r_abort("`env` must be an environment"); } if (r_typeof(ancestor) != R_TYPE_environment) { r_abort("`ancestor` must be an environment"); } if (r_typeof(top) != R_TYPE_environment) { r_abort("`top` must be an environment"); } if (env == r_envs.empty) { return false; } while (env != top && env != r_envs.empty) { if (env == ancestor) { return true; } env = r_env_parent(env);; } return env == ancestor; } r_obj* r_env_find_until(r_obj* env, r_obj* sym, r_obj* last) { r_obj* stop = r_envs.empty; if (last != r_envs.empty) { stop = r_env_parent(last); } r_obj* out = r_syms.unbound; while (out == r_syms.unbound && env != r_envs.empty && env != stop) { out = r_env_find(env, sym); env = r_env_parent(env); } return out; } void r_init_rlang_ns_env(void) { rlang_ns_env = r_ns_env("rlang"); } void r_init_library_env(void) { #if R_VERSION < R_Version(4, 1, 0) new_env_call = r_parse_eval("as.call(list(new.env, TRUE, NULL, NULL))", r_envs.base); r_preserve(new_env_call); new_env__parent_node = r_node_cddr(new_env_call); new_env__size_node = r_node_cdr(new_env__parent_node); #endif env2list_call = r_parse("as.list.environment(x, all.names = TRUE)"); r_preserve(env2list_call); list2env_call = r_parse("list2env(x, envir = NULL, parent = y, hash = TRUE)"); r_preserve(list2env_call); poke_lazy_call = r_parse("delayedAssign(x, value = NULL, assign.env = y, eval.env = z)"); r_preserve(poke_lazy_call); poke_lazy_value_node = r_node_cddr(poke_lazy_call); exists_call = r_parse("exists(y, envir = x, inherits = z)"); r_preserve(exists_call); remove_call = r_parse("remove(list = y, envir = x, inherits = z)"); r_preserve(remove_call); r_methods_ns_env = r_parse_eval("asNamespace('methods')", r_envs.base); } r_obj* rlang_ns_env = NULL; r_obj* r_methods_ns_env = NULL; #if R_VERSION < R_Version(4, 1, 0) static r_obj* new_env_call = NULL; static r_obj* new_env__parent_node = NULL; static r_obj* new_env__size_node = NULL; #endif static r_obj* exists_call = NULL; static r_obj* remove_call = NULL; static r_obj* poke_lazy_call = NULL; static r_obj* poke_lazy_value_node = NULL; static r_obj* env2list_call = NULL; static r_obj* list2env_call = NULL; vctrs/src/rlang/globals.c0000644000176200001440000000642114511320527015077 0ustar liggesusers#include "rlang-types.h" #include "sym.h" struct r_globals r_globals; struct r_globals_chrs r_chrs; struct r_globals_classes r_classes; struct r_globals_strs r_strs; struct r_globals_syms r_syms; struct r_globals_envs r_envs; r_obj* r_true = NULL; r_obj* r_false = NULL; void r_init_library_globals(r_obj* ns) { r_preserve_global(r_classes.data_frame = r_chr("data.frame")); const char* v_tibble_class[] = { "tbl_df", "tbl", "data.frame" }; r_preserve_global(r_globals.empty_lgl = r_alloc_logical(0)); r_preserve_global(r_globals.empty_int = r_alloc_integer(0)); r_preserve_global(r_globals.empty_dbl = r_alloc_double(0)); r_preserve_global(r_globals.empty_cpl = r_alloc_complex(0)); r_preserve_global(r_globals.empty_raw = r_alloc_raw(0)); r_preserve_global(r_globals.empty_chr = r_alloc_character(0)); r_preserve_global(r_globals.empty_list = r_alloc_list(0)); r_globals.na_lgl = NA_LOGICAL; r_globals.na_int = NA_INTEGER; r_globals.na_dbl = NA_REAL; r_globals.na_cpl = (r_complex) { .r = NA_REAL, .i = NA_REAL }; r_globals.na_str = NA_STRING; r_preserve_global(r_chrs.empty_string = r_chr("")); r_preserve_global(r_chrs.full = r_chr("full")); r_classes.tibble = r_chr_n(v_tibble_class, R_ARR_SIZEOF(v_tibble_class)); r_preserve_global(r_classes.tibble); r_strs.dots = r_sym_string(r_syms.dots); r_strs.condition = r_sym_string(r_syms.condition); r_strs.empty = r_chr_get(r_chrs.empty_string, 0); r_strs.error = r_sym_string(r_syms.error); r_strs.interrupt = r_sym_string(r_syms.interrupt); r_strs.na = r_globals.na_str; r_strs.message = r_sym_string(r_syms.message); r_strs.warning = r_sym_string(r_syms.warning); r_preserve_global(r_false = r_lgl(0)); r_preserve_global(r_true = r_lgl(1)); r_envs.empty = R_EmptyEnv; r_envs.base = R_BaseEnv; r_envs.global = R_GlobalEnv; r_envs.ns = ns; } void r_init_library_globals_syms(void) { r_syms.abort = r_sym("abort"); r_syms.arg = r_sym("arg"); r_syms.brace = R_BraceSymbol; r_syms.brackets = R_BracketSymbol; r_syms.brackets2 = R_Bracket2Symbol; r_syms.call = r_sym("call"); r_syms.class_ = R_ClassSymbol; r_syms.colon2 = R_DoubleColonSymbol; r_syms.colon3 = R_TripleColonSymbol; r_syms.condition = r_sym("condition"); r_syms.dots = R_DotsSymbol; r_syms.error = r_sym("error"); r_syms.error_arg = r_sym("error_arg"); r_syms.error_call = r_sym("error_call"); r_syms.error_call_flag = r_sym(".__error_call__."); r_syms.expr = r_sym("expr"); r_syms.interrupt = r_sym("interrupt"); r_syms.missing = R_MissingArg; r_syms.message = r_sym("message"); r_syms.names = R_NamesSymbol; r_syms.options = r_sym("options"); r_syms.dim = R_DimSymbol; r_syms.dim_names = R_DimNamesSymbol; r_syms.row_names = R_RowNamesSymbol; r_syms.stack_overflow_error = r_sym("stackOverflowError"); r_syms.unbound = R_UnboundValue; r_syms.warning = r_sym("warning"); r_syms.dot_environment = r_sym(".Environment"); r_syms.dot_fn = r_sym(".fn"); r_syms.dot_x = r_sym(".x"); r_syms.dot_y = r_sym(".y"); r_syms.function = r_sym("function"); r_syms.srcfile = r_sym("srcfile"); r_syms.srcref = r_sym("srcref"); r_syms.tilde = r_sym("~"); r_syms.w = r_sym("w"); r_syms.wholeSrcref = r_sym("wholeSrcref"); r_syms.x = r_sym("x"); r_syms.y = r_sym("y"); r_syms.z = r_sym("z"); } vctrs/src/rlang/session.c0000644000176200001440000000276014465445266015161 0ustar liggesusers#include "rlang.h" r_obj* eval_with_x(r_obj* call, r_obj* x); static r_obj* is_installed_call = NULL; bool r_is_installed(const char* pkg) { r_obj* installed = eval_with_x(is_installed_call, KEEP(r_chr(pkg))); bool out = *r_lgl_begin(installed); FREE(1); return out; } static r_obj* has_colour_call = NULL; bool r_has_colour(void) { if (!r_is_installed("crayon")) { return false; } return *r_lgl_begin(r_eval(has_colour_call, r_envs.base)); } void r_init_library_session(void) { is_installed_call = r_parse("requireNamespace(x, quietly = TRUE)"); r_preserve(is_installed_call); has_colour_call = r_parse("crayon::has_color()"); r_preserve(has_colour_call); } #ifdef _WIN32 # include # include r_obj* r_getppid(void) { DWORD pid = GetCurrentProcessId(); HANDLE handle = NULL; PROCESSENTRY32W pe = { 0 }; pe.dwSize = sizeof(PROCESSENTRY32W); handle = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); if (handle == INVALID_HANDLE_VALUE) { r_abort("Can't query parent pid."); } if (Process32FirstW(handle, &pe)) { do { if (pe.th32ProcessID == pid) { DWORD ppid = pe.th32ParentProcessID; CloseHandle(handle); return r_int(ppid); } } while (Process32NextW(handle, &pe)); } /* Should not get here */ CloseHandle(handle); r_stop_internal("Can't find my own process."); return r_null; } #else # include r_obj* r_getppid(void) { return r_int(getppid()); } #endif vctrs/src/rlang/node.c0000644000176200001440000000305414465445266014420 0ustar liggesusers#include "rlang.h" r_obj* r_new_pairlist(const struct r_pair* args, int n, r_obj** tail) { r_obj* shelter = KEEP(r_new_node(R_NilValue, R_NilValue)); r_obj* node = shelter; for (int i = 0; i < n; ++i) { struct r_pair arg = args[i]; r_obj* tag = arg.x; r_obj* car = arg.y; r_obj* cdr = r_new_node(car, r_null); r_node_poke_tag(cdr, tag); r_node_poke_cdr(node, cdr); node = cdr; } if (n && tail) { *tail = node; } FREE(1); return r_node_cdr(shelter); } // Shallow copy of a node tree. Other objects are not cloned. r_obj* r_node_tree_clone(r_obj* x) { enum r_type type = r_typeof(x); if (type != R_TYPE_pairlist && type != R_TYPE_call) { return x; } x = KEEP(r_clone(x)); r_obj* rest = x; while (rest != r_null) { r_obj* head = r_node_car(rest); enum r_type head_type = r_typeof(head); if (head_type == R_TYPE_pairlist || head_type == R_TYPE_call) { r_node_poke_car(rest, r_node_tree_clone(head)); } rest = r_node_cdr(rest); } FREE(1); return x; } r_obj* r_pairlist_find(r_obj* node, r_obj* tag) { while (node != r_null) { if (r_node_tag(node) == tag) { return node; } node = r_node_cdr(node); } return r_null; } r_obj* r_pairlist_rev(r_obj* node) { if (node == r_null) { return node; } r_obj* prev = r_null; r_obj* tail = node; r_obj* next; while (tail != r_null) { next = r_node_cdr(tail); r_node_poke_cdr(tail, prev); prev = tail; tail = next; } return prev; } vctrs/src/rlang/rlang.c0000644000176200001440000000600714465445266014577 0ustar liggesusers#include #include #include "arg.c" #include "attrib.c" #include "call.c" #include "cnd.c" #include "c-utils.c" #include "debug.c" #include "dict.c" #include "df.c" #include "dyn-array.c" #include "dyn-list-of.c" #include "env.c" #include "env-binding.c" #include "eval.c" #include "export.c" #include "fn.c" #include "formula.c" #include "globals.c" #include "node.c" #include "obj.c" #include "parse.c" #include "quo.c" #include "session.c" #include "stack.c" #include "sym.c" #include "vec.c" #include "vec-chr.c" #include "vec-lgl.c" #include "vendor.c" #include "walk.c" // Allows long vectors to be indexed with doubles r_ssize r_arg_as_ssize(r_obj* n, const char* arg) { switch (r_typeof(n)) { case R_TYPE_double: { if (r_length(n) != 1) { goto invalid; } double out = r_dbl_get(n, 0); if (out > R_SSIZE_MAX) { r_abort("`%s` is too large a number.", arg); } if (out != (int_least64_t) out) { r_abort("`%s` must be a whole number, not a decimal number.", arg); } return (r_ssize) floor(out); } case R_TYPE_integer: { if (r_length(n) != 1) { goto invalid; } return (r_ssize) r_int_get(n, 0); } invalid: default: r_abort("`%s` must be a scalar integer or double.", arg); } } static r_obj* shared_x_env; static r_obj* shared_xy_env; static r_obj* shared_xyz_env; // This *must* be called before making any calls to the functions // provided in the library. Register this function in your init file // and `.Call()` it from your `.onLoad()` hook. r_obj* r_init_library(r_obj* ns) { if (!R_IsNamespaceEnv(ns)) { Rf_errorcall(r_null, "Can't initialise rlang library.\n" "x `ns` must be a namespace environment."); } // Local precious lists are disabled by default because rchk // requires the base precious list and we don't want to // double-preserve. Still enable it on CI to get that part of the // code tested. _r_use_local_precious_list = getenv("RLIB_USE_LOCAL_PRECIOUS_LIST") || getenv("CI"); // Need to be first r_init_library_vendor(); // Needed for xxh used in `r_preserve()` r_init_library_globals_syms(); r_init_library_obj(ns); r_init_library_globals(ns); r_init_rlang_ns_env(); r_init_library_arg(); r_init_library_call(); r_init_library_cnd(); r_init_library_dyn_array(); r_init_library_env(); r_init_library_eval(); r_init_library_fn(); r_init_library_quo(); r_init_library_session(); r_init_library_sym(); r_init_library_stack(); shared_x_env = r_parse_eval("new.env(hash = FALSE, parent = baseenv(), size = 1L)", r_envs.base); r_preserve(shared_x_env); shared_xy_env = r_parse_eval("new.env(hash = FALSE, parent = baseenv(), size = 1L)", r_envs.base); r_preserve(shared_xy_env); shared_xyz_env = r_parse_eval("new.env(hash = FALSE, parent = baseenv(), size = 1L)", r_envs.base); r_preserve(shared_xyz_env); // Return a SEXP so the init function can be called from R return r_null; } bool _r_use_local_precious_list = false; vctrs/src/rlang/debug.h0000644000176200001440000000030014465445266014555 0ustar liggesusers#ifndef RLANG_DEBUG_H #define RLANG_DEBUG_H #define r_printf Rprintf void r_sexp_inspect(r_obj* x); void r_browse(r_obj* x); void r_browse_at(r_obj* env); void r_dbg_str(r_obj* x); #endif vctrs/src/rlang/decl/0000755000176200001440000000000014465445266014234 5ustar liggesusersvctrs/src/rlang/decl/env-decl.h0000644000176200001440000000122514465445266016102 0ustar liggesusersr_obj* eval_with_x(r_obj* call, r_obj* x); r_obj* eval_with_xy(r_obj* call, r_obj* x, r_obj* y); r_obj* eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z); #if R_VERSION < R_Version(4, 1, 0) static r_obj* new_env_call; static r_obj* new_env__parent_node; static r_obj* new_env__size_node; #endif static r_obj* exists_call; static r_obj* remove_call; static r_obj* poke_lazy_call; static r_obj* poke_lazy_value_node; static r_obj* env2list_call; static r_obj* list2env_call; #if R_VERSION < R_Version(4, 0, 0) static r_obj* env_as_list_compat(r_obj* env, r_obj* out); #endif static void env_coalesce_plain(r_obj* env, r_obj* from, r_obj* nms); vctrs/src/rlang/decl/dict-decl.h0000644000176200001440000000062614465445266016241 0ustar liggesusersstatic r_obj* dict_find_node_info(struct r_dict* dict, r_obj* key, r_ssize* hash, r_obj** parent); static r_obj* dict_find_node(struct r_dict* dict, r_obj* key); static void dict_push(struct r_dict* p_dict, r_ssize hash, r_obj* parent, r_obj* key, r_obj* value); vctrs/src/rlang/decl/df-decl.h0000644000176200001440000000016214465445266015702 0ustar liggesusersstatic void init_compact_rownames(r_obj* x, r_ssize n_rows); static r_obj* new_compact_rownames(r_ssize n_rows); vctrs/src/rlang/decl/walk-decl.h0000644000176200001440000000163114465445266016251 0ustar liggesusersstatic inline enum sexp_iterator_type sexp_iterator_type(enum r_type type, r_obj* x); static inline r_obj* sexp_node_attrib(enum r_type type, r_obj* x); static inline r_obj* sexp_node_car(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel); static inline r_obj* sexp_node_cdr(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel); static inline r_obj* sexp_node_tag(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel); static inline void init_incoming_stack_info(struct sexp_stack_info* p_info, enum sexp_iterator_type it_type, bool has_attrib); static bool sexp_next_incoming(struct r_sexp_iterator* p_it, struct sexp_stack_info* p_info); vctrs/src/rlang/decl/obj-decl.h0000644000176200001440000000023314465445266016062 0ustar liggesusersstatic r_obj* new_precious_stack(r_obj* x); static int push_precious(r_obj* stack); static int pop_precious(r_obj* stack); static r_obj* as_label_call; vctrs/src/rlang/decl/stack-decl.h0000644000176200001440000000016414465445266016420 0ustar liggesusers// From env.c r_obj* rlang_ns_get(const char* name); static r_obj* peek_frame_call; static r_obj* caller_env_call; vctrs/src/rlang/decl/dyn-list-of-decl.h0000644000176200001440000000024414465445266017457 0ustar liggesusersstatic bool reserve_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt); static void reserve_move(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt); vctrs/src/rlang/decl/cnd-decl.h0000644000176200001440000000003714465445266016056 0ustar liggesusersstatic r_obj* cnd_signal_call; vctrs/src/rlang/walk.c0000644000176200001440000003036514465445266014436 0ustar liggesusers#include #include "walk.h" #define SEXP_STACK_INIT_SIZE 256 enum sexp_iterator_type { SEXP_ITERATOR_TYPE_node, SEXP_ITERATOR_TYPE_pointer, SEXP_ITERATOR_TYPE_vector, SEXP_ITERATOR_TYPE_atomic }; enum sexp_iterator_state { SEXP_ITERATOR_STATE_done, SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_tag, SEXP_ITERATOR_STATE_car, SEXP_ITERATOR_STATE_cdr, SEXP_ITERATOR_STATE_elt }; struct sexp_stack_info { r_obj* x; enum r_type type; const enum sexp_iterator_state* p_state; r_obj* const * v_arr; r_obj* const * v_arr_end; int depth; r_obj* parent; enum r_sexp_it_relation rel; enum r_sexp_it_direction dir; }; #include "decl/walk-decl.h" static const enum sexp_iterator_state node_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_tag, SEXP_ITERATOR_STATE_car, SEXP_ITERATOR_STATE_cdr, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state pointer_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_tag, SEXP_ITERATOR_STATE_cdr, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state vector_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_elt, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state structure_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state done_state[] = { SEXP_ITERATOR_STATE_done }; struct r_sexp_iterator* r_new_sexp_iterator(r_obj* root) { r_obj* shelter = KEEP(r_alloc_list(2)); r_obj* it = r_alloc_raw(sizeof(struct r_sexp_iterator)); r_list_poke(shelter, 0, it); struct r_sexp_iterator* p_it = r_raw_begin(it); struct r_dyn_array* p_stack = r_new_dyn_array(sizeof(struct sexp_stack_info), SEXP_STACK_INIT_SIZE); r_list_poke(shelter, 1, p_stack->shelter); enum r_type type = r_typeof(root); enum sexp_iterator_type it_type = sexp_iterator_type(type, root); bool has_attrib = sexp_node_attrib(type, root) != r_null; struct sexp_stack_info root_info = { .x = root, .type = type, .depth = -1, .parent = r_null, .rel = R_SEXP_IT_RELATION_root }; if (it_type == SEXP_ITERATOR_TYPE_atomic && !has_attrib) { root_info.p_state = NULL; root_info.dir = R_SEXP_IT_DIRECTION_leaf; } else { init_incoming_stack_info(&root_info, it_type, has_attrib); } r_dyn_push_back(p_stack, &root_info); *p_it = (struct r_sexp_iterator) { .shelter = shelter, .p_stack = p_stack, .x = r_null, .parent = r_null, }; FREE(1); return p_it; } /* * An incoming node has a state indicating which edge we're at. An * outgoing node just need to be visited again and then popped. A * leaf node is just visited once and then popped. */ bool r_sexp_next(struct r_sexp_iterator* p_it) { struct r_dyn_array* p_stack = p_it->p_stack; if (!p_stack->count) { return false; } struct sexp_stack_info* p_info = (struct sexp_stack_info*) r_dyn_last(p_stack); if (p_it->skip_incoming) { p_it->skip_incoming = false; if (p_it->dir == R_SEXP_IT_DIRECTION_incoming) { r_dyn_pop_back(p_stack); return r_sexp_next(p_it); } } // In the normal case, if we push an "incoming" node on the stack it // means that we have already visited it and we are now visiting its // children. The root node is signalled with a depth of -1 so it can // be visited first before being visited as an incoming node. bool root = (p_info->depth == -1); if (!root && p_info->dir == R_SEXP_IT_DIRECTION_incoming) { return sexp_next_incoming(p_it, p_info); } r_ssize i = -1; if (p_info->v_arr) { i = p_info->v_arr_end - p_info->v_arr; } p_it->x = p_info->x; p_it->type = p_info->type; p_it->depth = p_info->depth; p_it->parent = p_info->parent; p_it->rel = p_info->rel; p_it->i = i; p_it->dir = p_info->dir; if (root) { ++p_it->depth; ++p_info->depth; // Incoming visit for the root node if (p_it->dir == R_SEXP_IT_DIRECTION_incoming) { return true; } } r_dyn_pop_back(p_stack); return true; } static bool sexp_next_incoming(struct r_sexp_iterator* p_it, struct sexp_stack_info* p_info) { enum sexp_iterator_state state = *p_info->p_state; r_obj* x = p_info->x; enum r_type type = p_info->type; struct sexp_stack_info child = { 0 }; child.parent = x; child.depth = p_info->depth + 1; switch (state) { case SEXP_ITERATOR_STATE_attrib: child.x = r_attrib(x); child.rel = R_SEXP_IT_RELATION_attrib; break; case SEXP_ITERATOR_STATE_elt: child.x = *p_info->v_arr; child.rel = R_SEXP_IT_RELATION_list_elt; break; case SEXP_ITERATOR_STATE_tag: child.x = sexp_node_tag(type, x, &child.rel); break; case SEXP_ITERATOR_STATE_car: child.x = sexp_node_car(type, x, &child.rel); break; case SEXP_ITERATOR_STATE_cdr: child.x = sexp_node_cdr(type, x, &child.rel); break; case SEXP_ITERATOR_STATE_done: r_stop_unreachable(); } child.type = r_typeof(child.x); bool has_attrib = sexp_node_attrib(child.type, child.x) != r_null; enum sexp_iterator_type it_type = sexp_iterator_type(child.type, child.x); if (it_type == SEXP_ITERATOR_TYPE_atomic && !has_attrib) { child.p_state = NULL; child.dir = R_SEXP_IT_DIRECTION_leaf; } else { init_incoming_stack_info(&child, it_type, has_attrib); // Push incoming node on the stack so it can be visited again, // either to descend its children or to visit it again on the // outgoing trip r_dyn_push_back(p_it->p_stack, &child); } // Bump state for next iteration if (state == SEXP_ITERATOR_STATE_elt) { ++p_info->v_arr; if (p_info->v_arr == p_info->v_arr_end) { p_info->p_state = done_state; } } else { ++p_info->p_state; } // Flip incoming to outgoing if we're done visiting children after // this iteration. We don't leave a done node on the stack because // that would break the invariant that there are remaining nodes to // visit when `n > 0` and that the stack can be popped. if (*p_info->p_state == SEXP_ITERATOR_STATE_done) { p_info->dir = R_SEXP_IT_DIRECTION_outgoing; } r_ssize i = -1; if (child.v_arr) { i = child.v_arr_end - child.v_arr; } p_it->x = child.x; p_it->type = child.type; p_it->depth = child.depth; p_it->parent = child.parent; p_it->rel = child.rel; p_it->i = i; p_it->dir = child.dir; return true; } static inline void init_incoming_stack_info(struct sexp_stack_info* p_info, enum sexp_iterator_type it_type, bool has_attrib) { p_info->dir = R_SEXP_IT_DIRECTION_incoming; switch (it_type) { case SEXP_ITERATOR_TYPE_atomic: p_info->p_state = structure_states; break; case SEXP_ITERATOR_TYPE_node: p_info->p_state = node_states + !has_attrib; break; case SEXP_ITERATOR_TYPE_pointer: p_info->p_state = pointer_states + !has_attrib; break; case SEXP_ITERATOR_TYPE_vector: p_info->v_arr = r_vec_cbegin(p_info->x); p_info->v_arr_end = p_info->v_arr + r_length(p_info->x); p_info->p_state = vector_states + !has_attrib; break; } } static inline enum sexp_iterator_type sexp_iterator_type(enum r_type type, r_obj* x) { switch (type) { case R_TYPE_closure: case R_TYPE_environment: case R_TYPE_promise: case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: return SEXP_ITERATOR_TYPE_node; case R_TYPE_pointer: return SEXP_ITERATOR_TYPE_pointer; case R_TYPE_list: case R_TYPE_expression: case R_TYPE_character: if (r_length(x)) { return SEXP_ITERATOR_TYPE_vector; } else { return SEXP_ITERATOR_TYPE_atomic; } default: return SEXP_ITERATOR_TYPE_atomic; } } static inline r_obj* sexp_node_attrib(enum r_type type, r_obj* x) { // Strings have private data stored in attributes if (type == R_TYPE_string) { return r_null; } else { return ATTRIB(x); } } static inline r_obj* sexp_node_car(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel) { switch (type) { case R_TYPE_closure: *p_rel = R_SEXP_IT_RELATION_function_fmls; return FORMALS(x); case R_TYPE_environment: *p_rel = R_SEXP_IT_RELATION_environment_frame; return FRAME(x); case R_TYPE_promise: *p_rel = R_SEXP_IT_RELATION_promise_value; return PRVALUE(x); case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: *p_rel = R_SEXP_IT_RELATION_node_car; return CAR(x); case R_TYPE_pointer: default: *p_rel = -1; return r_null; } } static inline r_obj* sexp_node_cdr(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel) { switch (type) { case R_TYPE_closure: *p_rel = R_SEXP_IT_RELATION_function_body; return BODY(x); case R_TYPE_environment: *p_rel = R_SEXP_IT_RELATION_environment_enclos; return ENCLOS(x); case R_TYPE_promise: *p_rel = R_SEXP_IT_RELATION_promise_expr; return PREXPR(x); case R_TYPE_pointer: *p_rel = R_SEXP_IT_RELATION_pointer_prot; return EXTPTR_PROT(x); case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: *p_rel = R_SEXP_IT_RELATION_node_cdr; return CDR(x); default: *p_rel = -1; return r_null; } } static inline r_obj* sexp_node_tag(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel) { switch (type) { case R_TYPE_closure: *p_rel = R_SEXP_IT_RELATION_function_env; return CLOENV(x); case R_TYPE_environment: *p_rel = R_SEXP_IT_RELATION_environment_hashtab; return HASHTAB(x); case R_TYPE_promise: *p_rel = R_SEXP_IT_RELATION_promise_env; return PRENV(x); case R_TYPE_pointer: *p_rel = R_SEXP_IT_RELATION_pointer_tag; return EXTPTR_TAG(x); case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: *p_rel = R_SEXP_IT_RELATION_node_tag; return TAG(x); default: *p_rel = -1; return r_null; } } const char* r_sexp_it_direction_as_c_string(enum r_sexp_it_direction dir) { switch (dir) { case R_SEXP_IT_DIRECTION_leaf: return "leaf"; case R_SEXP_IT_DIRECTION_incoming: return "incoming"; case R_SEXP_IT_DIRECTION_outgoing: return "outgoing"; default: r_stop_unreachable(); } } const char* r_sexp_it_relation_as_c_string(enum r_sexp_it_relation rel) { switch (rel) { case R_SEXP_IT_RELATION_root: return "root"; case R_SEXP_IT_RELATION_attrib: return "attrib"; case R_SEXP_IT_RELATION_node_car: return "node_car"; case R_SEXP_IT_RELATION_node_cdr: return "node_cdr"; case R_SEXP_IT_RELATION_node_tag: return "node_tag"; case R_SEXP_IT_RELATION_symbol_string: return "symbol_string"; case R_SEXP_IT_RELATION_symbol_value: return "symbol_value"; case R_SEXP_IT_RELATION_symbol_internal: return "symbol_internal"; case R_SEXP_IT_RELATION_function_fmls: return "function_fmls"; case R_SEXP_IT_RELATION_function_body: return "function_body"; case R_SEXP_IT_RELATION_function_env: return "function_env"; case R_SEXP_IT_RELATION_environment_frame: return "environment_frame"; case R_SEXP_IT_RELATION_environment_enclos: return "environment_enclos"; case R_SEXP_IT_RELATION_environment_hashtab: return "environment_hashtab"; case R_SEXP_IT_RELATION_promise_value: return "promise_value"; case R_SEXP_IT_RELATION_promise_expr: return "promise_expr"; case R_SEXP_IT_RELATION_promise_env: return "promise_env"; case R_SEXP_IT_RELATION_pointer_prot: return "pointer_prot"; case R_SEXP_IT_RELATION_pointer_tag: return "pointer_tag"; case R_SEXP_IT_RELATION_list_elt: return "list_elt"; case R_SEXP_IT_RELATION_character_elt: return "character_elt"; case R_SEXP_IT_RELATION_expression_elt: return "expression_elt"; case R_SEXP_IT_RELATION_none: r_stop_internal("r_sexp_it_relation_as_c_string", "Found `R_SEXP_IT_RELATION_none`."); default: r_stop_unreachable(); } } const char* r_sexp_it_raw_relation_as_c_string(enum r_sexp_it_raw_relation rel) { switch (rel) { case R_SEXP_IT_RAW_RELATION_root: return "root"; case R_SEXP_IT_RAW_RELATION_attrib: return "attrib"; case R_SEXP_IT_RAW_RELATION_node_car: return "node_car"; case R_SEXP_IT_RAW_RELATION_node_cdr: return "node_cdr"; case R_SEXP_IT_RAW_RELATION_node_tag: return "node_tag"; case R_SEXP_IT_RAW_RELATION_vector_elt: return "vector_elt"; default: r_stop_unreachable(); } } vctrs/src/rlang/fn.c0000644000176200001440000000161414465445266014076 0ustar liggesusers#include "rlang.h" r_obj* rlang_formula_formals = NULL; r_obj* r_as_function(r_obj* x, const char* arg) { switch (r_typeof(x)) { case R_TYPE_closure: case R_TYPE_builtin: case R_TYPE_special: return x; case R_TYPE_call: if (r_node_car(x) == r_syms.tilde && r_node_cddr(x) == r_null) { r_obj* env = r_attrib_get(x, r_syms.dot_environment); if (env == r_null) { r_abort("Can't transform formula to function because it doesn't have an environment."); } return r_new_function(rlang_formula_formals, r_node_cadr(x), env); } // else fallthrough; default: r_abort("Can't convert `%s` to a function", arg); } } void r_init_library_fn(void) { const char* formals_code = "formals(function(..., .x = ..1, .y = ..2, . = ..1) NULL)"; rlang_formula_formals = r_parse_eval(formals_code, r_envs.base); r_preserve_global(rlang_formula_formals); } vctrs/src/rlang/call.h0000644000176200001440000000060714465445266014414 0ustar liggesusers#ifndef RLANG_LANG_H #define RLANG_LANG_H #include "node.h" #define r_new_call Rf_lcons #define r_call Rf_lang1 #define r_call2 Rf_lang2 #define r_call3 Rf_lang3 #define r_call4 Rf_lang4 #define r_call5 Rf_lang5 bool r_is_call(r_obj* x, const char* name); bool r_is_call_any(r_obj* x, const char** names, int n); r_obj* r_expr_protect(r_obj* x); r_obj* r_call_clone(r_obj* x); #endif vctrs/src/rlang/session.h0000644000176200001440000000022214465445266015155 0ustar liggesusers#ifndef RLANG_SESSION_H #define RLANG_SESSION_H bool r_is_installed(const char* pkg); bool r_has_colour(void); r_obj* r_getppid(void); #endif vctrs/src/rlang/arg.h0000644000176200001440000000035214465445266014247 0ustar liggesusers#ifndef RLANG_ARG_H #define RLANG_ARG_H extern int (*r_arg_match)(r_obj* arg, r_obj* values, struct r_lazy error_arg, struct r_lazy error_call); #endif vctrs/src/rlang/globals.h0000644000176200001440000000353414465445266015126 0ustar liggesusers#ifndef RLANG_GLOBALS_H #define RLANG_GLOBALS_H struct r_globals { r_obj* empty_lgl; r_obj* empty_int; r_obj* empty_dbl; r_obj* empty_cpl; r_obj* empty_raw; r_obj* empty_chr; r_obj* empty_list; int na_lgl; int na_int; double na_dbl; r_complex na_cpl; r_obj* na_str; }; struct r_globals_chrs { r_obj* empty_string; r_obj* full; }; struct r_globals_classes { r_obj* data_frame; r_obj* tibble; }; struct r_globals_strs { r_obj* dots; r_obj* condition; r_obj* empty; r_obj* error; r_obj* interrupt; r_obj* message; r_obj* na; r_obj* warning; }; struct r_globals_syms { r_obj* abort; r_obj* arg; r_obj* brace; r_obj* brackets; r_obj* brackets2; r_obj* call; // `_` is required to avoid conflicts with the C++ keyword `class`. // See https://github.com/r-lib/rlang/pull/1359 for details. r_obj* class_; r_obj* condition; r_obj* dots; r_obj* dot_environment; r_obj* dot_fn; r_obj* dot_x; r_obj* dot_y; r_obj* error; r_obj* error_arg; r_obj* error_call; r_obj* error_call_flag; r_obj* expr; r_obj* function; r_obj* interrupt; r_obj* message; r_obj* missing; r_obj* names; r_obj* options; r_obj* colon2; r_obj* colon3; r_obj* srcfile; r_obj* srcref; r_obj* dim; r_obj* dim_names; r_obj* row_names; r_obj* stack_overflow_error; r_obj* tilde; r_obj* unbound; r_obj* w; r_obj* warning; r_obj* wholeSrcref; r_obj* x; r_obj* y; r_obj* z; }; struct r_globals_envs { r_obj* empty; r_obj* base; r_obj* global; r_obj* ns; // The namespace of the embedding package }; extern struct r_globals r_globals; extern struct r_globals_chrs r_chrs; extern struct r_globals_classes r_classes; extern struct r_globals_strs r_strs; extern struct r_globals_syms r_syms; extern struct r_globals_envs r_envs; extern r_obj* r_true; extern r_obj* r_false; #endif vctrs/src/rlang/dyn-array.h0000644000176200001440000001177514465445266015417 0ustar liggesusers#ifndef RLANG_DYN_ARRAY_H #define RLANG_DYN_ARRAY_H #include "vec.h" struct r_dyn_array { r_obj* shelter; r_ssize count; r_ssize capacity; int growth_factor; r_obj* data; void* v_data; const void* v_data_const; // private: enum r_type type; r_ssize elt_byte_size; void (*barrier_set)(r_obj* x, r_ssize i, r_obj* value); }; struct r_dyn_array* r_new_dyn_vector(enum r_type type, r_ssize capacity); struct r_dyn_array* r_new_dyn_array(r_ssize elt_byte_size, r_ssize capacity); void r_dyn_resize(struct r_dyn_array* p_arr, r_ssize capacity); void r_dyn_push_back(struct r_dyn_array* p_arr, const void* p_elt); r_obj* r_dyn_unwrap(struct r_dyn_array* p_arr); static inline void* r_dyn_pointer(struct r_dyn_array* p_arr, r_ssize i) { if (p_arr->barrier_set) { r_abort("Can't take mutable pointer of barrier vector."); } r_ssize offset = i * p_arr->elt_byte_size; return ((unsigned char*) p_arr->v_data) + offset; } static inline void* r_dyn_begin(struct r_dyn_array* p_arr) { return r_dyn_pointer(p_arr, 0); } static inline void* r_dyn_last(struct r_dyn_array* p_arr) { return r_dyn_pointer(p_arr, p_arr->count - 1); } static inline void* r_dyn_end(struct r_dyn_array* p_arr) { return r_dyn_pointer(p_arr, p_arr->count); } static inline const void* r_dyn_cpointer(struct r_dyn_array* p_arr, r_ssize i) { r_ssize offset = i * p_arr->elt_byte_size; return ((const unsigned char*) p_arr->v_data_const) + offset; } static inline const void* r_dyn_cbegin(struct r_dyn_array* p_arr) { return r_dyn_cpointer(p_arr, 0); } static inline const void* r_dyn_clast(struct r_dyn_array* p_arr) { return r_dyn_cpointer(p_arr, p_arr->count - 1); } static inline const void* r_dyn_cend(struct r_dyn_array* p_arr) { return r_dyn_cpointer(p_arr, p_arr->count); } #define R_DYN_GET(TYPE, X, I) (*((TYPE*) r_dyn_pointer((X), (I)))) #define R_DYN_POKE(TYPE, X, I, VAL) (*((TYPE*) r_dyn_pointer((X), (I))) = (VAL)) static inline int r_dyn_lgl_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const int*) p_vec->v_data_const)[i]; } static inline int r_dyn_int_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const int*) p_vec->v_data_const)[i]; } static inline double r_dyn_dbl_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const double*) p_vec->v_data_const)[i]; } static inline r_complex r_dyn_cpl_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const r_complex*) p_vec->v_data_const)[i]; } static inline char r_dyn_raw_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const char*) p_vec->v_data_const)[i]; } static inline r_obj* r_dyn_chr_get(struct r_dyn_array* p_vec, r_ssize i) { return ((r_obj* const *) p_vec->v_data_const)[i]; } static inline r_obj* r_dyn_list_get(struct r_dyn_array* p_vec, r_ssize i) { return ((r_obj* const *) p_vec->v_data_const)[i]; } static inline void r_dyn_lgl_poke(struct r_dyn_array* p_vec, r_ssize i, int value) { ((int*) p_vec->v_data)[i] = value; } static inline void r_dyn_int_poke(struct r_dyn_array* p_vec, r_ssize i, int value) { ((int*) p_vec->v_data)[i] = value; } static inline void r_dyn_dbl_poke(struct r_dyn_array* p_vec, r_ssize i, double value) { ((double*) p_vec->v_data)[i] = value; } static inline void r_dyn_cpl_poke(struct r_dyn_array* p_vec, r_ssize i, r_complex value) { ((r_complex*) p_vec->v_data)[i] = value; } static inline void r_dyn_raw_poke(struct r_dyn_array* p_vec, r_ssize i, char value) { ((char*) p_vec->v_data)[i] = value; } static inline void r_dyn_chr_poke(struct r_dyn_array* p_vec, r_ssize i, r_obj* value) { r_chr_poke(p_vec->data, i, value); } static inline void r_dyn_list_poke(struct r_dyn_array* p_vec, r_ssize i, r_obj* value) { r_list_poke(p_vec->data, i, value); } static inline void* const * r_dyn_pop_back(struct r_dyn_array* p_arr) { void* const * out = (void* const *) r_dyn_clast(p_arr); --p_arr->count; return out; } static inline r_ssize r__dyn_increment(struct r_dyn_array* p_arr) { r_ssize loc = p_arr->count++; if (p_arr->count > p_arr->capacity) { r_ssize new_capacity = r_ssize_mult(p_arr->capacity, p_arr->growth_factor); r_dyn_resize(p_arr, new_capacity); } return loc; } static inline void r_dyn_lgl_push_back(struct r_dyn_array* p_vec, int elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_lgl_poke(p_vec, loc, elt); } static inline void r_dyn_int_push_back(struct r_dyn_array* p_vec, int elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_int_poke(p_vec, loc, elt); } static inline void r_dyn_dbl_push_back(struct r_dyn_array* p_vec, double elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_dbl_poke(p_vec, loc, elt); } static inline void r_dyn_cpl_push_back(struct r_dyn_array* p_vec, r_complex elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_cpl_poke(p_vec, loc, elt); } static inline void r_dyn_list_push_back(struct r_dyn_array* p_vec, r_obj* elt) { KEEP(elt); r_ssize loc = r__dyn_increment(p_vec); r_dyn_list_poke(p_vec, loc, elt); FREE(1); } #endif vctrs/src/rlang/df.c0000644000176200001440000000272514465445266014070 0ustar liggesusers#include "rlang.h" #include "decl/df-decl.h" r_obj* r_alloc_df_list(r_ssize n_rows, r_obj* names, const enum r_type* v_types, r_ssize types_size) { r_obj* out = KEEP(r_alloc_list(types_size)); if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector."); } if (r_length(names) != types_size) { r_abort("`names` must match the number of columns."); } r_attrib_push(out, r_syms.names, names); for (r_ssize i = 0; i < types_size; ++i) { // A nil type stands for no column allocation enum r_type type = v_types[i]; if (type != R_TYPE_null) { r_obj* col = r_alloc_vector(type, n_rows); r_list_poke(out, i, col); } } FREE(1); return out; } void r_init_data_frame(r_obj* x, r_ssize n_rows) { init_compact_rownames(x, n_rows); r_attrib_poke(x, r_syms.class_, r_classes.data_frame); } void r_init_tibble(r_obj* x, r_ssize n_rows) { r_init_data_frame(x, n_rows); r_attrib_poke(x, r_syms.class_, r_classes.tibble); } static void init_compact_rownames(r_obj* x, r_ssize n_rows) { r_obj* rn = KEEP(new_compact_rownames(n_rows)); r_attrib_poke(x, r_syms.row_names, rn); FREE(1); } static r_obj* new_compact_rownames(r_ssize n_rows) { if (n_rows <= 0) { return r_globals.empty_int; } r_obj* out = r_alloc_integer(2); int* p_out = r_int_begin(out); p_out[0] = r_globals.na_int; p_out[1] = -n_rows; return out; } vctrs/src/rlang/dict.h0000644000176200001440000000235514465445266014426 0ustar liggesusers#ifndef RLANG_DICT_H #define RLANG_DICT_H /** * This is a simple hash table of `r_obj*`. It is structured like R * environments and uses xxhash for hashing. */ struct r_dict { r_obj* shelter; /* private: */ r_obj* buckets; r_obj* const * p_buckets; r_ssize n_buckets; r_ssize n_entries; // For testing collisions bool prevent_resize; }; struct r_dict* r_new_dict(r_ssize size); r_obj* r_dict_poke(struct r_dict* p_dict, r_obj* key, r_obj* value); bool r_dict_put(struct r_dict* p_dict, r_obj* key, r_obj* value); bool r_dict_del(struct r_dict* p_dict, r_obj* key); bool r_dict_has(struct r_dict* p_dict, r_obj* key); r_obj* r_dict_get(struct r_dict* p_dict, r_obj* key); r_obj* r_dict_get0(struct r_dict* p_dict, r_obj* key); // Pass a negative size to resize by the default growth factor void r_dict_resize(struct r_dict* p_dict, r_ssize size); r_obj* r_dict_as_df_list(struct r_dict* p_dict); r_obj* r_dict_as_list(struct r_dict* p_dict); struct r_dict_iterator { r_obj* shelter; r_obj* key; r_obj* value; /* private: */ r_ssize i; r_ssize n; r_obj* const * v_buckets; r_obj* node; }; struct r_dict_iterator* r_new_dict_iterator(struct r_dict* p_dict); bool r_dict_next(struct r_dict_iterator* p_it); #endif vctrs/src/rlang/export.c0000644000176200001440000000114514465445266015013 0ustar liggesusers#include "rlang.h" #include "export.h" #include #if (defined(R_VERSION) && R_VERSION < R_Version(3, 4, 0)) r_obj* R_MakeExternalPtrFn(DL_FUNC p, r_obj* tag, r_obj* prot) { fn_ptr ptr; ptr.fn = p; return R_MakeExternalPtr(ptr.p, tag, prot); } DL_FUNC R_ExternalPtrAddrFn(r_obj* s) { fn_ptr ptr; ptr.p = EXTPTR_PTR(s); return ptr.fn; } #endif r_obj* rlang_namespace(const char* ns) { r_obj* ns_string = KEEP(Rf_mkString(ns)); r_obj* call = KEEP(r_sym("getNamespace")); call = KEEP(Rf_lang2(call, ns_string)); r_obj* ns_env = r_eval(call, R_BaseEnv); FREE(3); return ns_env; } vctrs/src/rlang/c-utils.c0000644000176200001440000000103114465445266015044 0ustar liggesusers#include void* r_shelter_deref(r_obj* x) { enum r_type type = r_typeof(x); switch (type) { case R_TYPE_list: if (r_length(x) < 1) { r_abort("Shelter must have at least one element"); } x = r_list_get(x, 0); type = r_typeof(x); break; case R_TYPE_pairlist: x = r_node_car(x); type = r_typeof(x); break; case R_TYPE_raw: break; default: r_stop_unimplemented_type(type); } if (type != R_TYPE_raw) { r_stop_unexpected_type(type); } return r_raw_begin(x); } vctrs/src/rlang/vendor.c0000644000176200001440000000032114465445266014762 0ustar liggesusers#include "rlang.h" uint64_t (*r_xxh3_64bits)(const void*, size_t); void r_init_library_vendor(void) { r_xxh3_64bits = (uint64_t (*)(const void*, size_t)) r_peek_c_callable("rlang", "rlang_xxh3_64bits"); } vctrs/src/rlang/rlang.h0000644000176200001440000000362214465445266014604 0ustar liggesusers#ifndef RLANG_RLANG_H #define RLANG_RLANG_H /* * `_ISOC99_SOURCE` is defined to avoid warnings on Windows UCRT builds where * usage of `PRIx64` in Microsoft's `printf()` can generate the warnings shown * below. Defining this before including `` forces usage of MinGW's * custom `printf()`, which is C99 compliant. * warning: unknown conversion type character 'l' in format [-Wformat] * warning: too many arguments for format [-Wformat-extra-args] * * The conventional define for this is `__USE_MINGW_ANSI_STDIO`, but according * to the thread below it is recommended to instead use a feature test macro * (such as `_ISOC99_SOURCE`) which will indirectly define the internal * `__USE_MINGW_ANSI_STDIO` macro for us. * https://osdn.net/projects/mingw/lists/archive/users/2019-January/000199.html */ #ifndef _ISOC99_SOURCE #define _ISOC99_SOURCE #endif #include #define R_NO_REMAP #include #include #include #include #include "rlang-types.h" r_obj* r_init_library(r_obj* ns); r_ssize r_arg_as_ssize(r_obj* n, const char* arg); static inline r_ssize r_as_ssize(r_obj* n) { return r_arg_as_ssize(n, "n"); } extern bool _r_use_local_precious_list; #include "obj.h" #include "globals.h" #include "altrep.h" #include "arg.h" #include "attrib.h" #include "debug.h" #include "c-utils.h" #include "call.h" #include "cnd.h" #include "dict.h" #include "df.h" #include "dyn-array.h" #include "dyn-list-of.h" #include "env.h" #include "env-binding.h" #include "eval.h" #include "export.h" #include "fn.h" #include "formula.h" #include "node.h" #include "parse.h" #include "quo.h" #include "session.h" #include "stack.h" #include "state.h" #include "sym.h" #include "vec.h" #include "vec-chr.h" #include "vec-lgl.h" #include "vendor.h" #include "walk.h" #define r_abort_lazy_call(LAZY, ...) \ r_abort_call(KEEP(r_lazy_eval(LAZY)), __VA_ARGS__) #endif vctrs/src/rlang/vec-lgl.h0000644000176200001440000000023514465445266015027 0ustar liggesusers#ifndef RLANG_VECTOR_LGL_H #define RLANG_VECTOR_LGL_H r_ssize r_lgl_sum(r_obj* x, bool na_true); r_obj* r_lgl_which(r_obj* x, bool na_propagate); #endif vctrs/src/rlang/vec-chr.c0000644000176200001440000000356014465445266015024 0ustar liggesusers#include #include "rlang.h" r_ssize r_chr_detect_index(r_obj* chr, const char* c_string) { r_ssize n = r_length(chr); for (r_ssize i = 0; i != n; ++i) { const char* cur = CHAR(r_chr_get(chr, i)); if (strcmp(cur, c_string) == 0) { return i; } } return -1; } bool r_chr_has(r_obj* chr, const char* c_string) { r_ssize idx = r_chr_detect_index(chr, c_string); return idx >= 0; } bool r_chr_has_any(r_obj* chr, const char** c_strings) { r_ssize n = r_length(chr); for (r_ssize i = 0; i != n; ++i) { const char* cur = CHAR(r_chr_get(chr, i)); while (*c_strings) { if (strcmp(cur, *c_strings) == 0) { return true; } ++c_strings; } } return false; } void r_chr_fill(r_obj* chr, r_obj* value, r_ssize n) { for (r_ssize i = 0; i < n; ++i) { r_chr_poke(chr, i, value); } } static void validate_chr_setter(r_obj* chr, r_obj* r_string) { if (r_typeof(chr) != R_TYPE_character) { r_abort("`chr` must be a character vector"); } if (r_typeof(r_string) != R_TYPE_string) { r_abort("`r_string` must be an internal R string"); } } // From rlang/vec.c void r_vec_poke_n(r_obj* x, r_ssize offset, r_obj* y, r_ssize from, r_ssize n); r_obj* chr_prepend(r_obj* chr, r_obj* r_string) { if (chr == r_null) { return r_str_as_character(r_string); } else { validate_chr_setter(chr, r_string); } int n = r_length(chr); r_obj* out = KEEP(r_alloc_character(n + 1)); r_vec_poke_n(out, 1, chr, 0, n); r_chr_poke(out, 0, r_string); FREE(1); return out; } r_obj* chr_append(r_obj* chr, r_obj* r_str) { if (chr == r_null) { return r_str_as_character(r_str); } validate_chr_setter(chr, r_str); int n = r_length(chr); r_obj* out = KEEP(r_alloc_character(n + 1)); r_vec_poke_n(out, 0, chr, 0, n); r_chr_poke(out, n, r_str); FREE(1); return out; } vctrs/src/rlang/arg.c0000644000176200001440000000051714465445266014245 0ustar liggesusers#include "rlang.h" int (*r_arg_match)(r_obj* arg, r_obj* values, struct r_lazy error_arg, struct r_lazy error_call); void r_init_library_arg(void) { r_arg_match = (int (*)(r_obj*, r_obj*, struct r_lazy, struct r_lazy)) r_peek_c_callable("rlang", "rlang_arg_match_2"); } vctrs/src/rlang/obj.h0000644000176200001440000000537114465445266014256 0ustar liggesusers#ifndef RLANG_OBJ_H #define RLANG_OBJ_H #define r_missing_arg R_MissingArg static inline r_ssize r_length(r_obj* x) { return Rf_xlength(x); } static inline enum r_type r_typeof(r_obj* x) { return (enum r_type) TYPEOF(x); } void _r_preserve(r_obj* x); void _r_unpreserve(r_obj* x); static r_unused r_obj* _r_placeholder = NULL; #define r_preserve(X) \ (R_PreserveObject(_r_placeholder = X), \ (_r_preserve)(_r_placeholder), \ (void) NULL) #define r_unpreserve(X) \ (R_ReleaseObject(_r_placeholder = X), \ (_r_unpreserve)(_r_placeholder), \ (void) NULL) static inline void r_mark_shared(r_obj* x) { MARK_NOT_MUTABLE(x); } static inline bool r_is_shared(r_obj* x) { return MAYBE_REFERENCED(x); } static inline void _r_preserve_global(r_obj* x) { (_r_preserve)(x); r_mark_shared(x); } #define r_preserve_global(X) \ (R_PreserveObject(_r_placeholder = X), \ (_r_preserve_global)(_r_placeholder), \ (void) NULL) static inline void r_mark_object(r_obj* x) { SET_OBJECT(x, 1); } static inline void r_unmark_object(r_obj* x) { SET_OBJECT(x, 0); } static inline bool r_is_object(r_obj* x) { return OBJECT(x); } static inline bool r_inherits(r_obj* x, const char* tag) { return Rf_inherits(x, tag); } static inline r_obj* r_copy(r_obj* x) { return Rf_duplicate(x); } static inline r_obj* r_clone(r_obj* x) { return Rf_shallow_duplicate(x); } static inline r_obj* r_clone_shared(r_obj* x) { return r_is_shared(x) ? r_clone(x) : x; } // These also clone names r_obj* r_vec_clone(r_obj* x); r_obj* r_vec_clone_shared(r_obj* x); static inline r_obj* r_poke_type(r_obj* x, enum r_type type) { SET_TYPEOF(x, type); return x; } static inline r_obj* r_type_as_string(enum r_type type) { return Rf_type2str(type); } static inline r_obj* r_type_as_character(enum r_type type) { r_obj* str = KEEP(r_type_as_string(type)); r_obj* out = Rf_ScalarString(str); return FREE(1), out; } static inline const char* r_type_as_c_string(enum r_type type) { return CHAR(Rf_type2str(type)); } static inline enum r_type r_c_str_as_r_type(const char* type) { return (enum r_type) Rf_str2type(type); } enum r_type r_chr_as_r_type(r_obj* type); static inline bool r_is_symbolic(r_obj* x) { return r_typeof(x) == LANGSXP || r_typeof(x) == SYMSXP; } static inline void r_obj_print(r_obj* x) { Rf_PrintValue(x); } static inline bool r_is_identical(r_obj* x, r_obj* y) { // 16 corresponds to base::identical()'s defaults // Do we need less conservative versions? return R_compute_identical(x, y, 16); } r_obj* r_obj_address(r_obj* x); extern r_obj* (*r_obj_encode_utf8)(r_obj* x); r_obj* r_as_label(r_obj* x); #endif vctrs/src/rlang/altrep.h0000644000176200001440000000043214465445266014764 0ustar liggesusers#ifndef RLANG_ALTREP_H #define RLANG_ALTREP_H #if (R_VERSION < R_Version(3, 5, 0)) || \ (defined(_WIN32) && R_VERSION == R_Version(3, 5, 0)) # define R_HAS_ALTREP 0 #else # define R_HAS_ALTREP 1 #endif #if !R_HAS_ALTREP # define ALTREP(x) false #endif #endif vctrs/src/rlang/call.c0000644000176200001440000000223214465445266014403 0ustar liggesusers#include "rlang.h" static r_obj* quote_prim = NULL; bool r_is_call(r_obj* x, const char* name) { if (r_typeof(x) != LANGSXP) { return false; } else { return name == NULL || r_is_symbol(r_node_car(x), name); } } bool r_is_call_any(r_obj* x, const char** names, int n) { if (r_typeof(x) != LANGSXP) { return false; } else { return r_is_symbol_any(r_node_car(x), names, n); } } r_obj* r_expr_protect(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_symbol: case R_TYPE_call: case R_TYPE_promise: return r_call2(quote_prim, x); default: return x; } } static inline bool is_node(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_call: case R_TYPE_pairlist: return true; default: return false; } } r_obj* r_call_clone(r_obj* x) { if (!is_node(x)) { r_abort("Input must be a call."); } x = KEEP(r_clone(x)); r_obj* rest = x; while (rest != r_null) { r_obj* head = r_node_car(rest); if (is_node(head)) { r_node_poke_car(rest, r_call_clone(head)); } rest = r_node_cdr(rest); } FREE(1); return x; } void r_init_library_call(void) { quote_prim = r_base_ns_get("quote"); } vctrs/src/rlang/sym.c0000644000176200001440000000263114465445266014303 0ustar liggesusers#include #include "rlang.h" // In old R versions `as.name()` does not translate to native which // loses the encoding. This symbol constructor always translates. r_obj* r_new_symbol(r_obj* x, int* err) { switch (r_typeof(x)) { case SYMSXP: return x; case R_TYPE_character: if (r_length(x) == 1) { const char* string = Rf_translateChar(r_chr_get(x, 0)); return r_sym(string); } // else fallthrough default: { if (err) { *err = -1; return r_null; } else { const char* type = r_type_as_c_string(r_typeof(x)); r_abort("Can't create a symbol with a %s", type); } }} } bool r_is_symbol(r_obj* x, const char* string) { if (r_typeof(x) != SYMSXP) { return false; } else { return strcmp(CHAR(PRINTNAME(x)), string) == 0; } } bool r_is_symbol_any(r_obj* x, const char** strings, int n) { if (r_typeof(x) != SYMSXP) { return false; } const char* name = CHAR(PRINTNAME(x)); for (int i = 0; i < n; ++i) { if (strcmp(name, strings[i]) == 0) { return true; } } return false; } r_obj* (*r_sym_as_utf8_character)(r_obj* x) = NULL; r_obj* (*r_sym_as_utf8_string)(r_obj* x) = NULL; void r_init_library_sym(void) { r_sym_as_utf8_character = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_sym_as_character"); r_sym_as_utf8_string = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_sym_as_string"); } vctrs/src/subscript.h0000644000176200001440000000327214362266120014377 0ustar liggesusers#ifndef VCTRS_SUBSCRIPT_H #define VCTRS_SUBSCRIPT_H #include "vctrs-core.h" #include "utils.h" enum subscript_action { SUBSCRIPT_ACTION_DEFAULT = 0, SUBSCRIPT_ACTION_SUBSET, SUBSCRIPT_ACTION_EXTRACT, SUBSCRIPT_ACTION_ASSIGN, SUBSCRIPT_ACTION_RENAME, SUBSCRIPT_ACTION_REMOVE, SUBSCRIPT_ACTION_NEGATE }; enum subscript_type_action { SUBSCRIPT_TYPE_ACTION_CAST = 0, SUBSCRIPT_TYPE_ACTION_ERROR }; struct subscript_opts { enum subscript_action action; enum subscript_type_action logical; enum subscript_type_action numeric; enum subscript_type_action character; struct vctrs_arg* subscript_arg; struct r_lazy call; }; static inline struct subscript_opts new_subscript_opts_assign(void) { return (struct subscript_opts) { .action = SUBSCRIPT_ACTION_ASSIGN }; } SEXP vec_as_subscript_opts(SEXP subscript, const struct subscript_opts* opts, ERR* err); static inline SEXP subscript_type_action_chr(enum subscript_type_action action) { switch (action) { case SUBSCRIPT_TYPE_ACTION_CAST: return chrs_cast; case SUBSCRIPT_TYPE_ACTION_ERROR: return chrs_error; } never_reached("subscript_type_action_chr"); } static inline SEXP get_opts_action(const struct subscript_opts* opts) { switch (opts->action) { case SUBSCRIPT_ACTION_DEFAULT: return R_NilValue; case SUBSCRIPT_ACTION_SUBSET: return chrs_subset; case SUBSCRIPT_ACTION_EXTRACT: return chrs_extract; case SUBSCRIPT_ACTION_ASSIGN: return chrs_assign; case SUBSCRIPT_ACTION_RENAME: return chrs_rename; case SUBSCRIPT_ACTION_REMOVE: return chrs_remove; case SUBSCRIPT_ACTION_NEGATE: return chrs_negate; } never_reached("get_opts_action"); } #endif vctrs/src/match-joint.h0000644000176200001440000000050614350637775014612 0ustar liggesusers#ifndef VCTRS_MATCH_JOINT_H #define VCTRS_MATCH_JOINT_H #include "vctrs-core.h" r_obj* vec_joint_xtfrm(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, bool nan_distinct, r_obj* chr_proxy_collate); #endif vctrs/src/translate.h0000644000176200001440000000171314315060310014343 0ustar liggesusers#ifndef VCTRS_TRANSLATE_H #define VCTRS_TRANSLATE_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- // Vector translation SEXP vec_normalize_encoding(SEXP x); // ----------------------------------------------------------------------------- // Low-level string translation #define MASK_ASCII 8 #define MASK_UTF8 64 // The first 128 values are ASCII, and are the same regardless of the encoding. // Otherwise we enforce UTF-8. static inline bool string_is_ascii_or_utf8(SEXP x) { const int levels = LEVELS(x); return (levels & MASK_ASCII) || (levels & MASK_UTF8); } #undef MASK_ASCII #undef MASK_UTF8 static inline SEXP string_normalize(SEXP x) { return Rf_mkCharCE(Rf_translateCharUTF8(x), CE_UTF8); } static inline bool string_is_normalized(SEXP x) { return string_is_ascii_or_utf8(x) || (x == NA_STRING); } // ----------------------------------------------------------------------------- #endif vctrs/src/poly-op.c0000644000176200001440000002022014315060310013732 0ustar liggesusers#include "vctrs.h" #include "decl/poly-op-decl.h" poly_binary_int_fn* poly_p_equal_na_equal(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_equal_na_equal; case VCTRS_TYPE_logical: return p_lgl_equal_na_equal; case VCTRS_TYPE_integer: return p_int_equal_na_equal; case VCTRS_TYPE_double: return p_dbl_equal_na_equal; case VCTRS_TYPE_complex: return p_cpl_equal_na_equal; case VCTRS_TYPE_character: return p_chr_equal_na_equal; case VCTRS_TYPE_raw: return p_raw_equal_na_equal; case VCTRS_TYPE_list: return p_list_equal_na_equal; case VCTRS_TYPE_dataframe: return p_df_equal_na_equal; default: stop_unimplemented_vctrs_type("poly_p_equal_na_equal", type); } } static int p_df_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; struct poly_df_data* p_y_data = (struct poly_df_data*) p_y; r_ssize n_col = p_x_data->n_col; if (n_col != p_y_data->n_col) { r_stop_internal("`x` and `y` must have the same number of columns."); } enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_x_col_ptr = p_x_data->v_col_ptr; const void** v_y_col_ptr = p_y_data->v_col_ptr; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { if (!p_equal_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j, v_col_type[col])) { return false; } } return true; } poly_binary_int_fn* poly_p_compare_na_equal(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_compare_na_equal; case VCTRS_TYPE_logical: return p_lgl_compare_na_equal; case VCTRS_TYPE_integer: return p_int_compare_na_equal; case VCTRS_TYPE_double: return p_dbl_compare_na_equal; case VCTRS_TYPE_complex: return p_cpl_compare_na_equal; case VCTRS_TYPE_character: return p_chr_compare_na_equal; case VCTRS_TYPE_raw: return p_raw_compare_na_equal; case VCTRS_TYPE_list: return p_list_compare_na_equal; case VCTRS_TYPE_dataframe: return p_df_compare_na_equal; default: stop_unimplemented_vctrs_type("poly_p_compare_na_equal", type); } } static int p_df_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; struct poly_df_data* p_y_data = (struct poly_df_data*) p_y; r_ssize n_col = p_x_data->n_col; if (n_col != p_y_data->n_col) { r_stop_internal("`x` and `y` must have the same number of columns."); } enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_x_col_ptr = p_x_data->v_col_ptr; const void** v_y_col_ptr = p_y_data->v_col_ptr; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { const int cmp = p_compare_na_equal( v_x_col_ptr[col], i, v_y_col_ptr[col], j, v_col_type[col] ); if (cmp != 0) { return cmp; } } return 0; } poly_unary_bool_fn* poly_p_is_missing(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_is_missing; case VCTRS_TYPE_logical: return p_lgl_is_missing; case VCTRS_TYPE_integer: return p_int_is_missing; case VCTRS_TYPE_double: return p_dbl_is_missing; case VCTRS_TYPE_complex: return p_cpl_is_missing; case VCTRS_TYPE_character: return p_chr_is_missing; case VCTRS_TYPE_raw: return p_raw_is_missing; case VCTRS_TYPE_list: return p_list_is_missing; case VCTRS_TYPE_dataframe: return p_df_is_missing; default: stop_unimplemented_vctrs_type("poly_p_is_missing", type); } } static bool p_df_is_missing(const void* p_x, r_ssize i) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_col_ptr = p_x_data->v_col_ptr; r_ssize n_col = p_x_data->n_col; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { if (!p_is_missing(v_col_ptr[col], i, v_col_type[col])) { return false; } } return true; } poly_unary_bool_fn* poly_p_is_incomplete(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_is_missing; case VCTRS_TYPE_logical: return p_lgl_is_missing; case VCTRS_TYPE_integer: return p_int_is_missing; case VCTRS_TYPE_double: return p_dbl_is_missing; case VCTRS_TYPE_complex: return p_cpl_is_missing; case VCTRS_TYPE_character: return p_chr_is_missing; case VCTRS_TYPE_raw: return p_raw_is_missing; case VCTRS_TYPE_list: return p_list_is_missing; case VCTRS_TYPE_dataframe: return p_df_is_incomplete; default: stop_unimplemented_vctrs_type("poly_p_is_incomplete", type); } } static bool p_df_is_incomplete(const void* p_x, r_ssize i) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_col_ptr = p_x_data->v_col_ptr; r_ssize n_col = p_x_data->n_col; // df-cols should already be flattened, // so we only need missingness of each column, not completeness for (r_ssize col = 0; col < n_col; ++col) { if (p_is_missing(v_col_ptr[col], i, v_col_type[col])) { return true; } } return false; } struct poly_vec* new_poly_vec(r_obj* proxy, enum vctrs_type type) { r_obj* shelter = KEEP(r_alloc_list(2)); r_obj* self = r_alloc_raw(sizeof(struct poly_vec)); r_list_poke(shelter, 0, self); r_list_poke(shelter, 1, proxy); struct poly_vec* p_poly_vec = r_raw_begin(self); p_poly_vec->shelter = shelter; p_poly_vec->vec = proxy; switch (type) { case VCTRS_TYPE_null: init_nil_poly_vec(p_poly_vec); break; case VCTRS_TYPE_logical: init_lgl_poly_vec(p_poly_vec); break; case VCTRS_TYPE_integer: init_int_poly_vec(p_poly_vec); break; case VCTRS_TYPE_double: init_dbl_poly_vec(p_poly_vec); break; case VCTRS_TYPE_complex: init_cpl_poly_vec(p_poly_vec); break; case VCTRS_TYPE_character: init_chr_poly_vec(p_poly_vec); break; case VCTRS_TYPE_raw: init_raw_poly_vec(p_poly_vec); break; case VCTRS_TYPE_list: init_list_poly_vec(p_poly_vec); break; case VCTRS_TYPE_dataframe: init_df_poly_vec(p_poly_vec); break; default: stop_unimplemented_vctrs_type("new_poly_vec", type); } FREE(1); return p_poly_vec; } static void init_nil_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = NULL; } static void init_lgl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_lgl_cbegin(p_poly_vec->vec); } static void init_int_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_int_cbegin(p_poly_vec->vec); } static void init_dbl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_dbl_cbegin(p_poly_vec->vec); } static void init_cpl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_cpl_cbegin(p_poly_vec->vec); } static void init_chr_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_chr_cbegin(p_poly_vec->vec); } static void init_raw_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_raw_cbegin(p_poly_vec->vec); } static void init_list_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_list_cbegin(p_poly_vec->vec); } static void init_df_poly_vec(struct poly_vec* p_poly_vec) { r_obj* df = p_poly_vec->vec; r_ssize n_col = r_length(df); r_obj* shelter = KEEP(r_alloc_list(4)); r_list_poke(shelter, 0, p_poly_vec->shelter); p_poly_vec->shelter = shelter; r_obj* data_handle = KEEP(r_alloc_raw(sizeof(struct poly_df_data))); struct poly_df_data* data = (struct poly_df_data*) r_raw_begin(data_handle); r_list_poke(shelter, 1, data_handle); r_obj* col_type_handle = KEEP(r_alloc_raw(n_col * sizeof(enum vctrs_type))); enum vctrs_type* v_col_type = (enum vctrs_type*) r_raw_begin(col_type_handle); r_list_poke(shelter, 2, col_type_handle); r_obj* col_ptr_handle = KEEP(r_alloc_raw(n_col * sizeof(void*))); const void** v_col_ptr = (const void**) r_raw_begin(col_ptr_handle); r_list_poke(shelter, 3, col_ptr_handle); for (r_ssize i = 0; i < n_col; ++i) { r_obj* col = r_list_get(df, i); v_col_type[i] = vec_proxy_typeof(col); v_col_ptr[i] = r_vec_cbegin(col); } data->v_col_type = v_col_type; data->v_col_ptr = v_col_ptr; data->n_col = n_col; p_poly_vec->p_vec = (void*) data; FREE(4); } vctrs/src/dictionary.h0000644000176200001440000000351514350144500014520 0ustar liggesusers#ifndef VCTRS_DICTIONARY_H #define VCTRS_DICTIONARY_H #include "vctrs-core.h" #include "poly-op.h" #define DICT_EMPTY -1 // The dictionary structure is a little peculiar since R has no notion of // a scalar, so the `key`s are indexes into vector `x`. This means we can // only store values from a single vector, but we can still lookup using // another vector, provided that they're of the same type (which is ensured // at the R-level). struct dictionary { SEXP protect; poly_binary_int_fn* p_equal_na_equal; poly_unary_bool_fn* p_is_incomplete; struct poly_vec* p_poly_vec; uint32_t* hash; R_len_t* key; uint32_t size; uint32_t used; }; /** * Initialise a dictionary * * - `new_dictionary()` creates a dictionary and precaches the hashes for * each element of `x`. * * - `new_dictionary_partial()` creates a dictionary with precached hashes * as well, but does not allocate an array of keys. This is useful * for finding a key in another dictionary with `dict_hash_with()`. */ struct dictionary_opts { bool partial; bool na_equal; }; struct dictionary* new_dictionary(SEXP x); struct dictionary* new_dictionary_partial(SEXP x); #define PROTECT_DICT(d, n) do { \ struct dictionary* d_ = (d); \ KEEP(d_->p_poly_vec->shelter); \ KEEP(d_->protect); \ *(n) += 2; \ } while(0) /** * Find key hash for a vector element * * - `dict_hash_scalar()` returns the key hash for element `i`. * * - `dict_hash_with()` finds the hash for indexing into `d` with * element `i` of `x`. */ uint32_t dict_hash_scalar(struct dictionary* d, R_len_t i); uint32_t dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i); bool dict_is_incomplete(struct dictionary* d, R_len_t i); void dict_put(struct dictionary* d, uint32_t k, R_len_t i); #endif vctrs/src/typeof2-s3.h0000644000176200001440000001051614315060310014262 0ustar liggesusers#ifndef VCTRS_TYPEOF2_S3_H #define VCTRS_TYPEOF2_S3_H #include "vctrs-core.h" enum vctrs_type2_s3 { VCTRS_TYPE2_S3_null_bare_factor, VCTRS_TYPE2_S3_null_bare_ordered, VCTRS_TYPE2_S3_null_bare_date, VCTRS_TYPE2_S3_null_bare_posixct, VCTRS_TYPE2_S3_null_bare_posixlt, VCTRS_TYPE2_S3_null_bare_tibble, VCTRS_TYPE2_S3_null_unknown, VCTRS_TYPE2_S3_unspecified_bare_factor, VCTRS_TYPE2_S3_unspecified_bare_ordered, VCTRS_TYPE2_S3_unspecified_bare_date, VCTRS_TYPE2_S3_unspecified_bare_posixct, VCTRS_TYPE2_S3_unspecified_bare_posixlt, VCTRS_TYPE2_S3_unspecified_bare_tibble, VCTRS_TYPE2_S3_unspecified_unknown, VCTRS_TYPE2_S3_logical_bare_factor, VCTRS_TYPE2_S3_logical_bare_ordered, VCTRS_TYPE2_S3_logical_bare_date, VCTRS_TYPE2_S3_logical_bare_posixct, VCTRS_TYPE2_S3_logical_bare_posixlt, VCTRS_TYPE2_S3_logical_bare_tibble, VCTRS_TYPE2_S3_logical_unknown, VCTRS_TYPE2_S3_integer_bare_factor, VCTRS_TYPE2_S3_integer_bare_ordered, VCTRS_TYPE2_S3_integer_bare_date, VCTRS_TYPE2_S3_integer_bare_posixct, VCTRS_TYPE2_S3_integer_bare_posixlt, VCTRS_TYPE2_S3_integer_bare_tibble, VCTRS_TYPE2_S3_integer_unknown, VCTRS_TYPE2_S3_double_bare_factor, VCTRS_TYPE2_S3_double_bare_ordered, VCTRS_TYPE2_S3_double_bare_date, VCTRS_TYPE2_S3_double_bare_posixct, VCTRS_TYPE2_S3_double_bare_posixlt, VCTRS_TYPE2_S3_double_bare_tibble, VCTRS_TYPE2_S3_double_unknown, VCTRS_TYPE2_S3_complex_bare_factor, VCTRS_TYPE2_S3_complex_bare_ordered, VCTRS_TYPE2_S3_complex_bare_date, VCTRS_TYPE2_S3_complex_bare_posixct, VCTRS_TYPE2_S3_complex_bare_posixlt, VCTRS_TYPE2_S3_complex_bare_tibble, VCTRS_TYPE2_S3_complex_unknown, VCTRS_TYPE2_S3_character_bare_factor, VCTRS_TYPE2_S3_character_bare_ordered, VCTRS_TYPE2_S3_character_bare_date, VCTRS_TYPE2_S3_character_bare_posixct, VCTRS_TYPE2_S3_character_bare_posixlt, VCTRS_TYPE2_S3_character_bare_tibble, VCTRS_TYPE2_S3_character_unknown, VCTRS_TYPE2_S3_raw_bare_factor, VCTRS_TYPE2_S3_raw_bare_ordered, VCTRS_TYPE2_S3_raw_bare_date, VCTRS_TYPE2_S3_raw_bare_posixct, VCTRS_TYPE2_S3_raw_bare_posixlt, VCTRS_TYPE2_S3_raw_bare_tibble, VCTRS_TYPE2_S3_raw_unknown, VCTRS_TYPE2_S3_list_bare_factor, VCTRS_TYPE2_S3_list_bare_ordered, VCTRS_TYPE2_S3_list_bare_date, VCTRS_TYPE2_S3_list_bare_posixct, VCTRS_TYPE2_S3_list_bare_posixlt, VCTRS_TYPE2_S3_list_bare_tibble, VCTRS_TYPE2_S3_list_unknown, VCTRS_TYPE2_S3_dataframe_bare_factor, VCTRS_TYPE2_S3_dataframe_bare_ordered, VCTRS_TYPE2_S3_dataframe_bare_date, VCTRS_TYPE2_S3_dataframe_bare_posixct, VCTRS_TYPE2_S3_dataframe_bare_posixlt, VCTRS_TYPE2_S3_dataframe_bare_tibble, VCTRS_TYPE2_S3_dataframe_unknown, VCTRS_TYPE2_S3_scalar_bare_factor, VCTRS_TYPE2_S3_scalar_bare_ordered, VCTRS_TYPE2_S3_scalar_bare_date, VCTRS_TYPE2_S3_scalar_bare_posixct, VCTRS_TYPE2_S3_scalar_bare_posixlt, VCTRS_TYPE2_S3_scalar_bare_tibble, VCTRS_TYPE2_S3_scalar_unknown, VCTRS_TYPE2_S3_bare_factor_bare_factor, VCTRS_TYPE2_S3_bare_factor_bare_ordered, VCTRS_TYPE2_S3_bare_factor_bare_date, VCTRS_TYPE2_S3_bare_factor_bare_posixct, VCTRS_TYPE2_S3_bare_factor_bare_posixlt, VCTRS_TYPE2_S3_bare_factor_bare_tibble, VCTRS_TYPE2_S3_bare_factor_unknown, VCTRS_TYPE2_S3_bare_ordered_bare_ordered, VCTRS_TYPE2_S3_bare_ordered_bare_date, VCTRS_TYPE2_S3_bare_ordered_bare_posixct, VCTRS_TYPE2_S3_bare_ordered_bare_posixlt, VCTRS_TYPE2_S3_bare_ordered_bare_tibble, VCTRS_TYPE2_S3_bare_ordered_unknown, VCTRS_TYPE2_S3_bare_date_bare_date, VCTRS_TYPE2_S3_bare_date_bare_posixct, VCTRS_TYPE2_S3_bare_date_bare_posixlt, VCTRS_TYPE2_S3_bare_date_bare_tibble, VCTRS_TYPE2_S3_bare_date_unknown, VCTRS_TYPE2_S3_bare_posixct_bare_posixct, VCTRS_TYPE2_S3_bare_posixct_bare_posixlt, VCTRS_TYPE2_S3_bare_posixct_bare_tibble, VCTRS_TYPE2_S3_bare_posixct_unknown, VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt, VCTRS_TYPE2_S3_bare_posixlt_bare_tibble, VCTRS_TYPE2_S3_bare_posixlt_unknown, VCTRS_TYPE2_S3_bare_tibble_bare_tibble, VCTRS_TYPE2_S3_bare_tibble_unknown, VCTRS_TYPE2_S3_unknown_unknown }; enum vctrs_type2_s3 vec_typeof2_s3_impl(r_obj* x, r_obj* y, enum vctrs_type type_x, enum vctrs_type type_y, int* left); #endif vctrs/src/runs.c0000644000176200001440000003351614373202700013343 0ustar liggesusers#include "vctrs.h" #include "vec-bool.h" enum vctrs_run_bound { VCTRS_RUN_BOUND_start = 0, VCTRS_RUN_BOUND_end = 1 }; #include "decl/runs-decl.h" // ----------------------------------------------------------------------------- r_obj* ffi_vec_detect_run_bounds(r_obj* x, r_obj* ffi_start, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; const enum vctrs_run_bound which = as_run_bound(ffi_start, error_call); return vec_detect_run_bounds(x, which, error_call); } static r_obj* vec_detect_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call) { struct r_vector_bool* p_where = vec_detect_run_bounds_bool(x, which, error_call); KEEP(p_where->shelter); const bool* v_where = r_vector_bool_cbegin(p_where); const r_ssize size = r_vector_bool_length(p_where); r_obj* out = KEEP(r_alloc_logical(size)); int* v_out = r_lgl_begin(out); for (r_ssize i = 0; i < size; ++i) { v_out[i] = v_where[i]; } FREE(2); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_locate_run_bounds(r_obj* x, r_obj* ffi_start, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; const enum vctrs_run_bound which = as_run_bound(ffi_start, error_call); return vec_locate_run_bounds(x, which, error_call); } static r_obj* vec_locate_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call) { struct r_vector_bool* p_where = vec_detect_run_bounds_bool(x, which, error_call); KEEP(p_where->shelter); const bool* v_where = r_vector_bool_cbegin(p_where); const r_ssize size = r_vector_bool_length(p_where); r_ssize n = 0; for (r_ssize i = 0; i < size; ++i) { n += v_where[i]; } r_obj* out = KEEP(r_alloc_integer(n)); int* v_out = r_int_begin(out); r_ssize j = compute_iter_loc(n, which); r_ssize loc = compute_iter_loc(size, which); const r_ssize step = compute_iter_step(which); // First/last value are always the final bound locations // (depending on `which`), so `j` won't ever write to OOB locations for (r_ssize i = 0; i < size; ++i) { v_out[j] = loc + 1; j += step * v_where[loc]; loc += step; } FREE(2); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_identify_runs(r_obj* x, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; return vec_identify_runs(x, error_call); } r_obj* vec_identify_runs(r_obj* x, struct r_lazy error_call) { struct r_vector_bool* p_starts = vec_detect_run_bounds_bool(x, VCTRS_RUN_BOUND_start, error_call); KEEP(p_starts->shelter); const bool* v_starts = r_vector_bool_cbegin(p_starts); const r_ssize size = r_vector_bool_length(p_starts); r_obj* out = KEEP(r_alloc_integer(size)); int* v_out = r_int_begin(out); int n = 0; for (r_ssize i = 0; i < size; ++i) { n += v_starts[i]; v_out[i] = n; } r_obj* ffi_n = r_int(n); r_attrib_poke(out, syms_n, ffi_n); FREE(2); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_run_sizes(r_obj* x, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; return vec_run_sizes(x, error_call); } r_obj* vec_run_sizes(r_obj* x, struct r_lazy error_call) { struct r_vector_bool* p_ends = vec_detect_run_bounds_bool(x, VCTRS_RUN_BOUND_end, error_call); KEEP(p_ends->shelter); const bool* v_ends = r_vector_bool_cbegin(p_ends); const r_ssize size = r_vector_bool_length(p_ends); r_ssize n = 0; for (r_ssize i = 0; i < size; ++i) { n += v_ends[i]; } r_obj* out = KEEP(r_alloc_integer(n)); int* v_out = r_int_begin(out); r_ssize j = 0; int count = 1; for (r_ssize i = 0; i < size; ++i) { const bool end = v_ends[i]; v_out[j] = count; j += end; count = !end * count + 1; } FREE(2); return out; } // ----------------------------------------------------------------------------- /* * Like `vec_detect_run_bounds()`, but returns a less memory intensive * boolean array as an `r_vector_bool`. */ static struct r_vector_bool* vec_detect_run_bounds_bool(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call) { obj_check_vector(x, vec_args.x, error_call); r_obj* proxy = KEEP(vec_proxy_equal(x)); proxy = KEEP(vec_normalize_encoding(proxy)); const r_ssize size = vec_size(proxy); struct r_vector_bool* p_out = r_new_vector_bool(size); KEEP(p_out->shelter); bool* v_out = r_vector_bool_begin(p_out); const enum vctrs_type type = vec_proxy_typeof(proxy); switch (type) { case VCTRS_TYPE_logical: lgl_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_integer: int_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_double: dbl_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_complex: cpl_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_character: chr_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_raw: raw_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_list: list_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_dataframe: df_detect_run_bounds_bool(proxy, size, which, v_out); break; default: stop_unimplemented_vctrs_type("vec_detect_run_bounds_bool", type); } FREE(3); return p_out; } // ----------------------------------------------------------------------------- // Algorithm for "ends" is same as "starts", we just iterate in reverse #define VEC_DETECT_RUN_BOUNDS_BOOL(CTYPE, CBEGIN, EQUAL_NA_EQUAL) { \ if (size == 0) { \ /* Algorithm requires at least 1 value */ \ return; \ } \ \ CTYPE const* v_x = CBEGIN(x); \ \ r_ssize loc = compute_iter_loc(size, which); \ const r_ssize step = compute_iter_step(which); \ \ /* Handle first/last value */ \ CTYPE ref = v_x[loc]; \ v_out[loc] = true; \ loc += step; \ \ for (r_ssize i = 1; i < size; ++i) { \ CTYPE const elt = v_x[loc]; \ v_out[loc] = !EQUAL_NA_EQUAL(elt, ref); \ ref = elt; \ loc += step; \ } \ } static inline void lgl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(int, r_lgl_cbegin, lgl_equal_na_equal); } static inline void int_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(int, r_int_cbegin, int_equal_na_equal); } static inline void dbl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(double, r_dbl_cbegin, dbl_equal_na_equal); } static inline void cpl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(Rcomplex, r_cpl_cbegin, cpl_equal_na_equal); } static inline void chr_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_chr_cbegin, chr_equal_na_equal); } static inline void raw_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(Rbyte, r_raw_cbegin, raw_equal_na_equal); } static inline void list_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_list_cbegin, list_equal_na_equal); } #undef VEC_DETECT_RUN_BOUNDS_BOOL // ----------------------------------------------------------------------------- static inline void df_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { if (size == 0) { // Algorithm requires at least 1 value return; } const r_ssize n_col = r_length(x); r_obj* const* v_x = r_list_cbegin(x); r_ssize loc = compute_iter_loc(size, which); const r_ssize step = compute_iter_step(which); // `v_out` will eventually be `true` if we are in a run // continuation, and `false` if we are starting a new run. v_out[loc] = false; loc += step; for (r_ssize i = 1; i < size; ++i) { v_out[loc] = true; loc += step; } for (r_ssize i = 0; i < n_col; ++i) { col_detect_run_bounds_bool(v_x[i], size, which, v_out); } // Now invert to detect the bounds for (r_ssize i = 0; i < size; ++i) { v_out[i] = !v_out[i]; } } static inline void col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_integer: int_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_double: dbl_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_complex: cpl_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_character: chr_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_raw: raw_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_list: list_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should be flattened."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); default: r_abort("Unimplemented type."); } } #define VEC_COL_DETECT_RUN_BOUNDS_BOOL(CTYPE, CBEGIN, EQUAL_NA_EQUAL) { \ CTYPE const* v_x = CBEGIN(x); \ \ r_ssize loc = compute_iter_loc(size, which); \ const r_ssize step = compute_iter_step(which); \ \ CTYPE ref = v_x[loc]; \ loc += step; \ \ for (r_ssize i = 1; i < size; ++i) { \ CTYPE const elt = v_x[loc]; \ v_out[loc] = v_out[loc] && EQUAL_NA_EQUAL(ref, elt); \ ref = elt; \ loc += step; \ } \ } static inline void lgl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(int, r_lgl_cbegin, lgl_equal_na_equal); } static inline void int_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(int, r_int_cbegin, int_equal_na_equal); } static inline void dbl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(double, r_dbl_cbegin, dbl_equal_na_equal); } static inline void cpl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(Rcomplex, r_cpl_cbegin, cpl_equal_na_equal); } static inline void chr_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_chr_cbegin, chr_equal_na_equal); } static inline void raw_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(Rbyte, r_raw_cbegin, raw_equal_na_equal); } static inline void list_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_list_cbegin, list_equal_na_equal); } #undef VEC_COL_DETECT_RUN_BOUNDS_BOOL // ----------------------------------------------------------------------------- static inline r_ssize compute_iter_loc(r_ssize size, enum vctrs_run_bound which) { switch (which) { case VCTRS_RUN_BOUND_start: return 0; case VCTRS_RUN_BOUND_end: return size - 1; default: r_stop_internal("Unknown `which` value."); } } static inline r_ssize compute_iter_step(enum vctrs_run_bound which) { switch (which) { case VCTRS_RUN_BOUND_start: return 1; case VCTRS_RUN_BOUND_end: return -1; default: r_stop_internal("Unknown `which` value."); } } static inline enum vctrs_run_bound as_run_bound(r_obj* which, struct r_lazy error_call) { struct r_lazy error_arg = { .x = chrs_which, .env = r_null }; r_obj* values = KEEP(r_alloc_character(2)); r_chr_poke(values, 0, r_str("start")); r_chr_poke(values, 1, r_str("end")); const int match = r_arg_match(which, values, error_arg, error_call); enum vctrs_run_bound out; switch (match) { case 0: out = VCTRS_RUN_BOUND_start; break; case 1: out = VCTRS_RUN_BOUND_end; break; default: r_stop_internal("Unknown `which` value."); } FREE(1); return out; } vctrs/src/globals.h0000644000176200001440000000366614402367170014015 0ustar liggesusers#ifndef VCTRS_GLOBALS_H #define VCTRS_GLOBALS_H #include #include "rlang-dev.h" struct syms { r_obj* arg; r_obj* dot_arg; r_obj* dot_call; r_obj* dot_error_arg; r_obj* dot_error_call; r_obj* haystack_arg; r_obj* needles_arg; r_obj* recurse; r_obj* repair_arg; r_obj* times_arg; r_obj* to_arg; r_obj* value_arg; r_obj* vec_default_cast; r_obj* vec_slice_dispatch_integer64; r_obj* vec_slice_fallback; r_obj* vec_slice_fallback_integer64; r_obj* x_arg; r_obj* y_arg; }; // These structs must be in sync as their elements are defined // together by the `INIT_STRING()` macro struct strings { r_obj* AsIs; r_obj* repair; }; struct chrs { r_obj* AsIs; r_obj* repair; }; struct fns { r_obj* vec_slice_dispatch_integer64; r_obj* vec_slice_fallback; r_obj* vec_slice_fallback_integer64; }; struct vec_args { struct vctrs_arg* dot_name_repair; struct vctrs_arg* dot_ptype; struct vctrs_arg* dot_size; struct vctrs_arg* empty; struct vctrs_arg* i; struct vctrs_arg* max_fill; struct vctrs_arg* n; struct vctrs_arg* value; struct vctrs_arg* x; struct vctrs_arg* indices; struct vctrs_arg* sizes; }; struct lazy_args { struct r_lazy dot_name_repair; }; struct lazy_calls { struct r_lazy vec_assign; struct r_lazy vec_assign_params; struct r_lazy vec_assign_seq; struct r_lazy vec_init; struct r_lazy vec_ptype_finalise; struct r_lazy vec_recycle; struct r_lazy vec_recycle_common; struct r_lazy vec_size; struct r_lazy vec_size_common; struct r_lazy list_all_size; }; extern struct syms syms; extern struct strings strings; extern struct chrs chrs; extern struct fns fns; extern struct vec_args vec_args; extern struct lazy_args lazy_args; extern struct lazy_calls lazy_calls; extern r_obj* vctrs_shared_empty_date; extern r_obj* vctrs_shared_empty_uns; extern Rcomplex vctrs_shared_na_cpl; extern r_obj* vctrs_shared_na_lgl; extern r_obj* vctrs_shared_na_list; #endif vctrs/src/altrep-rle.c0000644000176200001440000001115014532373444014424 0ustar liggesusers#include "vctrs.h" #include "altrep-rle.h" #include "altrep.h" #if (!HAS_ALTREP) #include void vctrs_init_altrep_rle(DllInfo* dll) { } SEXP altrep_rle_is_materialized(SEXP x) { Rf_error("Need R 3.5+ for Altrep support."); return R_NilValue; } SEXP altrep_rle_Make(SEXP input) { Rf_error("Need R 3.5+ for Altrep support."); return R_NilValue; } #else // Initialised at load time R_altrep_class_t altrep_rle_class; SEXP altrep_rle_is_materialized(SEXP x) { return Rf_ScalarLogical(R_altrep_data2(x) != R_NilValue); } SEXP altrep_rle_Make(SEXP input) { SEXP res = R_new_altrep(altrep_rle_class, input, R_NilValue); MARK_NOT_MUTABLE(res); return res; } // ALTREP methods ------------------- // The length of the object inline R_xlen_t altrep_rle_Length(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return Rf_xlength(data2); } R_xlen_t sz = 0; SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); for (R_xlen_t i = 0; i < Rf_xlength(rle); ++i) { sz += rle_p[i]; } return sz; } // What gets printed when .Internal(inspect()) is used Rboolean altrep_rle_Inspect(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)) { Rprintf("vctrs_altrep_rle (len=%" R_PRIdXLEN_T ", materialized=%s)\n", altrep_rle_Length(x), R_altrep_data2(x) != R_NilValue ? "T" : "F"); return TRUE; } // ALTSTRING methods ----------------- // the element at the index `i` SEXP altrep_rle_string_Elt(SEXP vec, R_xlen_t i) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return STRING_ELT(data2, i); } SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); SEXP nms = Rf_getAttrib(rle, Rf_install("names")); R_xlen_t idx = 0; while (i >= 0 && idx < Rf_xlength(rle)) { i -= rle_p[idx++]; } return STRING_ELT(nms, idx - 1); } R_xlen_t find_rle_index(int* rle_data, R_xlen_t i, R_xlen_t size) { R_xlen_t idx = 0; while (i >= 0 && idx < size) { i -= rle_data[idx++]; } return idx - 1; } // This is a simple implementation, a more complex one would produce a // altrep_rle object as well SEXP altrep_rle_Extract_subset(SEXP x, SEXP indx, SEXP call) { SEXP data2 = R_altrep_data2(x); // If the vector is already materialized, just fall back to the default // implementation if (data2 != R_NilValue) { return NULL; } SEXP data1 = R_altrep_data1(x); int* index_data = INTEGER(indx); R_xlen_t index_n = Rf_length(indx); int* rle_data = INTEGER(data1); R_xlen_t rle_n = Rf_length(data1); SEXP nms = PROTECT(Rf_getAttrib(data1, Rf_install("names"))); SEXP out = PROTECT(Rf_allocVector(STRSXP, index_n)); for (R_len_t i = 0; i < index_n; ++i) { int index_elt = index_data[i]; if (index_elt == NA_INTEGER) { SET_STRING_ELT(out, i, NA_STRING); continue; } --index_elt; R_xlen_t rle_idx = find_rle_index(rle_data, index_elt, rle_n); SET_STRING_ELT(out, i, STRING_ELT(nms, rle_idx)); } UNPROTECT(2); return out; } // --- Altvec SEXP altrep_rle_string_Materialize(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return data2; } R_xlen_t sz = altrep_rle_Length(vec); SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); SEXP out = PROTECT(Rf_allocVector(STRSXP, sz)); R_xlen_t idx = 0; SEXP nms = Rf_getAttrib(rle, Rf_install("names")); for (R_xlen_t i = 0; i < Rf_xlength(rle); ++i) { for (R_xlen_t j = 0; j < rle_p[i]; ++j) { SET_STRING_ELT(out, idx++, STRING_ELT(nms, i)); } } UNPROTECT(1); R_set_altrep_data2(vec, out); return out; } void* altrep_rle_Dataptr(SEXP vec, Rboolean writeable) { return STDVEC_DATAPTR(altrep_rle_string_Materialize(vec)); } const void* altrep_rle_Dataptr_or_null(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 == R_NilValue) return NULL; return STDVEC_DATAPTR(data2); } void vctrs_init_altrep_rle(DllInfo* dll) { altrep_rle_class = R_make_altstring_class("altrep_rle", "vctrs", dll); // altrep R_set_altrep_Length_method(altrep_rle_class, altrep_rle_Length); R_set_altrep_Inspect_method(altrep_rle_class, altrep_rle_Inspect); // altvec R_set_altvec_Dataptr_method(altrep_rle_class, altrep_rle_Dataptr); R_set_altvec_Dataptr_or_null_method(altrep_rle_class, altrep_rle_Dataptr_or_null); R_set_altvec_Extract_subset_method(altrep_rle_class, altrep_rle_Extract_subset); // altstring R_set_altstring_Elt_method(altrep_rle_class, altrep_rle_string_Elt); } #endif // R version >= 3.5.0 vctrs/src/type-factor.c0000644000176200001440000002277714362266120014624 0ustar liggesusers#include "vctrs.h" static SEXP levels_union(SEXP x, SEXP y); // [[ include("type-factor.h") ]] SEXP fct_ptype2(const struct ptype2_opts* opts) { SEXP x = opts->x; SEXP y = opts->y; SEXP x_levels = Rf_getAttrib(x, R_LevelsSymbol); SEXP y_levels = Rf_getAttrib(y, R_LevelsSymbol); if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_factor_levels(x, opts->p_x_arg); } if (TYPEOF(y_levels) != STRSXP) { stop_corrupt_factor_levels(y, opts->p_y_arg); } // Quick early exit for identical levels pointing to the same SEXP if (x_levels == y_levels) { return new_empty_factor(x_levels); } SEXP levels = PROTECT(levels_union(x_levels, y_levels)); SEXP out = new_empty_factor(levels); UNPROTECT(1); return out; } static bool ord_ptype2_validate(r_obj* x_levels, r_obj* y_levels, const struct ptype2_opts* p_opts) { if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_ordered_levels(p_opts->x, p_opts->p_x_arg); } if (TYPEOF(y_levels) != STRSXP) { stop_corrupt_ordered_levels(p_opts->y, p_opts->p_y_arg); } return equal_object(x_levels, y_levels); } // [[ include("type-factor.h") ]] r_obj* ord_ptype2(const struct ptype2_opts* p_opts) { r_obj* x_levels = r_attrib_get(p_opts->x, R_LevelsSymbol); r_obj* y_levels = r_attrib_get(p_opts->y, R_LevelsSymbol); if (ord_ptype2_validate(x_levels, y_levels, p_opts)) { return new_empty_ordered(x_levels); } else { return vec_ptype2_default(p_opts->x, p_opts->y, p_opts->p_x_arg, p_opts->p_y_arg, r_lazy_null, &p_opts->fallback); } } static SEXP levels_union(SEXP x, SEXP y) { SEXP args = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(args, 0, x); SET_VECTOR_ELT(args, 1, y); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = R_NilValue }; // Combine with known ptype // No name repair because this is just combining factor levels SEXP xy = PROTECT(vec_c( args, r_globals.empty_chr, R_NilValue, &name_repair_opts, vec_args.empty, r_lazy_null )); SEXP out = vec_unique(xy); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- static void init_factor(SEXP x, SEXP levels); static void init_ordered(SEXP x, SEXP levels); // [[ include("vctrs.h") ]] SEXP fct_as_character(SEXP x, struct vctrs_arg* x_arg) { SEXP levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_factor_levels(x, x_arg); } UNPROTECT(1); return Rf_asCharacterFactor(x); } // [[ include("vctrs.h") ]] SEXP ord_as_character(SEXP x, struct vctrs_arg* x_arg) { return fct_as_character(x, x_arg); } static SEXP chr_as_factor_from_self(SEXP x, bool ordered); static SEXP chr_as_factor_impl(SEXP x, SEXP levels, bool* lossy, bool ordered); // [[ include("vctrs.h") ]] SEXP chr_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg) { SEXP levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_factor_levels(to, to_arg); } SEXP out; // When `to` has no levels, it is treated as a template and the // levels come from `x` if (vec_size(levels) == 0) { out = chr_as_factor_from_self(x, false); } else { out = chr_as_factor_impl(x, levels, lossy, false); } UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP chr_as_ordered(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg) { SEXP levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_ordered_levels(to, to_arg); } SEXP out; // When `to` has no levels, it is treated as a template and the // levels come from `x` if (vec_size(levels) == 0) { out = chr_as_factor_from_self(x, true); } else { out = chr_as_factor_impl(x, levels, lossy, true); } UNPROTECT(1); return out; } static SEXP chr_as_factor_impl(SEXP x, SEXP levels, bool* lossy, bool ordered) { SEXP out = PROTECT(vec_match(x, levels)); const int* p_out = INTEGER(out); R_len_t size = vec_size(x); const SEXP* p_x = STRING_PTR_RO(x); // Detect lossy no-matches, but allow `NA` values from `x` for (R_len_t i = 0; i < size; ++i) { if (p_out[i] == NA_INTEGER && p_x[i] != NA_STRING) { *lossy = true; UNPROTECT(1); return R_NilValue; } } if (ordered) { init_ordered(out, levels); } else { init_factor(out, levels); } UNPROTECT(1); return out; } static SEXP remove_na_levels(SEXP levels); // Factor levels are added in order of appearance. // `NA` values in `x` are not considered factor levels. static SEXP chr_as_factor_from_self(SEXP x, bool ordered) { SEXP levels = PROTECT(vec_unique(x)); levels = PROTECT(remove_na_levels(levels)); // `NA` values in `x` correctly become `NA` values in the result SEXP out = PROTECT(vec_match(x, levels)); if (ordered) { init_ordered(out, levels); } else { init_factor(out, levels); } UNPROTECT(3); return out; } static SEXP remove_na_levels(SEXP levels) { R_len_t size = vec_size(levels); const SEXP* p_levels = STRING_PTR_RO(levels); // There might only ever be 1 `NA` level. // Remove it if it exists. for (R_len_t i = 0; i < size; ++i) { if (p_levels[i] == NA_STRING) { int na_loc = (i + 1) * -1; SEXP na_loc_obj = PROTECT(r_int(na_loc)); SEXP out = vec_slice(levels, na_loc_obj); UNPROTECT(1); return out; } } return levels; } static SEXP fct_as_factor_impl(SEXP x, SEXP x_levels, SEXP to_levels, bool* lossy, bool ordered); // [[ include("vctrs.h") ]] SEXP fct_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg) { SEXP x_levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); SEXP to_levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_factor_levels(x, x_arg); } if (TYPEOF(to_levels) != STRSXP) { stop_corrupt_factor_levels(to, to_arg); } SEXP out = fct_as_factor_impl(x, x_levels, to_levels, lossy, false); UNPROTECT(2); return out; } // [[ include("factor.h") ]] SEXP ord_as_ordered(const struct cast_opts* p_opts) { r_obj* x_levels = r_attrib_get(p_opts->x, R_LevelsSymbol); r_obj* y_levels = r_attrib_get(p_opts->to, R_LevelsSymbol); struct ptype2_opts ptype2_opts = cast_opts_as_ptype2_opts(p_opts); if (ord_ptype2_validate(x_levels, y_levels, &ptype2_opts)) { return p_opts->x; } else { return vec_cast_default(p_opts->x, p_opts->to, p_opts->p_x_arg, p_opts->p_to_arg, p_opts->call, &p_opts->fallback); } } static SEXP fct_as_factor_impl(SEXP x, SEXP x_levels, SEXP to_levels, bool* lossy, bool ordered) { // Early exit if levels are identical if (x_levels == to_levels) { return x; } R_len_t x_levels_size = vec_size(x_levels); R_len_t to_levels_size = vec_size(to_levels); // Early exit if `to` has no levels. In this case it is being used as // a template if (to_levels_size == 0) { return x; } // Always lossy if there are more levels in `x` than in `to` if (x_levels_size > to_levels_size) { *lossy = true; return R_NilValue; } R_len_t x_size = vec_size(x); const SEXP* p_x_levels = STRING_PTR_RO(x_levels); const SEXP* p_to_levels = STRING_PTR_RO(to_levels); bool is_contiguous_subset = true; for (R_len_t i = 0; i < x_levels_size; ++i) { if (p_x_levels[i] != p_to_levels[i]) { is_contiguous_subset = false; break; } } // No recoding required if contiguous subset. // Duplicate, strip non-factor attributes, and re-initialize with new levels. // Using `r_clone_referenced()` avoids an immediate copy using ALTREP wrappers. if (is_contiguous_subset) { SEXP out = PROTECT(r_clone_referenced(x)); SET_ATTRIB(out, R_NilValue); if (ordered) { init_ordered(out, to_levels); } else { init_factor(out, to_levels); } UNPROTECT(1); return out; } const int* p_x = INTEGER_RO(x); SEXP out = PROTECT(Rf_allocVector(INTSXP, x_size)); int* p_out = INTEGER(out); if (ordered) { init_ordered(out, to_levels); } else { init_factor(out, to_levels); } SEXP recode = PROTECT(vec_match(x_levels, to_levels)); const int* p_recode = INTEGER_RO(recode); // Detect if there are any levels in `x` that aren't in `to` for (R_len_t i = 0; i < x_levels_size; ++i) { if (p_recode[i] == NA_INTEGER) { *lossy = true; UNPROTECT(2); return R_NilValue; } } // Recode `x` int values into `to` level ordering for (R_len_t i = 0; i < x_size; ++i) { const int elt = p_x[i]; if (elt == NA_INTEGER) { p_out[i] = NA_INTEGER; continue; } p_out[i] = p_recode[elt - 1]; } UNPROTECT(2); return out; } static void init_factor(SEXP x, SEXP levels) { if (TYPEOF(x) != INTSXP) { r_stop_internal("Only integers can be made into factors."); } Rf_setAttrib(x, R_LevelsSymbol, levels); Rf_setAttrib(x, R_ClassSymbol, classes_factor); } static void init_ordered(SEXP x, SEXP levels) { if (TYPEOF(x) != INTSXP) { r_stop_internal("Only integers can be made into ordered factors."); } Rf_setAttrib(x, R_LevelsSymbol, levels); Rf_setAttrib(x, R_ClassSymbol, classes_ordered); } vctrs/src/order.h0000644000176200001440000000507114341667017013502 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_H #define VCTRS_ORDER_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- SEXP vec_order(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate); SEXP vec_order_info(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, bool chr_ordered); // ----------------------------------------------------------------------------- /* * `order` is an integer vector intended to hold the ordering vector * in `vec_order()`. It is allocated eagerly, but the initialization of its * values is done lazily. Typically, it is initialized to a 1-based sequential * ordering which is rearranged by the internal algorithm. However, for the * counting order, the initialization is not required for the first integer * column, which can result in a nice performance improvement. */ struct order { SEXP self; SEXP data; int* p_data; r_ssize size; bool initialized; }; #define PROTECT_ORDER(p_order, p_n) do { \ PROTECT((p_order)->self); \ PROTECT((p_order)->data); \ *(p_n) += 2; \ } while (0) static inline struct order* new_order(r_ssize size) { SEXP self = PROTECT(r_new_raw(sizeof(struct order))); struct order* p_order = (struct order*) RAW(self); SEXP data = PROTECT(Rf_allocVector(INTSXP, size)); int* p_data = INTEGER(data); p_order->self = self; p_order->data = data; p_order->p_data = p_data; p_order->size = size; p_order->initialized = false; UNPROTECT(2); return p_order; } static inline int* init_order(struct order* p_order) { if (p_order->initialized) { return p_order->p_data; } // Initialize `x` with sequential 1-based ordering for (r_ssize i = 0; i < p_order->size; ++i) { p_order->p_data[i] = i + 1; } p_order->initialized = true; return p_order->p_data; } // ----------------------------------------------------------------------------- #endif vctrs/src/type-tibble.c0000644000176200001440000000264514315060310014566 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" // [[ include("vctrs.h") ]] SEXP tib_ptype2(const struct ptype2_opts* opts) { SEXP out = PROTECT(df_ptype2(opts)); Rf_setAttrib(out, R_ClassSymbol, classes_tibble); UNPROTECT(1); return out; } // [[ register() ]] r_obj* ffi_tib_ptype2(r_obj* x, r_obj* y, r_obj* ffi_x_arg_, r_obj* ffi_y_arg_, r_obj* frame) { struct vctrs_arg x_arg = vec_as_arg(ffi_x_arg_); struct vctrs_arg y_arg = vec_as_arg(ffi_y_arg_); const struct ptype2_opts opts = { .x = x, .y = y, .p_x_arg = &x_arg, .p_y_arg = &y_arg, .call = { .x = r_syms.call, .env = frame } }; return tib_ptype2(&opts); } // [[ include("type-tibble.h") ]] SEXP tib_cast(const struct cast_opts* opts) { SEXP out = PROTECT(df_cast_opts(opts)); Rf_setAttrib(out, R_ClassSymbol, classes_tibble); UNPROTECT(1); return out; } // [[ register() ]] r_obj* ffi_tib_cast(r_obj* x, r_obj* to, r_obj* ffi_x_arg, r_obj* ffi_to_arg, r_obj* frame) { struct vctrs_arg x_arg = vec_as_arg(ffi_x_arg); struct vctrs_arg to_arg = vec_as_arg(ffi_to_arg); const struct cast_opts opts = { .x = x, .to = to, .p_x_arg = &x_arg, .p_to_arg = &to_arg, .call = { .x = r_syms.call, .env = frame } }; return tib_cast(&opts); } vctrs/src/altrep-rle.h0000644000176200001440000000121414276722575014441 0ustar liggesusers#ifndef ALTREP_RLE_H #define ALTREP_RLE_H #include "altrep.h" #if (HAS_ALTREP) SEXP altrep_rle_Make(SEXP input); R_xlen_t altrep_rle_Length(SEXP vec); Rboolean altrep_rle_Inspect( SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)); SEXP altrep_rle_string_Elt(SEXP vec, R_xlen_t i); SEXP altrep_rle_Extract_subset(SEXP x, SEXP indx, SEXP call); SEXP altrep_rle_string_Materialize(SEXP vec); void* altrep_rle_Dataptr(SEXP vec, Rboolean writeable); const void* altrep_rle_Dataptr_or_null(SEXP vec); void vctrs_init_altrep_rle(DllInfo* dll); extern R_altrep_class_t altrep_rle_class; #endif #endif vctrs/src/match.c0000644000176200001440000027500214511320527013450 0ustar liggesusers#include "vctrs.h" // ----------------------------------------------------------------------------- enum vctrs_multiple { VCTRS_MULTIPLE_all = 0, VCTRS_MULTIPLE_any = 1, VCTRS_MULTIPLE_first = 2, VCTRS_MULTIPLE_last = 3, // Deprecated in favor of `relationship` VCTRS_MULTIPLE_warning = 4, VCTRS_MULTIPLE_error = 5 }; enum vctrs_relationship { VCTRS_RELATIONSHIP_none = 0, VCTRS_RELATIONSHIP_one_to_one = 1, VCTRS_RELATIONSHIP_one_to_many = 2, VCTRS_RELATIONSHIP_many_to_one = 3, VCTRS_RELATIONSHIP_many_to_many = 4, VCTRS_RELATIONSHIP_warn_many_to_many = 5 }; enum vctrs_filter { VCTRS_FILTER_none = 0, VCTRS_FILTER_min = 1, VCTRS_FILTER_max = 2 }; enum vctrs_ops { VCTRS_OPS_eq = 0, VCTRS_OPS_gt = 1, VCTRS_OPS_gte = 2, VCTRS_OPS_lt = 3, VCTRS_OPS_lte = 4 }; enum vctrs_incomplete_action { VCTRS_INCOMPLETE_ACTION_compare = 0, VCTRS_INCOMPLETE_ACTION_match = 1, VCTRS_INCOMPLETE_ACTION_value = 2, VCTRS_INCOMPLETE_ACTION_drop = 3, VCTRS_INCOMPLETE_ACTION_error = 4 }; struct vctrs_incomplete { enum vctrs_incomplete_action action; int value; }; enum vctrs_no_match_action { VCTRS_NO_MATCH_ACTION_drop = 0, VCTRS_NO_MATCH_ACTION_error = 1, VCTRS_NO_MATCH_ACTION_value = 2 }; struct vctrs_no_match { enum vctrs_no_match_action action; int value; }; enum vctrs_remaining_action { VCTRS_REMAINING_ACTION_drop = 0, VCTRS_REMAINING_ACTION_error = 1, VCTRS_REMAINING_ACTION_value = 2 }; struct vctrs_remaining { enum vctrs_remaining_action action; int value; }; struct vctrs_match_bounds { r_ssize lower; r_ssize upper; }; #define SIGNAL_NO_MATCH r_globals.na_int #define SIGNAL_INCOMPLETE -1 // ----------------------------------------------------------------------------- #include "decl/match-decl.h" // ----------------------------------------------------------------------------- // [[ register() ]] r_obj* ffi_locate_matches(r_obj* needles, r_obj* haystack, r_obj* condition, r_obj* filter, r_obj* incomplete, r_obj* no_match, r_obj* remaining, r_obj* multiple, r_obj* relationship, r_obj* nan_distinct, r_obj* chr_proxy_collate, r_obj* needles_arg, r_obj* haystack_arg, r_obj* frame) { struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct r_lazy internal_call = { .x = frame, .env = r_null }; const struct vctrs_incomplete c_incomplete = parse_incomplete(incomplete, internal_call); const struct vctrs_no_match c_no_match = parse_no_match(no_match, internal_call); const struct vctrs_remaining c_remaining = parse_remaining(remaining, internal_call); const enum vctrs_multiple c_multiple = parse_multiple(multiple, internal_call); const enum vctrs_relationship c_relationship = parse_relationship(relationship, internal_call); const bool c_nan_distinct = r_arg_as_bool(nan_distinct, "nan_distinct"); struct vctrs_arg c_needles_arg = vec_as_arg(needles_arg); struct vctrs_arg c_haystack_arg = vec_as_arg(haystack_arg); return vec_locate_matches( needles, haystack, condition, filter, &c_incomplete, &c_no_match, &c_remaining, c_multiple, c_relationship, c_nan_distinct, chr_proxy_collate, &c_needles_arg, &c_haystack_arg, error_call ); } static r_obj* vec_locate_matches(r_obj* needles, r_obj* haystack, r_obj* condition, r_obj* filter, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool nan_distinct, r_obj* chr_proxy_collate, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call) { int n_prot = 0; int _; r_obj* ptype = KEEP_N(vec_ptype2_params( needles, haystack, needles_arg, haystack_arg, error_call, &_ ), &n_prot); needles = KEEP_N(vec_cast_params( needles, ptype, needles_arg, vec_args.empty, error_call, S3_FALLBACK_false ), &n_prot); haystack = KEEP_N(vec_cast_params( haystack, ptype, haystack_arg, vec_args.empty, error_call, S3_FALLBACK_false ), &n_prot); r_ssize size_needles = vec_size(needles); r_ssize size_haystack = vec_size(haystack); // Support non-data frame types by wrapping them in a 1-col data frame if (!is_data_frame(needles)) { needles = KEEP_N(r_list(needles), &n_prot); haystack = KEEP_N(r_list(haystack), &n_prot); r_obj* names = KEEP_N(r_chr("x"), &n_prot); r_attrib_poke_names(needles, names); r_attrib_poke_names(haystack, names); r_init_data_frame(needles, size_needles); r_init_data_frame(haystack, size_haystack); } r_ssize n_cols = r_length(needles); enum vctrs_ops* v_ops = (enum vctrs_ops*) R_alloc(n_cols, sizeof(enum vctrs_ops)); parse_condition(condition, n_cols, v_ops); enum vctrs_filter* v_filters = (enum vctrs_filter*) R_alloc(n_cols, sizeof(enum vctrs_filter)); parse_filter(filter, n_cols, v_filters); bool any_filters = false; for (r_ssize i = 0; i < n_cols; ++i) { if (v_filters[i] != VCTRS_FILTER_none) { any_filters = true; break; } } if (n_cols == 0) { // If there are no columns, this operation isn't well defined. r_abort_lazy_call(error_call, "Must have at least 1 column to match on."); } // Compute the locations of incomplete values per column since computing // joint ranks per column is going to replace the incomplete values with // integer ranks r_obj* needles_complete = df_detect_complete_by_col(needles, size_needles, n_cols); KEEP_N(needles_complete, &n_prot); r_obj* haystack_complete = df_detect_complete_by_col(haystack, size_haystack, n_cols); KEEP_N(haystack_complete, &n_prot); // Compute joint xtfrm to simplify each column down to an integer vector r_obj* args = KEEP_N(df_joint_xtfrm_by_col( needles, haystack, size_needles, size_haystack, n_cols, nan_distinct, chr_proxy_collate ), &n_prot); needles = r_list_get(args, 0); haystack = r_list_get(args, 1); r_obj* out = df_locate_matches( needles, haystack, needles_complete, haystack_complete, size_needles, size_haystack, incomplete, no_match, remaining, multiple, relationship, any_filters, v_filters, v_ops, needles_arg, haystack_arg, error_call ); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static r_obj* df_locate_matches(r_obj* needles, r_obj* haystack, r_obj* needles_complete, r_obj* haystack_complete, r_ssize size_needles, r_ssize size_haystack, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call) { int n_prot = 0; r_obj* o_needles = KEEP_N(vec_order( needles, chrs_asc, chrs_smallest, true, r_null ), &n_prot); const int* v_o_needles = r_int_cbegin(o_needles); r_obj* container_info = KEEP_N(compute_nesting_container_info( haystack, size_haystack, v_ops ), &n_prot); r_obj* o_haystack = r_list_get(container_info, 0); const int* v_o_haystack = r_int_cbegin(o_haystack); // Will be `integer()` if no container ids are required. // In that case, `n_containers == 1`. r_obj* container_ids = r_list_get(container_info, 1); const int* v_container_ids = r_int_cbegin(container_ids); const int n_containers = r_as_int(r_list_get(container_info, 2)); const bool any_non_equi = r_as_bool(r_list_get(container_info, 3)); // In the case of possible multiple matches that fall in separate // nesting containers, allocate ~20% extra room const r_ssize initial_capacity = (n_containers == 1) ? size_needles : r_double_as_ssize(r_ssize_as_double(size_needles) * 1.2); struct r_dyn_array* p_loc_first_match_o_haystack = r_new_dyn_vector(R_TYPE_integer, initial_capacity); KEEP_N(p_loc_first_match_o_haystack->shelter, &n_prot); { // Temporary unstable pointer int* v_loc_first_match_o_haystack = (int*) r_dyn_begin(p_loc_first_match_o_haystack); for (r_ssize i = 0; i < size_needles; ++i) { // Initialize to no match everywhere, no need to initialize extra buffer v_loc_first_match_o_haystack[i] = SIGNAL_NO_MATCH; } p_loc_first_match_o_haystack->count = size_needles; } // If we can skip, `size_match` will always be `1` const bool skip_size_match = (multiple == VCTRS_MULTIPLE_any); struct r_dyn_array* p_size_match = NULL; if (!skip_size_match) { p_size_match = r_new_dyn_vector(R_TYPE_integer, initial_capacity); KEEP_N(p_size_match->shelter, &n_prot); int* v_size_match = (int*) r_dyn_begin(p_size_match); for (r_ssize i = 0; i < size_needles; ++i) { // No need to initialize extra buffer v_size_match[i] = 1; } p_size_match->count = size_needles; } // If we can skip, `loc_needles` will always be an increasing sequence of values const bool skip_loc_needles = (multiple == VCTRS_MULTIPLE_any); struct r_dyn_array* p_loc_needles = NULL; if (!skip_loc_needles) { p_loc_needles = r_new_dyn_vector(R_TYPE_integer, initial_capacity); KEEP_N(p_loc_needles->shelter, &n_prot); int* v_loc_needles = (int*) r_dyn_begin(p_loc_needles); for (r_ssize i = 0; i < size_needles; ++i) { // No need to initialize extra buffer v_loc_needles[i] = i; } p_loc_needles->count = size_needles; } // When filtering, we find the filtered match for a particular needle in each // nesting container of the haystack. `v_loc_filter_match_o_haystack` // keeps track of the overall filtered match loc for a needle across all // nesting containers in the haystack. const bool has_loc_filter_match_o_haystack = any_filters && (multiple == VCTRS_MULTIPLE_all || multiple == VCTRS_MULTIPLE_warning || multiple == VCTRS_MULTIPLE_error || multiple == VCTRS_MULTIPLE_first || multiple == VCTRS_MULTIPLE_last); int* v_loc_filter_match_o_haystack = NULL; if (has_loc_filter_match_o_haystack) { r_obj* loc_filter_match_o_haystack = KEEP_N(r_alloc_integer(size_needles), &n_prot); v_loc_filter_match_o_haystack = r_int_begin(loc_filter_match_o_haystack); } struct poly_vec* p_poly_needles = new_poly_vec(needles, VCTRS_TYPE_dataframe); KEEP_N(p_poly_needles->shelter, &n_prot); const struct poly_df_data* p_needles = (const struct poly_df_data*) p_poly_needles->p_vec; struct poly_vec* p_poly_haystack = new_poly_vec(haystack, VCTRS_TYPE_dataframe); KEEP_N(p_poly_haystack->shelter, &n_prot); const struct poly_df_data* p_haystack = (const struct poly_df_data*) p_poly_haystack->p_vec; const struct poly_vec* p_poly_needles_complete = new_poly_vec(needles_complete, VCTRS_TYPE_dataframe); KEEP_N(p_poly_needles_complete->shelter, &n_prot); const struct poly_df_data* p_needles_complete = (const struct poly_df_data*) p_poly_needles_complete->p_vec; struct poly_vec* p_poly_haystack_complete = new_poly_vec(haystack_complete, VCTRS_TYPE_dataframe); KEEP_N(p_poly_haystack_complete->shelter, &n_prot); const struct poly_df_data* p_haystack_complete = (const struct poly_df_data*) p_poly_haystack_complete->p_vec; if (size_needles > 0) { // Recursion requires at least 1 row in needles. // In the case of size 0 needles, there is nothing to do, but this avoids // a segfault. const r_ssize col = 0; const r_ssize loc_lower_bound_o_needles = 0; const r_ssize loc_upper_bound_o_needles = size_needles - 1; const r_ssize loc_lower_bound_o_haystack = 0; const r_ssize loc_upper_bound_o_haystack = size_haystack - 1; if (n_containers == 1) { df_locate_matches_recurse( col, loc_lower_bound_o_needles, loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } else { df_locate_matches_with_containers( n_containers, v_container_ids, col, loc_lower_bound_o_needles, loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } } r_obj* out = KEEP_N(expand_compact_indices( v_o_haystack, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, skip_size_match, skip_loc_needles, incomplete, no_match, remaining, multiple, relationship, size_needles, size_haystack, any_non_equi, has_loc_filter_match_o_haystack, v_filters, v_loc_filter_match_o_haystack, p_haystack, needles_arg, haystack_arg, error_call ), &n_prot); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static void df_locate_matches_recurse(r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack) { const enum vctrs_ops op = v_ops[col]; const enum vctrs_filter filter = v_filters[col]; const r_ssize n_col = p_needles->n_col; const int* v_needles = (const int*) p_needles->v_col_ptr[col]; const int* v_needles_complete = (const int*) p_needles_complete->v_col_ptr[col]; const int* v_haystack = (const int*) p_haystack->v_col_ptr[col]; const int* v_haystack_complete = (const int*) p_haystack_complete->v_col_ptr[col]; const r_ssize loc_mid_bound_o_needles = midpoint(loc_lower_bound_o_needles, loc_upper_bound_o_needles); const r_ssize loc_mid_bound_needles = v_o_needles[loc_mid_bound_o_needles] - 1; const int val_needle = v_needles[loc_mid_bound_needles]; const bool needle_is_complete = v_needles_complete[loc_mid_bound_needles]; // Find lower and upper duplicate location for this needle const r_ssize loc_lower_duplicate_o_needles = int_locate_lower_duplicate( val_needle, v_needles, v_o_needles, loc_lower_bound_o_needles, loc_mid_bound_o_needles ); const r_ssize loc_upper_duplicate_o_needles = int_locate_upper_duplicate( val_needle, v_needles, v_o_needles, loc_mid_bound_o_needles, loc_upper_bound_o_needles ); if (!needle_is_complete && (incomplete->action == VCTRS_INCOMPLETE_ACTION_value || incomplete->action == VCTRS_INCOMPLETE_ACTION_drop || incomplete->action == VCTRS_INCOMPLETE_ACTION_error)) { // Signal incomplete needle, don't recursive into further columns. // Early return at the end of this branch. for (r_ssize i = loc_lower_duplicate_o_needles; i <= loc_upper_duplicate_o_needles; ++i) { // Will always be the first and only time the output is touched for this // needle, so we can poke directly into it const int loc_needles = v_o_needles[i] - 1; r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, SIGNAL_INCOMPLETE); } // Learned nothing about haystack, so just update lhs/rhs bounds for // `o_needles` as needed and continue on bool do_lhs = loc_lower_duplicate_o_needles > loc_lower_bound_o_needles; bool do_rhs = loc_upper_duplicate_o_needles < loc_upper_bound_o_needles; if (do_lhs) { const r_ssize lhs_loc_lower_bound_o_needles = loc_lower_bound_o_needles; const r_ssize lhs_loc_upper_bound_o_needles = loc_lower_duplicate_o_needles - 1; df_locate_matches_recurse( col, lhs_loc_lower_bound_o_needles, lhs_loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } if (do_rhs) { const r_ssize rhs_loc_lower_bound_o_needles = loc_upper_duplicate_o_needles + 1; const r_ssize rhs_loc_upper_bound_o_needles = loc_upper_bound_o_needles; df_locate_matches_recurse( col, rhs_loc_lower_bound_o_needles, rhs_loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } return; } const struct vctrs_match_bounds bounds = int_locate_match( val_needle, v_haystack, v_o_haystack, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack ); r_ssize loc_lower_match_o_haystack = bounds.lower; r_ssize loc_upper_match_o_haystack = bounds.upper; // Adjust bounds based on non-equi condition. // If needle is NA and we are doing an exact match, then we treat it like an // equi condition here. Otherwise if needle is NA, then we are careful to // never extend the bounds to capture values past it. const enum vctrs_ops bounds_op = (!needle_is_complete && incomplete->action == VCTRS_INCOMPLETE_ACTION_match) ? VCTRS_OPS_eq : op; switch (bounds_op) { case VCTRS_OPS_lt: { // Exclude found needle loc_lower_match_o_haystack = loc_upper_match_o_haystack + 1; if (needle_is_complete) { loc_upper_match_o_haystack = loc_upper_bound_o_haystack; } break; } case VCTRS_OPS_lte: { if (needle_is_complete) { loc_upper_match_o_haystack = loc_upper_bound_o_haystack; } break; } case VCTRS_OPS_gt: { // Exclude found needle loc_upper_match_o_haystack = loc_lower_match_o_haystack - 1; if (needle_is_complete) { loc_lower_match_o_haystack = loc_lower_bound_o_haystack; } break; } case VCTRS_OPS_gte: { if (needle_is_complete) { loc_lower_match_o_haystack = loc_lower_bound_o_haystack; } break; } case VCTRS_OPS_eq: { break; } } if (needle_is_complete && (op == VCTRS_OPS_gt || op == VCTRS_OPS_gte) && (loc_lower_match_o_haystack <= loc_upper_match_o_haystack)) { // In this specific case, a non-NA needle may match an NA in the haystack // after applying the non-equi adjustments above because NA values are // always ordered as the "smallest" values, and we set // `loc_lower_match_o_haystack` to be `loc_lower_bound_o_haystack`, which // may capture NAs at the lower bound. If there is an NA on the lower bound, // we avoid it by finding the last NA and then going 1 location beyond it. const r_ssize loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const bool lower_match_haystack_is_complete = v_haystack_complete[loc_lower_match_haystack]; if (!lower_match_haystack_is_complete) { // Find the last incomplete value loc_lower_match_o_haystack = int_locate_upper_incomplete( v_haystack_complete, v_o_haystack, loc_lower_match_o_haystack, loc_upper_match_o_haystack ); // Exclude it and all before it ++loc_lower_match_o_haystack; } } if (loc_lower_match_o_haystack <= loc_upper_match_o_haystack) { // Hit! switch (filter) { case VCTRS_FILTER_max: { if (!needle_is_complete || op == VCTRS_OPS_eq) { // Lower match value will already equal upper match value break; } // We want the max values of this group. That's the upper match of the // haystack and its corresponding lower duplicate. const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int loc_upper_match_haystack = v_o_haystack[loc_upper_match_o_haystack] - 1; const int val_lower_match_haystack = v_haystack[loc_lower_match_haystack]; const int val_upper_match_haystack = v_haystack[loc_upper_match_haystack]; if (val_lower_match_haystack != val_upper_match_haystack) { loc_lower_match_o_haystack = int_locate_lower_duplicate( val_upper_match_haystack, v_haystack, v_o_haystack, loc_lower_match_o_haystack, loc_upper_match_o_haystack ); } break; } case VCTRS_FILTER_min: { if (!needle_is_complete || op == VCTRS_OPS_eq) { // Lower match value will already equal upper match value break; } // We want the min values of this group. That's the lower match of the // haystack and its corresponding upper duplicate. const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int loc_upper_match_haystack = v_o_haystack[loc_upper_match_o_haystack] - 1; const int val_lower_match_haystack = v_haystack[loc_lower_match_haystack]; const int val_upper_match_haystack = v_haystack[loc_upper_match_haystack]; if (val_lower_match_haystack != val_upper_match_haystack) { loc_upper_match_o_haystack = int_locate_upper_duplicate( val_lower_match_haystack, v_haystack, v_o_haystack, loc_lower_match_o_haystack, loc_upper_match_o_haystack ); } break; } case VCTRS_FILTER_none: { break; } } if (col < n_col - 1) { // For this column, we've bounded the needles locations to the upper/lower // duplicates of the current needle, and the haystack locations to the // upper/lower matches of that needle. Now recurse into the next column // to further refine the boundaries. df_locate_matches_recurse( col + 1, loc_lower_duplicate_o_needles, loc_upper_duplicate_o_needles, loc_lower_match_o_haystack, loc_upper_match_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } else { // We just finished locating matches for the last column, // and we still have at least 1 hit, so record it for (r_ssize i = loc_lower_duplicate_o_needles; i <= loc_upper_duplicate_o_needles; ++i) { const int loc_needles = v_o_needles[i] - 1; const int loc_first_match_o_haystack = r_dyn_int_get(p_loc_first_match_o_haystack, loc_needles); const bool first_touch = loc_first_match_o_haystack == r_globals.na_int; switch (multiple) { case VCTRS_MULTIPLE_any: { if (first_touch) { // Arbitrarily record the lower match r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, loc_lower_match_o_haystack); break; } if (any_filters) { const int loc_first_match_haystack = v_o_haystack[loc_first_match_o_haystack] - 1; const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int cmp = p_matches_df_compare_na_equal( p_haystack, loc_lower_match_haystack, p_haystack, loc_first_match_haystack, v_filters ); // -1 = New haystack value "loses", nothing to update // 1 = New haystack value "wins", it becomes new match // 0 = Equal values, nothing to update if (cmp == 1) { r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, loc_lower_match_o_haystack); } } break; } case VCTRS_MULTIPLE_all: case VCTRS_MULTIPLE_error: case VCTRS_MULTIPLE_warning: case VCTRS_MULTIPLE_first: case VCTRS_MULTIPLE_last: { const int size_match = loc_upper_match_o_haystack - loc_lower_match_o_haystack + 1; if (first_touch) { r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, loc_lower_match_o_haystack); r_dyn_int_poke(p_size_match, loc_needles, size_match); if (any_filters) { v_loc_filter_match_o_haystack[loc_needles] = loc_lower_match_o_haystack; } break; } if (any_filters) { const int loc_filter_match_o_haystack = v_loc_filter_match_o_haystack[loc_needles]; const int loc_filter_match_haystack = v_o_haystack[loc_filter_match_o_haystack] - 1; const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int cmp = p_matches_df_compare_na_equal( p_haystack, loc_lower_match_haystack, p_haystack, loc_filter_match_haystack, v_filters ); // -1 = New haystack value "loses", nothing to update // 1 = New haystack value "wins", it becomes new filter match // 0 = Equal values, fall through and append this set of matches // Note that in the 1 case, we have no way to invalidate the old // match at this point in time. Instead, we record all matches and // in `expand_compact_indices()` we skip the ones that aren't // equivalent to the filter match. if (cmp == -1) { break; } else if (cmp == 1) { v_loc_filter_match_o_haystack[loc_needles] = loc_lower_match_o_haystack; } } r_dyn_push_back(p_loc_first_match_o_haystack, &loc_lower_match_o_haystack); r_dyn_push_back(p_size_match, &size_match); r_dyn_push_back(p_loc_needles, &loc_needles); break; } } } } } else if (col < n_col - 1 && (incomplete->action == VCTRS_INCOMPLETE_ACTION_value || incomplete->action == VCTRS_INCOMPLETE_ACTION_drop || incomplete->action == VCTRS_INCOMPLETE_ACTION_error)) { // This branch occurs if there is no match in `haystack` for this needle, // but we aren't on the last column and we are tracking incomplete needles. // Before we move on from this needle, we check its future columns for // incomplete values. If the current `val_needles` was incomplete, it would // have already been caught above, so we only look at future columns. for (r_ssize i = loc_lower_duplicate_o_needles; i <= loc_upper_duplicate_o_needles; ++i) { const r_ssize loc_needles = v_o_needles[i] - 1; for (r_ssize j = col + 1; j < n_col; ++j) { const int* v_future_needles_complete = (const int*) p_needles_complete->v_col_ptr[j]; const bool future_needle_is_complete = v_future_needles_complete[loc_needles]; if (!future_needle_is_complete) { r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, SIGNAL_INCOMPLETE); break; } } } } // At this point we have finished recording matches for the current needle in // this column, and we need to move on to other needles on the LHS and RHS // of the current needle (remember the current needle is the midpoint). For // the `==` op case we can also limit the haystack bounds we search in for // needles on the LHS/RHS, since those needles won't ever match the current // haystack values. bool do_lhs = false; bool do_rhs = false; // Default to current bounds r_ssize lhs_loc_lower_bound_o_needles = loc_lower_bound_o_needles; r_ssize lhs_loc_upper_bound_o_needles = loc_upper_bound_o_needles; r_ssize lhs_loc_lower_bound_o_haystack = loc_lower_bound_o_haystack; r_ssize lhs_loc_upper_bound_o_haystack = loc_upper_bound_o_haystack; r_ssize rhs_loc_lower_bound_o_needles = loc_lower_bound_o_needles; r_ssize rhs_loc_upper_bound_o_needles = loc_upper_bound_o_needles; r_ssize rhs_loc_lower_bound_o_haystack = loc_lower_bound_o_haystack; r_ssize rhs_loc_upper_bound_o_haystack = loc_upper_bound_o_haystack; switch (op) { case VCTRS_OPS_eq: { do_lhs = (loc_lower_duplicate_o_needles > loc_lower_bound_o_needles) && (loc_lower_match_o_haystack > loc_lower_bound_o_haystack); do_rhs = (loc_upper_duplicate_o_needles < loc_upper_bound_o_needles) && (loc_upper_match_o_haystack < loc_upper_bound_o_haystack); // Limit bounds of both needle and haystack using existing info if (do_lhs) { lhs_loc_upper_bound_o_needles = loc_lower_duplicate_o_needles - 1; lhs_loc_upper_bound_o_haystack = loc_lower_match_o_haystack - 1; } if (do_rhs) { rhs_loc_lower_bound_o_needles = loc_upper_duplicate_o_needles + 1; rhs_loc_lower_bound_o_haystack = loc_upper_match_o_haystack + 1; } break; } case VCTRS_OPS_lt: case VCTRS_OPS_lte: case VCTRS_OPS_gt: case VCTRS_OPS_gte: { // Can't update haystack here, as nesting containers make this impossible do_lhs = loc_lower_duplicate_o_needles > loc_lower_bound_o_needles; do_rhs = loc_upper_duplicate_o_needles < loc_upper_bound_o_needles; if (do_lhs) { lhs_loc_upper_bound_o_needles = loc_lower_duplicate_o_needles - 1; } if (do_rhs) { rhs_loc_lower_bound_o_needles = loc_upper_duplicate_o_needles + 1; } break; } } if (do_lhs) { df_locate_matches_recurse( col, lhs_loc_lower_bound_o_needles, lhs_loc_upper_bound_o_needles, lhs_loc_lower_bound_o_haystack, lhs_loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } if (do_rhs) { df_locate_matches_recurse( col, rhs_loc_lower_bound_o_needles, rhs_loc_upper_bound_o_needles, rhs_loc_lower_bound_o_haystack, rhs_loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } } // ----------------------------------------------------------------------------- static void df_locate_matches_with_containers(int n_containers, const int* v_container_ids, r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack) { const int* v_haystack = v_container_ids; for (int i = 0; i < n_containers; ++i) { const int val_needle = i; const struct vctrs_match_bounds bounds = int_locate_match( val_needle, v_haystack, v_o_haystack, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack ); const r_ssize loc_lower_match_o_haystack = bounds.lower; const r_ssize loc_upper_match_o_haystack = bounds.upper; df_locate_matches_recurse( col, loc_lower_bound_o_needles, loc_upper_bound_o_needles, loc_lower_match_o_haystack, loc_upper_match_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); // Advance lower bound for next container loc_lower_bound_o_haystack = loc_upper_match_o_haystack + 1; } } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the last incomplete value static inline r_ssize int_locate_upper_incomplete(const int* v_haystack_complete, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int haystack_is_complete = v_haystack_complete[loc_mid_bound_haystack]; if (haystack_is_complete) { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } else { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } } return loc_upper_bound_o_haystack; } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the first occurrence of `val_needle` static inline r_ssize int_locate_lower_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int val_haystack = v_haystack[loc_mid_bound_haystack]; if (int_equal_na_equal(val_needle, val_haystack)) { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } else { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } } return loc_lower_bound_o_haystack; } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the last occurrence of `val_needle` static inline r_ssize int_locate_upper_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int val_haystack = v_haystack[loc_mid_bound_haystack]; if (int_equal_na_equal(val_needle, val_haystack)) { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } else { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } } return loc_upper_bound_o_haystack; } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the first and last occurrence of `val_needle` static inline struct vctrs_match_bounds int_locate_match(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int val_haystack = v_haystack[loc_mid_bound_haystack]; const int cmp = int_compare_na_equal(val_needle, val_haystack); if (cmp == 1) { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } else if (cmp == -1) { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } else { // Hit! // Find lower and upper duplicate bounds for the haystack value loc_lower_bound_o_haystack = int_locate_lower_duplicate( val_haystack, v_haystack, v_o_haystack, loc_lower_bound_o_haystack, loc_mid_bound_o_haystack ); loc_upper_bound_o_haystack = int_locate_upper_duplicate( val_haystack, v_haystack, v_o_haystack, loc_mid_bound_o_haystack, loc_upper_bound_o_haystack ); break; } } return (struct vctrs_match_bounds) { loc_lower_bound_o_haystack, loc_upper_bound_o_haystack }; } // ----------------------------------------------------------------------------- static r_obj* df_joint_xtfrm_by_col(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, r_ssize n_cols, bool nan_distinct, r_obj* chr_proxy_collate) { r_obj* out = KEEP(r_alloc_list(2)); x = r_clone(x); r_list_poke(out, 0, x); y = r_clone(y); r_list_poke(out, 1, y); r_obj* const* v_x = r_list_cbegin(x); r_obj* const* v_y = r_list_cbegin(y); for (r_ssize col = 0; col < n_cols; ++col) { r_obj* x_col = v_x[col]; r_obj* y_col = v_y[col]; r_obj* xtfrms = vec_joint_xtfrm(x_col, y_col, x_size, y_size, nan_distinct, chr_proxy_collate); r_list_poke(x, col, r_list_get(xtfrms, 0)); r_list_poke(y, col, r_list_get(xtfrms, 1)); } FREE(1); return out; } // ----------------------------------------------------------------------------- static r_obj* df_detect_complete_by_col(r_obj* x, r_ssize x_size, r_ssize n_cols) { r_obj* out = KEEP(r_alloc_list(n_cols)); r_attrib_poke_names(out, r_names(x)); r_init_data_frame(out, x_size); r_obj* const* v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = v_x[i]; // Use completeness to match `vec_rank()` and `vec_match()` r_obj* complete = vec_detect_complete(col); r_list_poke(out, i, complete); } FREE(1); return out; } // ----------------------------------------------------------------------------- static inline enum vctrs_ops parse_condition_one(const char* condition) { if (!strcmp(condition, "==")) { return VCTRS_OPS_eq; } if (!strcmp(condition, ">")) { return VCTRS_OPS_gt; } if (!strcmp(condition, ">=")) { return VCTRS_OPS_gte; } if (!strcmp(condition, "<")) { return VCTRS_OPS_lt; } if (!strcmp(condition, "<=")) { return VCTRS_OPS_lte; } r_abort("`condition` must only contain \"==\", \">\", \">=\", \"<\", or \"<=\"."); } static inline void parse_condition(r_obj* condition, r_ssize n_cols, enum vctrs_ops* v_ops) { if (r_typeof(condition) != R_TYPE_character) { r_abort("`condition` must be a character vector."); } r_obj* const* v_condition = r_chr_cbegin(condition); r_ssize size_condition = vec_size(condition); if (size_condition == 1) { const char* elt = r_str_c_string(v_condition[0]); enum vctrs_ops op = parse_condition_one(elt); for (r_ssize i = 0; i < n_cols; ++i) { v_ops[i] = op; } return; } if (size_condition == n_cols) { for (r_ssize i = 0; i < n_cols; ++i) { const char* elt = r_str_c_string(v_condition[i]); v_ops[i] = parse_condition_one(elt); } return; } r_abort( "`condition` must be length 1, or the same " "length as the number of columns of the input." ); } // ----------------------------------------------------------------------------- static inline struct vctrs_incomplete parse_incomplete(r_obj* incomplete, struct r_lazy call) { if (r_length(incomplete) != 1) { r_abort_lazy_call( call, "`incomplete` must be length 1, not length %i.", r_length(incomplete) ); } if (r_is_string(incomplete)) { const char* c_incomplete = r_chr_get_c_string(incomplete, 0); if (!strcmp(c_incomplete, "compare")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_compare, .value = -1 }; } if (!strcmp(c_incomplete, "match")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_match, .value = -1 }; } if (!strcmp(c_incomplete, "drop")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_drop, .value = -1 }; } if (!strcmp(c_incomplete, "error")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_error, .value = -1 }; } r_abort_lazy_call( call, "`incomplete` must be one of: \"compare\", \"match\", \"drop\", or \"error\"." ); } incomplete = vec_cast( incomplete, r_globals.empty_int, args_incomplete, vec_args.empty, call ); int c_incomplete = r_int_get(incomplete, 0); return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_value, .value = c_incomplete }; } // ----------------------------------------------------------------------------- static inline enum vctrs_multiple parse_multiple(r_obj* multiple, struct r_lazy call) { if (!r_is_string(multiple)) { r_abort_lazy_call(call, "`multiple` must be a string."); } const char* c_multiple = r_chr_get_c_string(multiple, 0); if (!strcmp(c_multiple, "all")) return VCTRS_MULTIPLE_all; if (!strcmp(c_multiple, "any")) return VCTRS_MULTIPLE_any; if (!strcmp(c_multiple, "first")) return VCTRS_MULTIPLE_first; if (!strcmp(c_multiple, "last")) return VCTRS_MULTIPLE_last; // TODO: Remove deprecated support for `multiple = "error"/"warning"` if (!strcmp(c_multiple, "warning")) return VCTRS_MULTIPLE_warning; if (!strcmp(c_multiple, "error")) return VCTRS_MULTIPLE_error; r_abort_lazy_call( call, "`multiple` must be one of \"all\", \"any\", \"first\", or \"last\"." ); } // ----------------------------------------------------------------------------- static inline enum vctrs_relationship parse_relationship(r_obj* relationship, struct r_lazy call) { if (!r_is_string(relationship)) { r_abort_lazy_call(call, "`relationship` must be a string."); } const char* c_relationship = r_chr_get_c_string(relationship, 0); if (!strcmp(c_relationship, "none")) return VCTRS_RELATIONSHIP_none; if (!strcmp(c_relationship, "one-to-one")) return VCTRS_RELATIONSHIP_one_to_one; if (!strcmp(c_relationship, "one-to-many")) return VCTRS_RELATIONSHIP_one_to_many; if (!strcmp(c_relationship, "many-to-one")) return VCTRS_RELATIONSHIP_many_to_one; if (!strcmp(c_relationship, "many-to-many")) return VCTRS_RELATIONSHIP_many_to_many; if (!strcmp(c_relationship, "warn-many-to-many")) return VCTRS_RELATIONSHIP_warn_many_to_many; r_abort_lazy_call( call, "`relationship` must be one of \"none\", \"one-to-one\", \"one-to-many\", \"many-to-one\", \"many-to-many\", or \"warn-many-to-many\"." ); } // ----------------------------------------------------------------------------- static inline enum vctrs_filter parse_filter_one(const char* filter) { if (!strcmp(filter, "none")) return VCTRS_FILTER_none; if (!strcmp(filter, "min")) return VCTRS_FILTER_min; if (!strcmp(filter, "max")) return VCTRS_FILTER_max; r_abort("`filter` must be one of \"none\", \"min\", or \"max\"."); } static inline void parse_filter(r_obj* filter, r_ssize n_cols, enum vctrs_filter* v_filters) { if (r_typeof(filter) != R_TYPE_character) { r_abort("`filter` must be a character vector."); } r_obj* const* v_filter = r_chr_cbegin(filter); r_ssize size_filter = vec_size(filter); if (size_filter == 1) { const char* elt = r_str_c_string(v_filter[0]); enum vctrs_filter elt_filter = parse_filter_one(elt); for (r_ssize i = 0; i < n_cols; ++i) { v_filters[i] = elt_filter; } return; } if (size_filter == n_cols) { for (r_ssize i = 0; i < n_cols; ++i) { const char* elt = r_str_c_string(v_filter[i]); v_filters[i] = parse_filter_one(elt); } return; } r_abort( "`filter` must be length 1, or the same " "length as the number of columns of the input." ); } // ----------------------------------------------------------------------------- static inline struct vctrs_no_match parse_no_match(r_obj* no_match, struct r_lazy call) { if (r_length(no_match) != 1) { r_abort_lazy_call( call, "`no_match` must be length 1, not length %i.", r_length(no_match) ); } if (r_is_string(no_match)) { const char* c_no_match = r_chr_get_c_string(no_match, 0); if (!strcmp(c_no_match, "error")) { return (struct vctrs_no_match) { .action = VCTRS_NO_MATCH_ACTION_error, .value = -1 }; } if (!strcmp(c_no_match, "drop")) { return (struct vctrs_no_match) { .action = VCTRS_NO_MATCH_ACTION_drop, .value = -1 }; } r_abort_lazy_call( call, "`no_match` must be either \"drop\" or \"error\"." ); } no_match = vec_cast( no_match, r_globals.empty_int, args_no_match, vec_args.empty, call ); int c_no_match = r_int_get(no_match, 0); return (struct vctrs_no_match) { .action = VCTRS_NO_MATCH_ACTION_value, .value = c_no_match }; } // ----------------------------------------------------------------------------- static inline struct vctrs_remaining parse_remaining(r_obj* remaining, struct r_lazy call) { if (r_length(remaining) != 1) { r_abort_lazy_call( call, "`remaining` must be length 1, not length %i.", r_length(remaining) ); } if (r_is_string(remaining)) { const char* c_remaining = r_chr_get_c_string(remaining, 0); if (!strcmp(c_remaining, "error")) { return (struct vctrs_remaining) { .action = VCTRS_REMAINING_ACTION_error, .value = -1 }; } if (!strcmp(c_remaining, "drop")) { return (struct vctrs_remaining) { .action = VCTRS_REMAINING_ACTION_drop, .value = -1 }; } r_abort_lazy_call( call, "`remaining` must be either \"drop\" or \"error\"." ); } remaining = vec_cast( remaining, r_globals.empty_int, args_remaining, vec_args.empty, call ); int c_remaining = r_int_get(remaining, 0); return (struct vctrs_remaining) { .action = VCTRS_REMAINING_ACTION_value, .value = c_remaining }; } // ----------------------------------------------------------------------------- static inline r_obj* new_matches_result(r_obj* needles, r_obj* haystack) { r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, needles); r_list_poke(out, 1, haystack); r_obj* names = r_alloc_character(2); r_attrib_poke_names(out, names); r_chr_poke(names, 0, strings_needles); r_chr_poke(names, 1, strings_haystack); r_init_data_frame(out, r_length(needles)); FREE(1); return out; } // ----------------------------------------------------------------------------- static r_obj* expand_compact_indices(const int* v_o_haystack, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, bool skip_size_match, bool skip_loc_needles, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, r_ssize size_needles, r_ssize size_haystack, bool any_non_equi, bool has_loc_filter_match_o_haystack, const enum vctrs_filter* v_filters, const int* v_loc_filter_match_o_haystack, const struct poly_df_data* p_haystack, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call) { int n_prot = 0; const r_ssize n_used = p_loc_first_match_o_haystack->count; const int* v_loc_first_match_o_haystack = (const int*) r_dyn_cbegin(p_loc_first_match_o_haystack); const int* v_size_match = skip_size_match ? NULL : (const int*) r_dyn_cbegin(p_size_match); const int* v_loc_needles = skip_loc_needles ? NULL : (const int*) r_dyn_cbegin(p_loc_needles); const bool one_match_per_needle = multiple == VCTRS_MULTIPLE_any || multiple == VCTRS_MULTIPLE_first || multiple == VCTRS_MULTIPLE_last; r_ssize size_out = 0; if (one_match_per_needle) { size_out = size_needles; } else { double dbl_size_out = 0; for (r_ssize i = 0; i < n_used; ++i) { // This could get extremely large with improperly specified non-equi joins. // May over-allocate in the case of `filters` with `multiple = "all"`, // or when `no_match = "drop"` or `incomplete = "drop"`. dbl_size_out += (double) v_size_match[i]; } if (dbl_size_out > R_LEN_T_MAX) { // TODO: Update this after a switch to long vector support stop_matches_overflow(dbl_size_out, error_call); } size_out = r_double_as_ssize(dbl_size_out); } r_keep_loc out_needles_pi; r_obj* out_needles = r_alloc_integer(size_out); KEEP_HERE(out_needles, &out_needles_pi); ++n_prot; r_keep_loc out_haystack_pi; r_obj* out_haystack = r_alloc_integer(size_out); KEEP_HERE(out_haystack, &out_haystack_pi); ++n_prot; int* v_out_needles = r_int_begin(out_needles); int* v_out_haystack = r_int_begin(out_haystack); const int* v_o_loc_needles = NULL; if (!skip_loc_needles) { // `loc_needles` is used to record the location of the needle that the // matches correspond to. The first `size_needles` elements will be in // sequential order, but locations after that correspond to the "extra" // matches gathered from different nesting containers. We need the order of // this `loc_needles` vector so we can process all the matches for needle 1, // then 2, then 3, etc, in that order, across all nesting containers. r_obj* loc_needles = KEEP_N(r_dyn_unwrap(p_loc_needles), &n_prot); r_obj* o_loc_needles = KEEP_N(vec_order(loc_needles, chrs_asc, chrs_smallest, true, r_null), &n_prot); v_o_loc_needles = r_int_cbegin(o_loc_needles); } bool any_multiple_needles = false; bool any_multiple_haystack = false; r_ssize loc_first_multiple_needles = -1; r_ssize loc_first_multiple_haystack = -1; // Check is always needed for `multiple = "all"`. // This also handles `relationship` options too, since if `multiple` is // `"any"`, `"first"`, or `"last"`, we can't invalidate a `relationship`. bool check_multiple_needles = multiple == VCTRS_MULTIPLE_all || // TODO: Remove deprecated support for `multiple = "error"/"warning"` multiple == VCTRS_MULTIPLE_error || multiple == VCTRS_MULTIPLE_warning; // Used to enforce `check_multiple_needles` r_ssize loc_needles_previous = r_globals.na_int; bool check_multiple_haystack = false; switch (relationship) { // Expecting `haystack` can match any number of `needles` case VCTRS_RELATIONSHIP_none: case VCTRS_RELATIONSHIP_many_to_one: case VCTRS_RELATIONSHIP_many_to_many: { check_multiple_haystack = false; break; } // Expecting `haystack` to match at most 1 `needles` case VCTRS_RELATIONSHIP_one_to_one: case VCTRS_RELATIONSHIP_one_to_many: { check_multiple_haystack = true; break; } // Only check for multiple matches in `haystack` if we are also checking // for them in `needles`. Otherwise we can't possibly have a many-to-many // issue so there is no need to check for one. case VCTRS_RELATIONSHIP_warn_many_to_many: { check_multiple_haystack = check_multiple_needles; break; } } const bool retain_remaining_haystack = remaining->action == VCTRS_REMAINING_ACTION_value || remaining->action == VCTRS_REMAINING_ACTION_error; bool track_matches_haystack = check_multiple_haystack || retain_remaining_haystack; bool* v_detect_matches_haystack = NULL; if (track_matches_haystack) { r_obj* detect_matches_haystack = KEEP_N(r_alloc_raw(size_haystack * sizeof(bool)), &n_prot); v_detect_matches_haystack = r_raw_begin(detect_matches_haystack); memset(v_detect_matches_haystack, 0, size_haystack * sizeof(bool)); } // For `multiple = "first" / "last"` r_ssize loc_haystack_overall = r_globals.na_int; r_ssize loc_out = 0; for (r_ssize i = 0; i < n_used; ++i) { const int loc = skip_loc_needles ? i : v_o_loc_needles[i] - 1; const int loc_first_match_o_haystack = v_loc_first_match_o_haystack[loc]; const int size_match = skip_size_match ? 1 : v_size_match[loc]; const int loc_needles = skip_loc_needles ? loc : v_loc_needles[loc]; if (loc_first_match_o_haystack == SIGNAL_INCOMPLETE) { if (size_match != 1) { r_stop_internal( "`size_match` should always be 1 in the case of incomplete values." ); } switch (incomplete->action) { case VCTRS_INCOMPLETE_ACTION_value: { v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = incomplete->value; ++loc_out; continue; } case VCTRS_INCOMPLETE_ACTION_drop: { // Do not increment `loc_out`, do not store locations continue; } case VCTRS_INCOMPLETE_ACTION_error: { stop_matches_incomplete(loc_needles, needles_arg, error_call); } case VCTRS_INCOMPLETE_ACTION_compare: case VCTRS_INCOMPLETE_ACTION_match: { r_stop_internal( "Needles should never be marked as `SIGNAL_INCOMPLETE`", "when `incomplete = 'compare'` or `incomplete = 'match'`." ); } default: { r_stop_internal("Unknown `incomplete->action`."); } } } if (loc_first_match_o_haystack == SIGNAL_NO_MATCH) { if (size_match != 1) { r_stop_internal( "`size_match` should always be 1 in the case of no matches." ); } switch (no_match->action) { case VCTRS_NO_MATCH_ACTION_value: { v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = no_match->value; ++loc_out; continue; } case VCTRS_NO_MATCH_ACTION_drop: { continue; } case VCTRS_NO_MATCH_ACTION_error: { stop_matches_nothing(loc_needles, needles_arg, haystack_arg, error_call); } default: { r_stop_internal("Unknown `no_match->action`."); } } } if (has_loc_filter_match_o_haystack) { // When recording matches, if we updated the filter match value for a // particular needle, then we weren't able to remove the old match from // `p_loc_first_match_o_haystack`. So we need to check that the current // match value in the haystack is the same as the recorded filter match // value for this needle. If it is the same, we continue, otherwise we // move on to the next value. const int loc_filter_match_o_haystack = v_loc_filter_match_o_haystack[loc_needles]; bool equal = false; if (loc_filter_match_o_haystack == loc_first_match_o_haystack) { equal = true; } else { const int loc_filter_match_haystack = v_o_haystack[loc_filter_match_o_haystack] - 1; const int loc_first_match_haystack = v_o_haystack[loc_first_match_o_haystack] - 1; equal = p_matches_df_equal_na_equal( p_haystack, loc_first_match_haystack, p_haystack, loc_filter_match_haystack, v_filters ); } if (!equal) { continue; } } if (check_multiple_needles) { if (size_match > 1) { // Easy, obvious, case. // This containment group had >1 matches for this `needle` so we // immediately handle multiple `needles` matches. any_multiple_needles = true; } else if (loc_needles == loc_needles_previous) { // We've recorded a match for this `needle` before. Remember that // `needles` are processed in increasing order across all containment // groups due to `v_o_loc_needles` so this simple tracking of the // previous `needle` works. any_multiple_needles = true; } else { // There was exactly 1 match for the `needle` in this containment group, // and we've never recorded a match for this `needle` before. // In that case we record that we've seen it for the next iteration. loc_needles_previous = loc_needles; } if (any_multiple_needles) { loc_first_multiple_needles = loc_needles; // TODO: Remove deprecated support for `multiple = "error"/"warning"` switch (multiple) { case VCTRS_MULTIPLE_all: break; case VCTRS_MULTIPLE_error: stop_matches_multiple( loc_first_multiple_needles, needles_arg, haystack_arg, error_call ); case VCTRS_MULTIPLE_warning: { warn_matches_multiple( loc_first_multiple_needles, needles_arg, haystack_arg, error_call ); break; } default: r_stop_internal("`check_multiple_needles` should have been false."); } switch (relationship) { case VCTRS_RELATIONSHIP_one_to_one: stop_matches_relationship_one_to_one( loc_first_multiple_needles, "needles", needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_many_to_one: stop_matches_relationship_many_to_one( loc_first_multiple_needles, needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_warn_many_to_many: { if (any_multiple_haystack) { warn_matches_relationship_many_to_many( loc_first_multiple_needles, loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); } break; } default: { switch (multiple) { case VCTRS_MULTIPLE_all: // We are tracking if there are multiple matches, but don't throw // any errors or warnings on them break; // TODO: Remove deprecated support for `multiple = "error"/"warning"` case VCTRS_MULTIPLE_error: r_stop_internal("`multiple = 'error'` should have thrown by now."); case VCTRS_MULTIPLE_warning: break; default: r_stop_internal("`check_multiple_needles` should have been false."); } } } // We know there are multiple and don't need to continue checking check_multiple_needles = false; } } int loc_o_haystack = loc_first_match_o_haystack; switch (multiple) { case VCTRS_MULTIPLE_first: case VCTRS_MULTIPLE_last: { if (skip_loc_needles) { // We use `v_loc_needles` unconditionally below because it should always // be available when finding the first/last match r_stop_internal( "`skip_loc_needles` should never be `true` with `multiple = 'first'/'last'`." ); } if (loc_haystack_overall == r_globals.na_int) { // Start of a new needle loc_haystack_overall = v_o_haystack[loc_o_haystack] - 1; } // Branching here seems to help a good bit when there are many matches if (multiple == VCTRS_MULTIPLE_first) { for (r_ssize j = 0; j < size_match; ++j) { const int loc_haystack = v_o_haystack[loc_o_haystack] - 1; if (loc_haystack_overall > loc_haystack) { loc_haystack_overall = loc_haystack; } ++loc_o_haystack; } } else if (multiple == VCTRS_MULTIPLE_last) { for (r_ssize j = 0; j < size_match; ++j) { const int loc_haystack = v_o_haystack[loc_o_haystack] - 1; if (loc_haystack_overall < loc_haystack) { loc_haystack_overall = loc_haystack; } ++loc_o_haystack; } } else { r_stop_internal( "`multiple` should only be 'first' or 'last' here." ); } const bool at_end_of_all_matches = (i == n_used - 1); // Check if we are at the end of the vector or if the next needle location // is different from this one, at which point we can record the match // corresponding to the first/last result bool at_end_of_needle_matches = true; if (!at_end_of_all_matches) { const int loc_next = v_o_loc_needles[i + 1] - 1; const int loc_needles_next = v_loc_needles[loc_next]; at_end_of_needle_matches = (loc_needles != loc_needles_next); } if (at_end_of_needle_matches) { v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = loc_haystack_overall + 1; if (track_matches_haystack) { if (check_multiple_haystack) { // `true` if a match already existed any_multiple_haystack = v_detect_matches_haystack[loc_haystack_overall]; if (any_multiple_haystack) { loc_first_multiple_haystack = loc_haystack_overall; switch (relationship) { case VCTRS_RELATIONSHIP_one_to_one: stop_matches_relationship_one_to_one( loc_first_multiple_haystack, "haystack", needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_one_to_many: stop_matches_relationship_one_to_many( loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_warn_many_to_many: r_stop_internal( "`relationship = 'warn-many-to-many'` with " "`multiple = 'first'/'last' should have resulted in " "`check_multiple_haystack = false`." ); default: r_stop_internal("`check_multiple_haystack` should have been false."); } } } // This haystack value was a match, so it isn't "remaining". v_detect_matches_haystack[loc_haystack_overall] = true; } ++loc_out; loc_haystack_overall = r_globals.na_int; } break; } case VCTRS_MULTIPLE_all: case VCTRS_MULTIPLE_error: case VCTRS_MULTIPLE_warning: case VCTRS_MULTIPLE_any: { for (r_ssize j = 0; j < size_match; ++j) { const int loc_haystack = v_o_haystack[loc_o_haystack] - 1; v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = loc_haystack + 1; if (track_matches_haystack) { if (check_multiple_haystack) { // `true` if a match already existed any_multiple_haystack = v_detect_matches_haystack[loc_haystack]; if (any_multiple_haystack) { loc_first_multiple_haystack = loc_haystack; switch (relationship) { case VCTRS_RELATIONSHIP_one_to_one: stop_matches_relationship_one_to_one( loc_first_multiple_haystack, "haystack", needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_one_to_many: stop_matches_relationship_one_to_many( loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_warn_many_to_many: { if (any_multiple_needles) { warn_matches_relationship_many_to_many( loc_first_multiple_needles, loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); } // We know there are multiple and don't need to continue checking check_multiple_haystack = false; // Only continue tracking if needed for `remaining` track_matches_haystack = retain_remaining_haystack; break; } default: r_stop_internal("`check_multiple_haystack` should have been false."); } } } // This haystack value was a match, so it isn't "remaining". v_detect_matches_haystack[loc_haystack] = true; } ++loc_out; ++loc_o_haystack; } break; } } } if (loc_out < size_out) { // Can happen with a `filter` and `multiple = "all"`, where it is possible // for potential matches coming from a different nesting container // to be skipped over in the `has_loc_filter_match_o_haystack` section. // Can also happen with `no_match = "drop"` or `incomplete = "drop"`. // This resize should be essentially free by setting truelength/growable. size_out = loc_out; out_needles = r_int_resize(out_needles, size_out); KEEP_AT(out_needles, out_needles_pi); v_out_needles = r_int_begin(out_needles); out_haystack = r_int_resize(out_haystack, size_out); KEEP_AT(out_haystack, out_haystack_pi); v_out_haystack = r_int_begin(out_haystack); } if (any_multiple_needles && any_non_equi) { // If we had multiple matches and we were doing a non-equi join, then // the needles column will be correct, but any group of multiple matches in // the haystack column will be ordered incorrectly within the needle group. // They will be ordered using the order of the original haystack values, // rather than by first appearance. Reordering the entire output data frame // orders them correctly, as within each needle group it will put the // haystack locations in ascending order (i.e. by first appearance). // This is expensive! `out` could have a huge number of matches. r_obj* both = KEEP(new_matches_result(out_needles, out_haystack)); r_obj* o_haystack_appearance = KEEP(vec_order(both, chrs_asc, chrs_smallest, true, r_null)); int* v_o_haystack_appearance = r_int_begin(o_haystack_appearance); // Avoid a second allocation by reusing the appearance order vector, // which has the same size and type as the output and we won't overwrite it r_obj* out_haystack_reordered = o_haystack_appearance; int* v_out_haystack_reordered = v_o_haystack_appearance; for (r_ssize i = 0; i < size_out; ++i) { v_out_haystack_reordered[i] = v_out_haystack[v_o_haystack_appearance[i] - 1]; } out_haystack = out_haystack_reordered; v_out_haystack = v_out_haystack_reordered; FREE(2); KEEP_AT(out_haystack, out_haystack_pi); } if (retain_remaining_haystack) { r_ssize n_remaining_haystack = 0; switch (remaining->action) { case VCTRS_REMAINING_ACTION_error: { for (r_ssize i = 0; i < size_haystack; ++i) { if (!v_detect_matches_haystack[i]) { stop_matches_remaining(i, needles_arg, haystack_arg, error_call); } } break; } case VCTRS_REMAINING_ACTION_value: { for (r_ssize i = 0; i < size_haystack; ++i) { n_remaining_haystack += !v_detect_matches_haystack[i]; } break; } case VCTRS_REMAINING_ACTION_drop: { r_stop_internal("`remaining` should never be 'drop' here."); } } if (n_remaining_haystack > 0) { // Resize to have enough room for "remaining" haystack values at the end r_ssize new_size_out = r_ssize_add(size_out, n_remaining_haystack); out_needles = r_int_resize(out_needles, new_size_out); KEEP_AT(out_needles, out_needles_pi); v_out_needles = r_int_begin(out_needles); out_haystack = r_int_resize(out_haystack, new_size_out); KEEP_AT(out_haystack, out_haystack_pi); v_out_haystack = r_int_begin(out_haystack); // Add in "remaining" values at the end of the output for (r_ssize i = size_out; i < new_size_out; ++i) { v_out_needles[i] = remaining->value; } r_ssize j = size_out; for (r_ssize i = 0; i < size_haystack; ++i) { if (!v_detect_matches_haystack[i]) { v_out_haystack[j] = i + 1; ++j; } } size_out = new_size_out; } } r_obj* out = new_matches_result(out_needles, out_haystack); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- // Registered for testing purposes // [[ register() ]] r_obj* ffi_compute_nesting_container_info(r_obj* haystack, r_obj* condition) { r_ssize n_cols = r_length(haystack); enum vctrs_ops* v_ops = (enum vctrs_ops*) R_alloc(n_cols, sizeof(enum vctrs_ops)); parse_condition(condition, n_cols, v_ops); const r_ssize size_haystack = vec_size(haystack); return compute_nesting_container_info(haystack, size_haystack, v_ops); } static r_obj* compute_nesting_container_info(r_obj* haystack, r_ssize size_haystack, const enum vctrs_ops* v_ops) { int n_prot = 0; const r_ssize n_cols = r_length(haystack); // Outputs: // - `haystack` order // - Container id vector // - Number of containers as a scalar // - Boolean for if there are any-non-equi conditions r_obj* out = KEEP_N(r_alloc_list(4), &n_prot); bool any_non_equi = false; int first_non_equi = 0; for (r_ssize i = 0; i < n_cols; ++i) { const enum vctrs_ops op = v_ops[i]; if (op != VCTRS_OPS_eq) { any_non_equi = true; first_non_equi = i; break; } } if (!any_non_equi) { // Container info isn't required for only `==` r_list_poke(out, 0, vec_order(haystack, chrs_asc, chrs_smallest, true, r_null)); r_list_poke(out, 1, r_globals.empty_int); r_list_poke(out, 2, r_int(1)); r_list_poke(out, 3, r_lgl(any_non_equi)); FREE(n_prot); return out; } r_obj* info = KEEP_N(vec_order_info( haystack, chrs_asc, chrs_smallest, true, r_null, true ), &n_prot); r_obj* o_haystack = r_list_get(info, 0); const int* v_o_haystack = r_int_cbegin(o_haystack); r_obj* group_sizes = r_list_get(info, 1); const int* v_group_sizes = r_int_cbegin(group_sizes); const r_ssize n_groups = r_length(group_sizes); // This is the haystack we compute container ids with. // This is initially the whole `haystack`, but will be adjusted to contain // fewer columns if there are `==` conditions before the first non-equi // condition. r_keep_loc haystack_container_pi; r_obj* haystack_container = haystack; KEEP_HERE(haystack_container, &haystack_container_pi); ++n_prot; // If there are `==` conditions before the first non-equi condition, // we separate those columns from the haystack and compute their group sizes, // which are used for computing the container ids. bool has_outer_group_sizes = false; const int* v_outer_group_sizes = NULL; if (first_non_equi != 0) { // We have equality comparisons before the first non-equi comparison. // In this case, we can skip nested containment ordering for the equality // comparisons before the first non-equi comparison if we pass on the // group sizes of the ordered equality columns as `v_outer_group_sizes`. r_obj* const* v_haystack = r_list_cbegin(haystack); r_obj* const* v_haystack_names = r_chr_cbegin(r_names(haystack)); // "Outer" data frame columns before the first non-equi condition r_obj* haystack_outer = KEEP_N(r_alloc_list(first_non_equi), &n_prot); r_obj* haystack_outer_names = r_alloc_character(first_non_equi); r_attrib_poke_names(haystack_outer, haystack_outer_names); r_init_data_frame(haystack_outer, size_haystack); for (r_ssize i = 0; i < first_non_equi; ++i) { r_list_poke(haystack_outer, i, v_haystack[i]); r_chr_poke(haystack_outer_names, i, v_haystack_names[i]); } // "Inner" data frame columns at and after the first non-equi condition r_obj* haystack_inner = KEEP_N(r_alloc_list(n_cols - first_non_equi), &n_prot); r_obj* haystack_inner_names = r_alloc_character(n_cols - first_non_equi); r_attrib_poke_names(haystack_inner, haystack_inner_names); r_init_data_frame(haystack_inner, size_haystack); for (r_ssize i = first_non_equi, j = 0; i < n_cols; ++i, ++j) { r_list_poke(haystack_inner, j, v_haystack[i]); r_chr_poke(haystack_inner_names, j, v_haystack_names[i]); } // Compute the order info of the outer columns, just to pluck off the // group sizes. These automatically create a set of groups that // "surround" the non-equi columns. r_obj* info = vec_order_info( haystack_outer, chrs_asc, chrs_smallest, true, r_null, true ); r_obj* outer_group_sizes = KEEP_N(r_list_get(info, 1), &n_prot); v_outer_group_sizes = r_int_cbegin(outer_group_sizes); has_outer_group_sizes = true; // Inner columns become the new container haystack haystack_container = haystack_inner; KEEP_AT(haystack_container, haystack_container_pi); } r_obj* container_ids_info = KEEP_N(compute_nesting_container_ids( haystack_container, v_o_haystack, v_group_sizes, v_outer_group_sizes, size_haystack, n_groups, has_outer_group_sizes ), &n_prot); const int n_containers = r_as_int(r_list_get(container_ids_info, 1)); if (n_containers == 1) { // If only a single container exists at this point, either there was // only 1 non-equi column which must already be in order, or we hit the // somewhat rare case of having a >1 col `haystack_container` data frame // that is already in nested containment order. In that case, original // haystack ordering is sufficient and we don't need the ids. r_list_poke(out, 0, o_haystack); r_list_poke(out, 1, r_globals.empty_int); r_list_poke(out, 2, r_int(1)); r_list_poke(out, 3, r_lgl(any_non_equi)); FREE(n_prot); return out; } // Otherwise, we need to recompute the haystack ordering accounting for // `container_ids`. One way to do this is to append `container_ids` to the // front of the `haystack` data frame and recompute the order, but since // we already have `o_haystack` and `group_sizes`, we can build a simpler // proxy for `haystack` that orders the exact same, but faster. So we end // up with a two column data frame of `container_ids` and `haystack_proxy` // to compute the new order for. r_obj* container_ids = r_list_get(container_ids_info, 0); r_obj* haystack_proxy = KEEP_N(r_alloc_integer(size_haystack), &n_prot); int* v_haystack_proxy = r_int_begin(haystack_proxy); r_ssize loc_o_haystack = 0; // Insert group number as the proxy value for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { v_haystack_proxy[v_o_haystack[loc_o_haystack] - 1] = i; ++loc_o_haystack; } } r_obj* df = KEEP_N(r_alloc_list(2), &n_prot); r_list_poke(df, 0, container_ids); r_list_poke(df, 1, haystack_proxy); r_obj* df_names = r_alloc_character(2); r_attrib_poke_names(df, df_names); r_chr_poke(df_names, 0, r_str("container_ids")); r_chr_poke(df_names, 1, r_str("haystack_proxy")); r_init_data_frame(df, size_haystack); o_haystack = KEEP_N(vec_order( df, chrs_asc, chrs_smallest, true, r_null ), &n_prot); r_list_poke(out, 0, o_haystack); r_list_poke(out, 1, container_ids); r_list_poke(out, 2, r_int(n_containers)); r_list_poke(out, 3, r_lgl(any_non_equi)); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static r_obj* compute_nesting_container_ids(r_obj* x, const int* v_order, const int* v_group_sizes, const int* v_outer_group_sizes, r_ssize size, r_ssize n_groups, bool has_outer_group_sizes) { if (!is_data_frame(x)) { r_stop_internal("`x` must be a data frame."); } int n_prot = 0; const r_ssize n_cols = r_length(x); r_obj* out = KEEP_N(r_alloc_list(2), &n_prot); r_obj* container_ids = r_alloc_integer(size); r_list_poke(out, 0, container_ids); int* v_container_ids = r_int_begin(container_ids); r_obj* n_container_ids = r_alloc_integer(1); r_list_poke(out, 1, n_container_ids); int* p_n_container_ids = r_int_begin(n_container_ids); // Initialize ids to 0, which is always our first container id value. // This means we start with 1 container. memset(v_container_ids, 0, size * sizeof(int)); *p_n_container_ids = 1; if (size == 0) { // Algorithm requires at least 1 row FREE(n_prot); return out; } if (n_cols == 1) { // If there is only 1 column, `x` is in increasing order already when // ordered by `v_order`. // If `v_outer_group_sizes` were supplied, within each group `x` will // be in increasing order (since the single `x` column is the one that // broke any ties), and that is all that is required. FREE(n_prot); return out; } struct r_dyn_array* p_prev_rows = r_new_dyn_vector(R_TYPE_integer, 10000); KEEP_N(p_prev_rows->shelter, &n_prot); struct poly_vec* p_poly_x = new_poly_vec(x, VCTRS_TYPE_dataframe); KEEP_N(p_poly_x->shelter, &n_prot); const void* v_x = p_poly_x->p_vec; // Will be used if `has_outer_group_sizes` is `true` r_ssize loc_outer_group_sizes = 0; r_ssize loc_next_outer_group_start = 0; r_ssize loc_group_start = 0; for (r_ssize i = 0; i < n_groups; ++i) { if (has_outer_group_sizes && loc_next_outer_group_start == loc_group_start) { // Start of a new outer group. Clear all stored previous rows. p_prev_rows->count = 0; loc_next_outer_group_start += v_outer_group_sizes[loc_outer_group_sizes]; ++loc_outer_group_sizes; } const r_ssize group_size = v_group_sizes[i]; const int cur_row = v_order[loc_group_start] - 1; int container_id = 0; int n_container_ids_group = p_prev_rows->count; for (; container_id < n_container_ids_group; ++container_id) { const int prev_row = r_dyn_int_get(p_prev_rows, container_id); if (p_nesting_container_df_compare_fully_ge_na_equal(v_x, cur_row, v_x, prev_row)) { // Current row is fully greater than or equal to previous row. // Meaning it is not a new `container_id`, and it falls in the current container. break; } } if (container_id == n_container_ids_group) { // New `container_id` for this outer group, which we add to the end r_dyn_push_back(p_prev_rows, &cur_row); ++n_container_ids_group; if (n_container_ids_group > *p_n_container_ids) { // `p_prev_rows` is reset for each outer group, // so we have to keep a running overall count *p_n_container_ids = n_container_ids_group; } } else { // Update stored row location to the current row, // since the current row is greater than or equal to it r_dyn_int_poke(p_prev_rows, container_id, cur_row); } for (r_ssize j = 0; j < group_size; ++j) { v_container_ids[v_order[loc_group_start] - 1] = container_id; ++loc_group_start; } } FREE(n_prot); return out; } static inline bool p_nesting_container_df_compare_fully_ge_na_equal(const void* x, r_ssize i, const void* y, r_ssize j) { // Checks if EVERY column of `x` is `>=` `y`. // Assumes original input that `x` and `y` came from is ordered, and that // `x` comes after `y` in terms of row location in that original input. This // means that the first column of `x` is always `>=` the first column of `y`, // so we can ignore it in the comparison. // Iterates backwards to (ideally) maximize chance of hitting the fastest // varying column. // All columns are integer vectors (ranks). const struct poly_df_data* x_data = (const struct poly_df_data*) x; const struct poly_df_data* y_data = (const struct poly_df_data*) y; const r_ssize n_col = x_data->n_col; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; for (r_ssize col = n_col - 1; col > 0; --col) { if (p_int_compare_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j) < 0) { return false; } } return true; } // ----------------------------------------------------------------------------- static inline int p_matches_df_compare_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters) { // First broken tie wins. // All columns are integer vectors (approximate ranks). const struct poly_df_data* x_data = (const struct poly_df_data*) x; const struct poly_df_data* y_data = (const struct poly_df_data*) y; const r_ssize n_col = x_data->n_col; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; for (r_ssize col = 0; col < n_col; ++col) { const enum vctrs_filter filter = v_filters[col]; switch (filter) { case VCTRS_FILTER_none: { break; } case VCTRS_FILTER_max: { const int cmp = p_int_compare_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j); if (cmp != 0) { // Want max, new value is greater (1), signal replace (1) // Want max, new value is smaller (-1), signal keep (-1) return cmp; } break; } case VCTRS_FILTER_min: { const int cmp = p_int_compare_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j); if (cmp != 0) { // Want min, new value is smaller (-1), signal replace (1) // Want min, new value is larger (1), signal keep (-1) return -cmp; } break; } default: { r_stop_internal("Unknown `filter`."); } } } // All columns are equal, or no columns. No need to update anything. return 0; } static inline bool p_matches_df_equal_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters) { // All columns are integer vectors (approximate ranks). const struct poly_df_data* x_data = (const struct poly_df_data*) x; const struct poly_df_data* y_data = (const struct poly_df_data*) y; const r_ssize n_col = x_data->n_col; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; for (r_ssize col = 0; col < n_col; ++col) { const enum vctrs_filter filter = v_filters[col]; if (filter == VCTRS_FILTER_none) { continue; } if (!p_int_equal_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j)) { return false; } } // All columns are equal, or no columns. return true; } // ----------------------------------------------------------------------------- static inline r_ssize midpoint(r_ssize lhs, r_ssize rhs) { return lhs + (rhs - lhs) / 2; } // ----------------------------------------------------------------------------- static inline void stop_matches_overflow(double size, struct r_lazy call) { r_obj* syms[3] = { syms_size, syms_call, NULL }; r_obj* args[3] = { KEEP(r_dbl(size)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_overflow, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_overflow"); } static inline void stop_matches_nothing(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_nothing, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_nothing"); } static inline void stop_matches_remaining(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_remaining, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_remaining"); } static inline void stop_matches_incomplete(r_ssize i, struct vctrs_arg* needles_arg, struct r_lazy call) { r_obj* syms[4] = { syms_i, syms_needles_arg, syms_call, NULL }; r_obj* args[4] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_incomplete, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_incomplete"); } static inline void stop_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_multiple, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_multiple"); } static inline void warn_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_warn_matches_multiple, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); FREE(5); } static inline void stop_matches_relationship_one_to_one(r_ssize i, const char* which, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[6] = { syms_i, syms_which, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[6] = { KEEP(r_int((int)i + 1)), KEEP(r_chr(which)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_relationship_one_to_one, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_relationship_one_to_one"); } static inline void stop_matches_relationship_one_to_many(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_relationship_one_to_many, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_relationship_one_to_many"); } static inline void stop_matches_relationship_many_to_one(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_relationship_many_to_one, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_relationship_many_to_one"); } static inline void warn_matches_relationship_many_to_many(r_ssize i, r_ssize j, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[6] = { syms_i, syms_j, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[6] = { KEEP(r_int((int)i + 1)), KEEP(r_int((int)j + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_warn_matches_relationship_many_to_many, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); FREE(6); } // ----------------------------------------------------------------------------- void vctrs_init_match(r_obj* ns) { args_incomplete_ = new_wrapper_arg(NULL, "incomplete"); args_no_match_ = new_wrapper_arg(NULL, "no_match"); args_remaining_ = new_wrapper_arg(NULL, "remaining"); } // ----------------------------------------------------------------------------- #undef SIGNAL_NO_MATCH #undef SIGNAL_INCOMPLETE vctrs/src/lazy.h0000644000176200001440000000575014315060310013332 0ustar liggesusers#ifndef VCTRS_LAZY_H #define VCTRS_LAZY_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- /* * @member self A RAWSXP for the struct memory. * @member data The RAWSXP that gets allocated lazily. * @member p_data A void pointer to the RAWSXP. * @member data_pi A protection index to `data` so it can reprotect itself * upon allocation. * @member size The total size of the RAWSXP to allocate. * This is computed as `size * n_bytes` in `new_lazy_raw()`, where `n_bytes` * is from `sizeof()`. */ struct lazy_raw { SEXP self; SEXP data; void* p_data; PROTECT_INDEX data_pi; r_ssize size; }; /* * @param size The size of the type you want to interpret the memory as. * @param n_bytes A `sizeof()` result for the type you are allocating * memory for. */ static inline struct lazy_raw* new_lazy_raw(r_ssize size, size_t n_bytes) { SEXP self = PROTECT(r_new_raw(sizeof(struct lazy_raw))); struct lazy_raw* p_out = (struct lazy_raw*) RAW(self); p_out->self = self; p_out->data = R_NilValue; p_out->size = size * n_bytes; UNPROTECT(1); return p_out; } /* * Allocate the lazy vector if it hasn't already been allocated. * This reprotects itself using the protection index. */ static inline void* init_lazy_raw(struct lazy_raw* p_x) { if (p_x->data != R_NilValue) { return p_x->p_data; } p_x->data = Rf_allocVector(RAWSXP, p_x->size); REPROTECT(p_x->data, p_x->data_pi); p_x->p_data = (void*) RAW(p_x->data); return p_x->p_data; } // ----------------------------------------------------------------------------- /* * @member self A RAWSXP for the struct memory. * @member data The STRSXP that gets allocated lazily. * @member p_data A constant pointer to `data`. Modification to `data` should * be done using `SET_STRING_ELT()`. * @member data_pi A protection index to `data` so it can reprotect itself * upon allocation. * @member size The total size of the STRSXP to allocate. */ struct lazy_chr { SEXP self; SEXP data; const SEXP* p_data; PROTECT_INDEX data_pi; r_ssize size; }; static inline struct lazy_chr* new_lazy_chr(r_ssize size) { SEXP self = PROTECT(r_new_raw(sizeof(struct lazy_chr))); struct lazy_chr* p_out = (struct lazy_chr*) RAW(self); p_out->self = self; p_out->data = R_NilValue; p_out->size = size; UNPROTECT(1); return p_out; } static inline const SEXP* init_lazy_chr(struct lazy_chr* p_x) { if (p_x->data != R_NilValue) { return p_x->p_data; } p_x->data = Rf_allocVector(STRSXP, p_x->size); REPROTECT(p_x->data, p_x->data_pi); p_x->p_data = STRING_PTR_RO(p_x->data); return p_x->p_data; } // ----------------------------------------------------------------------------- #define PROTECT_LAZY_VEC(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->data, &(p_info)->data_pi); \ *(p_n) += 2; \ } while (0) #endif vctrs/src/subscript-loc.h0000644000176200001440000000257514362266120015157 0ustar liggesusers#ifndef VCTRS_SUBSCRIPT_LOC_H #define VCTRS_SUBSCRIPT_LOC_H #include "vctrs-core.h" #include "utils.h" #include "subscript.h" enum subscript_missing { SUBSCRIPT_MISSING_PROPAGATE = 0, SUBSCRIPT_MISSING_REMOVE, SUBSCRIPT_MISSING_ERROR }; enum num_loc_negative { LOC_NEGATIVE_INVERT = 0, LOC_NEGATIVE_ERROR, LOC_NEGATIVE_IGNORE }; enum num_loc_oob { LOC_OOB_ERROR = 0, LOC_OOB_REMOVE, LOC_OOB_EXTEND }; enum num_loc_zero { LOC_ZERO_REMOVE = 0, LOC_ZERO_ERROR, LOC_ZERO_IGNORE }; struct location_opts { struct subscript_opts subscript_opts; enum num_loc_negative loc_negative; enum num_loc_oob loc_oob; enum num_loc_zero loc_zero; enum subscript_missing missing; }; static inline struct location_opts new_location_opts_assign(void) { return (struct location_opts) { .subscript_opts = new_subscript_opts_assign() }; } r_obj* vec_as_location(r_obj* i, r_ssize n, r_obj* names); r_obj* vec_as_location_ctxt(r_obj* subscript, r_ssize n, r_obj* names, struct vctrs_arg* arg, struct r_lazy call); r_obj* vec_as_location_opts(r_obj* subscript, r_ssize n, r_obj* names, const struct location_opts* location_opts); #endif vctrs/src/fill.c0000644000176200001440000001333514362266120013303 0ustar liggesusers#include "vctrs.h" #define INFINITE_FILL -1 static void parse_direction(SEXP x, bool* p_down, bool* p_leading); static int parse_max_fill(SEXP x); static SEXP vec_fill_missing(SEXP x, bool down, bool leading, int max_fill); // [[ register() ]] SEXP vctrs_fill_missing(SEXP x, SEXP direction, SEXP max_fill) { bool down; bool leading; parse_direction(direction, &down, &leading); int c_max_fill = parse_max_fill(max_fill); return vec_fill_missing(x, down, leading, c_max_fill); } static void vec_fill_missing_down(const int* p_na, r_ssize size, bool leading, int* p_loc); static void vec_fill_missing_down_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc); static void vec_fill_missing_up(const int* p_na, r_ssize size, bool leading, int* p_loc); static void vec_fill_missing_up_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc); static SEXP vec_fill_missing(SEXP x, bool down, bool leading, int max_fill) { r_ssize size = vec_size(x); SEXP na = PROTECT(vec_detect_missing(x)); const int* p_na = LOGICAL_RO(na); SEXP loc = PROTECT(r_new_integer(size)); int* p_loc = INTEGER(loc); const bool has_max_fill = max_fill != INFINITE_FILL; if (down) { if (has_max_fill) { vec_fill_missing_down_with_max_fill(p_na, size, leading, max_fill, p_loc); } else { vec_fill_missing_down(p_na, size, leading, p_loc); } } else { if (has_max_fill) { vec_fill_missing_up_with_max_fill(p_na, size, leading, max_fill, p_loc); } else { vec_fill_missing_up(p_na, size, leading, p_loc); } } SEXP out = vec_slice_unsafe(x, loc); UNPROTECT(2); return out; } static void vec_fill_missing_down(const int* p_na, r_ssize size, bool leading, int* p_loc) { r_ssize loc = 0; if (leading) { // Increment `loc` to the first non-missing value for (r_ssize i = 0; i < size; ++i) { if (!p_na[i]) { loc = i; break; } } // Back-fill with first non-missing value for (r_ssize i = loc - 1; i >= 0; --i) { p_loc[i] = loc + 1; } } for (r_ssize i = loc; i < size; ++i) { if (!p_na[i]) { loc = i; } p_loc[i] = loc + 1; } } static void vec_fill_missing_down_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc) { r_ssize loc = 0; if (leading) { // Increment `loc` to the first non-missing value for (r_ssize i = 0; i < size; ++i) { if (!p_na[i]) { loc = i; break; } } // Back-fill with first non-missing value with a max_fill r_ssize n_fill = 0; for (r_ssize i = loc - 1; i >= 0; --i) { if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } r_ssize n_fill = 0; for (r_ssize i = loc; i < size; ++i) { if (!p_na[i]) { loc = i; n_fill = 0; p_loc[i] = i + 1; continue; } if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } static void vec_fill_missing_up(const int* p_na, r_ssize size, bool leading, int* p_loc) { r_ssize loc = size - 1; if (leading) { // Decrement `loc` to the last non-missing value for (r_ssize i = size - 1; i >= 0; --i) { if (!p_na[i]) { loc = i; break; } } // Forward-fill with last non-missing value for (r_ssize i = loc + 1; i < size; ++i) { p_loc[i] = loc + 1; } } for (r_ssize i = loc; i >= 0; --i) { if (!p_na[i]) { loc = i; } p_loc[i] = loc + 1; } } static void vec_fill_missing_up_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc) { r_ssize loc = size - 1; if (leading) { // Decrement `loc` to the last non-missing value for (r_ssize i = size - 1; i >= 0; --i) { if (!p_na[i]) { loc = i; break; } } // Forward-fill with last non-missing value with a max_fill r_ssize n_fill = 0; for (r_ssize i = loc + 1; i < size; ++i) { if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } r_ssize n_fill = 0; for (r_ssize i = loc; i >= 0; --i) { if (!p_na[i]) { loc = i; n_fill = 0; p_loc[i] = i + 1; continue; } if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } // ----------------------------------------------------------------------------- static void stop_bad_direction(void); static void parse_direction(SEXP x, bool* p_down, bool* p_leading) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_bad_direction(); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "down")) { *p_down = true; *p_leading = false; return; } if (!strcmp(str, "up")) { *p_down = false; *p_leading = false; return; } if (!strcmp(str, "downup")) { *p_down = true; *p_leading = true; return; } if (!strcmp(str, "updown")) { *p_down = false; *p_leading = true; return; } stop_bad_direction(); never_reached("parse_direction"); } static void stop_bad_direction(void) { r_abort("`direction` must be one of \"down\", \"up\", \"downup\", or \"updown\"."); } static int parse_max_fill(r_obj* x) { if (x == R_NilValue) { return INFINITE_FILL; } x = KEEP(vec_cast(x, r_globals.empty_int, vec_args.max_fill, vec_args.empty, r_lazy_null)); if (!r_is_positive_number(x)) { r_abort("`max_fill` must be `NULL` or a single positive integer."); } int out = r_int_get(x, 0); FREE(1); return out; } vctrs/src/conditions.h0000644000176200001440000000064314315060310014520 0ustar liggesusers#ifndef VCTRS_CONDITIONS_H #define VCTRS_CONDITIONS_H #include "vctrs-core.h" r_no_return void stop_incompatible_size(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); #endif vctrs/src/order-sortedness.h0000644000176200001440000000426714315060310015657 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_SORTEDNESS_H #define VCTRS_ORDER_SORTEDNESS_H #include "vctrs-core.h" #include "order-groups.h" // ----------------------------------------------------------------------------- enum vctrs_sortedness { VCTRS_SORTEDNESS_unsorted, VCTRS_SORTEDNESS_sorted, VCTRS_SORTEDNESS_reversed, }; // ----------------------------------------------------------------------------- enum vctrs_sortedness dbl_sortedness(const double* p_x, r_ssize size, bool decreasing, bool na_last, bool nan_distinct, struct group_infos* p_group_infos); enum vctrs_sortedness int_sortedness(const int* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos); enum vctrs_sortedness chr_sortedness(const SEXP* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos); // ----------------------------------------------------------------------------- void ord_resolve_sortedness(enum vctrs_sortedness sortedness, r_ssize size, int* p_o); void ord_resolve_sortedness_chunk(enum vctrs_sortedness sortedness, r_ssize size, int* p_o); // ----------------------------------------------------------------------------- #endif vctrs/src/order-truelength.h0000644000176200001440000001264114315060310015642 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_TRUELENGTH_H #define VCTRS_ORDER_TRUELENGTH_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- // This seems to be a reasonable default to start with for tracking the unique // strings, and is what base R uses. It is expanded by 2x every time we need to // reallocate. #define TRUELENGTH_SIZE_ALLOC_DEFAULT 10000 // ----------------------------------------------------------------------------- /* * Struct of information required to track unique strings and their truelengths * when ordering them * * @member self A RAWSXP for the struct memory. * * @members strings,p_strings,strings_pi CHARSXPs originally containing a * TRUELENGTH value >0, implying that base R was already using it and we * need to reset it. These are rare. * @members truelengths,p_truelengths,truelengths_pi The original TRUELENGTHs * of `strings`. * @member n_strings_alloc The allocated length of `strings` * (and `truelengths`). * @member n_strings_used The number of `strings` currently in use. * * @members uniques,p_uniques,uniques_pi Unique CHARSXPs. Will be sorted in * place by `chr_mark_sorted_uniques()`. We reset the TRUELENGTH of these * to 0 (R's default) after ordering, then reset the TRUELENGTH of `strings`. * @member n_uniques_alloc The allocated length of `uniques`. * @member n_uniques_used The number of `uniques` currently in use. * * @members sizes,p_sizes,sizes_pi The sizes of each individual CHARSXP in * `uniques`. Kept in the same ordering as `uniques` while sorting. * @members sizes_aux, p_sizes_aux, sizes_aux_pi Auxiliary vector of sizes * that is used as working memory when sorting `sizes`. * @member n_sizes_alloc The allocated length of `sizes` (and `sizes_aux`). * @member n_sizes_used The number of `sizes` currently in use. * @member max_string_size The maximum string size of the unique strings stored * in `uniques`. This controls the depth of recursion in `chr_radix_order()`. * * @member n_max The maximum allowed allocation size for the SEXP * objects in this struct. Always set to the size of `x`, which would occur if * all strings were unique. */ struct truelength_info { SEXP self; SEXP strings; SEXP* p_strings; PROTECT_INDEX strings_pi; SEXP truelengths; r_ssize* p_truelengths; PROTECT_INDEX truelengths_pi; r_ssize n_strings_alloc; r_ssize n_strings_used; SEXP uniques; SEXP* p_uniques; PROTECT_INDEX uniques_pi; r_ssize n_uniques_alloc; r_ssize n_uniques_used; SEXP sizes; int* p_sizes; PROTECT_INDEX sizes_pi; SEXP sizes_aux; int* p_sizes_aux; PROTECT_INDEX sizes_aux_pi; r_ssize n_sizes_alloc; r_ssize n_sizes_used; int max_string_size; r_ssize n_max; }; #define PROTECT_TRUELENGTH_INFO(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->strings, &(p_info)->strings_pi); \ PROTECT_WITH_INDEX((p_info)->truelengths, &(p_info)->truelengths_pi); \ PROTECT_WITH_INDEX((p_info)->uniques, &(p_info)->uniques_pi); \ PROTECT_WITH_INDEX((p_info)->sizes, &(p_info)->sizes_pi); \ PROTECT_WITH_INDEX((p_info)->sizes_aux, &(p_info)->sizes_aux_pi); \ *(p_n) += 6; \ } while(0) struct truelength_info* new_truelength_info(r_ssize n_max); void truelength_reset(struct truelength_info* p_truelength_info); void truelength_realloc_strings(struct truelength_info* p_truelength_info); void truelength_realloc_uniques(struct truelength_info* p_truelength_info); void truelength_realloc_sizes(struct truelength_info* p_truelength_info); static inline void truelength_save_string(SEXP string, r_ssize truelength, struct truelength_info* p_truelength_info) { if (p_truelength_info->n_strings_used == p_truelength_info->n_strings_alloc) { truelength_realloc_strings(p_truelength_info); } p_truelength_info->p_strings[p_truelength_info->n_strings_used] = string; p_truelength_info->p_truelengths[p_truelength_info->n_strings_used] = truelength; ++p_truelength_info->n_strings_used; } static inline void truelength_save_unique(SEXP unique, struct truelength_info* p_truelength_info) { if (p_truelength_info->n_uniques_used == p_truelength_info->n_uniques_alloc) { truelength_realloc_uniques(p_truelength_info); } p_truelength_info->p_uniques[p_truelength_info->n_uniques_used] = unique; ++p_truelength_info->n_uniques_used; } static inline void truelength_save_size(int size, struct truelength_info* p_truelength_info) { if (p_truelength_info->n_sizes_used == p_truelength_info->n_sizes_alloc) { truelength_realloc_sizes(p_truelength_info); } p_truelength_info->p_sizes[p_truelength_info->n_sizes_used] = size; ++p_truelength_info->n_sizes_used; } // ----------------------------------------------------------------------------- #endif vctrs/src/arg.c0000644000176200001440000001626214316625146013136 0ustar liggesusers#include "vctrs.h" #include "decl/arg-decl.h" // Materialising argument tags ------------------------------------------ #define DEFAULT_ARG_BUF_SIZE 100 /** * This takes a `struct vctrs_arg{}` linked list and calls the * recursive function `fill_arg_buffer()`. It allocates a buffer in a * RAWSXP of size 100, which is grown by a factor of 1.5 each time the * `fill()` methods return a negative value. Returns a character * vector of size 1 containing the materialised argument tag. */ r_obj* vctrs_arg(struct vctrs_arg* arg) { if (!arg) { return chrs_empty; } r_ssize next_size = DEFAULT_ARG_BUF_SIZE; r_ssize size; r_obj* buf_holder = KEEP(r_null); char* buf; do { size = next_size; FREE(1); buf_holder = KEEP(r_alloc_raw(size)); buf = (char*) r_raw_begin(buf_holder); // Reallocate a larger buffer at the next iteration if the current // buffer turns out too small next_size *= 1.5; } while (fill_arg_buffer(arg, buf, 0, size) < 0); r_obj* out = r_chr(buf); FREE(1); return out; } // vmax-protected const char* vec_arg_format(struct vctrs_arg* p_arg) { r_obj* arg = KEEP(vctrs_arg(p_arg)); const char* out = r_format_error_arg(arg); FREE(1); return out; } /** * Takes a `struct vctrs_arg{}` linked list and a buffer and calls the * `fill()` method on each of those, recursively. Unless an error * occurred, it returns the current size written to the buffer so we * can track the remaining memory available in the buffer after * recursion. */ static int fill_arg_buffer(struct vctrs_arg* arg, char* buf, r_ssize cur_size, r_ssize tot_size) { if (arg->parent) { cur_size = fill_arg_buffer(arg->parent, buf, cur_size, tot_size); if (cur_size < 0) { return cur_size; } } r_ssize written = arg->fill(arg->data, buf + cur_size, tot_size - cur_size); if (written < 0) { return written; } else { return cur_size + written; } } static r_ssize str_arg_fill(const char* data, char* buf, r_ssize remaining) { size_t len = strlen(data); if (len >= remaining) { return -1; } memcpy(buf, data, len); buf[len] = '\0'; return len; } // Objects ------------------------------------------------------------- // Simple wrapper around a `const char*` argument tag struct vctrs_arg new_wrapper_arg(struct vctrs_arg* parent, const char* arg) { return (struct vctrs_arg) { .parent = parent, .fill = &wrapper_arg_fill, .data = (void*) arg }; } static r_ssize wrapper_arg_fill(void* data, char* buf, r_ssize remaining) { return str_arg_fill((const char*) data, buf, remaining); } // Wrapper that accesses a symbol in an environment, for lazy evaluation struct vctrs_arg new_lazy_arg(struct r_lazy* arg) { return (struct vctrs_arg) { .parent = NULL, .fill = &lazy_arg_fill, .data = arg }; } static r_ssize lazy_arg_fill(void* data_, char* buf, r_ssize remaining) { struct r_lazy* data = (struct r_lazy*) data_; r_obj* arg = KEEP(r_lazy_eval(*data)); const char* arg_str = ""; if (r_is_string(arg)) { arg_str = r_chr_get_c_string(arg, 0); } else if (arg != r_null) { r_abort("`arg` must be a string."); } r_ssize out = str_arg_fill(arg_str, buf, remaining); FREE(1); return out; } // Wrapper around a subscript, either numeric or character struct subscript_arg_data { struct vctrs_arg self; r_obj* names; r_ssize n; r_ssize* p_i; }; struct vctrs_arg* new_subscript_arg_vec(struct vctrs_arg* parent, r_obj* x, r_ssize* p_i) { r_obj* names = KEEP(vec_names(x)); struct vctrs_arg* p_arg = new_subscript_arg(parent, names, vec_size(x), p_i); FREE(1); return p_arg; } struct vctrs_arg* new_subscript_arg(struct vctrs_arg* parent, r_obj* names, r_ssize n, r_ssize* p_i) { r_obj* shelter = KEEP(r_alloc_list(2)); r_list_poke(shelter, 0, r_alloc_raw(sizeof(struct subscript_arg_data))); r_list_poke(shelter, 1, names); struct subscript_arg_data* p_data = r_raw_begin(r_list_get(shelter, 0)); p_data->self = (struct vctrs_arg) { .shelter = shelter, .parent = parent, .fill = &subscript_arg_fill, .data = p_data }; p_data->names = names; p_data->n = n; p_data->p_i = p_i; FREE(1); return (struct vctrs_arg*) p_data; } static r_ssize subscript_arg_fill(void* p_data_, char* buf, r_ssize remaining) { struct subscript_arg_data* p_data = (struct subscript_arg_data*) p_data_; r_ssize i = *p_data->p_i; r_obj* names = p_data->names; r_ssize n = p_data->n; if (i >= n) { r_stop_internal("`i = %" R_PRI_SSIZE "` can't be greater than `vec_size(x) = %" R_PRI_SSIZE "`.", i, n); } int len = 0; bool child = !is_empty_arg(p_data->self.parent); if (child) { if (r_has_name_at(names, i)) { len = snprintf(buf, remaining, "$%s", r_chr_get_c_string(names, i)); } else { len = snprintf(buf, remaining, "[[%td]]", i + 1); } } else { if (r_has_name_at(names, i)) { len = snprintf(buf, remaining, "%s", r_chr_get_c_string(names, i)); } else { len = snprintf(buf, remaining, "..%td", i + 1); } } if (len >= remaining) { return -1; } else { return len; } } // Wrapper around a counter representing the current position of the // argument struct vctrs_arg new_counter_arg(struct vctrs_arg* parent, struct arg_data_counter* data) { return (struct vctrs_arg) { .parent = parent, .fill = &counter_arg_fill, .data = (void*) data }; } struct arg_data_counter new_counter_arg_data(struct vctrs_arg* p_parent, r_ssize* i, r_obj** names, r_ssize* names_i) { return (struct arg_data_counter) { .p_parent = p_parent, .i = i, .names = names, .names_i = names_i }; } static r_ssize counter_arg_fill(void* data_, char* buf, r_ssize remaining) { struct arg_data_counter* data = (struct arg_data_counter*) data_; r_ssize i = *data->i; r_obj* names = *data->names; r_ssize names_i = *data->names_i; int len; bool child = !is_empty_arg(data->p_parent); // FIXME: Check for syntactic names if (child) { if (r_has_name_at(names, names_i)) { len = snprintf(buf, remaining, "$%s", r_chr_get_c_string(names, names_i)); } else { len = snprintf(buf, remaining, "[[%" R_PRI_SSIZE "]]", i + 1); } } else { if (r_has_name_at(names, names_i)) { len = snprintf(buf, remaining, "%s", r_chr_get_c_string(names, names_i)); } else { len = snprintf(buf, remaining, "..%" R_PRI_SSIZE, i + 1); } } if (len >= remaining) { return -1; } else { return len; } } static bool is_empty_arg(struct vctrs_arg* arg) { if (!arg) { return true; } char tmp[1]; return arg->fill(arg->data, tmp, 1) == 0; } vctrs/src/type-factor.h0000644000176200001440000000035714315060310014606 0ustar liggesusers#ifndef VCTRS_TYPE_FACTOR_H #define VCTRS_TYPE_FACTOR_H #include "vctrs-core.h" SEXP fct_ptype2(const struct ptype2_opts* opts); SEXP ord_ptype2(const struct ptype2_opts* opts); SEXP ord_as_ordered(const struct cast_opts* opts); #endif vctrs/src/expand.h0000644000176200001440000000062314362266120013635 0ustar liggesusers#ifndef VCTRS_EXPAND_H #define VCTRS_EXPAND_H #include "vctrs-core.h" #include "names.h" enum vctrs_expand_vary { VCTRS_EXPAND_VARY_slowest = 0, VCTRS_EXPAND_VARY_fastest = 1 }; r_obj* vec_expand_grid(r_obj* xs, enum vctrs_expand_vary vary, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call); #endif vctrs/src/order-collate.h0000644000176200001440000000221314315060310015076 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_COLLATE_H #define VCTRS_ORDER_COLLATE_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- /* * `proxy_apply_chr_proxy_collate()` iterates over `proxy`, applying * `chr_proxy_collate` on any character vectors that it detects. * * It expects that: * - If `proxy` is a data frame, it has been flattened by its corresponding * `vec_proxy_*()` function. * - All character vectors in `proxy` have already been normalized to UTF-8 * by `vec_normalize_encoding()`. */ SEXP proxy_apply_chr_proxy_collate(SEXP proxy, SEXP chr_proxy_collate); // ----------------------------------------------------------------------------- #endif vctrs/src/subscript-loc.c0000644000176200001440000005607514315060310015145 0ustar liggesusers#include "vctrs.h" #include "decl/subscript-loc-decl.h" r_obj* vec_as_location(r_obj* subscript, r_ssize n, r_obj* names) { const struct location_opts opts = { 0 }; return vec_as_location_opts(subscript, n, names, &opts); } r_obj* vec_as_location_ctxt(r_obj* subscript, r_ssize n, r_obj* names, struct vctrs_arg* arg, struct r_lazy call) { struct location_opts opts = { .subscript_opts = { .subscript_arg = arg, .call = call } }; return vec_as_location_opts(subscript, n, names, &opts); } r_obj* vec_as_location_opts(r_obj* subscript, r_ssize n, r_obj* names, const struct location_opts* opts) { ERR err = NULL; subscript = vec_as_subscript_opts(subscript, &opts->subscript_opts, &err); KEEP2(subscript, err); if (err) { r_cnd_signal(err); r_stop_unreachable(); } r_obj* out = r_null; switch (r_typeof(subscript)) { case R_TYPE_null: out = r_globals.empty_int; break; case R_TYPE_logical: out = lgl_as_location(subscript, n, opts); break; case R_TYPE_integer: out = int_as_location(subscript, n, opts); break; case R_TYPE_double: out = dbl_as_location(subscript, n, opts); break; case R_TYPE_character: out = chr_as_location(subscript, names, opts); break; default: r_stop_unimplemented_type(r_typeof(subscript)); } FREE(2); return out; } static r_obj* lgl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { r_ssize subscript_n = r_length(subscript); if (subscript_n == n) { bool na_propagate = false; switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: na_propagate = true; break; case SUBSCRIPT_MISSING_REMOVE: break; case SUBSCRIPT_MISSING_ERROR: { if (lgl_any_na(subscript)) { stop_subscript_missing(subscript, opts); } break; } } return r_lgl_which(subscript, na_propagate); } /* A single `TRUE` or `FALSE` index is recycled to the full vector * size. This means `TRUE` is synonym for missing index (subscript.e. no * subsetting) and `FALSE` is synonym for empty index. * * We could return the missing argument as sentinel to avoid * materialising the index vector for the `TRUE` case but this would * make `vec_as_location()` an option type just to optimise a rather * uncommon case. */ if (subscript_n == 1) { int elt = r_lgl_get(subscript, 0); r_ssize recycle_size = n; r_obj* out = r_null; r_keep_loc out_shelter; KEEP_HERE(out, &out_shelter); if (elt == r_globals.na_lgl) { switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: { out = r_alloc_integer(n); KEEP_AT(out, out_shelter); r_int_fill(out, r_globals.na_int, n); break; } case SUBSCRIPT_MISSING_REMOVE: { out = r_copy(r_globals.empty_int); KEEP_AT(out, out_shelter); recycle_size = 0; break; } case SUBSCRIPT_MISSING_ERROR: { stop_subscript_missing(subscript, opts); } } } else if (elt) { out = r_alloc_integer(n); KEEP_AT(out, out_shelter); r_int_fill_seq(out, 1, n); } else { out = r_copy(r_globals.empty_int); KEEP_AT(out, out_shelter); recycle_size = 0; } r_obj* nms = KEEP(r_names(subscript)); if (nms != R_NilValue) { r_obj* recycled_nms = r_alloc_character(recycle_size); r_attrib_poke_names(out, recycled_nms); r_chr_fill(recycled_nms, r_chr_get(nms, 0), recycle_size); } FREE(2); return out; } r_obj* n_obj = KEEP(r_int(n)); stop_indicator_size(subscript, n_obj, opts); r_stop_unreachable(); } static r_obj* int_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { const int* data = r_int_cbegin(subscript); r_ssize loc_n = r_length(subscript); // Zeros need to be filtered out from the subscript vector. // `int_invert_location()` filters them out for negative indices, but // positive indices need to go through and `int_filter_zero()`. r_ssize n_zero = 0; r_ssize n_oob = 0; r_ssize n_missing = 0; for (r_ssize i = 0; i < loc_n; ++i, ++data) { int elt = *data; if (elt == r_globals.na_int) { switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: break; case SUBSCRIPT_MISSING_REMOVE: ++n_missing; break; case SUBSCRIPT_MISSING_ERROR: stop_subscript_missing(subscript, opts); } } else if (elt == 0) { switch (opts->loc_zero) { case LOC_ZERO_REMOVE: ++n_zero; break; case LOC_ZERO_ERROR: stop_location_zero(subscript, opts); case LOC_ZERO_IGNORE: break; } } else if (elt < 0) { switch (opts->loc_negative) { case LOC_NEGATIVE_INVERT: return int_invert_location(subscript, n, opts); case LOC_NEGATIVE_ERROR: stop_location_negative(subscript, opts); case LOC_NEGATIVE_IGNORE: { if (abs(elt) > n) { switch (opts->loc_oob) { case LOC_OOB_ERROR: stop_subscript_oob_location(subscript, n, opts); case LOC_OOB_EXTEND: stop_subscript_negative_oob_location(subscript, n, opts); case LOC_OOB_REMOVE: ++n_oob; break; } } break; } } } else if (elt > n) { switch (opts->loc_oob) { case LOC_OOB_ERROR: stop_subscript_oob_location(subscript, n, opts); case LOC_OOB_EXTEND: ++n_oob; break; case LOC_OOB_REMOVE: ++n_oob; break; } } } r_keep_loc subscript_shelter; KEEP_HERE(subscript, &subscript_shelter); if (n_missing > 0) { subscript = int_filter_missing(subscript, n_missing); KEEP_AT(subscript, subscript_shelter); } if (n_zero > 0) { subscript = int_filter_zero(subscript, n_zero); KEEP_AT(subscript, subscript_shelter); } if (n_oob > 0) { switch (opts->loc_oob) { case LOC_OOB_ERROR: { r_stop_internal("An error should have been thrown on the first OOB value."); } case LOC_OOB_EXTEND: { int_check_consecutive(subscript, n, n_oob, opts); break; } case LOC_OOB_REMOVE: { subscript = int_filter_oob(subscript, n, n_oob); KEEP_AT(subscript, subscript_shelter); break; } } } FREE(1); return subscript; } static r_obj* int_invert_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { const int* data = r_int_cbegin(subscript); r_ssize loc_n = r_length(subscript); r_obj* sel = KEEP(r_alloc_logical(n)); r_lgl_fill(sel, 1, n); int* sel_data = r_lgl_begin(sel); for (r_ssize i = 0; i < loc_n; ++i, ++data) { int j = *data; if (j == r_globals.na_int) { // Following base R by erroring on `missing = "propagate"`, e.g. `1[c(NA, -1)]`. // Doesn't make sense to invert an `NA`, so we can't meaningfully propagate. switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: stop_location_negative_missing(subscript, opts); case SUBSCRIPT_MISSING_REMOVE: continue; case SUBSCRIPT_MISSING_ERROR: stop_location_negative_missing(subscript, opts); } } if (j >= 0) { if (j == 0) { switch (opts->loc_zero) { case LOC_ZERO_REMOVE: continue; case LOC_ZERO_IGNORE: continue; case LOC_ZERO_ERROR: stop_location_zero(subscript, opts); } } else { stop_location_negative_positive(subscript, opts); } } j = -j; if (j > n) { switch (opts->loc_oob) { case LOC_OOB_REMOVE: { continue; } case LOC_OOB_EXTEND: case LOC_OOB_ERROR: { // Setting `oob` to `"error"` and `"extend"` result in errors here, // because extending with a negative subscript is nonsensical stop_subscript_negative_oob_location(subscript, n, opts); } } } sel_data[j - 1] = 0; } r_obj* out = lgl_as_location(sel, n, opts); FREE(1); return out; } static r_obj* int_filter(r_obj* subscript, r_ssize n_filter, int value) { const r_ssize size = r_length(subscript); const int* v_subscript = r_int_cbegin(subscript); r_obj* out = KEEP(r_alloc_integer(size - n_filter)); int* v_out = r_int_begin(out); r_obj* names = r_names(subscript); const bool has_names = names != r_null; r_obj* const* v_names = NULL; r_obj* out_names = r_null; if (has_names) { v_names = r_chr_cbegin(names); out_names = r_alloc_character(size - n_filter); r_attrib_poke_names(out, out_names); } r_ssize j = 0; for (r_ssize i = 0; i < size; ++i) { const int elt = v_subscript[i]; if (elt != value) { v_out[j] = elt; if (has_names) { r_chr_poke(out_names, j, v_names[i]); } ++j; } } FREE(1); return out; } static r_obj* int_filter_zero(r_obj* subscript, r_ssize n_zero) { return int_filter(subscript, n_zero, 0); } static r_obj* int_filter_missing(r_obj* subscript, r_ssize n_missing) { return int_filter(subscript, n_missing, r_globals.na_int); } static r_obj* int_filter_oob(r_obj* subscript, r_ssize n, r_ssize n_oob) { const r_ssize n_subscript = r_length(subscript); const r_ssize n_out = n_subscript - n_oob; const int* v_subscript = r_int_cbegin(subscript); r_obj* out = KEEP(r_alloc_integer(n_out)); int* v_out = r_int_begin(out); r_obj* names = r_names(subscript); const bool has_names = names != r_null; r_obj* const* v_names = NULL; r_obj* out_names = r_null; if (has_names) { v_names = r_chr_cbegin(names); out_names = r_alloc_character(n_out); r_attrib_poke_names(out, out_names); } r_ssize j = 0; for (r_ssize i = 0; i < n_subscript; ++i) { const int elt = v_subscript[i]; if (abs(elt) <= n || elt == r_globals.na_int) { v_out[j] = elt; if (has_names) { r_chr_poke(out_names, j, v_names[i]); } ++j; } } FREE(1); return out; } static void int_check_consecutive(r_obj* subscript, r_ssize n, r_ssize n_extend, const struct location_opts* opts) { r_obj* extended = KEEP(r_alloc_integer(n_extend)); int* p_extended = r_int_begin(extended); int i_extend = 0; int new_n = n; int* p_subscript = r_int_begin(subscript); r_ssize n_subscript = Rf_length(subscript); for (r_ssize i = 0; i < n_subscript; ++i) { int elt = p_subscript[i]; // Missing value also covered here if (elt <= n) { continue; } // Special case: appending in ascending sequence at the end // should not require any sorting if (elt - 1 == new_n) { ++new_n; --n_extend; } else { p_extended[i_extend++] = elt - 1; } } if (n_extend != i_extend) { r_stop_internal("int_check_consecutive", "n_extend (%d) != i_extend (%d).", n_extend, i_extend); } if (i_extend == 0) { FREE(1); return; } // Only the first i_extend entries of the array are populated, // the rest is never touched. qsort(p_extended, i_extend, sizeof(int), &qsort_int_compare_scalar); for (r_ssize i = 0; i < i_extend; ++i) { int elt = p_extended[i]; if (elt != new_n + i) { stop_location_oob_non_consecutive(subscript, n, opts); } } FREE(1); } static r_obj* dbl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { subscript = KEEP(vec_cast(subscript, r_globals.empty_int, vec_args.empty, vec_args.empty, r_lazy_null)); subscript = int_as_location(subscript, n, opts); FREE(1); return subscript; } static r_obj* chr_as_location(r_obj* subscript, r_obj* names, const struct location_opts* opts) { if (names == R_NilValue) { r_abort("Can't use character names to index an unnamed vector."); } if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector."); } bool remove_missing = false; r_obj* matched = KEEP(Rf_match(names, subscript, r_globals.na_int)); r_attrib_poke_names(matched, r_names(subscript)); r_ssize n = r_length(matched); int* p = r_int_begin(matched); r_obj* const * ip = r_chr_cbegin(subscript); for (r_ssize k = 0; k < n; ++k) { const r_obj* elt = ip[k]; if (elt == r_strs.empty) { // `""` never matches, even if `names` contains a `""` name stop_subscript_empty(subscript, opts); } if (elt == r_globals.na_str) { // `NA_character_` never matches, even if `names` contains a missing name p[k] = r_globals.na_int; switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: continue; case SUBSCRIPT_MISSING_REMOVE: remove_missing = true; continue; case SUBSCRIPT_MISSING_ERROR: stop_subscript_missing(subscript, opts); } } if (p[k] == r_globals.na_int) { stop_subscript_oob_name(subscript, names, opts); } } if (remove_missing) { if (opts->missing != SUBSCRIPT_MISSING_REMOVE) { r_stop_internal("`missing = 'remove'` must be set if `n_missing > 0`."); } r_obj* not_missing = KEEP(vec_detect_complete(matched)); matched = KEEP(vec_slice(matched, not_missing)); FREE(2); } KEEP(matched); FREE(2); return matched; } // [[ register() ]] r_obj* ffi_as_location(r_obj* subscript, r_obj* ffi_n, r_obj* names, r_obj* loc_negative, r_obj* loc_oob, r_obj* loc_zero, r_obj* missing, r_obj* frame) { r_ssize n = 0; if (ffi_n == r_null && r_typeof(subscript) == R_TYPE_character) { n = r_length(subscript); } else { if (r_is_object(ffi_n) || r_typeof(ffi_n) != R_TYPE_integer) { ffi_n = vec_cast(ffi_n, r_globals.empty_int, vec_args.n, vec_args.empty, (struct r_lazy) { .x = frame, .env = r_null }); } KEEP(ffi_n); if (r_length(ffi_n) != 1) { r_stop_internal("`n` must be a scalar number."); } n = r_int_get(ffi_n, 0); FREE(1); } struct r_lazy arg_ = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_); struct r_lazy call = (struct r_lazy) { .x = syms_call, .env = frame }; struct location_opts opts = { .subscript_opts = { .subscript_arg = &arg, .call = call }, .missing = parse_subscript_arg_missing(missing, call), .loc_negative = parse_loc_negative(loc_negative, call), .loc_oob = parse_loc_oob(loc_oob, call), .loc_zero = parse_loc_zero(loc_zero, call) }; return vec_as_location_opts(subscript, n, names, &opts); } static enum subscript_missing parse_subscript_arg_missing(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_subscript_arg_missing(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "propagate")) return SUBSCRIPT_MISSING_PROPAGATE; if (!strcmp(str, "remove")) return SUBSCRIPT_MISSING_REMOVE; if (!strcmp(str, "error")) return SUBSCRIPT_MISSING_ERROR; stop_subscript_arg_missing(call); r_stop_unreachable(); } static enum num_loc_negative parse_loc_negative(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_bad_negative(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "invert")) return LOC_NEGATIVE_INVERT; if (!strcmp(str, "error")) return LOC_NEGATIVE_ERROR; if (!strcmp(str, "ignore")) return LOC_NEGATIVE_IGNORE; stop_bad_negative(call); r_stop_unreachable(); } static enum num_loc_oob parse_loc_oob(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_bad_oob(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "error")) return LOC_OOB_ERROR; if (!strcmp(str, "remove")) return LOC_OOB_REMOVE; if (!strcmp(str, "extend")) return LOC_OOB_EXTEND; stop_bad_oob(call); r_stop_unreachable(); } static enum num_loc_zero parse_loc_zero(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_bad_zero(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "remove")) return LOC_ZERO_REMOVE; if (!strcmp(str, "error")) return LOC_ZERO_ERROR; if (!strcmp(str, "ignore")) return LOC_ZERO_IGNORE; stop_bad_zero(call); r_stop_unreachable(); } static void stop_subscript_arg_missing(struct r_lazy call) { r_abort_call(call.env, "`missing` must be one of \"propagate\", \"remove\", or \"error\"."); } static void stop_bad_negative(struct r_lazy call) { r_abort_call(call.env, "`negative` must be one of \"invert\", \"error\", or \"ignore\"."); } static void stop_bad_oob(struct r_lazy call) { r_abort_call(call.env, "`oob` must be one of \"error\", \"remove\", or \"extend\"."); } static void stop_bad_zero(struct r_lazy call) { r_abort_call(call.env, "`zero` must be one of \"remove\", \"error\", or \"ignore\"."); } static void stop_subscript_missing(r_obj* i, const struct location_opts* opts) { r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask2(r_sym("stop_subscript_missing"), syms_i, i, syms_call, call); r_stop_unreachable(); } static void stop_subscript_empty(r_obj* i, const struct location_opts* opts) { r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask2(r_sym("stop_subscript_empty"), syms_i, i, syms_call, call); r_stop_unreachable(); } static void stop_location_negative_missing(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_negative_missing"), syms_i, i, syms_subscript_arg, arg, syms_call, call, syms_subscript_action, get_opts_action(&opts->subscript_opts)); r_stop_unreachable(); } static void stop_location_negative_positive(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_negative_positive"), syms_i, i, syms_subscript_arg, arg, syms_call, call, syms_subscript_action, get_opts_action(&opts->subscript_opts)); r_stop_unreachable(); } static void stop_subscript_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts) { r_obj* size_obj = KEEP(r_int(size)); r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask6(r_sym("stop_subscript_oob"), syms_i, i, syms_subscript_type, chrs_numeric, syms_size, size_obj, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_subscript_negative_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts) { struct location_opts error_opts = *opts; error_opts.subscript_opts.action = SUBSCRIPT_ACTION_NEGATE; stop_subscript_oob_location(i, size, &error_opts); } static void stop_subscript_oob_name(r_obj* i, r_obj* names, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask6(r_sym("stop_subscript_oob"), syms_i, i, syms_subscript_type, chrs_character, syms_names, names, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_location_negative(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_negative"), syms_i, i, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_location_zero(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_zero"), syms_i, i, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_indicator_size(r_obj* i, r_obj* n, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask5(r_sym("stop_indicator_size"), syms_i, i, syms_n, n, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_location_oob_non_consecutive(r_obj* i, r_ssize size, const struct location_opts* opts) { r_obj* size_obj = KEEP(r_int(size)); r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask5(r_sym("stop_location_oob_non_consecutive"), syms_i, i, syms_size, size_obj, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); FREE(1); r_stop_unreachable(); } void vctrs_init_subscript_loc(r_obj* ns) { } vctrs/src/altrep.h0000644000176200001440000000413014315060310013631 0ustar liggesusers#ifndef VCTRS_ALTREP_H #define VCTRS_ALTREP_H #include "Rversion.h" #if (R_VERSION < R_Version(3, 5, 0)) || \ (defined(_WIN32) && R_VERSION == R_Version(3, 5, 0)) # define HAS_ALTREP 0 #else # define HAS_ALTREP 1 #endif #if !HAS_ALTREP # define ALTREP(x) false # define ALTVEC_EXTRACT_SUBSET_PROXY(x, indx, call) \ ((void) x, (void) indx, (void) call, NULL) #else #include "R_ext/Altrep.h" #define ALTREP_METHODS \ R_altrep_UnserializeEX_method_t UnserializeEX; \ R_altrep_Unserialize_method_t Unserialize; \ R_altrep_Serialized_state_method_t Serialized_state; \ R_altrep_DuplicateEX_method_t DuplicateEX; \ R_altrep_Duplicate_method_t Duplicate; \ R_altrep_Coerce_method_t Coerce; \ R_altrep_Inspect_method_t Inspect; \ R_altrep_Length_method_t Length #define ALTVEC_METHODS \ ALTREP_METHODS; \ R_altvec_Dataptr_method_t Dataptr; \ R_altvec_Dataptr_or_null_method_t Dataptr_or_null; \ R_altvec_Extract_subset_method_t Extract_subset typedef struct { ALTREP_METHODS; } altrep_methods_t; typedef struct { ALTVEC_METHODS; } altvec_methods_t; #define CLASS_METHODS_TABLE(type_class) STDVEC_DATAPTR(type_class) #define GENERIC_METHODS_TABLE(x, type_class) \ ((type_class##_methods_t *) CLASS_METHODS_TABLE(ALTREP_CLASS(x))) #define ALTREP_METHODS_TABLE(x) GENERIC_METHODS_TABLE(x, altrep) #define ALTVEC_METHODS_TABLE(x) GENERIC_METHODS_TABLE(x, altvec) #define DISPATCH_TARGET_HELPER(x, ...) x #define DISPATCH_TARGET(...) DISPATCH_TARGET_HELPER(__VA_ARGS__, dummy) #define DO_DISPATCH(type, fun, ...) \ type##_METHODS_TABLE(DISPATCH_TARGET(__VA_ARGS__))->fun(__VA_ARGS__) #define ALTREP_DISPATCH(fun, ...) DO_DISPATCH(ALTREP, fun, __VA_ARGS__) #define ALTVEC_DISPATCH(fun, ...) DO_DISPATCH(ALTVEC, fun, __VA_ARGS__) static inline SEXP ALTVEC_EXTRACT_SUBSET_PROXY(SEXP x, SEXP indx, SEXP call) { return ALTVEC_DISPATCH(Extract_subset, x, indx, call); } #endif #endif vctrs/src/complete.c0000644000176200001440000001353314315060310014154 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" // ----------------------------------------------------------------------------- static SEXP vec_slice_complete(SEXP x); // [[ register() ]] SEXP vctrs_slice_complete(SEXP x) { return vec_slice_complete(x); } static SEXP vec_locate_complete(SEXP x); static SEXP vec_slice_complete(SEXP x) { SEXP loc = PROTECT(vec_locate_complete(x)); // Skip `vec_as_location()` in `vec_slice()` SEXP out = vec_slice_unsafe(x, loc); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_locate_complete(SEXP x) { return vec_locate_complete(x); } static SEXP vec_locate_complete(SEXP x) { SEXP where = PROTECT(vec_detect_complete(x)); SEXP out = r_lgl_which(where, false); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_detect_complete(SEXP x) { return vec_detect_complete(x); } static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out); // [[ include("complete.h") ]] SEXP vec_detect_complete(SEXP x) { SEXP proxy = PROTECT(vec_proxy_equal(x)); R_len_t size = vec_size(proxy); SEXP out = PROTECT(r_new_logical(size)); int* p_out = LOGICAL(out); // Initialize assuming fully complete for (R_len_t i = 0; i < size; ++i) { p_out[i] = 1; } vec_detect_complete_switch(proxy, size, p_out); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- static inline void lgl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void int_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void dbl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void cpl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void chr_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void raw_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void list_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void df_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_integer: int_detect_complete(x, size, p_out); break; case VCTRS_TYPE_double: dbl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_complex: cpl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_character: chr_detect_complete(x, size, p_out); break; case VCTRS_TYPE_raw: raw_detect_complete(x, size, p_out); break; case VCTRS_TYPE_list: list_detect_complete(x, size, p_out); break; case VCTRS_TYPE_dataframe: df_detect_complete(x, size, p_out); break; case VCTRS_TYPE_scalar: r_stop_internal("Can't detect missing values in scalars."); default: stop_unimplemented_vctrs_type("vec_detect_complete", vec_proxy_typeof(x)); } } // ----------------------------------------------------------------------------- /* * Avoid the temptation to add an extra if branch at the start of the for * loop like: * * ``` * if (!p_out[i]) { * continue; * } * ``` * * In theory this avoids calculations if we already know the row is incomplete, * but in practice it can wreck performance. I imagine it is due to the cost * of the extra branch + the volatility of this value, causing the result of * the branch to be "guessed" incorrectly many times. For example, the vctrs * result here gets 6x slower (i.e. slower than the R solution) by adding that * branch. * * ``` * # Place many NA values randomly in the first column * first <- sample(c(1, NA, 3), size = 1e6, replace = TRUE) * cols <- rep_len(list(rep(1, 1e6)), 100) * cols <- c(list(first), cols) * names(cols) <- paste0("a", 1:length(cols)) * df <- new_data_frame(cols) * bench::mark(vec_detect_complete(df), complete.cases(df)) * ``` */ #define VEC_DETECT_COMPLETE(CTYPE, CONST_DEREF, IS_MISSING) { \ const CTYPE* p_x = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i) { \ const CTYPE elt = p_x[i]; \ \ if (IS_MISSING(elt)) { \ p_out[i] = 0; \ } \ } \ } static inline void lgl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(int, LOGICAL_RO, lgl_is_missing); } static inline void int_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(int, INTEGER_RO, int_is_missing); } static inline void dbl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(double, REAL_RO, dbl_is_missing); } static inline void cpl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(Rcomplex, COMPLEX_RO, cpl_is_missing); } static inline void chr_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(SEXP, STRING_PTR_RO, chr_is_missing); } static inline void raw_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(unsigned char, RAW_RO, raw_is_missing); } static inline void list_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(SEXP, VECTOR_PTR_RO, list_is_missing); } #undef VEC_DETECT_COMPLETE // ----------------------------------------------------------------------------- static inline void df_detect_complete(SEXP x, R_len_t size, int* p_out) { r_ssize n_cols = r_length(x); const SEXP* p_x = VECTOR_PTR_RO(x); for (r_ssize i = 0; i < n_cols; ++i) { vec_detect_complete_switch(p_x[i], size, p_out); } } vctrs/src/proxy.c0000644000176200001440000001734314511320527013537 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/proxy-decl.h" r_obj* vec_proxy(r_obj* x) { return vec_proxy_2(x, VCTRS_RECURSE_false); } r_obj* vec_proxy_recurse(r_obj* x) { return vec_proxy_2(x, VCTRS_RECURSE_true); } static r_obj* vec_proxy_2(r_obj* x, enum vctrs_recurse recurse) { struct vctrs_type_info info = vec_type_info(x); KEEP(info.shelter); switch (info.type) { case VCTRS_TYPE_dataframe: { r_obj* out = recurse ? df_proxy_recurse(x) : x; FREE(1); return out; } case VCTRS_TYPE_s3: { r_obj* out = KEEP(vec_proxy_invoke(x, info.proxy_method)); if (recurse && is_data_frame(out)) { out = df_proxy_recurse(out); } FREE(2); return out; } default: FREE(1); return x; } } // Recurse into data frames static r_obj* df_proxy_recurse(r_obj* x) { r_obj* out = KEEP(r_clone(x)); r_ssize n = r_length(out); r_obj* const * v_out = r_list_cbegin(out); for (r_ssize i = 0; i < n; ++i) { r_list_poke(out, i, vec_proxy_recurse(v_out[i])); } FREE(1); return out; } // [[ register() ]] r_obj* vec_proxy_equal(r_obj* x) { r_obj* out = KEEP(vec_proxy_equal_impl(x)); if (is_data_frame(out)) { // Automatically proxy df-proxies recursively. // Also flattens and unwraps them (#1537, #1664). out = df_proxy(out, VCTRS_PROXY_KIND_equal); } FREE(1); return out; } // [[ register() ]] r_obj* vec_proxy_compare(r_obj* x) { r_obj* out = KEEP(vec_proxy_compare_impl(x)); if (is_data_frame(out)) { // Automatically proxy df-proxies recursively. // Also flattens and unwraps them (#1537, #1664). out = df_proxy(out, VCTRS_PROXY_KIND_compare); } FREE(1); return out; } // [[ register() ]] r_obj* vec_proxy_order(r_obj* x) { r_obj* out = KEEP(vec_proxy_order_impl(x)); if (is_data_frame(out)) { // Automatically proxy df-proxies recursively. // Also flattens and unwraps them (#1537, #1664). out = df_proxy(out, VCTRS_PROXY_KIND_order); } FREE(1); return out; } // Non-recursive variants called by the fallback path to ensure we only // fallback on the container itself (like a df or rcrd) and not its elements // (like columns or fields) #define VEC_PROXY_KIND_IMPL(METHOD, INVOKE) do { \ r_obj* method = KEEP(METHOD(x)); \ r_obj* out = INVOKE(x, method); \ FREE(1); \ return out; \ } while (0) \ static inline r_obj* vec_proxy_equal_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_equal_method, vec_proxy_equal_invoke); } static inline r_obj* vec_proxy_compare_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_compare_method, vec_proxy_compare_invoke); } static inline r_obj* vec_proxy_order_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_order_method, vec_proxy_order_invoke); } #undef VEC_PROXY_KIND_IMPL r_obj* vec_proxy_method(r_obj* x) { return s3_find_method("vec_proxy", x, vctrs_method_table); } // This should be faster than normal dispatch but also means that // proxy methods can't call `NextMethod()`. This could be changed if // it turns out a problem. r_obj* vec_proxy_invoke(r_obj* x, r_obj* method) { if (method == r_null) { return x; } else { return vctrs_dispatch1(syms_vec_proxy, method, syms_x, x); } } static inline r_obj* vec_proxy_method_impl(r_obj* x, const char* generic, r_obj* fn_proxy_array) { r_obj* cls = KEEP(s3_get_class(x)); r_obj* method = s3_class_find_method(generic, cls, vctrs_method_table); if (method != r_null) { FREE(1); return method; } /* FIXME: Stopgap check for bare arrays */ /* which equality functions don't handle well */ if (vec_dim_n(x) > 1) { FREE(1); return fn_proxy_array; } FREE(1); return r_null; } static inline r_obj* vec_proxy_equal_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_equal", fns_vec_proxy_equal_array); } static inline r_obj* vec_proxy_compare_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_compare", fns_vec_proxy_compare_array); } static inline r_obj* vec_proxy_order_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_order", fns_vec_proxy_order_array); } static inline r_obj* vec_proxy_invoke_impl(r_obj* x, r_obj* method, r_obj* vec_proxy_sym, r_obj* (*vec_proxy_impl_fn)(r_obj*)) { if (method != r_null) { return vctrs_dispatch1(vec_proxy_sym, method, syms_x, x); } /* Fallback on S3 objects with no proxy */ if (vec_typeof(x) == VCTRS_TYPE_s3) { return vec_proxy_impl_fn(x); } else { return x; } } static inline r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_equal, vec_proxy); } static inline r_obj* vec_proxy_compare_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_compare, &vec_proxy_equal_impl); } static inline r_obj* vec_proxy_order_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_order, &vec_proxy_compare_impl); } #define DF_PROXY(PROXY) do { \ const r_ssize n_cols = r_length(x); \ r_obj* const* v_x = r_list_cbegin(x); \ \ for (r_ssize i = 0; i < n_cols; ++i) { \ r_obj* col = v_x[i]; \ r_list_poke(x, i, PROXY(col)); \ } \ } while (0) static inline r_obj* df_proxy(r_obj* x, enum vctrs_proxy_kind kind) { // Always clone to avoid modifying the original object, even if it is one // we freshly created in C, because we often work with both the proxy and the // original object within the same function (#1837) x = KEEP(r_clone(x)); switch (kind) { case VCTRS_PROXY_KIND_equal: DF_PROXY(vec_proxy_equal); break; case VCTRS_PROXY_KIND_compare: DF_PROXY(vec_proxy_compare); break; case VCTRS_PROXY_KIND_order: DF_PROXY(vec_proxy_order); break; } x = KEEP(df_flatten(x)); x = vec_proxy_unwrap(x); FREE(2); return x; } r_obj* ffi_df_proxy(r_obj* x, r_obj* kind) { if (!r_is_number(kind)) { r_stop_internal("`kind` must be a single integer."); } enum vctrs_proxy_kind c_kind = r_int_get(kind, 0); return df_proxy(x, c_kind); } r_obj* vec_proxy_unwrap(r_obj* x) { if (r_typeof(x) == R_TYPE_list && r_length(x) == 1 && is_data_frame(x)) { x = vec_proxy_unwrap(r_list_get(x, 0)); } return x; } r_obj* ffi_unset_s4(r_obj* x) { x = r_clone_referenced(x); r_unmark_s4(x); return x; } void vctrs_init_data(r_obj* ns) { syms_vec_proxy = r_sym("vec_proxy"); syms_vec_proxy_equal = r_sym("vec_proxy_equal"); syms_vec_proxy_equal_array = r_sym("vec_proxy_equal.array"); syms_vec_proxy_compare = r_sym("vec_proxy_compare"); syms_vec_proxy_compare_array = r_sym("vec_proxy_compare.array"); syms_vec_proxy_order = r_sym("vec_proxy_order"); syms_vec_proxy_order_array = r_sym("vec_proxy_order.array"); fns_vec_proxy_equal_array = r_env_get(ns, syms_vec_proxy_equal_array); fns_vec_proxy_compare_array = r_env_get(ns, syms_vec_proxy_compare_array); fns_vec_proxy_order_array = r_env_get(ns, syms_vec_proxy_order_array); } r_obj* syms_vec_proxy = NULL; r_obj* syms_vec_proxy_equal = NULL; r_obj* syms_vec_proxy_equal_array = NULL; r_obj* syms_vec_proxy_compare = NULL; r_obj* syms_vec_proxy_compare_array = NULL; r_obj* syms_vec_proxy_order = NULL; r_obj* syms_vec_proxy_order_array = NULL; r_obj* fns_vec_proxy_equal_array = NULL; r_obj* fns_vec_proxy_compare_array = NULL; r_obj* fns_vec_proxy_order_array = NULL; vctrs/src/translate.c0000644000176200001440000001154314315060310014340 0ustar liggesusers#include "vctrs.h" // For testing // [[ register() ]] SEXP vctrs_normalize_encoding(SEXP x) { return vec_normalize_encoding(x); } static inline SEXP obj_normalize_encoding(SEXP x); /* * Recursively normalize encodings of character vectors. * * A CHARSXP is considered normalized if: * - It is the NA_STRING * - It is ASCII, which means the encoding will be unmarked * - It is marked as UTF-8 * * Attributes are translated as well. * * ASCII strings will never get marked with an encoding when they go * through `Rf_mkCharLenCE()`, but they will get marked as ASCII. Since * UTF-8 is fully compatible with ASCII and ASCII is by far the most common * case, we let ASCII strings through without translating them. * * This converts vectors that are completely marked as Latin-1 to UTF-8. In * theory we could leave these as Latin-1, and comparing within * a single vector would be fine, since the encoding would be consistent. * However, this makes comparing between vectors difficult because we then * have to normalize the vectors relative to each other's encodings. * Consistently converting to UTF-8 avoids this issue altogether. * * Vectors with "bytes" encodings are not supported, as they cannot be * converted to UTF-8 by `Rf_translateCharUTF8()`. * * [[ include("translate.h") ]] */ SEXP vec_normalize_encoding(SEXP x) { return obj_normalize_encoding(x); } // ----------------------------------------------------------------------------- static SEXP chr_normalize_encoding(SEXP x); static SEXP list_normalize_encoding(SEXP x); static SEXP obj_attrib_normalize_encoding(SEXP x, SEXP attrib); static inline SEXP obj_normalize_encoding(SEXP x) { switch (TYPEOF(x)) { case STRSXP: x = chr_normalize_encoding(x); break; case VECSXP: x = list_normalize_encoding(x); break; default: break; } // For performance, avoid `PROTECT()` / `UNPROTECT()` when not needed SEXP attrib = r_attrib(x); if (attrib != r_null) { PROTECT(x); x = obj_attrib_normalize_encoding(x, attrib); UNPROTECT(1); } return x; } // ----------------------------------------------------------------------------- static inline r_ssize chr_find_normalize_start(SEXP x, r_ssize size); static SEXP chr_normalize_encoding(SEXP x) { r_ssize size = r_length(x); r_ssize start = chr_find_normalize_start(x, size); if (size == start) { return x; } x = PROTECT(r_clone_referenced(x)); const SEXP* p_x = STRING_PTR_RO(x); const void* vmax = vmaxget(); for (r_ssize i = start; i < size; ++i) { const SEXP elt = p_x[i]; if (string_is_normalized(elt)) { continue; } SET_STRING_ELT(x, i, string_normalize(elt)); } vmaxset(vmax); UNPROTECT(1); return x; } static inline r_ssize chr_find_normalize_start(SEXP x, r_ssize size) { const SEXP* p_x = STRING_PTR_RO(x); for (r_ssize i = 0; i < size; ++i) { const SEXP elt = p_x[i]; if (string_is_normalized(elt)) { continue; } return i; } return size; } // ----------------------------------------------------------------------------- static SEXP list_normalize_encoding(SEXP x) { PROTECT_INDEX pi; PROTECT_WITH_INDEX(x, &pi); r_ssize size = r_length(x); const SEXP* p_x = VECTOR_PTR_RO(x); for (r_ssize i = 0; i < size; ++i) { SEXP elt_old = p_x[i]; SEXP elt_new = obj_normalize_encoding(elt_old); if (elt_old == elt_new) { continue; } PROTECT(elt_new); // Cloned once, at which point `x` is free of references if (MAYBE_REFERENCED(x)) { x = r_clone(x); REPROTECT(x, pi); p_x = VECTOR_PTR_RO(x); } SET_VECTOR_ELT(x, i, elt_new); UNPROTECT(1); } UNPROTECT(1); return x; } // ----------------------------------------------------------------------------- static SEXP attrib_normalize_encoding(SEXP x); static SEXP obj_attrib_normalize_encoding(SEXP x, SEXP attrib) { SEXP attrib_new = attrib_normalize_encoding(attrib); if (attrib_new == attrib) { return x; } PROTECT(attrib_new); x = PROTECT(r_clone_referenced(x)); r_poke_attrib(x, attrib_new); UNPROTECT(2); return x; } static SEXP attrib_normalize_encoding(SEXP x) { r_ssize loc = 0; bool owned = false; PROTECT_INDEX pi; PROTECT_WITH_INDEX(x, &pi); for (SEXP node = x; node != r_null; node = r_node_cdr(node), ++loc) { SEXP elt_old = r_node_car(node); SEXP elt_new = obj_normalize_encoding(elt_old); if (elt_old == elt_new) { continue; } PROTECT(elt_new); if (!owned) { // Shallow clone entire pairlist if not owned. // Should be fast because these are generally short. x = r_clone(x); REPROTECT(x, pi); owned = true; node = x; // Restore original positioning post-clone for (r_ssize i = 0; i < loc; ++i) { node = r_node_cdr(node); } } r_node_poke_car(node, elt_new); UNPROTECT(1); } UNPROTECT(1); return x; } vctrs/src/shape.c0000644000176200001440000001305714516740336013465 0ustar liggesusers#include "vctrs.h" #include "decl/shape-decl.h" // Computes the common shape of `x` and `y` and attaches it as the // dimensions of `ptype`. If `x` and `y` are both atomic with `NULL` dimensions, // then no dimensions are attached and `ptype` is returned unmodified. // [[ include("shape.h") ]] r_obj* vec_shaped_ptype(r_obj* ptype, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { r_obj* ptype_dimensions = KEEP(vec_shape2(x, y, p_x_arg, p_y_arg)); if (ptype_dimensions == r_null) { FREE(1); return ptype; } ptype = KEEP(r_clone_referenced(ptype)); r_attrib_poke_dim(ptype, ptype_dimensions); FREE(2); return ptype; } r_obj* ffi_vec_shaped_ptype(r_obj* ptype, r_obj* x, r_obj* y, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); return vec_shaped_ptype(ptype, x, y, &x_arg, &y_arg); } // ----------------------------------------------------------------------------- static r_obj* vec_shape2(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { r_obj* x_dimensions = KEEP(r_dim(x)); r_obj* y_dimensions = KEEP(r_dim(y)); r_obj* out = vec_shape2_impl(x_dimensions, y_dimensions, x, y, p_x_arg, p_y_arg); FREE(2); return out; } r_obj* ffi_vec_shape2(r_obj* x, r_obj* y, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); return vec_shape2(x, y, &x_arg, &y_arg); } /* * Returns `NULL` if `x` and `y` are atomic. * Otherwise returns a dimensions vector where the first dimension length * is forcibly set to 0, and the rest are the common shape of `x` and `y`. */ static r_obj* vec_shape2_impl(r_obj* x_dimensions, r_obj* y_dimensions, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { if (x_dimensions == r_null) { return vec_shape(y_dimensions); } if (y_dimensions == r_null) { return vec_shape(x_dimensions); } r_ssize x_dimensionality = r_length(x_dimensions); r_ssize y_dimensionality = r_length(y_dimensions); r_obj* max_dimensions; r_ssize max_dimensionality; r_ssize min_dimensionality; if (x_dimensionality >= y_dimensionality) { max_dimensions = x_dimensions; max_dimensionality = x_dimensionality; min_dimensionality = y_dimensionality; } else { max_dimensions = y_dimensions; max_dimensionality = y_dimensionality; min_dimensionality = x_dimensionality; } // Sanity check, should never be true if (max_dimensionality == 0) { r_stop_internal("`max_dimensionality` must have length."); } const int* p_x_dimensions = r_int_cbegin(x_dimensions); const int* p_y_dimensions = r_int_cbegin(y_dimensions); const int* p_max_dimensions = r_int_cbegin(max_dimensions); r_obj* out = KEEP(r_alloc_integer(max_dimensionality)); int* p_out = r_int_begin(out); // Set the first axis to zero p_out[0] = 0; // Start loop at the second axis r_ssize i = 1; for (; i < min_dimensionality; ++i) { const int axis = i + 1; const int x_dimension = p_x_dimensions[i]; const int y_dimension = p_y_dimensions[i]; p_out[i] = vec_dimension2(x_dimension, y_dimension, axis, x, y, p_x_arg, p_y_arg); } for (; i < max_dimensionality; ++i) { p_out[i] = p_max_dimensions[i]; } FREE(1); return out; } // ----------------------------------------------------------------------------- // Sets the first axis to zero static r_obj* vec_shape(r_obj* dimensions) { if (dimensions == r_null) { return r_null; } dimensions = KEEP(r_clone_referenced(dimensions)); if (r_length(dimensions) == 0) { r_stop_internal("`dimensions` must have length."); } if (r_typeof(dimensions) != R_TYPE_integer) { r_stop_internal("`dimensions` must be an integer vector."); } r_int_begin(dimensions)[0] = 0; FREE(1); return dimensions; } static inline int vec_dimension2(int x_dimension, int y_dimension, int axis, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { if (x_dimension == y_dimension) { return x_dimension; } else if (x_dimension == 1) { return y_dimension; } else if (y_dimension == 1) { return x_dimension; } else { stop_incompatible_shape(x, y, x_dimension, y_dimension, axis, p_x_arg, p_y_arg); } } // ----------------------------------------------------------------------------- r_obj* vec_shape_broadcast(r_obj* out, const struct cast_opts* p_opts) { r_obj* r_x_arg = KEEP(vctrs_arg(p_opts->p_x_arg)); r_obj* r_to_arg = KEEP(vctrs_arg(p_opts->p_to_arg)); r_obj* call = KEEP(r_lazy_eval(p_opts->call)); out = KEEP(r_clone_referenced(out)); r_attrib_poke_dim(out, r_dim(p_opts->x)); r_attrib_poke_dim_names(out, r_dim_names(p_opts->x)); out = vctrs_eval_mask5(r_sym("shape_broadcast"), r_syms.x, out, r_sym("to"), p_opts->to, syms.x_arg, r_x_arg, syms.to_arg, r_to_arg, r_syms.call, call); FREE(4); return out; } vctrs/src/proxy-restore.c0000644000176200001440000001575314416313204015221 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/proxy-restore-decl.h" // FIXME: Having `owned` as an argument to `vec_restore()` may be // unnecessary once we have recursive proxy / restore mechanisms. // It currently helps resolve performance issues in `vec_rbind()`'s usage of // `df_assign()`, which repeatedly proxies and restores each column, // causing duplication to occur. Passing `owned` through here allows us to // call `vec_clone_referenced()`, which won't attempt to clone if we know we // own the object. See #1151. r_obj* vec_restore(r_obj* x, r_obj* to, enum vctrs_owned owned) { return vec_restore_4(x, to, owned, VCTRS_RECURSE_false); } r_obj* vec_restore_recurse(r_obj* x, r_obj* to, enum vctrs_owned owned) { return vec_restore_4(x, to, owned, VCTRS_RECURSE_true); } r_obj* ffi_vec_restore(r_obj* x, r_obj* to) { return vec_restore(x, to, vec_owned(x)); } r_obj* ffi_vec_restore_recurse(r_obj* x, r_obj* to) { return vec_restore_recurse(x, to, vec_owned(x)); } static r_obj* vec_restore_4(r_obj* x, r_obj* to, enum vctrs_owned owned, enum vctrs_recurse recurse) { enum vctrs_class_type to_type = class_type(to); switch (to_type) { case VCTRS_CLASS_bare_factor: case VCTRS_CLASS_bare_ordered: case VCTRS_CLASS_none: return vec_restore_default(x, to, owned); case VCTRS_CLASS_bare_date: return vec_date_restore(x, to, owned); case VCTRS_CLASS_bare_posixct: return vec_posixct_restore(x, to, owned); case VCTRS_CLASS_bare_posixlt: return vec_posixlt_restore(x, to, owned); case VCTRS_CLASS_bare_data_frame: case VCTRS_CLASS_bare_tibble: return vec_bare_df_restore(x, to, owned, recurse); case VCTRS_CLASS_data_frame: return vec_df_restore(x, to, owned, recurse); default: if (recurse && is_data_frame(x)) { return vec_df_restore(x, to, owned, recurse); } else { return vec_restore_dispatch(x, to); } } } static r_obj* vec_restore_dispatch(r_obj* x, r_obj* to) { return vctrs_dispatch2(syms_vec_restore_dispatch, fns_vec_restore_dispatch, syms_x, x, syms_to, to); } // Copy attributes except names and dim. This duplicates `x` if needed. r_obj* vec_restore_default(r_obj* x, r_obj* to, enum vctrs_owned owned) { r_obj* attrib = r_attrib(to); const bool is_s4 = IS_S4_OBJECT(to); if (attrib == r_null && !is_s4) { return x; } int n_prot = 0; attrib = KEEP(r_clone(attrib)); ++n_prot; x = KEEP(vec_clone_referenced(x, owned)); ++n_prot; // Remove vectorised attributes which might be incongruent after reshaping. // Shouldn't matter for GNU R but other R implementations might have checks. // Also record class to set it later with `r_attrib_poke()`. This restores // the OBJECT bit and is likely more compatible with other implementations. r_obj* cls = r_null; { r_obj* node = attrib; r_obj* prev = r_null; while (node != r_null) { r_obj* tag = r_node_tag(node); // Skip special attributes if (tag == r_syms.names || tag == r_syms.dim || tag == r_syms.dim_names || tag == r_syms.class_ || tag == r_syms.row_names) { if (tag == r_syms.class_) { cls = r_node_car(node); } if (prev == r_null) { attrib = r_node_cdr(attrib); } else { r_node_poke_cdr(prev, r_node_cdr(node)); } node = r_node_cdr(node); continue; } prev = node; node = r_node_cdr(node); } } // Copy attributes but keep names and dims. Don't restore names for // shaped objects since those are generated from dimnames. r_obj* dim = KEEP(r_attrib_get(x, r_syms.dim)); ++n_prot; if (dim == r_null) { r_obj* nms = KEEP(r_attrib_get(x, r_syms.names)); // Check if `to` is a data frame early. If `x` and `to` point // to the same reference, then `r_poke_attrib()` would alter `to`. r_obj* rownms = KEEP(df_rownames(x)); const bool restore_rownms = rownms != r_null && is_data_frame(to); r_poke_attrib(x, attrib); r_attrib_poke(x, r_syms.names, nms); // Don't restore row names if `to` isn't a data frame if (restore_rownms) { r_attrib_poke(x, r_syms.row_names, rownms); } FREE(2); } else { r_obj* dimnames = KEEP(r_attrib_get(x, r_syms.dim_names)); r_poke_attrib(x, attrib); r_attrib_poke(x, r_syms.dim, dim); r_attrib_poke(x, r_syms.dim_names, dimnames); FREE(1); } if (cls != r_null) { r_attrib_poke(x, r_syms.class_, cls); } if (is_s4) { r_mark_s4(x); } FREE(n_prot); return x; } r_obj* ffi_vec_restore_default(r_obj* x, r_obj* to) { return vec_restore_default(x, to, vec_owned(x)); } r_obj* vec_df_restore(r_obj* x, r_obj* to, enum vctrs_owned owned, enum vctrs_recurse recurse) { r_obj* out = KEEP(vec_bare_df_restore(x, to, owned, recurse)); out = vec_restore_dispatch(out, to); FREE(1); return out; } r_obj* vec_bare_df_restore(r_obj* x, r_obj* to, enum vctrs_owned owned, enum vctrs_recurse recurse) { if (r_typeof(x) != R_TYPE_list) { r_stop_internal("Attempt to restore data frame from a %s.", r_type_as_c_string(r_typeof(x))); } int n_prot = 0; if (!is_data_frame(to)) { to = KEEP_N(vec_proxy(to), &n_prot); if (!is_data_frame(to)) { r_stop_internal("Expected restoration target to have a df proxy."); } } if (recurse) { r_ssize n_cols = r_length(x); if (n_cols != r_length(to)) { r_stop_internal("Shape of `x` doesn't match `to` in recursive df restoration."); }; r_obj* const * v_x = r_list_cbegin(x); r_obj* const * v_to = r_list_cbegin(to); for (r_ssize i = 0; i < n_cols; ++i) { r_obj* x_restored = vec_restore_recurse(v_x[i], v_to[i], owned); r_list_poke(x, i, x_restored); } } x = KEEP(vec_restore_default(x, to, owned)); if (r_attrib_get(x, r_syms.names) == r_null) { r_obj* names = KEEP(r_alloc_character(r_length(x))); r_attrib_poke(x, r_syms.names, names); FREE(1); } r_obj* rownames = KEEP(df_rownames(x)); if (rownames == r_null) { r_ssize size = df_raw_size(x); init_compact_rownames(x, size); } else if (rownames_type(rownames) == ROWNAMES_TYPE_identifiers) { rownames = KEEP(vec_as_names(rownames, p_unique_repair_silent_opts)); x = vec_proxy_set_names(x, rownames, owned); FREE(1); } FREE(2); FREE(n_prot); return x; } r_obj* ffi_vec_bare_df_restore(r_obj* x, r_obj* to) { return vec_bare_df_restore(x, to, vec_owned(x), VCTRS_RECURSE_false); } void vctrs_init_proxy_restore(r_obj* ns) { syms_vec_restore_dispatch = r_sym("vec_restore_dispatch"); fns_vec_restore_dispatch = r_eval(syms_vec_restore_dispatch, ns); } static r_obj* syms_vec_restore_dispatch = NULL; static r_obj* fns_vec_restore_dispatch = NULL; vctrs/src/split.c0000644000176200001440000000101514402367170013502 0ustar liggesusers#include "vctrs.h" // [[ register() ]] SEXP vec_split(SEXP x, SEXP by) { if (vec_size(x) != vec_size(by)) { Rf_errorcall(R_NilValue, "`x` and `by` must have the same size."); } SEXP out = PROTECT(vec_group_loc(by)); SEXP indices = VECTOR_ELT(out, 1); SEXP val = vec_chop_unsafe(x, indices, r_null); SET_VECTOR_ELT(out, 1, val); SEXP names = PROTECT(Rf_getAttrib(out, R_NamesSymbol)); SET_STRING_ELT(names, 1, strings_val); Rf_setAttrib(out, R_NamesSymbol, names); UNPROTECT(2); return out; } vctrs/src/ptype-common.h0000644000176200001440000000142314362266120015004 0ustar liggesusers#ifndef VCTRS_PTYPE_COMMON_H #define VCTRS_PTYPE_COMMON_H #include "vctrs-core.h" #include "ptype2.h" #include "utils.h" struct ptype_common_opts { struct r_lazy call; struct vctrs_arg* p_arg; struct fallback_opts fallback; }; static inline bool vec_is_common_class_fallback(r_obj* ptype) { return r_inherits(ptype, c_strs_vctrs_common_class_fallback); } r_obj* vec_ptype_common_params(r_obj* dots, r_obj* ptype, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call); r_obj* vec_ptype_common_opts(r_obj* dots, r_obj* ptype, const struct ptype_common_opts* opts); #endif vctrs/vignettes/0000755000176200001440000000000014532404540013424 5ustar liggesusersvctrs/vignettes/type-size.Rmd0000644000176200001440000003155214511320527016026 0ustar liggesusers--- title: "Prototypes and sizes" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Prototypes and sizes} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Rather than using `class()` and `length()`, vctrs has notions of prototype (`vec_ptype_show()`) and size (`vec_size()`). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules. Size and prototype are motivated by thinking about the optimal behaviour for `c()` and `rbind()`, particularly inspired by data frames with columns that are matrices or data frames. ```{r} library(vctrs) ``` ## Prototype The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the `class()` of an object is inadequate for this purpose: * The `class()` doesn't include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a `POSIXct`. You cannot combine two factors or two `POSIXct`s without thinking about the attributes. * The `class()` of a matrix is "matrix" and doesn't include the type of the underlying vector or the dimensionality. Instead, vctrs takes advantage of R's vectorised nature and uses a __prototype__, a 0-observation slice of the vector (this is basically `x[0]` but with some subtleties we'll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data. Conveniently, you can create many prototypes using existing base functions (e.g, `double()` and `factor(levels = c("a", "b"))`). vctrs provides a few helpers (e.g. `new_date()`, `new_datetime()`, and `new_duration()`) where the equivalents in base R are missing. ### Base prototypes `vec_ptype()` creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides `vec_ptype_show()`, which prints the prototype in a friendly way (and returns nothing). Using `vec_ptype_show()` allows us to see the prototypes base R classes: * Atomic vectors have no attributes and just display the underlying `typeof()`: ```{r} vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ``` * The prototype of matrices and arrays include the base type and the dimensions after the first: ```{r} vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ``` * The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it's highly likely that their levels are also equal. ```{r} vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ``` While `vec_ptype_show()` prints only the hash, the prototype object itself does contain all levels: ```{r} vec_ptype(factor("a")) ``` * Base R has three key date time classes: dates, date-times (`POSIXct`), and durations (`difftime)`. Date-times have a timezone, and durations have a unit. ```{r} vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ``` * Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column: ```{r} vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ``` Data frames can have columns that are themselves data frames, making this a "recursive" type: ```{r} df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ``` ### Coercing to common type It's often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via `vec_ptype_common()`. `vec_ptype_common()` possesses the following invariants: * `class(vec_ptype_common(x, y))` equals `class(vec_ptype_common(y, x))`. * `class(vec_ptype_common(x, vec_ptype_common(y, z))` equals `class(vec_ptype_common(vec_ptype_common(x, y), z))`. * `vec_ptype_common(x, NULL) == vec_ptype(x)`. i.e., `vec_ptype_common()` is both commutative and associative (with respect to class) and has an identity element, `NULL`; i.e., it's a __commutative monoid__. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects. Like with `vec_ptype()`, the easiest way to explore `vec_ptype_common()` is with `vec_ptype_show()`: when given multiple inputs, it will print their common prototype. (In other words: program with `vec_ptype_common()` but play with `vec_ptype_show()`.) * The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically: ```{r, error = TRUE} vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ``` * Matrices and arrays are automatically broadcast to higher dimensions: ```{r} vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ``` Provided that the dimensions follow the vctrs recycling rules: ```{r, error = TRUE} vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ``` * Factors combine levels in the order in which they appear. ```{r} fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ``` * Combining a date and date-time yields a date-time: ```{r} vec_ptype_show(new_date(), new_datetime()) ``` When combining two date times, the timezone is taken from the first input: ```{r} vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ``` Unless it's the local timezone, in which case any explicit time zone will win: ```{r} vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ``` * The common type of two data frames is the common type of each column that occurs in both data frames: ```{r} vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ``` And the union of the columns that only occur in one: ```{r} vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ``` Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled. ### Casting to specified type `vec_ptype_common()` finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That's the job of `vec_cast_common()`: ```{r} str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ``` Alternatively, you can cast to a specific prototype using `vec_cast()`: ```{r, error = TRUE} # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ``` If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error. ```{r, error = TRUE} vec_cast(c(1.5, 2), integer()) ``` You can suppress the lossy cast errors with `allow_lossy_cast()`: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ``` This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ``` The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound. ## Size `vec_size()` was motivated by the need to have an invariant that describes the number of "observations" in a data structure. This is particularly important for data frames, as it's useful to have some function such that `f(data.frame(x))` equals `f(x)`. No base function has this property: * `length(data.frame(x))` equals `1` because the length of a data frame is the number of columns. * `nrow(data.frame(x))` does not equal `nrow(x)` because `nrow()` of a vector is `NULL`. * `NROW(data.frame(x))` equals `NROW(x)` for vector `x`, so is almost what we want. But because `NROW()` is defined in terms of `length()`, it returns a value for every object, even types that can't go in a data frame, e.g. `data.frame(mean)` errors even though `NROW(mean)` is `1`. We define `vec_size()` as follows: * It is the length of 1d vectors. * It is the number of rows of data frames, matrices, and arrays. * It throws error for non vectors. Given `vec_size()`, we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns. ### Slicing `vec_slice()` is to `vec_size()` as `[` is to `length()`; i.e., it allows you to select observations regardless of the dimensionality of the underlying object. `vec_slice(x, i)` is equivalent to: * `x[i]` when `x` is a vector. * `x[i, , drop = FALSE]` when `x` is a data frame. * `x[i, , , drop = FALSE]` when `x` is a 3d array. ```{r} x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ``` `vec_slice(data.frame(x), i)` equals `data.frame(vec_slice(x, i))` (modulo variable and row names). Prototypes are generated with `vec_slice(x, 0L)`; given a prototype, you can initialize a vector of given size (filled with `NA`s) with `vec_init()`. ### Common sizes: recycling rules Closely related to the definition of size are the __recycling rules__. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in `vec_size_common()`, which gives the common size of a set of vectors: ```{r} vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ``` vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like `dest == c("IAH", "HOU"))`, at the cost of occasionally requiring an explicit calls to `rep()`. ```{r, echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates an error"} knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ``` You can apply the recycling rules in two ways: * If you have a vector and desired size, use `vec_recycle()`: ```{r} vec_recycle(1:3, 3) vec_recycle(1, 10) ``` * If you have multiple vectors and you want to recycle them to the same size, use `vec_recycle_common()`: ```{r} vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ``` ## Appendix: recycling in base R The recycling rules in base R are described in [The R Language Definition](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Recycling-rules) but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions. Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer: ```{r} rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ``` If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning: ```{r} invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ``` But some functions recycle silently: ```{r} length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ``` And `data.frame()` throws an error: ```{r, error = TRUE} data.frame(1:2, 1:3) ``` The R language definition states that "any arithmetic operation involving a zero-length vector has a zero-length result". But outside of arithmetic, this rule is not consistently followed: ```{r, error = TRUE} # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) ``` vctrs/vignettes/pillar.Rmd0000644000176200001440000002126014315060310015344 0ustar liggesusers--- title: "Printing vectors nicely in tibbles" author: "Kirill Müller, Hadley Wickham" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Printing vectors nicely in tibbles} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` You can get basic control over how a vector is printed in a tibble by providing a `format()` method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics: * `vctrs::vec_ptype_abbr()` determines what goes into the column header. * `pillar::pillar_shaft()` determines what goes into the body, or the shaft, of the column. Technically a [*pillar*](https://en.wikipedia.org/wiki/Column#Nomenclature) is composed of a *shaft* (decorated with an *ornament*), with a *capital* above and a *base* below. Multiple pillars form a *colonnade*, which can be stacked in multiple *tiers*. This is the motivation behind the names in our API. This short vignette shows the basics of column styling using a `"latlon"` vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you'll need to create documentation and the `NAMESPACE` file. In this vignette, we'll attach pillar and vctrs: ```{r setup} library(vctrs) library(pillar) ``` You don't need to do this in a package. Instead, you'll need to _import_ the packages by then to the `Imports:` section of your `DESCRIPTION`. The following helper does this for you: ```{r, eval = FALSE} usethis::use_package("vctrs") usethis::use_package("pillar") ``` ## Prerequisites To illustrate the basic ideas we're going to create a `"latlon"` class that encodes geographic coordinates in a record. We'll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using `vctrs_rcrd()`, we already get the infrastructure to make this class fully compatible with data frames for free. See `vignette("s3-vector", package = "vctrs")` for details on the record data type. ```{r} #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ``` ## Using in a tibble Columns of this class can be used in a tibble right away because we've made a class using the vctrs infrastructure and have provided a `format()` method: ```{r} library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ``` This output is ok, but we could improve it by: 1. Using a more description type abbreviation than ``. 1. Using a dash of colour to highlight the most important parts of the value. 1. Providing a narrower view when horizontal space is at a premium. The following sections show how to enhance the rendering. ## Fixing the data type Instead of `` we'd prefer to use ``. We can do that by implementing the `vec_ptype_abbr()` method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that's under 6 characters. ```{r} #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ``` ## Custom rendering The `format()` method is used by default for rendering. For custom formatting you need to implement the `pillar_shaft()` method. This function should always return a pillar shaft object, created by `new_pillar_shaft_simple()` or similar. `new_pillar_shaft_simple()` accepts ANSI escape codes for colouring, and pillar includes some built in styles like `style_subtle()`. We can use subtle style for the degree and minute separators to make the data more obvious. First we define a degree formatter that makes use of `style_subtle()`: ```{r} deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ``` And then we pass that to our `format()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ``` Currently, ANSI escapes are not rendered in vignettes, so this result doesn't look any different, but if you run the code yourself you'll see an improved display. ```{r} data ``` As well as the functions in pillar, the [cli](https://cli.r-lib.org/) package provides a variety of tools for styling text. ## Truncation Tibbles can automatically compacts columns when there's no enough horizontal space to display everything: ```{r} print(data, width = 30) ``` Currently the latlon class isn't ever compacted because we haven't specified a minimum width when constructing the shaft. Let's fix that and re-print the data: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ``` ## Adaptive rendering Truncation may be useful for character data, but for lat-lon data it'd be nicer to show full degrees and remove the minutes. We'll first write a function that does this: ```{r} deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ``` Then use it as part of more sophisticated implementation of the `pillar_shaft()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ``` Now the `pillar_shaft()` method returns an object of class `"pillar_shaft_latlon"` created by `new_pillar_shaft()`. This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (`get_max_extent()` is a helper that computes the maximum display width occupied by the values in a character vector.) All that's left to do is to implement a `format()` method for our new `"pillar_shaft_latlon"` class. This method will be called with a `width` argument, which then determines which of the formats to choose. The formatting of our choice is passed to the `new_ornament()` function: ```{r} #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ``` ## Testing If you want to test the output of your code, you can compare it with a known state recorded in a text file. The `testthat::expect_snapshot()` function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won't make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN. Use this testthat expectation in one of your test files to create a snapshot test: ```{r eval = FALSE} expect_snapshot(pillar_shaft(data$loc)) ``` See for more information. vctrs/vignettes/s3-vector.Rmd0000644000176200001440000013076514511320527015730 0ustar liggesusers--- title: "S3 vectors" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{S3 vectors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ``` This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you'll also need to provide methods that actually make the vector useful. I assume that you're already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of [the S3 chapter](https://adv-r.hadley.nz/s3.html) of *Advanced R*. This article refers to "vectors of numbers" as *double vectors*. Here, "double" stands for ["double precision floating point number"](https://en.wikipedia.org/wiki/Double-precision_floating-point_format), see also `double()`. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` This vignette works through five big topics: - The basics of creating a new vector class with vctrs. - The coercion and casting system. - The record and list-of types. - Equality and comparison proxies. - Arithmetic operators. They're collectively demonstrated with a number of simple S3 classes: - Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting. - Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions. - Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care. - Rational: a pair of integer vectors that defines a rational number like `2 / 3`. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for `+`, `-`, and friends. - Polynomial: a list of integer vectors that define polynomials like `1 + x - x^3`. Sorting such vectors correctly requires a custom equality method. - Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties. - Period and frequency: a pair of classes represent a period, or its inverse, frequency. This allows us to explore more arithmetic operators. ## Basics In this section you'll learn how to create a new vctrs class by calling `new_vctr()`. This creates an object with class `vctrs_vctr` which has a number of methods. These are designed to make your life as easy as possible. For example: - The `print()` and `str()` methods are defined in terms of `format()` so you get a pleasant, consistent display as soon as you've made your `format()` method. - You can immediately put your new vector class in a data frame because `as.data.frame.vctrs_vctr()` does the right thing. - Subsetting (`[`, `[[`, and `$`), `length<-`, and `rep()` methods automatically preserve attributes because they use `vec_restore()`. A default `vec_restore()` works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data. - Default subset-assignment methods (`[<-`, `[[<-`, and `$<-`) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages. ### Percent class In this section, I'll show you how to make a `percent` class, i.e., a double vector that is printed as a percentage. We start by defining a low-level [constructor](https://adv-r.hadley.nz/s3.html#s3-constrcutor) to check types and/or sizes and call `new_vctr()`. `percent` is built on a double vector of any length and doesn't have any attributes. ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ``` Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as [blob](https://blob.tidyverse.org/)), it's fine to use the package name without prefix as the class name. We then follow up with a user friendly [helper](https://adv-r.hadley.nz/s3.html#helpers). Here we'll use `vec_cast()` to allow it to accept anything coercible to a double: ```{r} percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype. ```{r} new_percent() percent() ``` For the convenience of your users, consider implementing an `is_percent()` function: ```{r} is_percent <- function(x) { inherits(x, "vctrs_percent") } ``` ### `format()` method The first method for every class should almost always be a `format()` method. This should return a character vector the same length as `x`. The easiest way to do this is to rely on one of R's low-level formatting functions like `formatC()`: ```{r} format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ``` ```{r, include = FALSE} # As of R 3.5, print.vctr can not find format.percent since it's not in # its lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ``` ```{r} x ``` (Note the use of `vec_data()` so `format()` doesn't get stuck in an infinite loop, and that I take a little care to not convert `NA` to `"NA"`; this leads to better printing.) The format method is also used by data frames, tibbles, and `str()`: ```{r} data.frame(x) ``` For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in `str()`: ```{r} vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ``` If you need more control over printing in tibbles, implement a method for `pillar::pillar_shaft()`. See `vignette("pillar", package = "vctrs")` for details. ## Casting and coercion The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens *implicitly* (e.g in `c()`) we call it **coercion**; when the change happens *explicitly* (e.g. with `as.integer(x)`), we call it **casting**. One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it's possible to make accurate predictions about what (e.g.) `c(x, y)` should do when `x` and `y` have different prototypes. vctrs achieves this goal through two generics: - `vec_ptype2(x, y)` defines possible set of coercions. It returns a prototype if `x` and `y` can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes. - `vec_cast(x, to)` defines the possible sets of casts. It returns `x` translated to have prototype `to`, or throws an error if the conversion isn't possible. The set of possible casts is a superset of possible coercions because they're requested explicitly. ### Double dispatch Both generics use [**double dispatch**](https://en.wikipedia.org/wiki/Double_dispatch) which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means: - You end up with method names with two classes, like `vec_ptype2.foo.bar()`. - You don't need to implement default methods (they would never be called if you do). - You can't call `NextMethod()`. ### Percent class {#percent} We'll make our percent class coercible back and forth with double vectors. `vec_ptype2()` provides a user friendly error message if the coercion doesn't exist and makes sure `NA` is handled in a standard way. `NA` is technically a logical vector, but we want to stand in for a missing value of any type. ```{r, error = TRUE} vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ``` By default and in simple cases, an object of the same class is compatible with itself: ```{r} vec_ptype2(percent(), percent()) ``` However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we'll start by saying that a `vctrs_percent` combined with a `vctrs_percent` yields a `vctrs_percent`, which we indicate by returning a prototype generated by the constructor. ```{r} vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ``` Next we define methods that say that combining a `percent` and double should yield a `double`. We avoid returning a `percent` here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers. Because double dispatch is a bit of a hack, we need to provide two methods. It's your responsibility to ensure that each member of the pair returns the same result: if they don't you will get weird and unpredictable behaviour. The double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. If we implemented `vec_ptype2.vctrs_percent.numeric()`, it would never be called. ```{r} vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ``` We can check that we've implemented this correctly with `vec_ptype_show()`: ```{r} vec_ptype_show(percent(), double(), percent()) ``` The `vec_ptype2()` methods define which input is the richer type that vctrs should coerce to. However, they don't perform any conversion. This is the job of `vec_cast()`, which we implement next. We'll provide a method to cast a percent to a percent: ```{r} vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ``` And then for converting back and forth between doubles. To convert a double to a percent we use the `percent()` helper (not the constructor; this is unvalidated user input). To convert a `percent` to a double, we strip the attributes. Note that for historical reasons the order of argument in the signature is the opposite as for `vec_ptype2()`. The class for `to` comes first, and the class for `x` comes second. Again, the double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. Implementing `vec_cast.vctrs_percent.numeric()` has no effect. ```{r} vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ``` Then we can check this works with `vec_cast()`: ```{r} vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ``` Once you've implemented `vec_ptype2()` and `vec_cast()`, you get `vec_c()`, `[<-`, and `[[<-` implementations for free. ```{r, error = TRUE} vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ``` You'll also get mostly correct behaviour for `c()`. The exception is when you use `c()` with a base R class: ```{r, error = TRUE} # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ``` Unfortunately there's no way to fix this problem with the current design of `c()`. Again, as a convenience, consider providing an `as_percent()` function that makes use of the casts defined in your `vec_cast.vctrs_percent()` methods: ```{r} as_percent <- function(x) { vec_cast(x, new_percent()) } ``` Occasionally, it is useful to provide conversions that go beyond what's allowed in casting. For example, we could offer a parsing method for character vectors. In this case, `as_percent()` should be generic, the default method should cast, and then additional methods should implement more flexible conversion: ```{r} as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ``` ### Decimal class Now that you've seen the basics with a very simple S3 class, we'll gradually explore more complicated scenarios. This section creates a `decimal` class that prints with the specified number of decimal places. This is very similar to `percent` but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1). We start off as before, defining a low-level constructor, a user-friendly constructor, a `format()` method, and a `vec_ptype_abbr()`. Note that additional object attributes are simply passed along to `new_vctr()`: ```{r} new_decimal <- function(x = double(), digits = 2L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_integer(digits)) { abort("`digits` must be an integer vector.") } vec_check_size(digits, size = 1L) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ``` Note that I provide a little helper to extract the `digits` attribute. This makes the code a little easier to read and should not be exported. By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You'll see what to do if the attributes are data dependent in the next section. ```{r} x[1:2] x[[1]] ``` For the sake of exposition, we'll assume that `digits` is an important attribute of the class and should be included in the full type: ```{r} vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ``` Now consider `vec_cast()` and `vec_ptype2()`. Casting and coercing from one decimal to another requires a little thought as the values of the `digits` attribute might be different, and we need some way to reconcile them. Here I've decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error. ```{r} vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ``` Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal). ```{r} vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ``` If type `x` has greater resolution than `y`, there will be some inputs that lose precision. These should generate errors using `stop_lossy_cast()`. You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution. ```{r, error = TRUE} vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ``` ### Cached sum class {#cached-sum} The next level up in complexity is an object that has data-dependent attributes. To explore this idea we'll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors: ```{r} new_cached_sum <- function(x = double(), sum = 0L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_double(sum)) { abort("`sum` must be a double vector.") } vec_check_size(sum, size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ``` For this class, we can use the default `format()` method, and instead, we'll customise the `obj_print_footer()` method. This is a good place to display user facing attributes. ```{r} obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ``` We'll also override `sum()` and `mean()` to use the attribute. This is easiest to do with `vec_math()`, which you'll learn about later. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ``` As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they'll work, but return the incorrect result: ```{r} x[1:2] ``` To fix this, you need to provide a `vec_restore()` method. Note that this method dispatches on the `to` argument. ```{r} vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ``` This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with `vec_data()` and then reapplying them again with `vec_restore()`. The default `vec_restore()` method copies over all attributes, which is not appropriate when the attributes depend on the data. Note that `vec_restore.class` is subtly different from `vec_cast.class.class()`. `vec_restore()` is used when restoring attributes that have been lost; `vec_cast()` is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with `new_vctr()`. `vec_restore.factor()` would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with `vec_cast()`. ## Record-style objects Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is `POSIXlt`, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override `length()` and subsetting methods to conceal this implementation detail. ```{r} x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ``` vctrs makes it easy to create new record-style classes using `new_rcrd()`, which has a wide selection of default methods. ### Rational class A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short). As usual we start with low-level and user-friendly constructors. The low-level constructor calls `new_rcrd()`, which needs a named list of equal-length vectors. ```{r} new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer vector.") } if (!is_integer(d)) { abort("`d` must be an integer vector.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ``` Our user friendly constructor casts `n` and `d` to integers and recycles them to the same length. ```{r} rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ``` Behind the scenes, `x` is a named list with two elements. But those details are hidden so that it behaves like a vector: ```{r} names(x) length(x) ``` To access the underlying fields we need to use `field()` and `fields()`: ```{r} fields(x) field(x, "n") ``` Notice that we can't `print()` or `str()` the new rational vector `x` yet. Printing causes an error: ```{r, error = TRUE} x str(x) ``` This is because we haven't defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call `vec_data(x)`. ```{r} vec_data(x) str(vec_data(x)) ``` It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way: ```{r} format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ``` vctrs uses the `format()` method in `str()`, hiding the underlying implementation details from the user: ```{r} str(x) ``` For `rational`, `vec_ptype2()` and `vec_cast()` follow the same pattern as `percent()`. We allow coercion from integer and to doubles. ```{r} vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ``` ### Decimal2 class The previous implementation of `decimal` was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems. A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a `scale`). The following code is a very quick sketch of how you might start creating such a class: ```{r} new_decimal2 <- function(l, r, scale = 2L) { if (!is_integer(l)) { abort("`l` must be an integer vector.") } if (!is_integer(r)) { abort("`r` must be an integer vector.") } if (!is_integer(scale)) { abort("`scale` must be an integer vector.") } vec_check_size(scale, size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ``` ## Equality and comparison vctrs provides four "proxy" generics. Two of these let you control how your class determines equality and comparison: - `vec_proxy_equal()` returns a data vector suitable for comparison. It underpins `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()`. - `vec_proxy_compare()` specifies how to compare the elements of your vector. This proxy is used in `<`, `<=`, `>=`, `>`, `min()`, `max()`, `median()`, and `quantile()`. Two other proxy generic are used for sorting for unordered data types and for accessing the raw data for exotic storage formats: - `vec_proxy_order()` specifies how to sort the elements of your vector. It is used in `xtfrm()`, which in turn is called by the `order()` and `sort()` functions. This proxy was added to implement the behaviour of lists, which are sortable (their order proxy sorts by first occurrence) but not comparable (comparison operators cause an error). Its default implementation for other classes calls `vec_proxy_compare()` and you normally don't need to implement this proxy. - `vec_proxy()` returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn't need to implement `vec_proxy()`. The default behavior is as follows: - `vec_proxy_equal()` calls `vec_proxy()` - `vec_proxy_compare()` calls `vec_proxy_equal()` - `vec_proxy_order()` calls `vec_proxy_compare()` You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work. These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C. ### Rational class Let's explore these ideas by with the rational class we started on above. By default, `vec_proxy()` converts a record to a data frame, and the default comparison works column by column: ```{r} x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ``` This makes sense as a default but isn't correct here because `rational(1, 1)` represents the same number as `rational(2, 2)`, so they should be equal. We can fix that by implementing a `vec_proxy_equal()` method that divides `n` and `d` by their greatest common divisor: ```{r} # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ``` `vec_proxy_equal()` is also used by `unique()`: ```{r} unique(x) ``` We now need to fix the comparison operations similarly, since comparison currently happens lexicographically by `n`, then by `d`: ```{r} rational(1, 2) < rational(2, 3) rational(2, 4) < rational(2, 3) ``` The easiest fix is to convert the fraction to a floating point number and use this as a proxy: ```{r} vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational(2, 4) < rational(2, 3) ``` This also fixes `sort()`, because the default implementation of `vec_proxy_order()` calls `vec_proxy_compare()`. ```{r} sort(x) ``` (We could have used the same approach in `vec_proxy_equal()`, but when working with floating point numbers it's not necessarily true that `x == y` implies that `d * x == d * y`.) ### Polynomial class A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like `1 + 3x - 2x^2`) using a list of integer vectors (like `c(1, 3, -2)`). Note the use of `new_list_of()` in the constructor. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) new_poly(x) } new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly_list") } vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly" format.vctrs_poly_list <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly_list <- function(x, ...) { if (length(x) != 0) { print(format(x), quote = FALSE) } } p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` The resulting objects will inherit from the `vctrs_list_of` class, which provides tailored methods for `$`, `[[`, the corresponding assignment operators, and other methods. ```{r} class(p) p[2] p[[2]] ``` The class implements the list interface: ```{r} obj_is_list(p) ``` This is fine for the internal implementation of this class but it would be more appropriate if it behaved like an atomic vector rather than a list. #### Make an atomic polynomial vector An atomic vector is a vector like integer or character for which `[[` returns the same type. Unlike lists, you can't reach inside an atomic vector. To make the polynomial class an atomic vector, we'll wrap the internal `list_of()` class within a record vector. Usually records are used because they can store several fields of data for each observation. Here we have only one, but we use the class anyway to inherit its atomicity. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) x <- new_poly(x) new_rcrd(list(data = x), class = "vctrs_poly") } format.vctrs_poly <- function(x, ...) { format(field(x, "data")) } ``` The new `format()` method delegates to the one we wrote for the internal list. The vector looks just like before: ```{r} p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` Making the class atomic means that `obj_is_list()` now returns `FALSE`. This prevents recursive algorithms that traverse lists from reaching too far inside the polynomial internals. ```{r} obj_is_list(p) ``` Most importantly, it prevents users from reaching into the internals with `[[`: ```{r} p[[2]] ``` #### Implementing equality and comparison Equality works out of the box because we can tell if two integer vectors are equal: ```{r} p == poly(c(1, 0, 1)) ``` We can't compare individual elements, because the data is stored in a list and by default lists are not comparable: ```{r, error = TRUE} p < p[2] ``` To enable comparison, we implement a `vec_proxy_compare()` method: ```{r} vec_proxy_compare.vctrs_poly <- function(x, ...) { # Get the list inside the record vector x_raw <- vec_data(field(x, "data")) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } p < p[2] ``` Often, this is sufficient to also implement `sort()`. However, for lists, there is already a default `vec_proxy_order()` method that sorts by first occurrence: ```{r} sort(p) sort(p[c(1:3, 1:2)]) ``` To ensure consistency between ordering and comparison, we forward `vec_proxy_order()` to `vec_proxy_compare()`: ```{r} vec_proxy_order.vctrs_poly <- function(x, ...) { vec_proxy_compare(x, ...) } sort(p) ``` ## Arithmetic vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once: - `vec_math(fn, x, ...)` specifies the behaviour of mathematical functions like `abs()`, `sum()`, and `mean()`. (Note that `var()` and `sd()` can't be overridden, see `?vec_math()` for the complete list supported by `vec_math()`.) - `vec_arith(op, x, y)` specifies the behaviour of the arithmetic operations like `+`, `-`, and `%%`. (See `?vec_arith()` for the complete list.) Both generics define the behaviour for multiple functions because `sum.vctrs_vctr(x)` calls `vec_math.vctrs_vctr("sum", x)`, and `x + y` calls `vec_math.x_class.y_class("+", x, y)`. They're accompanied by `vec_math_base()` and `vec_arith_base()` which make it easy to call the underlying base R functions. `vec_arith()` uses double dispatch and needs the following standard boilerplate: ```{r} vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Correctly exporting `vec_arith()` methods from a package is currently a little awkward. See the instructions in the Arithmetic section of the "Implementing a vctrs S3 class in a package" section below. ### Cached sum class I showed an example of `vec_math()` to define `sum()` and `mean()` methods for `cached_sum`. Now let's talk about exactly how it works. Most `vec_math()` functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to `vec_math_base()` for those that you don't care about. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ``` ### Meter class To explore the infix arithmetic operators exposed by `vec_arith()` I'll create a new class that represents a measurement in `meter`s: ```{r} new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ``` Because `meter` is built on top of a double vector, basic mathematic operations work: ```{r} sum(x) mean(x) ``` But we can't do arithmetic: ```{r, error = TRUE} x + 1 meter(10) + meter(1) meter(10) * 3 ``` To allow these infix functions to work, we'll need to provide `vec_arith()` generic. But before we do that, let's think about what combinations of inputs we should support: - It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can't multiply meters (because that would yield an area). - For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don't make much sense as we, strictly speaking, are dealing with objects of different nature. `vec_arith()` is another function that uses double dispatch, so as usual we start with a template. ```{r} vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Then write the method for two meter objects. We use a switch statement to cover the cases we care about and `stop_incompatible_op()` to throw an informative error message for everything else. ```{r, error = TRUE} vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ``` Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while `meter(10) / 2` makes sense, `2 / meter(10)` does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over `numeric` here instead of `double`. ```{r, error = TRUE} vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ``` For completeness, we also need `vec_arith.vctrs_meter.MISSING` for the unary `+` and `-` operators: ```{r} vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ``` ## Implementing a vctrs S3 class in a package Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things: - Register the S3 methods by listing them in the `NAMESPACE` file. - Create documentation around your methods, for the sake of your user and to satisfy `R CMD check`. Let's assume that the `percent` class is implemented in the pizza package in the file `R/percent.R`. Here we walk through the major sections of this hypothetical file. You've seen all of this code before, but now it's augmented by the roxygen2 directives that produce the correct `NAMESPACE` entries and help topics. ### Getting started First, the pizza package needs to include vctrs in the `Imports` section of its `DESCRIPTION` (perhaps by calling `usethis::use_package("vctrs")`. While vctrs is under very active development, it probably makes sense to state a minimum version. Imports: a_package, another_package, ... vctrs (>= x.y.z), ... Then we make all vctrs functions available within the pizza package by including the directive `#' @import vctrs` somewhere. Usually, it's not good practice to `@import` the entire namespace of a package, but vctrs is deliberately designed with this use case in mind. Where should we put `#' @import vctrs`? There are two natural locations: - With package-level docs in `R/pizza-doc.R`. You can use `usethis::use_package_doc()` to initiate this package-level documentation. - In `R/percent.R`. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package. We also must use one of these locations to dump some internal documentation that's needed to avoid `R CMD check` complaints. We don't expect any human to ever read this documentation. Here's how this dummy documentation should look, combined with the `#' @import vctrs` directive described above. ```{r eval = FALSE} #' Internal vctrs methods #' #' @import vctrs #' @keywords internal #' @name pizza-vctrs NULL ``` This should appear in `R/pizza-doc.R` (package-level docs) or in `R/percent.R` (class-focused file). Remember to call `devtools::document()` regularly, as you develop, to regenerate `NAMESPACE` and the `.Rd` files. From this point on, the code shown is expected to appear in `R/percent.R`. ### Low-level and user-friendly constructors Next we add our constructor: ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "pizza_percent") } ``` Note that the name of the package must be included in the class name (`pizza_percent`), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class. We can also add a call to `setOldClass()` for compatibility with S4: ```{r} # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ``` Because we've used a function from the methods package, you'll also need to add methods to `Imports`, with (e.g.) `usethis::use_package("methods")`. This is a "free" dependency because methods is bundled with every R install. Next we implement, export, and document a user-friendly helper: `percent()`. ```{r} #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` (Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do `pizza::percent()`; it would be redundant to have `pizza::pizza_percent()`.) ### Other helpers It's a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor `percent()`: ```{r} #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ``` You'll also need to update the `percent()` documentation to reflect that `x` now means two different things: ```{r} #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ``` Next we provide the key methods to make printing work. These are S3 methods, so they don't need to be documented, but they do need to be exported. ```{r eval = FALSE} #' @export format.pizza_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } #' @export vec_ptype_abbr.pizza_percent <- function(x, ...) { "prcnt" } ``` Finally, we implement methods for `vec_ptype2()` and `vec_cast()`. ```{r, eval = FALSE} #' @export vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() #' @export vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() #' @export vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x #' @export vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) #' @export vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ``` ### Arithmetic Writing double dispatch methods for `vec_arith()` is currently more awkward than writing them for `vec_ptype2()` or `vec_cast()`. We plan to improve this in the future. For now, you can use the following instructions. If you define a new type and want to write `vec_arith()` methods for it, you'll need to provide a new single dispatch S3 generic for it of the following form: ```{r, eval=FALSE} #' @export #' @method vec_arith my_type vec_arith.my_type <- function(op, x, y, ...) { UseMethod("vec_arith.my_type", y) } ``` Note that this actually functions as both an S3 method for `vec_arith()` and an S3 generic called `vec_arith.my_type()` that dispatches off `y`. roxygen2 only recognizes it as an S3 generic, so you have to register the S3 method part of this with an explicit `@method` call. After that, you can define double dispatch methods, but you still need an explicit `@method` tag to ensure it is registered with the correct generic: ```{r, eval=FALSE} #' @export #' @method vec_arith.my_type my_type vec_arith.my_type.my_type <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.my_type integer vec_arith.my_type.integer <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.integer my_type vec_arith.integer.my_type <- function(op, x, y, ...) { # implementation here } ``` vctrs provides the hybrid S3 generics/methods for most of the base R types, like `vec_arith.integer()`. If you don't fully import vctrs with `@import vctrs`, then you will need to explicitly import the generic you are registering double dispatch methods for with `@importFrom vctrs vec_arith.integer`. ### Testing It's good practice to test your new class. Specific recommendations: - `R/percent.R` is the type of file where you really do want 100% test coverage. You can use `devtools::test_coverage_file()` to check this. - Make sure to test behaviour with zero-length inputs and missing values. - Use `testthat::verify_output()` to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about `verify_output()` in the [testthat v2.3.0 blog post](https://www.tidyverse.org/blog/2019/11/testthat-2-3-0/); it's an example of a so-called [golden test](https://ro-che.info/articles/2017-12-04-golden-tests). - Check for method symmetry; use `expect_s3_class()`, probably with `exact = TRUE`, to ensure that `vec_c(x, y)` and `vec_c(y, x)` return the same type of output for the important `x`s and `y`s in your domain. - Use `testthat::expect_error()` to check that inputs that can't be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include `vctrs_error_assert_ptype`, `vctrs_error_assert_size`, and `vctrs_error_incompatible_type`. ```{r, eval = FALSE} expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") ``` If your tests pass when run by `devtools::test()`, but fail when run in `R CMD check`, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated `NAMESPACE`. ### Existing classes Before you build your own class, you might want to consider using, or subclassing existing classes. You can check [awesome-vctrs](https://github.com/krlmlr/awesome-vctrs) for a curated list of R vector classes, some of which are built with vctrs. If you've built or extended a class, consider adding it to that list so other people can use it. vctrs/vignettes/stability.Rmd0000644000176200001440000003142614376223322016105 0ustar liggesusers--- title: "Type and size stability" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Type and size stability} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the "shape" of the output you only need to know the "shape"s of the inputs. This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can't predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` ## Definitions We say a function is __type-stable__ iff: 1. You can predict the output type knowing only the input types. 1. The order of arguments in ... does not affect the output type. Similarly, a function is __size-stable__ iff: 1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size. Very few base R functions are size-stable, so I'll also define a slightly weaker condition. I'll call a function __length-stable__ iff: 1. You can predict the output _length_ knowing only the input _lengths_, or there is a single numeric input that specifies the output _length_. (But note that length-stable is not a particularly robust definition because `length()` returns a value for things that are not vectors.) We'll call functions that don't obey these principles __type-unstable__ and __size-unstable__ respectively. On top of type- and size-stability it's also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions. The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again. ### Examples To make these ideas concrete, let's apply them to a few base functions: 1. `mean()` is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error). 1. Surprisingly, `median()` is type-unstable: ```{r} vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ``` It is, however, size-stable, since it always returns a vector of length 1. 1. `sapply()` is type-unstable because you can't predict the output type only knowing the input types: ```{r} vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ``` It's not quite size-stable; `vec_size(sapply(x, f))` is `vec_size(x)` for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns). 1. `vapply()` is a type-stable version of `sapply()` because `vec_ptype_show(vapply(x, fn, template))` is always `vec_ptype_show(template)`. It is size-unstable for the same reasons as `sapply()`. 1. `c()` is type-unstable because `c(x, y)` doesn't always output the same type as `c(y, x)`. ```{r} vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ``` `c()` is *almost always* length-stable because `length(c(x, y))` *almost always* equals `length(x) + length(y)`. One common source of instability here is dealing with non-vectors (see the later section "Non-vectors"): ```{r} env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ``` 1. `paste(x1, x2)` is length-stable because `length(paste(x1, x2))` equals `max(length(x1), length(x2))`. However, it doesn't follow the usual arithmetic recycling rules because `paste(1:2, 1:3)` doesn't generate a warning. 1. `ifelse()` is length-stable because `length(ifelse(cond, true, false))` is always `length(cond)`. `ifelse()` is type-unstable because the output type depends on the value of `cond`: ```{r} vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ``` 1. `read.csv(file)` is type-unstable and size-unstable because, while you know it will return a data frame, you don't know what columns it will return or how many rows it will have. Similarly, `df[[i]]` is not type-stable because the result depends on the _value_ of `i`. There are many important functions that can not be made type-stable or size-stable! With this understanding of type- and size-stability in hand, we'll use them to analyse some base R functions in greater depth and then propose alternatives with better properties. ## `c()` and `vctrs::vec_c()` In this section we'll compare and contrast `c()` and `vec_c()`. `vec_c()` is both type- and size-stable because it possesses the following invariants: * `vec_ptype(vec_c(x, y))` equals `vec_ptype_common(x, y)`. * `vec_size(vec_c(x, y))` equals `vec_size(x) + vec_size(y)`. `c()` has another undesirable property in that it's not consistent with `unlist()`; i.e., `unlist(list(x, y))` does not always equal `c(x, y)`; i.e., base R has multiple sets of type-coercion rules. I won't consider this problem further here. I have two goals here: * To fully document the quirks of `c()`, hence motivating the development of an alternative. * To discuss non-obvious consequences of the type- and size-stability above. ### Atomic vectors If we only consider atomic vectors, `c()` is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical. ```{r} c(FALSE, 1L, 2.5) ``` `vec_c()` obeys similar rules: ```{r} vec_c(FALSE, 1L, 2.5) ``` But it does not automatically coerce to character vectors or lists: ```{r, error = TRUE} c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ``` ### Incompatible vectors and non-vectors In general, most base methods do not throw an error: ```{r} c(10.5, factor("x")) ``` If the inputs aren't vectors, `c()` automatically puts them in a list: ```{r} c(mean, globalenv()) ``` For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list: ```{r, error = TRUE} c(getRversion(), "x") c("x", getRversion()) ``` `vec_c()` throws an error if the inputs are not vectors or not automatically coercible: ```{r, error = TRUE} vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ``` ### Factors Combining two factors returns an integer vector: ```{r} fa <- factor("a") fb <- factor("b") c(fa, fb) ``` (This is documented in `c()` but is still undesirable.) `vec_c()` returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with `dplyr::bind_rows()`, which is stricter and is a common source of user difficulty.) ```{r} vec_c(fa, fb) vec_c(fb, fa) ``` ### Date-times `c()` strips the time zone associated with date-times: ```{r} datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ``` This behaviour is documented in `?DateTimeClasses` but is the source of considerable user pain. `vec_c()` preserves time zones: ```{r} vec_c(datetime_nz) ``` What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there's no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone: ```{r} datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ``` ### Dates and date-times Combining dates and date-times with `c()` gives silently incorrect results: ```{r} date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ``` This behaviour arises because neither `c.Date()` nor `c.POSIXct()` check that all inputs are of the same type. `vec_c()` uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time). ```{r} vec_c(date, datetime) vec_c(date, datetime_nz) ``` ### Missing values If a missing value comes at the beginning of the inputs, `c()` falls back to the internal behaviour, which strips all attributes: ```{r} c(NA, fa) c(NA, date) c(NA, datetime) ``` `vec_c()` takes a different approach treating a logical vector consisting only of `NA` as the `unspecified()` class which can be converted to any other 1d type: ```{r} vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ``` ### Data frames Because it is *almost always* length-stable, `c()` combines data frames column wise (into a list): ```{r} df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ``` `vec_c()` is size-stable, which implies it will row-bind data frames: ```{r} vec_c(df1, df2) ``` ### Matrices and arrays The same reasoning applies to matrices: ```{r} m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ``` One difference is that `vec_c()` will "broadcast" a vector to match the dimensions of a matrix: ```{r} c(m, 1) vec_c(m, 1) ``` ### Implementation The basic implementation of `vec_c()` is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with `vec_init()`, and then insert each input into the correct place in the output. ```{r, eval = FALSE} vec_c <- function(...) { args <- compact(list2(...)) ptype <- vec_ptype_common(!!!args) if (is.null(ptype)) return(NULL) ns <- map_int(args, vec_size) out <- vec_init(ptype, sum(ns)) pos <- 1 for (i in seq_along(ns)) { n <- ns[[i]] x <- vec_cast(args[[i]], to = ptype) vec_slice(out, pos:(pos + n - 1)) <- x pos <- pos + n } out } ``` (The real `vec_c()` is a bit more complicated in order to handle inner and outer names). ## `ifelse()` One of the functions that motivate the development of vctrs is `ifelse()`. It has the surprising property that the result value is "A vector of the same length and attributes (including dimensions and class) as `test`". To me, it seems more reasonable for the type of the output to be controlled by the type of the `yes` and `no` arguments. In `dplyr::if_else()` I swung too far towards strictness: it throws an error if `yes` and `no` are not the same type. This is annoying in practice because it requires typed missing values (`NA_character_` etc), and because the checks are only on the class (not the full prototype), it's easy to create invalid output. I found it much easier to understand what `ifelse()` _should_ do once I internalised the ideas of type- and size-stability: * The first argument must be logical. * `vec_ptype(if_else(test, yes, no))` equals `vec_ptype_common(yes, no)`. Unlike `ifelse()` this implies that `if_else()` must always evaluate both `yes` and `no` in order to figure out the correct type. I think this is consistent with `&&` (scalar operation, short circuits) and `&` (vectorised, evaluates both sides). * `vec_size(if_else(test, yes, no))` equals `vec_size_common(test, yes, no)`. I think the output could have the same size as `test` (i.e., the same behaviour as `ifelse`), but I _think_ as a general rule that your inputs should either be mutually recycling or not. This leads to the following implementation: ```{r} if_else <- function(test, yes, no) { if (!is_logical(test)) { abort("`test` must be a logical vector.") } c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ``` By using `vec_size()` and `vec_slice()`, this definition of `if_else()` automatically works with data.frames and matrices: ```{r} if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) ``` vctrs/R/0000755000176200001440000000000014520724751011623 5ustar liggesusersvctrs/R/partial-factor.R0000644000176200001440000000464614376223321014664 0ustar liggesusers#' Partially specify a factor #' #' @description #' `r lifecycle::badge("experimental")` #' #' This special class can be passed as a `ptype` in order to specify that the #' result should be a factor that contains at least the specified levels. #' #' @inheritParams new_factor #' @keywords internal #' @export #' @examples #' pf <- partial_factor(levels = c("x", "y")) #' pf #' #' vec_ptype_common(factor("v"), factor("w"), .ptype = pf) #' partial_factor <- function(levels = character()) { partial <- new_factor(levels = levels) new_partial_factor(partial) } new_partial_factor <- function(partial = factor(), learned = factor()) { stopifnot( is.factor(partial), is.factor(learned) ) # Fails if `learned` is not compatible with `partial` vec_ptype2(partial, learned) new_partial( partial = partial, learned = learned, class = "vctrs_partial_factor" ) } #' @export vec_ptype_full.vctrs_partial_factor <- function(x, ...) { empty <- "" levels <- map(x, levels) hashes <- map_chr(levels, hash_label) needs_indent <- hashes != empty hashes[needs_indent] <- map_chr(hashes[needs_indent], function(x) paste0(" ", x)) source <- rep_named(names(hashes), empty) if (hashes["partial"] != empty) { source["partial"] <- " {partial}" } details <- paste0(hashes, source) details <- details[details != empty] paste0( "partial_factor<\n", paste0(details, collapse = "\n"), "\n>" ) } #' @export vec_ptype_abbr.vctrs_partial_factor <- function(x, ...) { "prtl_fctr" } #' @method vec_ptype2 vctrs_partial_factor #' @export vec_ptype2.vctrs_partial_factor <- function(x, y, ...) { UseMethod("vec_ptype2.vctrs_partial_factor") } #' @method vec_ptype2.vctrs_partial_factor vctrs_partial_factor #' @export vec_ptype2.vctrs_partial_factor.vctrs_partial_factor <- function(x, y, ...) { partial <- vec_ptype2(x$partial, y$partial) learned <- vec_ptype2(x$learned, y$learned) new_partial_factor(partial, learned) } #' @method vec_ptype2.vctrs_partial_factor factor #' @export vec_ptype2.vctrs_partial_factor.factor <- function(x, y, ...) { new_partial_factor(x$partial, vec_ptype2(x$learned, y)) } #' @method vec_ptype2.factor vctrs_partial_factor #' @export vec_ptype2.factor.vctrs_partial_factor <- function(x, y, ...) { new_partial_factor(y$partial, vec_ptype2(y$learned, x)) } #' @export vec_ptype_finalise.vctrs_partial_factor <- function(x, ...) { vec_ptype2(x$learned, x$partial) } vctrs/R/set.R0000644000176200001440000001164514362266120012543 0ustar liggesusers#' Set operations #' #' @description #' - `vec_set_intersect()` returns all values in both `x` and `y`. #' #' - `vec_set_difference()` returns all values in `x` but not `y`. Note #' that this is an asymmetric set difference, meaning it is not commutative. #' #' - `vec_set_union()` returns all values in either `x` or `y`. #' #' - `vec_set_symmetric_difference()` returns all values in either `x` or `y` #' but not both. This is a commutative difference. #' #' Because these are _set_ operations, these functions only return unique values #' from `x` and `y`, returned in the order they first appeared in the original #' input. Names of `x` and `y` are retained on the result, but names are always #' taken from `x` if the value appears in both inputs. #' #' These functions work similarly to [intersect()], [setdiff()], and [union()], #' but don't strip attributes and can be used with data frames. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x,y A pair of vectors. #' #' @param ptype If `NULL`, the default, the output type is determined by #' computing the common type between `x` and `y`. If supplied, both `x` and #' `y` will be cast to this type. #' #' @param x_arg,y_arg Argument names for `x` and `y`. These are used in error #' messages. #' #' @returns #' A vector of the common type of `x` and `y` (or `ptype`, if supplied) #' containing the result of the corresponding set function. #' #' @details #' Missing values are treated as equal to other missing values. For doubles and #' complexes, `NaN` are equal to other `NaN`, but not to `NA`. #' #' @section Dependencies: #' #' ## `vec_set_intersect()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' #' ## `vec_set_difference()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' #' ## `vec_set_union()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' - [vec_c()] #' #' ## `vec_set_symmetric_difference()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' - [vec_c()] #' #' @name vec-set #' @examples #' x <- c(1, 2, 1, 4, 3) #' y <- c(2, 5, 5, 1) #' #' # All unique values in both `x` and `y`. #' # Duplicates in `x` and `y` are always removed. #' vec_set_intersect(x, y) #' #' # All unique values in `x` but not `y` #' vec_set_difference(x, y) #' #' # All unique values in either `x` or `y` #' vec_set_union(x, y) #' #' # All unique values in either `x` or `y` but not both #' vec_set_symmetric_difference(x, y) #' #' # These functions can also be used with data frames #' x <- data_frame( #' a = c(2, 3, 2, 2), #' b = c("j", "k", "j", "l") #' ) #' y <- data_frame( #' a = c(1, 2, 2, 2, 3), #' b = c("j", "l", "j", "l", "j") #' ) #' #' vec_set_intersect(x, y) #' vec_set_difference(x, y) #' vec_set_union(x, y) #' vec_set_symmetric_difference(x, y) #' #' # Vector names don't affect set membership, but if you'd like to force #' # them to, you can transform the vector into a two column data frame #' x <- c(a = 1, b = 2, c = 2, d = 3) #' y <- c(c = 2, b = 1, a = 3, d = 3) #' #' vec_set_intersect(x, y) #' #' x <- data_frame(name = names(x), value = unname(x)) #' y <- data_frame(name = names(y), value = unname(y)) #' #' vec_set_intersect(x, y) NULL #' @rdname vec-set #' @export vec_set_intersect <- function(x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env()) { check_dots_empty0(...) .Call(ffi_vec_set_intersect, x, y, ptype, environment()) } #' @rdname vec-set #' @export vec_set_difference <- function(x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env()) { check_dots_empty0(...) .Call(ffi_vec_set_difference, x, y, ptype, environment()) } #' @rdname vec-set #' @export vec_set_union <- function(x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env()) { check_dots_empty0(...) .Call(ffi_vec_set_union, x, y, ptype, environment()) } #' @rdname vec-set #' @export vec_set_symmetric_difference <- function(x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env()) { check_dots_empty0(...) .Call(ffi_vec_set_symmetric_difference, x, y, ptype, environment()) } vctrs/R/names.R0000644000176200001440000004424714402444671013063 0ustar liggesusers#' Retrieve and repair names #' #' @description #' #' `vec_as_names()` takes a character vector of names and repairs it #' according to the `repair` argument. It is the r-lib and tidyverse #' equivalent of [base::make.names()]. #' #' vctrs deals with a few levels of name repair: #' #' * `minimal` names exist. The `names` attribute is not `NULL`. The #' name of an unnamed element is `""` and never `NA`. For instance, #' `vec_as_names()` always returns minimal names and data frames #' created by the tibble package have names that are, at least, #' `minimal`. #' #' * `unique` names are `minimal`, have no duplicates, and can be used #' where a variable name is expected. Empty names, `...`, and #' `..` followed by a sequence of digits are banned. #' #' - All columns can be accessed by name via `df[["name"]]` and #' ``df$`name` `` and ``with(df, `name`)``. #' #' * `universal` names are `unique` and syntactic (see Details for #' more). #' #' - Names work everywhere, without quoting: `df$name` and `with(df, #' name)` and `lm(name1 ~ name2, data = df)` and #' `dplyr::select(df, name)` all work. #' #' `universal` implies `unique`, `unique` implies `minimal`. These #' levels are nested. #' #' #' @inheritParams rlang::args_error_context #' @inheritParams rlang::args_dots_empty #' #' @param names A character vector. #' @param repair Either a string or a function. If a string, it must be one of #' `"check_unique"`, `"minimal"`, `"unique"`, `"universal"`, `"unique_quiet"`, #' or `"universal_quiet"`. If a function, it is invoked with a vector of #' minimal names and must return minimal names, otherwise an error is thrown. #' #' * Minimal names are never `NULL` or `NA`. When an element doesn't #' have a name, its minimal name is an empty string. #' #' * Unique names are unique. A suffix is appended to duplicate #' names to make them unique. #' #' * Universal names are unique and syntactic, meaning that you can #' safely use the names as variables without causing a syntax #' error. #' #' The `"check_unique"` option doesn't perform any name repair. #' Instead, an error is raised if the names don't suit the #' `"unique"` criteria. #' #' The options `"unique_quiet"` and `"universal_quiet"` are here to help the #' user who calls this function indirectly, via another function which exposes #' `repair` but not `quiet`. Specifying `repair = "unique_quiet"` is like #' specifying `repair = "unique", quiet = TRUE`. When the `"*_quiet"` options #' are used, any setting of `quiet` is silently overridden. #' @param repair_arg If specified and `repair = "check_unique"`, any errors #' will include a hint to set the `repair_arg`. #' @param quiet By default, the user is informed of any renaming #' caused by repairing the names. This only concerns unique and #' universal repairing. Set `quiet` to `TRUE` to silence the #' messages. #' #' Users can silence the name repair messages by setting the #' `"rlib_name_repair_verbosity"` global option to `"quiet"`. #' #' @section `minimal` names: #' #' `minimal` names exist. The `names` attribute is not `NULL`. The #' name of an unnamed element is `""` and never `NA`. #' #' Examples: #' #' ``` #' Original names of a vector with length 3: NULL #' minimal names: "" "" "" #' #' Original names: "x" NA #' minimal names: "x" "" #' ``` #' #' #' @section `unique` names: #' #' `unique` names are `minimal`, have no duplicates, and can be used #' (possibly with backticks) in contexts where a variable is #' expected. Empty names, `...`, and `..` followed by a sequence of #' digits are banned. If a data frame has `unique` names, you can #' index it by name, and also access the columns by name. In #' particular, `df[["name"]]` and `` df$`name` `` and also ``with(df, #' `name`)`` always work. #' #' There are many ways to make names `unique`. We append a suffix of the form #' `...j` to any name that is `""` or a duplicate, where `j` is the position. #' We also change `..#` and `...` to `...#`. #' #' Example: #' #' ``` #' Original names: "" "x" "" "y" "x" "..2" "..." #' unique names: "...1" "x...2" "...3" "y" "x...5" "...6" "...7" #' ``` #' #' Pre-existing suffixes of the form `...j` are always stripped, prior #' to making names `unique`, i.e. reconstructing the suffixes. If this #' interacts poorly with your names, you should take control of name #' repair. #' #' #' @section `universal` names: #' #' `universal` names are `unique` and syntactic, meaning they: #' #' * Are never empty (inherited from `unique`). #' * Have no duplicates (inherited from `unique`). #' * Are not `...`. Do not have the form `..i`, where `i` is a #' number (inherited from `unique`). #' * Consist of letters, numbers, and the dot `.` or underscore `_` #' characters. #' * Start with a letter or start with the dot `.` not followed by a #' number. #' * Are not a [reserved] word, e.g., `if` or `function` or `TRUE`. #' #' If a vector has `universal` names, variable names can be used #' "as is" in code. They work well with nonstandard evaluation, e.g., #' `df$name` works. #' #' vctrs has a different method of making names syntactic than #' [base::make.names()]. In general, vctrs prepends one or more dots #' `.` until the name is syntactic. #' #' Examples: #' #' ``` #' Original names: "" "x" NA "x" #' universal names: "...1" "x...2" "...3" "x...4" #' #' Original names: "(y)" "_z" ".2fa" "FALSE" #' universal names: ".y." "._z" "..2fa" ".FALSE" #' ``` #' #' @seealso [rlang::names2()] returns the names of an object, after #' making them `minimal`. #' @examples #' # By default, `vec_as_names()` returns minimal names: #' vec_as_names(c(NA, NA, "foo")) #' #' # You can make them unique: #' vec_as_names(c(NA, NA, "foo"), repair = "unique") #' #' # Universal repairing fixes any non-syntactic name: #' vec_as_names(c("_foo", "+"), repair = "universal") #' @export vec_as_names <- function(names, ..., repair = c("minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), repair_arg = NULL, quiet = FALSE, call = caller_env()) { check_dots_empty0(...) .Call( ffi_vec_as_names, names, repair, quiet, environment() ) } # TODO! Error calls validate_name_repair_arg <- function(repair) { .Call(vctrs_validate_name_repair_arg, repair) } validate_minimal_names <- function(names, n = NULL) { .Call(vctrs_validate_minimal_names, names, n) } validate_unique <- function(names, arg = "", n = NULL, call = caller_env()) { validate_minimal_names(names, n) empty_names <- detect_empty_names(names) if (has_length(empty_names)) { stop_names_cannot_be_empty(names, call = call) } dot_dot_name <- detect_dot_dot(names) if (has_length(dot_dot_name)) { stop_names_cannot_be_dot_dot(names, call = call) } if (anyDuplicated(names)) { stop_names_must_be_unique(names, arg, call = call) } invisible(names) } detect_empty_names <- function(names) { which(names == "") } detect_dot_dot <- function(names) { grep("^[.][.](?:[.]|[1-9][0-9]*)$", names) } #' Get or set the names of a vector #' #' @description #' These functions work like [rlang::names2()], [names()] and [names<-()], #' except that they return or modify the the rowwise names of the vector. These are: #' * The usual `names()` for atomic vectors and lists #' * The row names for data frames and matrices #' * The names of the first dimension for arrays #' Rowwise names are size consistent: the length of the names always equals #' [vec_size()]. #' #' `vec_names2()` returns the repaired names from a vector, even if it is unnamed. #' See [vec_as_names()] for details on name repair. #' #' `vec_names()` is a bare-bones version that returns `NULL` if the vector is #' unnamed. #' #' `vec_set_names()` sets the names or removes them. #' #' @param x A vector with names #' @param names A character vector, or `NULL`. #' @inheritParams vec_as_names #' #' @return #' `vec_names2()` returns the names of `x`, repaired. #' `vec_names()` returns the names of `x` or `NULL` if unnamed. #' `vec_set_names()` returns `x` with names updated. #' #' @name vec_names #' @export #' @examples #' vec_names2(1:3) #' vec_names2(1:3, repair = "unique") #' vec_names2(c(a = 1, b = 2)) #' #' # `vec_names()` consistently returns the rowwise names of data frames and arrays: #' vec_names(data.frame(a = 1, b = 2)) #' names(data.frame(a = 1, b = 2)) #' vec_names(mtcars) #' names(mtcars) #' vec_names(Titanic) #' names(Titanic) #' #' vec_set_names(1:3, letters[1:3]) #' vec_set_names(data.frame(a = 1:3), letters[1:3]) vec_names2 <- function(x, ..., repair = c("minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), quiet = FALSE) { check_dots_empty0(...) repair <- validate_name_repair_arg(repair) if (is_function(repair)) { names <- minimal_names(x) new_names <- validate_minimal_names(repair(names), n = length(names)) if (!quiet) { describe_repair(names, new_names) } return(new_names) } switch( repair, minimal = minimal_names(x), unique = unique_names(x, quiet = quiet), universal = as_universal_names(minimal_names(x), quiet = quiet), check_unique = validate_unique(minimal_names(x)), unique_quiet = unique_names(x, quiet = TRUE), universal_quiet = as_universal_names(minimal_names(x), quiet = TRUE) ) } vec_repair_names <- function(x, repair = c("minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), ..., quiet = FALSE) { if (is.data.frame(x)) { x } else { vec_set_names(x, vec_names2(x, ..., repair = repair, quiet = quiet)) } } minimal_names <- function(x) { .Call(ffi_minimal_names, x) } unique_names <- function(x, quiet = FALSE) { .Call(ffi_unique_names, x, quiet) } #' @rdname vec_names #' @export vec_names <- function(x) { .Call(vctrs_names, x) } as_minimal_names <- function(names) { .Call(ffi_as_minimal_names, names) } as_unique_names <- function(names, quiet = FALSE) { .Call(vctrs_as_unique_names, names, quiet) } as_universal_names <- function(names, quiet = FALSE) { new_names <- names new_names[] <- "" naked_names <- strip_pos(two_to_three_dots(names)) empty <- naked_names %in% c("", "...") new_names[!empty] <- make_syntactic(naked_names[!empty]) needs_suffix <- empty | vec_duplicate_detect(new_names) new_names <- append_pos(new_names, needs_suffix = needs_suffix) if (!quiet) { describe_repair(names, new_names) } new_names } two_to_three_dots <- function(names) { sub("(^[.][.][1-9][0-9]*$)", ".\\1", names) } append_pos <- function(names, needs_suffix) { need_append_pos <- which(needs_suffix) names[need_append_pos] <- paste0(names[need_append_pos], "...", need_append_pos) names } strip_pos <- function(names) { rx <- "([.][.][.][1-9][0-9]*)+$" gsub(rx, "", names) %|% "" } # Makes each individual name syntactic but does not enforce unique-ness make_syntactic <- function(names) { names[is.na(names)] <- "" names[names == ""] <- "." names[names == "..."] <- "...." names <- sub("^_", "._", names) new_names <- make.names(names) X_prefix <- grepl("^X", new_names) & !grepl("^X", names) new_names[X_prefix] <- sub("^X", "", new_names[X_prefix]) dot_suffix <- which(new_names == paste0(names, ".")) new_names[dot_suffix] <- sub("^(.*)[.]$", ".\\1", new_names[dot_suffix]) # Illegal characters have been replaced with '.' via make.names() # however, we have: # * Declined its addition of 'X' prefixes. # * Turned its '.' suffixes to '.' prefixes. regex <- paste0( "^(?[.]{0,2})", "(?[0-9]*)", "(?[^0-9]?.*$)" ) re <- re_match(new_names, pattern = regex) needs_dots <- which(re$numbers != "") needs_third_dot <- (re$leftovers[needs_dots] == "") re$leading_dots[needs_dots] <- ifelse(needs_third_dot, "...", "..") new_names <- paste0(re$leading_dots, re$numbers, re$leftovers) new_names } # From rematch2, except we don't add tbl_df or tbl classes to the return value re_match <- function(text, pattern, perl = TRUE, ...) { stopifnot( is.character(pattern), length(pattern) == 1, !is.na(pattern) ) text <- as.character(text) match <- regexpr(pattern, text, perl = perl, ...) start <- as.vector(match) length <- attr(match, "match.length") end <- start + length - 1L matchstr <- substring(text, start, end) matchstr[ start == -1 ] <- NA_character_ res <- data.frame( stringsAsFactors = FALSE, .text = text, .match = matchstr ) if (!is.null(attr(match, "capture.start"))) { gstart <- attr(match, "capture.start") glength <- attr(match, "capture.length") gend <- gstart + glength - 1L groupstr <- substring(text, gstart, gend) groupstr[ gstart == -1 ] <- NA_character_ dim(groupstr) <- dim(gstart) res <- cbind(groupstr, res, stringsAsFactors = FALSE) } names(res) <- c(attr(match, "capture.names"), ".text", ".match") res } describe_repair <- function(orig_names, names) { names_inform_repair(orig_names, names) } bullets <- function(..., header = NULL) { problems <- c(...) MAX_BULLETS <- 6L if (length(problems) >= MAX_BULLETS) { n_more <- length(problems) - MAX_BULLETS + 1L problems[[MAX_BULLETS]] <- "..." length(problems) <- MAX_BULLETS } info <- paste0("* ", problems, collapse = "\n") if (!is.null(header)) { info <- paste0(header, "\n", info) } info } # Used in names.c set_rownames_dispatch <- function(x, names) { rownames(x) <- names x } # Used in names.c set_names_dispatch <- function(x, names) { names(x) <- names x } #' @rdname vec_names #' @export vec_set_names <- function(x, names) { .Call(vctrs_set_names, x, names) } #' Repair names with legacy method #' #' This standardises names with the legacy approach that was used in #' tidyverse packages (such as tibble, tidyr, and readxl) before #' [vec_as_names()] was implemented. This tool is meant to help #' transitioning to the new name repairing standard and will be #' deprecated and removed from the package some time in the future. #' #' @inheritParams vec_as_names #' @param prefix,sep Prefix and separator for repaired names. #' #' @examples #' if (rlang::is_installed("tibble")) { #' #' library(tibble) #' #' # Names repair is turned off by default in tibble: #' try(tibble(a = 1, a = 2)) #' #' # You can turn it on by supplying a repair method: #' tibble(a = 1, a = 2, .name_repair = "universal") #' #' # If you prefer the legacy method, use `vec_as_names_legacy()`: #' tibble(a = 1, a = 2, .name_repair = vec_as_names_legacy) #' #' } #' @keywords internal #' @export vec_as_names_legacy <- function(names, prefix = "V", sep = "") { if (length(names) == 0) { return(character()) } blank <- names == "" names[!blank] <- make.unique(names[!blank], sep = sep) new_nms <- setdiff(paste(prefix, seq_along(names), sep = sep), names) names[blank] <- new_nms[seq_len(sum(blank))] names } #' Name specifications #' #' @description #' #' A name specification describes how to combine an inner and outer #' names. This sort of name combination arises when concatenating #' vectors or flattening lists. There are two possible cases: #' #' * Named vector: #' #' ``` #' vec_c(outer = c(inner1 = 1, inner2 = 2)) #' ``` #' #' * Unnamed vector: #' #' ``` #' vec_c(outer = 1:2) #' ``` #' #' In r-lib and tidyverse packages, these cases are errors by default, #' because there's no behaviour that works well for every case. #' Instead, you can provide a name specification that describes how to #' combine the inner and outer names of inputs. Name specifications #' can refer to: #' #' * `outer`: The external name recycled to the size of the input #' vector. #' #' * `inner`: Either the names of the input vector, or a sequence of #' integer from 1 to the size of the vector if it is unnamed. #' #' @param name_spec,.name_spec A name specification for combining #' inner and outer names. This is relevant for inputs passed with a #' name, when these inputs are themselves named, like `outer = #' c(inner = 1)`, or when they have length greater than 1: `outer = #' 1:2`. By default, these cases trigger an error. You can resolve #' the error by providing a specification that describes how to #' combine the names or the indices of the inner vector with the #' name of the input. This specification can be: #' #' * A function of two arguments. The outer name is passed as a #' string to the first argument, and the inner names or positions #' are passed as second argument. #' #' * An anonymous function as a purrr-style formula. #' #' * A glue specification of the form `"{outer}_{inner}"`. #' #' * An [rlang::zap()] object, in which case both outer and inner #' names are ignored and the result is unnamed. #' #' See the [name specification topic][name_spec]. #' #' @examples #' # By default, named inputs must be length 1: #' vec_c(name = 1) # ok #' try(vec_c(name = 1:3)) # bad #' #' # They also can't have internal names, even if scalar: #' try(vec_c(name = c(internal = 1))) # bad #' #' # Pass a name specification to work around this. A specification #' # can be a glue string referring to `outer` and `inner`: #' vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}") #' vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}_{inner}") #' #' # They can also be functions: #' my_spec <- function(outer, inner) paste(outer, inner, sep = "_") #' vec_c(name = 1:3, other = 4:5, .name_spec = my_spec) #' #' # Or purrr-style formulas for anonymous functions: #' vec_c(name = 1:3, other = 4:5, .name_spec = ~ paste0(.x, .y)) #' @name name_spec NULL apply_name_spec <- function(name_spec, outer, inner, n = length(inner)) { .Call(ffi_apply_name_spec, name_spec, outer, inner, n) } glue_as_name_spec <- function(`_spec`) { function(inner, outer) { glue::glue(`_spec`) } } # Evaluate glue specs in a child of base for now environment(glue_as_name_spec) <- baseenv() vctrs/R/import-standalone-linked-version.R0000644000176200001440000000426514377215505020346 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-linked-version.R # last-updated: 2022-05-26 # license: https://unlicense.org # --- # # nocov start check_linked_version <- local({ # Keep in sync with standalone-downstream-deps.R howto_reinstall_msg <- function(pkg) { os <- tolower(Sys.info()[["sysname"]]) if (os == "windows") { url <- "https://github.com/jennybc/what-they-forgot/issues/62" c( i = sprintf("Please update %s to the latest version.", pkg), i = sprintf("Updating packages on Windows requires precautions:\n <%s>", url) ) } else { c( i = sprintf("Please update %s with `install.packages(\"%s\")` and restart R.", pkg, pkg) ) } } function(pkg, with_rlang = requireNamespace("rlang", quietly = TRUE)) { ver <- utils::packageVersion(pkg) ns <- asNamespace(pkg) linked_ver_ptr <- ns[[paste0(pkg, "_linked_version")]] if (is.null(linked_ver_ptr)) { linked_ver <- "" } else { # Construct call to avoid NOTE when argument to `.Call()` is not # statically analysable linked_ver <- do.call(".Call", list(linked_ver_ptr)) } if (nzchar(linked_ver) && ver == linked_ver) { return(invisible(NULL)) } header <- sprintf("The %s package is not properly installed.", pkg) if (nzchar(linked_ver)) { msg <- c(x = sprintf( "The DLL version (%s) does not correspond to the package version (%s).", linked_ver, ver )) } else { # Package does not have a version pointer. This happens when DLL # updating fails for the first version that includes the pointer. msg <- c(x = "The DLL version does not correspond to the package version.") } msg <- c(msg, howto_reinstall_msg(pkg)) if (with_rlang) { msg <- paste(header, rlang::format_error_bullets(msg), sep = "\n") rlang::abort(msg) } else { msg <- paste(c(header, msg), collapse = "\n") stop(msg, call. = FALSE) } } }) # nocov end vctrs/R/empty.R0000644000176200001440000000074014315060307013075 0ustar liggesusers#' Drop empty elements from a list #' #' `list_drop_empty()` removes empty elements from a list. This includes `NULL` #' elements along with empty vectors, like `integer(0)`. This is equivalent to, #' but faster than, `vec_slice(x, list_sizes(x) != 0L)`. #' #' @section Dependencies: #' - [vec_slice()] #' #' @param x A list. #' #' @export #' @examples #' x <- list(1, NULL, integer(), 2) #' list_drop_empty(x) list_drop_empty <- function(x) { .Call(vctrs_list_drop_empty, x) } vctrs/R/altrep-lazy-character.R0000644000176200001440000000221414511320527016134 0ustar liggesusers#' Lazy character vector #' #' `new_lazy_character()` takes a function with no arguments which must return #' a character vector of arbitrary length. The function will be evaluated #' exactly once whenever any properties of the character vector are required #' (including the length or any vector elements). #' #' A "real" production level implementation might work more like #' `carrier::crate()`, where the function is isolated and users must explicitly #' provide any data required to evaluate the function, since the time of #' evaluation is unknown. #' #' As of June 2023, running `x <- new_lazy_character(~ c("x", "y"))` in the #' RStudio console will call the ALTREP length method, which materializes the #' object. Doing this in a terminal session running R does not, so it is an #' RStudio issue. This doesn't affect tests run within a `test_that()` block. #' #' @param fn A function with no arguments returning a character vector. #' #' @noRd new_lazy_character <- function(fn) { fn <- as_function(fn) .Call(ffi_altrep_new_lazy_character, fn) } lazy_character_is_materialized <- function(x) { .Call(ffi_altrep_lazy_character_is_materialized, x) } vctrs/R/type-data-table.R0000644000176200001440000000171714405105465014726 0ustar liggesusersdelayedAssign("as.data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "as.data.table") } else { function(...) abort("`data.table` must be installed.") } }) dt_ptype2 <- function(x, y, ...) { as.data.table(df_ptype2(x, y, ...)) } dt_cast <- function(x, to, ...) { as.data.table(df_cast(x, to, ...)) } #' @export vec_ptype2.data.table.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_cast.data.table.data.table <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.table.data.frame <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.frame.data.table <- function(x, to, ...) { df_cast(x, to, ...) } #' @export vec_ptype_abbr.data.table <- function(x, ...) { "dt" } vctrs/R/type-misc.R0000644000176200001440000001035414405105465013660 0ustar liggesusers # `numeric_version` from base ---------------------------------------- #' @export vec_proxy.numeric_version <- function(x, ...) { x } #' @export vec_proxy_equal.numeric_version <- function(x, ...) { proxy_equal_numeric_version(x) } # To generate data agnostic proxies of ``, we enforce a # restriction that each version can have at most 8 components. This allows us # to `vec_compare()` them without needing a "joint" comparison proxy, unlike # what `.encode_numeric_version()` returns. proxy_equal_numeric_version <- function(x, error_call = caller_env()) { N_COMPONENTS <- 8L x <- unclass(x) size <- length(x) sizes <- lengths(x) if (length(sizes) != 0L) { max <- max(sizes) } else { max <- N_COMPONENTS } if (max > N_COMPONENTS) { cli::cli_abort( "`x` can't contain more than {N_COMPONENTS} version components.", call = error_call ) } if (any(sizes != max)) { # Pad with zeros where needed to be able to transpose. # This is somewhat slow if required. pad_sizes <- max - sizes pad_needed <- which(pad_sizes != 0L) x[pad_needed] <- map2( x[pad_needed], pad_sizes[pad_needed], function(elt, pad_size) { c(elt, vec_rep(0L, times = pad_size)) } ) } # Transpose with combination of `vec_interleave()` and `vec_chop()` x <- vec_interleave(!!!x, .ptype = integer()) out <- vec_chop(x, sizes = vec_rep(size, times = max)) n_zeros <- N_COMPONENTS - max if (n_zeros != 0L) { # Pad columns of zeros out to `N_COMPONENTS` columns zero <- list(vec_rep(0L, times = size)) out <- c(out, vec_rep(zero, times = n_zeros)) } # Use a data frame as the proxy names(out) <- paste0("...", seq_len(N_COMPONENTS)) out <- new_data_frame(out, n = size) # A `` internally stored as `integer()` is considered the # `NA` value. We patch that in at the very end if needed. It is hard to create # so should be very uncommon. missing <- sizes == 0L if (any(missing)) { na <- vec_init(out) out <- vec_assign(out, missing, na) } out } # `omit` from base --------------------------------------------------- #' @export vec_proxy.omit <- function(x, ...) { x } #' @export vec_restore.omit <- function(x, ...) { structure(x, class = "omit") } #' @export vec_ptype2.omit.omit <- function(x, y, ...) { x } #' @export vec_ptype2.integer.omit <- function(x, y, ...) { x } #' @export vec_ptype2.omit.integer <- function(x, y, ...) { y } #' @export vec_ptype2.double.omit <- function(x, y, ...) { x } #' @export vec_ptype2.omit.double <- function(x, y, ...) { y } #' @export vec_cast.omit.omit <- function(x, to, ...) { x } #' @export vec_cast.integer.omit <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.omit.integer <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } #' @export vec_cast.double.omit <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.omit.double <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } # `exclude` from base ------------------------------------------------ #' @export vec_proxy.exclude <- function(x, ...) { x } #' @export vec_restore.exclude <- function(x, ...) { structure(x, class = "exclude") } #' @export vec_ptype2.exclude.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.integer.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.exclude.integer <- function(x, y, ...) { y } #' @export vec_ptype2.double.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.exclude.double <- function(x, y, ...) { y } #' @export vec_cast.exclude.exclude <- function(x, to, ...) { x } #' @export vec_cast.integer.exclude <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.exclude.integer <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } #' @export vec_cast.double.exclude <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.exclude.double <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } vctrs/R/type-list-of.R0000644000176200001440000001426114401377400014277 0ustar liggesusers#' `list_of` S3 class for homogenous lists #' #' A `list_of` object is a list where each element has the same type. #' Modifying the list with `$`, `[`, and `[[` preserves the constraint #' by coercing all input items. #' #' Unlike regular lists, setting a list element to `NULL` using `[[` #' does not remove it. #' #' @inheritParams vec_c #' @param x For `as_list_of()`, a vector to be coerced to list_of. #' @param y,to Arguments to `vec_ptype2()` and `vec_cast()`. #' @export #' @examples #' x <- list_of(1:3, 5:6, 10:15) #' if (requireNamespace("tibble", quietly = TRUE)) { #' tibble::tibble(x = x) #' } #' #' vec_c(list_of(1, 2), list_of(FALSE, TRUE)) list_of <- function(..., .ptype = NULL) { args <- list2(...) list_as_list_of(args, ptype = .ptype) } #' @export #' @rdname list_of as_list_of <- function(x, ...) { UseMethod("as_list_of") } #' @export as_list_of.vctrs_list_of <- function(x, ..., .ptype = NULL) { if (!is.null(.ptype)) { x <- unclass(x) list_as_list_of(x, ptype = .ptype) } else { x } } #' @export as_list_of.list <- function(x, ..., .ptype = NULL) { list_as_list_of(x, ptype = .ptype) } #' Create list_of subclass #' #' @param x A list #' @param ptype The prototype which every element of `x` belongs to #' @param ... Additional attributes used by subclass #' @param class Optional subclass name #' @keywords internal #' @export new_list_of <- function(x = list(), ptype = logical(), ..., class = character()) { if (!obj_is_list(x)) { abort("`x` must be a list.") } if (vec_size(ptype) != 0L) { abort("`ptype` must have size 0.") } new_list_of0(x = x, ptype = ptype, ..., class = class) } new_list_of0 <- function(x, ptype, ..., class = character()) { new_vctr(x, ..., ptype = ptype, class = c(class, "vctrs_list_of")) } list_of_unstructure <- function(x) { attr(x, "ptype") <- NULL attr(x, "class") <- NULL x } #' @export #' @rdname list_of is_list_of <- function(x) { inherits(x, "vctrs_list_of") } #' @export vec_proxy.vctrs_list_of <- function(x, ...) { unclass(x) } # Formatting -------------------------------------------------------------- #' @export obj_print_data.vctrs_list_of <- function(x, ...) { if (length(x) == 0) return() print(vec_data(x)) } #' @export format.vctrs_list_of <- function(x, ...) { format.default(x) } #' @export vec_ptype_full.vctrs_list_of <- function(x, ...) { param <- vec_ptype_full(attr(x, "ptype")) if (grepl("\n", param)) { param <- paste0(indent(paste0("\n", param), 2), "\n") } paste0("list_of<", param, ">") } #' @export vec_ptype_abbr.vctrs_list_of <- function(x, ...) { paste0("list<", vec_ptype_abbr(attr(x, "ptype")), ">") } # vctr methods ------------------------------------------------------------ #' @export as.list.vctrs_list_of <- function(x, ...) { list_of_unstructure(x) } #' @export as.character.vctrs_list_of <- function(x, ...) { # For compatibility with the RStudio Viewer. See tidyverse/tidyr#654. map_chr(x, function(elt) paste0("<", vec_ptype_abbr(elt), ">")) } #' @export `[[.vctrs_list_of` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `$.vctrs_list_of` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `[<-.vctrs_list_of` <- function(x, i, value) { wrapped_type <- attr(x, "ptype") value <- map(value, vec_cast, to = wrapped_type) value <- new_list_of0(value, ptype = wrapped_type) NextMethod() } #' @export `[[<-.vctrs_list_of` <- function(x, i, value) { if (is.null(value)) { # Setting to NULL via [[ shortens the list! Example: # `[[<-`(list(1), 1, NULL) x[i] <- list(value) return(x) } value <- vec_cast(value, attr(x, "ptype")) NextMethod() } #' @export `$<-.vctrs_list_of` <- function(x, i, value) { value <- vec_cast(value, attr(x, "ptype")) NextMethod() } # Type system ------------------------------------------------------------- #' @rdname list_of #' @inheritParams vec_ptype2 #' @export vec_ptype2.vctrs_list_of #' @method vec_ptype2 vctrs_list_of #' @export vec_ptype2.vctrs_list_of <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.vctrs_list_of") } #' @method vec_ptype2.vctrs_list_of vctrs_list_of #' @export vec_ptype2.vctrs_list_of.vctrs_list_of <- function(x, y, ..., x_arg = "", y_arg = "") { x_ptype <- attr(x, "ptype", exact = TRUE) y_ptype <- attr(y, "ptype", exact = TRUE) if (identical(x_ptype, y_ptype)) { return(x) } tryCatch( expr = { ptype <- vec_ptype2(x_ptype, y_ptype, x_arg = x_arg, y_arg = y_arg) new_list_of0(x = list(), ptype = ptype) }, vctrs_error_incompatible_type = function(cnd) { list() } ) } #' @export vec_ptype2.list.vctrs_list_of <- function(x, y, ...) { list() } #' @export vec_ptype2.vctrs_list_of.list <- function(x, y, ...) { list() } #' @rdname list_of #' @export vec_cast.vctrs_list_of #' @method vec_cast vctrs_list_of #' @export vec_cast.vctrs_list_of <- function(x, to, ...) { UseMethod("vec_cast.vctrs_list_of") } #' @export #' @method vec_cast.vctrs_list_of vctrs_list_of vec_cast.vctrs_list_of.vctrs_list_of <- function(x, to, ..., call = caller_env()) { x_ptype <- attr(x, "ptype", exact = TRUE) to_ptype <- attr(to, "ptype", exact = TRUE) if (identical(x_ptype, to_ptype)) { # FIXME: Suboptimal check for "same type", but should be good enough for the # common case of unchopping a list of identically generated list-ofs (#875). # Would be fixed by https://github.com/r-lib/vctrs/issues/1688. x } else { x <- unclass(x) list_as_list_of(x, ptype = to_ptype, error_call = call) } } #' @export vec_cast.list.vctrs_list_of <-function(x, to, ...) { list_of_unstructure(x) } #' @export vec_cast.vctrs_list_of.list <-function(x, to, ..., call = caller_env()) { list_as_list_of( x, attr(to, "ptype"), error_call = call ) } # Helpers ----------------------------------------------------------------- list_as_list_of <- function(x, ptype = NULL, error_call = caller_env()) { ptype <- vec_ptype_common(!!!x, .ptype = ptype, .call = error_call) if (is.null(ptype)) { abort("Can't find common type for elements of `x`.", call = error_call) } x <- vec_cast_common(!!!x, .to = ptype, .call = error_call) new_list_of0(x = x, ptype = ptype) } vctrs/R/type-date-time.R0000644000176200001440000003763314315060307014602 0ustar liggesusers#' Date, date-time, and duration S3 classes #' #' * A `date` ([Date]) is a double vector. Its value represent the number #' of days since the Unix "epoch", 1970-01-01. It has no attributes. #' * A `datetime` ([POSIXct] is a double vector. Its value represents the #' number of seconds since the Unix "Epoch", 1970-01-01. It has a single #' attribute: the timezone (`tzone`)) #' * A `duration` ([difftime]) #' #' These function help the base `Date`, `POSIXct`, and `difftime` classes fit #' into the vctrs type system by providing constructors, coercion functions, #' and casting functions. #' #' @param x A double vector representing the number of days since UNIX #' epoch for `new_date()`, number of seconds since UNIX epoch for #' `new_datetime()`, and number of `units` for `new_duration()`. #' @param tzone Time zone. A character vector of length 1. Either `""` for #' the local time zone, or a value from [OlsonNames()] #' @param units Units of duration. #' @export #' @keywords internal #' @examples #' new_date(0) #' new_datetime(0, tzone = "UTC") #' new_duration(1, "hours") new_date <- function(x = double()) { .Call(vctrs_new_date, x) } #' @export #' @rdname new_date new_datetime <- function(x = double(), tzone = "") { .Call(vctrs_new_datetime, x, tzone) } #' @export #' @rdname new_date new_duration <- function(x = double(), units = c("secs", "mins", "hours", "days", "weeks")) { stopifnot(is.double(x)) units <- arg_match0(units, c("secs", "mins", "hours", "days", "weeks")) structure( x, units = units, class = "difftime" ) } #' @export vec_proxy.Date <- function(x, ...) { date_validate(x) } #' @export vec_proxy.POSIXct <- function(x, ...) { datetime_validate(x) } #' @export vec_proxy.POSIXlt <- function(x, ...) { new_data_frame(unclass(x)) } #' @export vec_proxy_equal.POSIXlt <- function(x, ...) { x <- vec_cast(x, new_datetime(tzone = tzone(x))) vec_proxy_equal(x, ...) } #' @export vec_proxy_compare.POSIXlt <- function(x, ...) { x <- vec_cast(x, new_datetime(tzone = tzone(x))) vec_proxy_compare(x) } #' @export vec_restore.Date <- function(x, to, ...) { NextMethod() } #' @export vec_restore.POSIXct <- function(x, to, ...) { NextMethod() } #' @export vec_restore.POSIXlt <- function(x, to, ...) { NextMethod() } # Print ------------------------------------------------------------------ #' @export vec_ptype_full.Date <- function(x, ...) { "date" } #' @export vec_ptype_abbr.Date <- function(x, ...) { "date" } #' @export vec_ptype_full.POSIXct <- function(x, ...) { tzone <- if (tzone_is_local(x)) "local" else tzone(x) paste0("datetime<", tzone, ">") } #' @export vec_ptype_full.POSIXlt <- function(x, ...) { tzone <- if (tzone_is_local(x)) "local" else tzone(x) paste0("POSIXlt<", tzone, ">") } #' @export vec_ptype_abbr.POSIXct <- function(x, ...) { "dttm" } #' @export vec_ptype_abbr.POSIXlt <- function(x, ...) { "dttm" } #' @export vec_ptype_full.difftime <- function(x, ...) { paste0("duration<", attr(x, "units"), ">") } #' @export vec_ptype_abbr.difftime <- function(x, ...) { "drtn" } # Coerce ------------------------------------------------------------------ #' @rdname new_date #' @export vec_ptype2.Date #' @method vec_ptype2 Date #' @export vec_ptype2.Date <- function(x, y, ...) { UseMethod("vec_ptype2.Date") } #' @method vec_ptype2.Date Date #' @export vec_ptype2.Date.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.Date") } #' @method vec_ptype2.Date POSIXct #' @export vec_ptype2.Date.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.POSIXct") } #' @method vec_ptype2.Date POSIXlt #' @export vec_ptype2.Date.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.POSIXlt") } #' @rdname new_date #' @export vec_ptype2.POSIXct #' @method vec_ptype2 POSIXct #' @export vec_ptype2.POSIXct <- function(x, y, ...) { UseMethod("vec_ptype2.POSIXct") } #' @method vec_ptype2.POSIXct POSIXct #' @export vec_ptype2.POSIXct.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.POSIXct") } #' @method vec_ptype2.POSIXct Date #' @export vec_ptype2.POSIXct.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.Date") } #' @method vec_ptype2.POSIXct POSIXlt #' @export vec_ptype2.POSIXct.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.POSIXlt") } #' @rdname new_date #' @export vec_ptype2.POSIXlt #' @method vec_ptype2 POSIXlt #' @export vec_ptype2.POSIXlt <- function(x, y, ...) { UseMethod("vec_ptype2.POSIXlt") } #' @method vec_ptype2.POSIXlt POSIXlt #' @export vec_ptype2.POSIXlt.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.POSIXlt") } #' @method vec_ptype2.POSIXlt Date #' @export vec_ptype2.POSIXlt.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.Date") } #' @method vec_ptype2.POSIXlt POSIXct #' @export vec_ptype2.POSIXlt.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.POSIXct") } #' @rdname new_date #' @export vec_ptype2.difftime #' @method vec_ptype2 difftime #' @export vec_ptype2.difftime <- function(x, y, ...) UseMethod("vec_ptype2.difftime") #' @method vec_ptype2.difftime difftime #' @export vec_ptype2.difftime.difftime <- function(x, y, ...) new_duration(units = units_union(x, y)) # Cast -------------------------------------------------------------------- #' @rdname new_date #' @export vec_cast.Date #' @method vec_cast Date #' @export vec_cast.Date <- function(x, to, ...) { UseMethod("vec_cast.Date") } #' @export #' @method vec_cast.Date Date vec_cast.Date.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.Date.Date") } #' @export #' @method vec_cast.Date POSIXct vec_cast.Date.POSIXct <- function(x, to, ...) { # TODO: Mark with `stop_native_implementation()` when we use lazy errors date_cast(x, to, ...) } #' @export #' @method vec_cast.Date POSIXlt vec_cast.Date.POSIXlt <- function(x, to, ...) { # TODO: Mark with `stop_native_implementation()` when we use lazy errors date_cast(x, to, ...) } # TODO: Remove when we have lazy errors date_cast <- function(x, to, ..., x_arg = "", to_arg = "") { out <- as.Date(x, tz = tzone(x)) x_ct <- as.POSIXct(x) out_ct <- as.POSIXct(as.character(out), tz = tzone(x)) lossy <- abs(x_ct - out_ct) > 1e-9 & !is.na(x) maybe_lossy_cast(out, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } #' @rdname new_date #' @export vec_cast.POSIXct #' @method vec_cast POSIXct #' @export vec_cast.POSIXct <- function(x, to, ...) { UseMethod("vec_cast.POSIXct") } #' @export #' @method vec_cast.POSIXct Date vec_cast.POSIXct.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.Date") } #' @export #' @method vec_cast.POSIXct POSIXlt vec_cast.POSIXct.POSIXlt <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.POSIXlt") } #' @export #' @method vec_cast.POSIXct POSIXct vec_cast.POSIXct.POSIXct <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.POSIXct") } #' @rdname new_date #' @export vec_cast.POSIXlt #' @method vec_cast POSIXlt #' @export vec_cast.POSIXlt <- function(x, to, ...) { UseMethod("vec_cast.POSIXlt") } #' @export #' @method vec_cast.POSIXlt Date vec_cast.POSIXlt.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.Date") } #' @export #' @method vec_cast.POSIXlt POSIXlt vec_cast.POSIXlt.POSIXlt <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.POSIXlt") } #' @export #' @method vec_cast.POSIXlt POSIXct vec_cast.POSIXlt.POSIXct <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.POSIXct") } #' @rdname new_date #' @export vec_cast.difftime #' @method vec_cast difftime #' @export vec_cast.difftime <- function(x, to, ...) { UseMethod("vec_cast.difftime") } #' @export #' @method vec_cast.difftime difftime vec_cast.difftime.difftime <- function(x, to, ...) { if (identical(units(x), units(to))) { if (typeof(x) == "integer") { # Catch corrupt difftime objects (#1602) storage.mode(x) <- "double" } x } else { # Hack: I can't see any obvious way of changing the units origin <- as.POSIXct(0, origin = "1970-01-01") difftime(origin, origin - x, units = units(to)) } } # Arithmetic -------------------------------------------------------------- #' @rdname new_date #' @export vec_arith.Date #' @method vec_arith Date #' @export vec_arith.Date <- function(op, x, y, ...) UseMethod("vec_arith.Date", y) #' @rdname new_date #' @export vec_arith.POSIXct #' @method vec_arith POSIXct #' @export vec_arith.POSIXct <- function(op, x, y, ...) UseMethod("vec_arith.POSIXct", y) #' @rdname new_date #' @export vec_arith.POSIXlt #' @method vec_arith POSIXlt #' @export vec_arith.POSIXlt <- function(op, x, y, ...) UseMethod("vec_arith.POSIXlt", y) #' @rdname new_date #' @export vec_arith.difftime #' @method vec_arith difftime #' @export vec_arith.difftime <- function(op, x, y, ...) UseMethod("vec_arith.difftime", y) #' @method vec_arith.Date default #' @export vec_arith.Date.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.POSIXct default #' @export vec_arith.POSIXct.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.POSIXlt default #' @export vec_arith.POSIXlt.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.difftime default #' @export vec_arith.difftime.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.Date Date #' @export vec_arith.Date.Date <- function(op, x, y, ...) { switch(op, `-` = difftime(x, y, units = "days"), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct POSIXct #' @export vec_arith.POSIXct.POSIXct <- function(op, x, y, ...) { switch(op, `-` = difftime(x, y, units = "secs"), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXlt POSIXlt #' @export vec_arith.POSIXlt.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXct Date #' @export vec_arith.POSIXct.Date <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date POSIXct #' @export vec_arith.Date.POSIXct <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXlt Date #' @export vec_arith.POSIXlt.Date <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date POSIXlt #' @export vec_arith.Date.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXlt POSIXct #' @export vec_arith.POSIXlt.POSIXct <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXct POSIXlt #' @export vec_arith.POSIXct.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date numeric #' @export vec_arith.Date.numeric <- function(op, x, y, ...) { switch(op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.numeric Date #' @export vec_arith.numeric.Date <- function(op, x, y, ...) { switch(op, `+` = vec_restore(vec_arith_base(op, x, y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct numeric #' @export vec_arith.POSIXct.numeric <- vec_arith.Date.numeric #' @method vec_arith.numeric POSIXct #' @export vec_arith.numeric.POSIXct <- vec_arith.numeric.Date #' @method vec_arith.POSIXlt numeric #' @export vec_arith.POSIXlt.numeric <- function(op, x, y, ...) { vec_arith.POSIXct.numeric(op, as.POSIXct(x), y, ...) } #' @method vec_arith.numeric POSIXlt #' @export vec_arith.numeric.POSIXlt <- function(op, x, y, ...) { vec_arith.numeric.POSIXct(op, x, as.POSIXct(y), ...) } #' @method vec_arith.Date difftime #' @export vec_arith.Date.difftime <- function(op, x, y, ...) { y <- vec_cast(y, new_duration(units = "days")) switch(op, `+` = , `-` = vec_restore(vec_arith_base(op, x, lossy_floor(y, x)), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime Date #' @export vec_arith.difftime.Date <- function(op, x, y, ...) { x <- vec_cast(x, new_duration(units = "days")) switch(op, `+` = vec_restore(vec_arith_base(op, lossy_floor(x, y), y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct difftime #' @export vec_arith.POSIXct.difftime <- function(op, x, y, ...) { y <- vec_cast(y, new_duration(units = "secs")) switch(op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime POSIXct #' @export vec_arith.difftime.POSIXct <- function(op, x, y, ...) { x <- vec_cast(x, new_duration(units = "secs")) switch(op, `+` = vec_restore(vec_arith_base(op, x, y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXlt difftime #' @export vec_arith.POSIXlt.difftime <- function(op, x, y, ...) { vec_arith.POSIXct.difftime(op, as.POSIXct(x), y, ...) } #' @method vec_arith.difftime POSIXlt #' @export vec_arith.difftime.POSIXlt <- function(op, x, y, ...) { vec_arith.difftime.POSIXct(op, x, as.POSIXct(y), ...) } #' @method vec_arith.difftime difftime #' @export vec_arith.difftime.difftime <- function(op, x, y, ...) { # Ensure x and y have same units args <- vec_cast_common(x, y) x <- args[[1L]] y <- args[[2L]] switch(op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), `/` = vec_arith_base(op, x, y), `%/%` = vec_arith_base(op, x, y), `%%` = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime MISSING #' @export vec_arith.difftime.MISSING <- function(op, x, y, ...) { switch(op, `-` = vec_restore(-vec_data(x), x), `+` = x, stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime numeric #' @export vec_arith.difftime.numeric <- function(op, x, y, ...) { vec_restore(vec_arith_base(op, x, y), x) } #' @method vec_arith.numeric difftime #' @export vec_arith.numeric.difftime <- function(op, x, y, ...) { switch(op, `/` = stop_incompatible_op(op, x, y), vec_restore(vec_arith_base(op, x, y), y) ) } # Helpers ----------------------------------------------------------------- # The tz attribute for POSIXlt can have 3 components # (time zone name, abbreviated name, abbreviated DST name) tzone <- function(x) { attr(x, "tzone")[[1]] %||% "" } tzone_is_local <- function(x) { identical(tzone(x), "") } tzone_union <- function(x, y) { if (tzone_is_local(x)) { tzone(y) } else { tzone(x) } } units_union <- function(x, y) { if (identical(units(x), units(y))) { units(x) } else { "secs" } } date_validate <- function(x) { .Call(vctrs_date_validate, x) } datetime_validate <- function(x) { .Call(vctrs_datetime_validate, x) } # as.character.Date() calls format() which tries to guess a simplified format. # Supplying a known format is faster and much more memory efficient. date_as_character <- function(x) { format(x, format = "%Y-%m-%d") } # `as.POSIXlt.character()` tries multiple formats. Supplying # a known format is much faster and more memory efficient. chr_date_as_posixlt <- function(x, tzone) { as.POSIXlt.character(x, tz = tzone, format = "%Y-%m-%d") } # `as.POSIXct.default()` for characters goes through `as.POSIXlt.character()` chr_date_as_posixct <- function(x, tzone) { out <- chr_date_as_posixlt(x, tzone) as.POSIXct.POSIXlt(out, tzone) } lossy_floor <- function(x, to, x_arg = "", to_arg = "") { x_floor <- floor(x) lossy <- x != x_floor maybe_lossy_cast(x_floor, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } # Guarantees the presence of a `tzone` attribute # by going through `as.POSIXlt.POSIXct()`. # Useful for testing, since we always try to restore a `tzone`. as_posixlt <- function(x, tz = "") { as.POSIXlt(as.POSIXct(x, tz)) } # Math -------------------------------------------------------------------- #' @export vec_math.Date <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_math.POSIXct <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_math.POSIXlt <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } vctrs/R/register-s3.R0000644000176200001440000001561114362266120014114 0ustar liggesusers# This source code file is licensed under the unlicense license # https://unlicense.org #' Register a method for a suggested dependency #' #' Generally, the recommend way to register an S3 method is to use the #' `S3Method()` namespace directive (often generated automatically by the #' `@export` roxygen2 tag). However, this technique requires that the generic #' be in an imported package, and sometimes you want to suggest a package, #' and only provide a method when that package is loaded. `s3_register()` #' can be called from your package's `.onLoad()` to dynamically register #' a method only if the generic's package is loaded. #' #' For R 3.5.0 and later, `s3_register()` is also useful when demonstrating #' class creation in a vignette, since method lookup no longer always involves #' the lexical scope. For R 3.6.0 and later, you can achieve a similar effect #' by using "delayed method registration", i.e. placing the following in your #' `NAMESPACE` file: #' #' ``` #' if (getRversion() >= "3.6.0") { #' S3method(package::generic, class) #' } #' ``` #' #' @section Usage in other packages: #' To avoid taking a dependency on vctrs, you copy the source of #' [`s3_register()`](https://github.com/r-lib/vctrs/blob/main/R/register-s3.R) #' into your own package. It is licensed under the permissive #' [unlicense](https://choosealicense.com/licenses/unlicense/) to make it #' crystal clear that we're happy for you to do this. There's no need to include #' the license or even credit us when using this function. #' #' @usage NULL #' @param generic Name of the generic in the form `pkg::generic`. #' @param class Name of the class #' @param method Optionally, the implementation of the method. By default, #' this will be found by looking for a function called `generic.class` #' in the package environment. #' #' Note that providing `method` can be dangerous if you use #' devtools. When the namespace of the method is reloaded by #' `devtools::load_all()`, the function will keep inheriting from #' the old namespace. This might cause crashes because of dangling #' `.Call()` pointers. #' @export #' @examples #' # A typical use case is to dynamically register tibble/pillar methods #' # for your class. That way you avoid creating a hard dependency on packages #' # that are not essential, while still providing finer control over #' # printing when they are used. #' #' .onLoad <- function(...) { #' s3_register("pillar::pillar_shaft", "vctrs_vctr") #' s3_register("tibble::type_sum", "vctrs_vctr") #' } #' @keywords internal # nocov start s3_register <- function(generic, class, method = NULL) { stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) pieces <- strsplit(generic, "::")[[1]] stopifnot(length(pieces) == 2) package <- pieces[[1]] generic <- pieces[[2]] caller <- parent.frame() get_method_env <- function() { top <- topenv(caller) if (isNamespace(top)) { asNamespace(environmentName(top)) } else { caller } } get_method <- function(method) { if (is.null(method)) { get(paste0(generic, ".", class), envir = get_method_env()) } else { method } } register <- function(...) { envir <- asNamespace(package) # Refresh the method each time, it might have been updated by # `devtools::load_all()` method_fn <- get_method(method) stopifnot(is.function(method_fn)) # Only register if generic can be accessed if (exists(generic, envir)) { registerS3method(generic, class, method_fn, envir = envir) } else if (identical(Sys.getenv("NOT_CRAN"), "true")) { warn <- .rlang_s3_register_compat("warn") warn(c( sprintf( "Can't find generic `%s` in package %s to register S3 method.", generic, package ), "i" = "This message is only shown to developers using devtools.", "i" = sprintf("Do you need to update %s to the latest version?", package) )) } } # Always register hook in case package is later unloaded & reloaded setHook(packageEvent(package, "onLoad"), function(...) { register() }) # For compatibility with R < 4.0 where base isn't locked is_sealed <- function(pkg) { identical(pkg, "base") || environmentIsLocked(asNamespace(pkg)) } # Avoid registration failures during loading (pkgload or regular). # Check that environment is locked because the registering package # might be a dependency of the package that exports the generic. In # that case, the exports (and the generic) might not be populated # yet (#1225). if (isNamespaceLoaded(package) && is_sealed(package)) { register() } invisible() } .rlang_s3_register_compat <- function(fn, try_rlang = TRUE) { # Compats that behave the same independently of rlang's presence out <- switch( fn, is_installed = return(function(pkg) requireNamespace(pkg, quietly = TRUE)) ) # Only use rlang if it is fully loaded (#1482) if (try_rlang && requireNamespace("rlang", quietly = TRUE) && environmentIsLocked(asNamespace("rlang"))) { switch( fn, is_interactive = return(rlang::is_interactive) ) # Make sure rlang knows about "x" and "i" bullets if (utils::packageVersion("rlang") >= "0.4.2") { switch( fn, abort = return(rlang::abort), warn = return((rlang::warn)), inform = return(rlang::inform) ) } } # Fall back to base compats is_interactive_compat <- function() { opt <- getOption("rlang_interactive") if (!is.null(opt)) { opt } else { interactive() } } format_msg <- function(x) paste(x, collapse = "\n") switch( fn, is_interactive = return(is_interactive_compat), abort = return(function(msg) stop(format_msg(msg), call. = FALSE)), warn = return(function(msg) warning(format_msg(msg), call. = FALSE)), inform = return(function(msg) message(format_msg(msg))) ) stop(sprintf("Internal error in rlang shims: Unknown function `%s()`.", fn)) } on_load({ s3_register <- replace_from("s3_register", "rlang") }) knitr_defer <- function(expr, env = caller_env()) { roxy_caller <- detect(sys.frames(), env_inherits, ns_env("knitr")) if (is_null(roxy_caller)) { abort("Internal error: can't find knitr on the stack.") } blast( withr::defer(!!substitute(expr), !!roxy_caller), env ) } blast <- function(expr, env = caller_env()) { eval_bare(enexpr(expr), env) } knitr_local_registration <- function(generic, class, env = caller_env()) { stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) pieces <- strsplit(generic, "::")[[1]] stopifnot(length(pieces) == 2) package <- pieces[[1]] generic <- pieces[[2]] name <- paste0(generic, ".", class) method <- env_get(env, name) old <- env_bind(global_env(), !!name := method) knitr_defer(env_bind(global_env(), !!!old)) } # nocov end vctrs/R/type-tibble.R0000644000176200001440000000254614420030332014155 0ustar liggesusers#' @rdname df_ptype2 #' @export tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { .Call( ffi_tib_ptype2, x = x, y = y, x_arg = x_arg, y_arg = y_arg, frame = environment() ) } #' @rdname df_ptype2 #' @export tib_cast <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { .Call( ffi_tib_cast, x = x, to = to, x_arg = x_arg, to_arg = to_arg, frame = environment() ) } df_as_tibble <- function(df) { class(df) <- c("tbl_df", "tbl", "data.frame") df } # Conditionally registered in .onLoad() vec_ptype2_tbl_df_tbl_df <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_ptype2_tbl_df_data.frame <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_ptype2_data.frame_tbl_df <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_cast_tbl_df_tbl_df <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vec_cast_data.frame_tbl_df <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vec_cast_tbl_df_data.frame <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vctrs/R/utils.R0000644000176200001440000001701614315060307013103 0ustar liggesusersstr_dup <- function(x, times) { paste0(rep(x, times = times), collapse = "") } indent <- function(x, n) { pad <- str_dup(" ", n) map_chr(x, gsub, pattern = "(\n+)", replacement = paste0("\\1", pad)) } ones <- function(...) { array(1, dim = c(...)) } vec_coerce_bare <- function(x, type) { # FIXME! Unexported wrapper around Rf_coerceVector() coerce <- env_get(ns_env("rlang"), "vec_coerce") coerce(x, type) } # Matches the semantics of c() - based on experimenting with the output # of c(), not reading the source code. outer_names <- function(names, outer, n) { .Call(ffi_outer_names, names, outer, vec_cast(n, int())) } has_inner_names <- function(x) { !all(map_lgl(map(x, vec_names), is.null)) } cat_line <- function(...) { cat(paste0(..., "\n", collapse = "")) } set_partition <- function(x, y) { list( both = intersect(x, y), only_x = setdiff(x, y), only_y = setdiff(y, x) ) } all_equal <- function(x) all(x == x[[1]]) inline_list <- function(title, x, width = getOption("width"), quote = "") { label_width <- width - nchar(title) x <- glue::glue_collapse( encodeString(x, quote = quote), sep = ", ", width = label_width ) paste0(title, x) } has_unique_names <- function(x) { nms <- names(x) if (length(nms) != length(x)) { return(FALSE) } if (any(is.na(nms) | nms == "")) { return(FALSE) } !anyDuplicated(nms) } compact <- function(x) { is_null <- map_lgl(x, is.null) x[!is_null] } paste_line <- function (...) { paste(chr(...), collapse = "\n") } # Experimental result <- function(ok = NULL, err = NULL) { structure( list(ok = ok, err = err), class = "rlang_result" ) } result_get <- function(x) { if (!is_null(x$err)) { cnd_signal(x$err) } x$ok } obj_type <- function(x) { if (vec_is(x)) { vec_ptype_full(x) } else if (is.object(x)) { paste(class(x), collapse = "/") } else if (is_function(x)) { "function" } else { typeof(x) } } new_opts <- function(x, opts, subclass = NULL, arg = NULL) { if (!all(x %in% opts)) { if (is_null(arg)) { arg <- "Argument" } else { arg <- glue::glue("`{arg}`") } opts <- encodeString(opts, quote = "\"") opts <- glue::glue_collapse(opts, sep = ", ", last = " or ") abort(glue::glue("{arg} must be one of {opts}.")) } structure( set_names(opts %in% x, opts), class = c(subclass, "vctrs_opts") ) } glue_data_bullets <- function(.data, ..., .env = caller_env()) { glue_data <- function(...) glue::glue_data(.data, ..., .envir = .env) format_error_bullets(map_chr(chr(...), glue_data)) } unstructure <- function(x) { attributes(x) <- NULL x } # We almost never want `stringsAsFactors = TRUE`, and `FALSE` became # the default in R 4.0.0. This wrapper ensures that our tests are compliant # with versions of R before and after this change. Keeping it in `utils.R` # rather than as a testthat helper ensures that it is sourced before any other # testthat helpers. data.frame <- function(..., stringsAsFactors = NULL) { stringsAsFactors <- stringsAsFactors %||% FALSE base::data.frame(..., stringsAsFactors = stringsAsFactors) } try_catch_callback <- function(data, cnd) { .Call(vctrs_try_catch_callback, data, cnd) } try_catch_hnd <- function(data) { function(cnd) { try_catch_callback(data, cnd) } } try_catch_impl <- function(data, ...) { tryCatch( try_catch_callback(data, NULL), ... ) } ns_methods <- function(name) { ns_env(name)$.__S3MethodsTable__. } s3_find_method <- function(x, generic, ns = "base") { stopifnot( is_string(generic), is_string(ns) ) table <- ns_methods(ns_env(ns)) .Call(vctrs_s3_find_method, generic, x, table) } s3_get_method <- function(class, generic, ns = "base") { stopifnot( is_string(class), is_string(generic), is_string(ns) ) table <- ns_methods(ns_env(ns)) .Call(ffi_s3_get_method, generic, class, table) } s3_method_specific <- function(x, generic, ns = "base", default = TRUE) { classes <- class(x)[[1]] if (default) { classes <- c(classes, "default") } for (class in classes) { method <- s3_get_method(class, generic, ns = ns) if (!is_null(method)) { return(method) } } cli::cli_abort("Can't find {.fn {generic}} method for {.cls {class}}.") } df_has_base_subset <- function(x) { method <- s3_find_method(x, "[", ns = "base") is_null(method) || identical(method, `[.data.frame`) } last <- function(x) { x[[length(x)]] } # Find the longest common suffix of two vectors vec_common_suffix <- function(x, y) { common <- vec_cast_common(x = x, y = y) x <- common$x y <- common$y x_size <- vec_size(x) y_size <- vec_size(y) n <- min(x_size, y_size) if (!n) { return(vec_slice(x, int())) } # Truncate the start of the vectors so they have equal size if (x_size < y_size) { y <- vec_slice(y, seq2(y_size - x_size + 1, y_size)) } else if (y_size < x_size) { x <- vec_slice(x, seq2(x_size - y_size + 1, x_size)) } # Find locations of unequal elements. Elements after the last # location are the common suffix. common <- vec_equal(x, y) i <- which(!common) # Slice the suffix after the last unequal element if (length(i)) { vec_slice(x, seq2(max(i) + 1, n)) } else { x } } import_from <- function(ns, names, env = caller_env()) { objs <- env_get_list(ns_env(ns), names) env_bind(env, !!!objs) } fast_c <- function(x, y) { .Call(vctrs_fast_c, x, y) } # Based on r-lib/bench (itself based on gaborcsardi/prettyunits) #' @export format.vctrs_bytes <- function(x, scientific = FALSE, digits = 3, drop0trailing = TRUE, ...) { nms <- names(x) bytes <- unclass(x) unit <- map_chr(x, find_unit, byte_units) res <- round(bytes / byte_units[unit], digits = digits) ## Zero bytes res[bytes == 0] <- 0 unit[bytes == 0] <- "B" ## NA and NaN bytes res[is.na(bytes)] <- NA_real_ res[is.nan(bytes)] <- NaN unit[is.na(bytes)] <- "" # Includes NaN as well # Append an extra B to each unit large_units <- unit %in% names(byte_units)[-1] unit[large_units] <- paste0(unit[large_units], "B") res <- format(res, scientific = scientific, digits = digits, drop0trailing = drop0trailing, ...) stats::setNames(paste0(res, unit), nms) } #' @export print.vctrs_bytes <- function(x, ...) { print(format(x, ...), quote = FALSE) } tolerance <- sqrt(.Machine$double.eps) find_unit <- function(x, units) { if (is.na(x) || is.nan(x) || x <= 0 || is.infinite(x)) { return(NA_character_) } epsilon <- 1 - (x * (1 / units)) names(utils::tail(n = 1, which(epsilon < tolerance))) } byte_units <- c( 'B' = 1, 'K' = 1024, 'M' = 1024 ^ 2, 'G' = 1024 ^ 3, 'T' = 1024 ^ 4, 'P' = 1024 ^ 5, 'E' = 1024 ^ 6, 'Z' = 1024 ^ 7, 'Y' = 1024 ^ 8 ) new_vctrs_bytes <- function(x) { structure(x, class = c("vctrs_bytes", "numeric")) } named <- function(x) { if (is_null(names(x))) { names(x) <- names2(x) } x } browser <- function(..., skipCalls = 0, frame = parent.frame()) { if (!identical(stdout(), getConnection(1))) { sink(getConnection(1)) withr::defer(sink(), envir = frame) } # Calling `browser()` on exit avoids RStudio displaying the # `browser2()` location. We still need one `n` to get to the # expected place. Ideally `skipCalls` would not skip but exit the # contexts. on.exit(base::browser(..., skipCalls = skipCalls + 1)) } vec_paste0 <- function(...) { args <- vec_recycle_common(...) exec(paste0, !!!args) } vctrs/R/zzz.R0000644000176200001440000001235114420030332012565 0ustar liggesusers# nocov start .onLoad <- function(libname, pkgname) { check_linked_version(pkgname) ns <- ns_env("vctrs") run_on_load() on_package_load("testthat", { s3_register("testthat::is_informative_error", "vctrs_error_cast_lossy", is_informative_error_vctrs_error_cast_lossy) s3_register("testthat::is_informative_error", "vctrs_error_cast_lossy_dropped", is_informative_error_vctrs_error_cast_lossy_dropped) }) s3_register("generics::as.factor", "vctrs_vctr") s3_register("generics::as.ordered", "vctrs_vctr") s3_register("generics::as.difftime", "vctrs_vctr") # Remove once tibble has implemented the methods on_package_load("tibble", { if (!env_has(ns_env("tibble"), "vec_ptype2.tbl_df.tbl_df")) { s3_register("vctrs::vec_ptype2", "tbl_df.tbl_df", vec_ptype2_tbl_df_tbl_df) s3_register("vctrs::vec_ptype2", "tbl_df.data.frame", vec_ptype2_tbl_df_data.frame) s3_register("vctrs::vec_ptype2", "data.frame.tbl_df", vec_ptype2_data.frame_tbl_df) } if (!env_has(ns_env("tibble"), "vec_cast.tbl_df.tbl_df")) { s3_register("vctrs::vec_cast", "tbl_df.tbl_df", vec_cast_tbl_df_tbl_df) s3_register("vctrs::vec_cast", "tbl_df.data.frame", vec_cast_tbl_df_data.frame) s3_register("vctrs::vec_cast", "data.frame.tbl_df", vec_cast_data.frame_tbl_df) } }) on_package_load("dplyr", { if (!env_has(ns_env("dplyr"), "vec_restore.grouped_df")) { s3_register("vctrs::vec_restore", "grouped_df", vec_restore_grouped_df) } if (!env_has(ns_env("dplyr"), "vec_ptype2.grouped_df.grouped_df")) { s3_register("vctrs::vec_ptype2", "grouped_df.grouped_df", vec_ptype2_grouped_df_grouped_df) s3_register("vctrs::vec_ptype2", "grouped_df.data.frame", vec_ptype2_grouped_df_data.frame) s3_register("vctrs::vec_ptype2", "grouped_df.tbl_df", vec_ptype2_grouped_df_tbl_df) s3_register("vctrs::vec_ptype2", "data.frame.grouped_df", vec_ptype2_data.frame_grouped_df) s3_register("vctrs::vec_ptype2", "tbl_df.grouped_df", vec_ptype2_tbl_df_grouped_df) } if (!env_has(ns_env("dplyr"), "vec_cast.grouped_df.grouped_df")) { s3_register("vctrs::vec_cast", "grouped_df.grouped_df", vec_cast_grouped_df_grouped_df) s3_register("vctrs::vec_cast", "grouped_df.data.frame", vec_cast_grouped_df_data.frame) s3_register("vctrs::vec_cast", "grouped_df.tbl_df", vec_cast_grouped_df_tbl_df) s3_register("vctrs::vec_cast", "data.frame.grouped_df", vec_cast_data.frame_grouped_df) s3_register("vctrs::vec_cast", "tbl_df.grouped_df", vec_cast_tbl_df_grouped_df) } if (!env_has(ns_env("dplyr"), "vec_restore.rowwise_df")) { s3_register("vctrs::vec_restore", "rowwise_df", vec_restore_rowwise_df) } if (!env_has(ns_env("dplyr"), "vec_ptype2.rowwise_df.rowwise_df")) { s3_register("vctrs::vec_ptype2", "rowwise_df.rowwise_df", vec_ptype2_rowwise_df_rowwise_df) s3_register("vctrs::vec_ptype2", "rowwise_df.data.frame", vec_ptype2_rowwise_df_data.frame) s3_register("vctrs::vec_ptype2", "rowwise_df.tbl_df", vec_ptype2_rowwise_df_tbl_df) s3_register("vctrs::vec_ptype2", "data.frame.rowwise_df", vec_ptype2_data.frame_rowwise_df) s3_register("vctrs::vec_ptype2", "tbl_df.rowwise_df", vec_ptype2_tbl_df_rowwise_df) } if (!env_has(ns_env("dplyr"), "vec_cast.rowwise_df.rowwise_df")) { s3_register("vctrs::vec_cast", "rowwise_df.rowwise_df", vec_cast_rowwise_df_rowwise_df) s3_register("vctrs::vec_cast", "rowwise_df.data.frame", vec_cast_rowwise_df_data.frame) s3_register("vctrs::vec_cast", "rowwise_df.tbl_df", vec_cast_rowwise_df_tbl_df) s3_register("vctrs::vec_cast", "data.frame.rowwise_df", vec_cast_data.frame_rowwise_df) s3_register("vctrs::vec_cast", "tbl_df.rowwise_df", vec_cast_tbl_df_rowwise_df) } }) on_package_load("sf", { import_from("sf", sf_deps, env = sf_env) if (!env_has(ns_env("sf"), "vec_restore.sf")) { s3_register("vctrs::vec_proxy", "sf", vec_proxy_sf) s3_register("vctrs::vec_restore", "sf", vec_restore_sf) } if (!env_has(ns_env("sf"), "vec_ptype2.sf.sf")) { s3_register("vctrs::vec_ptype2", "sf.sf", vec_ptype2_sf_sf) s3_register("vctrs::vec_ptype2", "sf.data.frame", vec_ptype2_sf_data.frame) s3_register("vctrs::vec_ptype2", "data.frame.sf", vec_ptype2_data.frame_sf) s3_register("vctrs::vec_ptype2", "sf.tbl_df", vec_ptype2_sf_tbl_df) s3_register("vctrs::vec_ptype2", "tbl_df.sf", vec_ptype2_tbl_df_sf) s3_register("vctrs::vec_cast", "sf.sf", vec_cast_sf_sf) s3_register("vctrs::vec_cast", "sf.data.frame", vec_cast_sf_data.frame) s3_register("vctrs::vec_cast", "data.frame.sf", vec_cast_data.frame_sf) } if (!env_has(ns_env("sf"), "vec_proxy_order.sfc")) { s3_register("vctrs::vec_proxy_order", "sfc", vec_proxy_order_sfc) } }) utils::globalVariables("vec_set_attributes") # Prevent two copies from being made by `attributes(x) <- attrib` on R < 3.6.0 if (getRversion() >= '3.6.0') { vec_set_attributes <- function(x, attrib) { attributes(x) <- attrib x } } else { vec_set_attributes <- function(x, attrib) { .Call(vctrs_set_attributes, x, attrib) } } env_bind(ns, vec_set_attributes = vec_set_attributes) .Call(vctrs_init_library, ns_env()) } # nocov end vctrs/R/type-sclr.R0000644000176200001440000000737413651552525013705 0ustar liggesusersnew_sclr <- function(..., class = character()) { fields <- list(...) stopifnot(has_unique_names(fields)) structure( list(...), class = c(class, "vctrs_sclr") ) } # Subsetting -------------------------------------------------------------- #' @export `[[.vctrs_sclr` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `$.vctrs_sclr` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `[[<-.vctrs_sclr` <- function(x, i, value) { .Call(vctrs_list_set, x, i, value) } #' @export `$<-.vctrs_sclr` <- function(x, i, value) { .Call(vctrs_list_set, x, i, value) } # Shared niceties --------------------------------------------------------- #' @export print.vctrs_sclr <- function(x, ...) { obj_print(x, ...) invisible(x) } #' @export as.list.vctrs_sclr <- function(x, ...) { vec_set_attributes(x, list(names = names(x))) } #' @export as.data.frame.vctrs_sclr <- function(x, row.names = NULL, optional = FALSE, ..., nm = paste(deparse(substitute(x), width.cutoff = 500L), collapse = " ") ) { force(nm) cols <- list(list(x)) if (!optional) { names(cols) <- nm } new_data_frame(cols, n = 1L) } # Vector behaviours ------------------------------------------------------- #' @export `[.vctrs_sclr` <- function(x, ...) { stop_unsupported(x, "[") } #' @export `[<-.vctrs_sclr` <- function(x, ..., value) { stop_unsupported(x, "[<-") } #' @export c.vctrs_sclr <- function(...) { stop_unsupported(..1, "c") } #' @export Math.vctrs_sclr <- function(x, ...) { stop_unsupported(x, .Generic) } #' @export Ops.vctrs_sclr <- function(e1, e2) { stop_unsupported(e1, .Generic) } #' @export Complex.vctrs_sclr <- function(z) { stop_unsupported(z, .Generic) } #' @export Summary.vctrs_sclr <- function(..., na.rm = TRUE) { stop_unsupported(..1, .Generic) } #' @export `names<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "names<-") } #' @export xtfrm.vctrs_sclr <- function(x) { stop_unsupported(x, "xtfrm") } #' @export `dim<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "dim<-") } #' @export `dimnames<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "dimnames<-") } #' @export levels.vctrs_sclr <- function(x) { stop_unsupported(x, "levels") } #' @export `levels<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "levels<-") } #' @export `t.vctrs_sclr` <- function(x) { stop_unsupported(x, "t") } #' @export `is.na<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "is.na<-") } #' @export unique.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export duplicated.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export anyDuplicated.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export as.logical.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.logical") } #' @export as.integer.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.integer") } #' @export as.double.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.double") } #' @export as.character.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.character") } #' @export as.Date.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.Date") } #' @export as.POSIXct.vctrs_sclr <- function(x, tz = "", ...) { stop_unsupported(x, "as.POSIXct") } # Unimplemented ----------------------------------------------------------- #' @export summary.vctrs_sclr <- function(object, ...) { # nocov start stop_unimplemented(object, "summary") # nocov end } vctrs/R/fields.R0000644000176200001440000000164313347722504013220 0ustar liggesusers#' Tools for accessing the fields of a record. #' #' A [rcrd] behaves like a vector, so `length()`, `names()`, and `$` can #' not provide access to the fields of the underlying list. These helpers do: #' `fields()` is equivalent to `names()`; `n_fields()` is equivalent to #' `length()`; `field()` is equivalent to `$`. #' #' @param x A [rcrd], i.e. a list of equal length vectors with unique names. #' @keywords internal #' @export #' @examples #' x <- new_rcrd(list(x = 1:3, y = 3:1, z = letters[1:3])) #' n_fields(x) #' fields(x) #' #' field(x, "y") #' field(x, "y") <- runif(3) #' field(x, "y") fields <- function(x) { .Call(vctrs_fields, x) } #' @export #' @rdname fields n_fields <- function(x) { .Call(vctrs_n_fields, x) } #' @export #' @rdname fields field <- function(x, i) { .Call(vctrs_field_get, x, i) } #' @export #' @rdname fields `field<-` <- function(x, i, value) { .Call(vctrs_field_set, x, i, value) } vctrs/R/type-integer64.R0000644000176200001440000000752414276722575014556 0ustar liggesusers#' @export vec_proxy_equal.integer64 <- function(x, ...) { if (is.array(x)) { # Stopgap to convert arrays to data frames, then run them through # `vec_proxy_equal()` again, which will proxy each column x <- as_data_frame_from_array(x) x <- vec_proxy_equal(x) return(x) } integer64_proxy(x) } # Print ------------------------------------------------------------------- #' 64 bit integers #' #' A `integer64` is a 64 bits integer vector, implemented in the `bit64` package. #' #' These functions help the `integer64` class from `bit64` in to #' the vctrs type system by providing coercion functions #' and casting functions. #' #' @keywords internal #' @rdname int64 #' @export vec_ptype_full.integer64 <- function(x, ...) { "integer64" } #' @rdname int64 #' @export vec_ptype_abbr.integer64 <- function(x, ...) { "int64" } # Coerce ------------------------------------------------------------------ #' @export #' @rdname int64 #' @export vec_ptype2.integer64 #' @method vec_ptype2 integer64 vec_ptype2.integer64 <- function(x, y, ...) { UseMethod("vec_ptype2.integer64") } #' @method vec_ptype2.integer64 integer64 #' @export vec_ptype2.integer64.integer64 <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer64 integer #' @export vec_ptype2.integer64.integer <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer integer64 #' @export vec_ptype2.integer.integer64 <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer64 logical #' @export vec_ptype2.integer64.logical <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.logical integer64 #' @export vec_ptype2.logical.integer64 <- function(x, y, ...) bit64::integer64() # Cast -------------------------------------------------------------------- #' @export #' @rdname int64 #' @export vec_cast.integer64 #' @method vec_cast integer64 vec_cast.integer64 <- function(x, to, ...) { UseMethod("vec_cast.integer64") } #' @export #' @method vec_cast.integer64 integer64 vec_cast.integer64.integer64 <- function(x, to, ...) { x } #' @export #' @method vec_cast.integer64 integer vec_cast.integer64.integer <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.integer integer64 vec_cast.integer.integer64 <- function(x, to, ...) { as.integer(x) } #' @export #' @method vec_cast.integer64 logical vec_cast.integer64.logical <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.logical integer64 vec_cast.logical.integer64 <- function(x, to, ...) { as.logical(x) } #' @export #' @method vec_cast.integer64 double vec_cast.integer64.double <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.double integer64 vec_cast.double.integer64 <- function(x, to, ...) { as.double(x) } # ------------------------------------------------------------------------------ integer64_proxy <- function(x) { .Call(vctrs_integer64_proxy, x) } integer64_restore <- function(x) { .Call(vctrs_integer64_restore, x) } # ------------------------------------------------------------------------------ as_data_frame_from_array <- function(x) { # Alternative to `as.data.frame.array()` that always strips 1-D arrays # of their dimensions. Unlike `as.data.frame2()`, it doesn't unclass the # input, which means that each column retains its original class. # This function doesn't attempt to keep the names of `x` at all. dim <- dim(x) n_dim <- length(dim) if (n_dim == 1) { # Treat 1-D arrays as 1 column matrices dim(x) <- c(dim, 1L) n_dim <- 2L } n_row <- dim[[1L]] n_col <- prod(dim[-1L]) n_col_seq <- seq_len(n_col) dim(x) <- c(n_row, n_col) out <- vector("list", n_col) names(out) <- as_unique_names(rep("", n_col), quiet = TRUE) for (i in n_col_seq) { out[[i]] <- x[, i, drop = TRUE] } new_data_frame(out, n = n_row) } vctrs/R/subscript-loc.R0000644000176200001440000004254214373205357014550 0ustar liggesusers#' Create a vector of locations #' #' @description #' #' These helpers provide a means of standardizing common indexing #' methods such as integer, character or logical indexing. #' #' * `vec_as_location()` accepts integer, character, or logical vectors #' of any size. The output is always an integer vector that is #' suitable for subsetting with `[` or [vec_slice()]. It might be a #' different size than the input because negative selections are #' transformed to positive ones and logical vectors are transformed #' to a vector of indices for the `TRUE` locations. #' #' * `vec_as_location2()` accepts a single number or string. It returns #' a single location as a integer vector of size 1. This is suitable #' for extracting with `[[`. #' #' * `num_as_location()` and `num_as_location2()` are specialized variants #' that have extra options for numeric indices. #' #' @inheritParams vec_slice #' @inheritParams rlang::args_error_context #' #' @param n A single integer representing the total size of the #' object that `i` is meant to index into. #' #' @param names If `i` is a character vector, `names` should be a character #' vector that `i` will be matched against to construct the index. Otherwise, #' not used. The default value of `NULL` will result in an error #' if `i` is a character vector. #' #' @param missing How should missing `i` values be handled? #' - `"error"` throws an error. #' - `"propagate"` returns them as is. #' - `"remove"` removes them. #' #' By default, vector subscripts propagate missing values but scalar #' subscripts error on them. #' #' Propagated missing values can't be combined with negative indices when #' `negative = "invert"`, because they can't be meaningfully inverted. #' #' @param arg The argument name to be displayed in error messages. #' #' @return #' - `vec_as_location()` and `num_as_location()` return an integer vector that #' can be used as an index in a subsetting operation. #' #' - `vec_as_location2()` and `num_as_location2()` return an integer of size 1 #' that can be used a scalar index for extracting an element. #' #' @examples #' x <- array(1:6, c(2, 3)) #' dimnames(x) <- list(c("r1", "r2"), c("c1", "c2", "c3")) #' #' # The most common use case validates row indices #' vec_as_location(1, vec_size(x)) #' #' # Negative indices can be used to index from the back #' vec_as_location(-1, vec_size(x)) #' #' # Character vectors can be used if `names` are provided #' vec_as_location("r2", vec_size(x), rownames(x)) #' #' # You can also construct an index for dimensions other than the first #' vec_as_location(c("c2", "c1"), ncol(x), colnames(x)) #' #' @keywords internal #' @export vec_as_location <- function(i, n, names = NULL, ..., missing = c("propagate", "remove", "error"), arg = caller_arg(i), call = caller_env()) { check_dots_empty0(...) .Call( ffi_as_location, i = i, n = n, names = names, loc_negative = "invert", loc_oob = "error", loc_zero = "remove", missing = missing, frame = environment() ) } #' @rdname vec_as_location #' #' @param negative How should negative `i` values be handled? #' - `"error"` throws an error. #' - `"ignore"` returns them as is. #' - `"invert"` returns the positive location generated by inverting the #' negative location. When inverting, positive and negative locations #' can't be mixed. This option is only applicable for `num_as_location()`. #' #' @param oob How should out-of-bounds `i` values be handled? #' - `"error"` throws an error. #' - `"remove"` removes both positive and negative out-of-bounds locations. #' - `"extend"` allows positive out-of-bounds locations if they directly #' follow the end of a vector. This can be used to implement extendable #' vectors, like `letters[1:30]`. #' #' @param zero How should zero `i` values be handled? #' - `"error"` throws an error. #' - `"remove"` removes them. #' - `"ignore"` returns them as is. #' #' @export num_as_location <- function(i, n, ..., missing = c("propagate", "remove", "error"), negative = c("invert", "error", "ignore"), oob = c("error", "remove", "extend"), zero = c("remove", "error", "ignore"), arg = caller_arg(i), call = caller_env()) { check_dots_empty0(...) if (is.object(i) || !(is_integer(i) || is_double(i))) { abort("`i` must be a numeric vector.") } .Call( ffi_as_location, i = i, n = n, names = NULL, loc_negative = negative, loc_oob = oob, loc_zero = zero, missing = missing, env = environment() ) } #' @rdname vec_as_location #' @export vec_as_location2 <- function(i, n, names = NULL, ..., missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env()) { check_dots_empty0(...) result_get(vec_as_location2_result( i, n = n, names = names, negative = "error", missing = missing, arg = arg, call = call )) } #' @rdname vec_as_location #' @export num_as_location2 <- function(i, n, ..., negative = c("error", "ignore"), missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env()) { check_dots_empty0(...) if (!is_integer(i) && !is_double(i)) { abort("`i` must be a numeric vector.", call = call) } result_get(vec_as_location2_result( i, n = n, names = NULL, negative = negative, missing = missing, arg = arg, call = call )) } vec_as_location2_result <- function(i, n, names, missing, negative, arg, call) { allow_missing <- arg_match0(missing, c("error", "propagate")) == "propagate" allow_negative <- arg_match0(negative, c("error", "ignore")) == "ignore" result <- vec_as_subscript2_result( i = i, arg = arg, call = call ) if (!is_null(result$err)) { parent <- result$err return(result(err = new_error_location2_type( i = i, subscript_arg = arg, body = parent$body, call = call ))) } # Locations must be size 1, can't be NA, and must be positive i <- result$ok if (length(i) != 1L) { return(result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_scalar, call = call ))) } neg <- typeof(i) == "integer" && !is.na(i) && i < 0L if (allow_negative && neg) { i <- -i } if (is.na(i)) { if (!allow_missing && is.na(i)) { result <- result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_present, call = call )) } else { result <- result(i) } return(result) } if (identical(i, 0L)) { return(result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_positive, call = call ))) } if (!allow_negative && neg) { return(result(err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_positive, call = call ))) } err <- NULL i <- tryCatch( vec_as_location(i, n, names = names, arg = arg, call = call), vctrs_error_subscript = function(err) { err[["subscript_scalar"]] <- TRUE err <<- err i } ) if (!is_null(err)) { return(result(err = err)) } if (neg) { i <- -i } result(i) } stop_location_negative_missing <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, ..., body = cnd_body_vctrs_error_location_negative_missing, call = call )) } cnd_body_vctrs_error_location_negative_missing <- function(cnd, ...) { missing_loc <- which(is.na(cnd$i)) arg <- append_arg("Subscript", cnd$subscript_arg) if (length(missing_loc) == 1) { loc <- glue::glue("{arg} has a missing value at location {missing_loc}.") } else { n_loc <- length(missing_loc) missing_loc <- ensure_full_stop(enumerate(missing_loc)) loc <- glue::glue( "{arg} has {n_loc} missing values at locations {missing_loc}" ) } format_error_bullets(c( x = "Negative locations can't have missing values.", i = loc )) } stop_location_negative_positive <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, ..., body = cnd_body_vctrs_error_location_negative_positive, call = call )) } cnd_body_vctrs_error_location_negative_positive <- function(cnd, ...) { positive_loc <- which(cnd$i > 0) arg <- append_arg("Subscript", cnd$subscript_arg) if (length(positive_loc) == 1) { loc <- glue::glue("{arg} has a positive value at location {positive_loc}.") } else { n_loc <- length(positive_loc) positive_loc <- ensure_full_stop(enumerate(positive_loc)) loc <- glue::glue( "{arg} has {n_loc} positive values at locations {positive_loc}" ) } format_error_bullets(c( x = "Negative and positive locations can't be mixed.", i = loc )) } new_error_location2_type <- function(i, ..., class = NULL) { new_error_subscript2_type( class = class, i = i, numeric = "cast", character = "cast", ... ) } cnd_bullets_location2_need_scalar <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} must be size 1, not {length(i)}.") )) } cnd_bullets_location2_need_present <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} must be a location, not {obj_type_friendly(i)}.") )) } cnd_bullets_location2_need_positive <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} must be a positive location, not {i}.") )) } stop_location_negative <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, body = cnd_bullets_location_need_non_negative, ..., call = call )) } cnd_bullets_location_need_non_negative <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't contain negative locations.") )) } stop_location_zero <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, body = cnd_bullets_location_need_non_zero, ..., call = call )) } cnd_bullets_location_need_non_zero <- function(cnd, ...) { zero_loc <- which(cnd$i == 0) zero_loc_size <- length(zero_loc) arg <- append_arg("Subscript", cnd$subscript_arg) if (zero_loc_size == 1) { loc <- glue::glue("It has a `0` value at location {zero_loc}.") } else { zero_loc <- ensure_full_stop(enumerate(zero_loc)) loc <- glue::glue( "It has {zero_loc_size} `0` values at locations {zero_loc}" ) } format_error_bullets(c( x = glue::glue("{arg} can't contain `0` values."), i = loc )) } stop_subscript_missing <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i = i, body = cnd_bullets_subscript_missing, ..., call = call )) } cnd_bullets_subscript_missing <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) missing_loc <- which(is.na(cnd$i)) if (length(missing_loc) == 1) { missing_line <- glue::glue("It has a missing value at location {missing_loc}.") } else { missing_enum <- ensure_full_stop(enumerate(missing_loc)) missing_line <- glue::glue("It has missing values at locations {missing_enum}") } format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't contain missing values."), x = missing_line )) } stop_subscript_empty <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i = i, body = cnd_bullets_subscript_empty, ..., call = call )) } cnd_bullets_subscript_empty <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) loc <- which(cnd$i == "") if (length(loc) == 1) { line <- glue::glue("It has an empty string at location {loc}.") } else { enum <- ensure_full_stop(enumerate(loc)) line <- glue::glue("It has an empty string at locations {enum}") } format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't contain the empty string."), x = line )) } stop_indicator_size <- function(i, n, ..., call = caller_env()) { cnd_signal(new_error_subscript_size( i, n = n, ..., body = cnd_body_vctrs_error_indicator_size, call = call )) } cnd_body_vctrs_error_indicator_size <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Logical subscript", cnd$subscript_arg) glue_data_bullets( cnd, x = "{subscript_arg} must be size 1 or {n}, not {vec_size(i)}." ) } stop_subscript_oob <- function(i, subscript_type, ..., call = caller_env()) { stop_subscript( class = "vctrs_error_subscript_oob", i = i, subscript_type = subscript_type, ..., call = call ) } #' @export cnd_header.vctrs_error_subscript_oob <- function(cnd, ...) { if (cnd_subscript_oob_non_consecutive(cnd)) { return(cnd_header_vctrs_error_subscript_oob_non_consecutive(cnd, ...)) } elt <- cnd_subscript_element(cnd) action <- cnd_subscript_action(cnd) type <- cnd_subscript_type(cnd) if (action %in% c("rename", "relocate") || type == "character") { glue::glue("Can't {action} {elt[[2]]} that don't exist.") } else { glue::glue("Can't {action} {elt[[2]]} past the end.") } } #' @export cnd_body.vctrs_error_subscript_oob <- function(cnd, ...) { switch(cnd_subscript_type(cnd), numeric = if (cnd_subscript_oob_non_consecutive(cnd)) { cnd_body_vctrs_error_subscript_oob_non_consecutive(cnd, ...) } else { cnd_body_vctrs_error_subscript_oob_location(cnd, ...) }, character = cnd_body_vctrs_error_subscript_oob_name(cnd, ...), abort("Internal error: subscript type can't be `logical` for OOB errors.") ) } cnd_body_vctrs_error_subscript_oob_location <- function(cnd, ...) { i <- cnd$i # In case of missing locations i <- i[!is.na(i)] if (cnd_subscript_action(cnd) == "negate") { # Only report negative indices i <- i[i < 0L] } # In case of negative indexing i <- abs(i) oob <- i[i > cnd$size] oob_enum <- vctrs_cli_vec(oob) n_loc <- length(oob) n <- cnd$size elt <- cnd_subscript_element_cli(n, cnd) # TODO: Switch to `format_inline()` and format bullets lazily through rlang cli::format_error(c( "i" = "{cli::qty(n_loc)} Location{?s} {oob_enum} do{?esn't/n't} exist.", "i" = "There {cli::qty(n)} {?is/are} only {elt}." )) } cnd_body_vctrs_error_subscript_oob_name <- function(cnd, ...) { elt <- cnd_subscript_element(cnd, capital = TRUE) oob <- cnd$i[!cnd$i %in% cnd$names] oob_enum <- enumerate(glue::backtick(oob)) format_error_bullets(c( x = glue::glue(ngettext( length(oob), "{elt[[1]]} {oob_enum} doesn't exist.", "{elt[[2]]} {oob_enum} don't exist." )) )) } vctrs_cli_vec <- function(x, ..., vec_trunc = 5) { cli::cli_vec(as.character(x), list(..., vec_trunc = vec_trunc)) } stop_location_oob_non_consecutive <- function(i, size, ..., call = caller_env()) { stop_subscript_oob( i = i, size = size, subscript_type = "numeric", subscript_oob_non_consecutive = TRUE, ..., call = call ) } cnd_header_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) { action <- cnd_subscript_action(cnd) elt <- cnd_subscript_element(cnd) glue::glue("Can't {action} {elt[[2]]} beyond the end with non-consecutive locations.") } cnd_body_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) { i <- sort(cnd$i) i <- i[i > cnd$size] non_consecutive <- i[c(TRUE, diff(i) != 1L)] arg <- append_arg("Subscript", cnd$subscript_arg) if (length(non_consecutive) == 1) { x_line <- glue::glue("{arg} contains non-consecutive location {non_consecutive}.") } else { non_consecutive <- ensure_full_stop(enumerate(non_consecutive)) x_line <- glue::glue("{arg} contains non-consecutive locations {non_consecutive}") } glue_data_bullets( cnd, i = "Input has size {size}.", x = x_line ) } cnd_subscript_oob_non_consecutive <- function(cnd) { out <- cnd$subscript_oob_non_consecutive %||% FALSE check_bool(out) out } vctrs/R/split.R0000644000176200001440000000216114276722575013113 0ustar liggesusers#' Split a vector into groups #' #' This is a generalisation of [split()] that can split by any type of vector, #' not just factors. Instead of returning the keys in the character names, #' the are returned in a separate parallel vector. #' #' @param x Vector to divide into groups. #' @param by Vector whose unique values defines the groups. #' @return A data frame with two columns and size equal to #' `vec_size(vec_unique(by))`. The `key` column has the same type as #' `by`, and the `val` column is a list containing elements of type #' `vec_ptype(x)`. #' #' Note for complex types, the default `data.frame` print method will be #' suboptimal, and you will want to coerce into a tibble to better #' understand the output. #' @export #' #' @section Dependencies: #' - [vec_group_loc()] #' - [vec_chop()] #' #' @examples #' vec_split(mtcars$cyl, mtcars$vs) #' vec_split(mtcars$cyl, mtcars[c("vs", "am")]) #' #' if (require("tibble")) { #' as_tibble(vec_split(mtcars$cyl, mtcars[c("vs", "am")])) #' as_tibble(vec_split(mtcars, mtcars[c("vs", "am")])) #' } vec_split <- function(x, by) { .Call(vctrs_split, x, by) } vctrs/R/assert.R0000644000176200001440000003004414401377400013241 0ustar liggesusers#' Assert an argument has known prototype and/or size #' #' @description #' `r lifecycle::badge("questioning")` #' #' * `vec_is()` is a predicate that checks if its input is a vector that #' conforms to a prototype and/or a size. #' #' * `vec_assert()` throws an error when the input is not a vector or #' doesn't conform. #' #' @inheritSection vector-checks Vectors and scalars #' #' @section Error types: #' #' `vec_is()` never throws. #' `vec_assert()` throws the following errors: #' #' * If the input is not a vector, an error of class #' `"vctrs_error_scalar_type"` is raised. #' #' * If the prototype doesn't match, an error of class #' `"vctrs_error_assert_ptype"` is raised. #' #' * If the size doesn't match, an error of class #' `"vctrs_error_assert_size"` is raised. #' #' Both errors inherit from `"vctrs_error_assert"`. #' #' @section Lifecycle: #' #' Both `vec_is()` and `vec_assert()` are questioning because their `ptype` #' arguments have semantics that are challenging to define clearly and are #' rarely useful. #' #' - Use [obj_is_vector()] or [obj_check_vector()] for vector checks #' #' - Use [vec_check_size()] for size checks #' #' - Use [vec_cast()], [inherits()], or simple type predicates like #' [rlang::is_logical()] for specific type checks #' #' @inheritParams rlang::args_error_context #' #' @param x A vector argument to check. #' @param ptype Prototype to compare against. If the prototype has a #' class, its [vec_ptype()] is compared to that of `x` with #' `identical()`. Otherwise, its [typeof()] is compared to that of #' `x` with `==`. #' @param size A single integer size against which to compare. #' @param arg Name of argument being checked. This is used in error #' messages. The label of the expression passed as `x` is taken as #' default. #' #' @return `vec_is()` returns `TRUE` or `FALSE`. `vec_assert()` either #' throws a typed error (see section on error types) or returns `x`, #' invisibly. #' @keywords internal #' @export vec_assert <- function(x, ptype = NULL, size = NULL, arg = caller_arg(x), call = caller_env()) { if (!obj_is_vector(x)) { stop_scalar_type(x, arg, call = call) } if (!is_null(ptype)) { ptype <- vec_ptype(ptype) x_type <- vec_ptype_finalise(vec_ptype(x)) if (!is_same_type(x_type, ptype)) { msg <- vec_assert_type_explain(x_type, ptype, arg) abort( msg, class = c("vctrs_error_assert_ptype", "vctrs_error_assert"), required = ptype, actual = x_type, call = call ) } } if (!is_null(size)) { size <- vec_cast(size, integer(), x_arg = "size") n_size <- length(size) if (n_size != 1L) { abort(glue::glue("`size` must be length 1, not length {n_size}.")) } x_size <- vec_size(x) if (!identical(x_size, size)) { stop_assert_size( x_size, size, arg, call = call ) } } invisible(x) } # Also thrown from C stop_assert_size <- function(actual, required, arg, call = caller_env()) { if (!nzchar(arg)) { arg <- "Input" } else { arg <- glue::backtick(arg) } message <- glue::glue("{arg} must have size {required}, not size {actual}.") stop_assert( message, class = "vctrs_error_assert_size", actual = actual, required = required, call = call ) } stop_assert <- function(message = NULL, class = NULL, ..., call = caller_env()) { stop_vctrs( message, class = c(class, "vctrs_error_assert"), ..., call = call ) } #' @rdname vec_assert #' @export vec_is <- function(x, ptype = NULL, size = NULL) { if (!obj_is_vector(x)) { return(FALSE) } if (!is_null(ptype)) { ptype <- vec_ptype(ptype) x_type <- vec_ptype_finalise(vec_ptype(x)) if (!is_same_type(x_type, ptype)) { return(FALSE) } } if (!is_null(size)) { size <- vec_recycle(vec_cast(size, integer()), 1L) x_size <- vec_size(x) if (!identical(x_size, size)) { return(FALSE) } } TRUE } #' Vector checks #' #' @description #' #' - `obj_is_vector()` tests if `x` is considered a vector in the vctrs sense. #' See _Vectors and scalars_ below for the exact details. #' #' - `obj_check_vector()` uses `obj_is_vector()` and throws a standardized and #' informative error if it returns `FALSE`. #' #' - `vec_check_size()` tests if `x` has size `size`, and throws an informative #' error if it doesn't. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x For `obj_*()` functions, an object. For `vec_*()` functions, a #' vector. #' #' @param size The size to check for. #' #' @returns #' - `obj_is_vector()` returns a single `TRUE` or `FALSE`. #' #' - `obj_check_vector()` returns `NULL` invisibly, or errors. #' #' - `vec_check_size()` returns `NULL` invisibly, or errors. #' #' @section Vectors and scalars: #' #' Informally, a vector is a collection that makes sense to use as column in a #' data frame. The following rules define whether or not `x` is considered a #' vector. #' #' If no [vec_proxy()] method has been registered, `x` is a vector if: #' #' - The [base type][typeof] of the object is atomic: `"logical"`, `"integer"`, #' `"double"`, `"complex"`, `"character"`, or `"raw"`. #' #' - `x` is a list, as defined by [obj_is_list()]. #' #' - `x` is a [data.frame]. #' #' If a `vec_proxy()` method has been registered, `x` is a vector if: #' #' - The proxy satisfies one of the above conditions. #' #' - The base type of the proxy is `"list"`, regardless of its class. S3 lists #' are thus treated as scalars unless they implement a `vec_proxy()` method. #' #' Otherwise an object is treated as scalar and cannot be used as a vector. #' In particular: #' #' - `NULL` is not a vector. #' #' - S3 lists like `lm` objects are treated as scalars by default. #' #' - Objects of type [expression] are not treated as vectors. #' #' @section Technical limitations: #' #' - Support for S4 vectors is currently limited to objects that inherit from an #' atomic type. #' #' - Subclasses of [data.frame] that *append* their class to the back of the #' `"class"` attribute are not treated as vectors. If you inherit from an S3 #' class, always prepend your class to the front of the `"class"` attribute #' for correct dispatch. This matches our general principle of allowing #' subclasses but not mixins. #' #' @name vector-checks #' @examples #' obj_is_vector(1) #' #' # Data frames are vectors #' obj_is_vector(data_frame()) #' #' # Bare lists are vectors #' obj_is_vector(list()) #' #' # S3 lists are vectors if they explicitly inherit from `"list"` #' x <- structure(list(), class = c("my_list", "list")) #' obj_is_list(x) #' obj_is_vector(x) #' #' # But if they don't explicitly inherit from `"list"`, they aren't #' # automatically considered to be vectors. Instead, vctrs considers this #' # to be a scalar object, like a linear model returned from `lm()`. #' y <- structure(list(), class = "my_list") #' obj_is_list(y) #' obj_is_vector(y) #' #' # `obj_check_vector()` throws an informative error if the input #' # isn't a vector #' try(obj_check_vector(y)) #' #' # `vec_check_size()` throws an informative error if the size of the #' # input doesn't match `size` #' vec_check_size(1:5, size = 5) #' try(vec_check_size(1:5, size = 4)) NULL #' @export #' @rdname vector-checks obj_is_vector <- function(x) { .Call(ffi_obj_is_vector, x) } #' @export #' @rdname vector-checks obj_check_vector <- function(x, ..., arg = caller_arg(x), call = caller_env()) { check_dots_empty0(...) invisible(.Call(ffi_obj_check_vector, x, environment())) } #' @export #' @rdname vector-checks vec_check_size <- function(x, size, ..., arg = caller_arg(x), call = caller_env()) { check_dots_empty0(...) invisible(.Call(ffi_vec_check_size, x, size, environment())) } #' List checks #' #' @description #' - `obj_is_list()` tests if `x` is considered a list in the vctrs sense. It #' returns `TRUE` if: #' - `x` is a bare list with no class. #' - `x` is a list explicitly inheriting from `"list"`. #' #' - `list_all_vectors()` takes a list and returns `TRUE` if all elements of #' that list are vectors. #' #' - `list_all_size()` takes a list and returns `TRUE` if all elements of that #' list have the same `size`. #' #' - `obj_check_list()`, `list_check_all_vectors()`, and `list_check_all_size()` #' use the above functions, but throw a standardized and informative error if #' they return `FALSE`. #' #' @inheritParams rlang::args_error_context #' @inheritParams rlang::args_dots_empty #' #' @param x For `vec_*()` functions, an object. For `list_*()` functions, a #' list. #' #' @param size The size to check each element for. #' #' @details #' Notably, data frames and S3 record style classes like POSIXlt are not #' considered lists. #' #' @seealso [list_sizes()] #' @export #' @examples #' obj_is_list(list()) #' obj_is_list(list_of(1)) #' obj_is_list(data.frame()) #' #' list_all_vectors(list(1, mtcars)) #' list_all_vectors(list(1, environment())) #' #' list_all_size(list(1:2, 2:3), 2) #' list_all_size(list(1:2, 2:4), 2) #' #' # `list_`-prefixed functions assume a list: #' try(list_all_vectors(environment())) obj_is_list <- function(x) { .Call(ffi_obj_is_list, x) } #' @rdname obj_is_list #' @export obj_check_list <- function(x, ..., arg = caller_arg(x), call = caller_env()) { check_dots_empty0(...) invisible(.Call(ffi_check_list, x, environment())) } #' @rdname obj_is_list #' @export list_all_vectors <- function(x) { .Call(ffi_list_all_vectors, x, environment()) } #' @rdname obj_is_list #' @export list_check_all_vectors <- function(x, ..., arg = caller_arg(x), call = caller_env()) { check_dots_empty0(...) invisible(.Call(ffi_list_check_all_vectors, x, environment())) } #' @rdname obj_is_list #' @export list_all_size <- function(x, size) { .Call(ffi_list_all_size, x, size, environment()) } #' @rdname obj_is_list #' @export list_check_all_size <- function(x, size, ..., arg = caller_arg(x), call = caller_env()) { check_dots_empty0(...) invisible(.Call(ffi_list_check_all_size, x, size, environment())) } # Called from C stop_non_list_type <- function(x, arg, call) { if (nzchar(arg)) { arg <- cli::format_inline("{.arg {arg}}") } else { arg <- "Input" } cli::cli_abort( "{arg} must be a list, not {obj_type_friendly(x)}.", call = call ) } is_same_type <- function(x, ptype) { if (is_partial(ptype)) { env <- environment() ptype <- tryCatch( vctrs_error_incompatible_type = function(...) return_from(env, FALSE), vec_ptype_common(x, ptype) ) } x <- vec_slice(x, integer()) ptype <- vec_slice(ptype, integer()) # FIXME: Remove row names for matrices and arrays, and handle empty # but existing dimnames x <- vec_set_names(x, NULL) ptype <- vec_set_names(ptype, NULL) identical(x, ptype) } vec_assert_type_explain <- function(x, type, arg) { arg <- str_backtick(arg) x <- paste0("<", vec_ptype_full(x), ">") type <- paste0("<", vec_ptype_full(type), ">") intro <- paste0(arg, " must be a vector with type") intro <- layout_type(intro, type) outro <- paste0("Instead, it has type") outro <- layout_type(outro, x) paste_line( !!!intro, if (str_is_multiline(intro)) "", !!!outro ) } layout_type <- function(start, type) { if (str_is_multiline(type)) { paste_line( paste0(start, ":"), "", paste0(" ", indent(type, 2)) ) } else { paste0(start, " ", type, ".") } } vctrs/R/order.R0000644000176200001440000003116114341667017013065 0ustar liggesusers# TODO: Use this NEWS bullet when we move to the new `vec_order()` algorithm # # * `vec_order()` and `vec_sort()` now use a custom radix sort algorithm, rather # than relying on `order()`. The implementation is based on data.table’s # `forder()` and their earlier contribution to R’s `order()`. There are four # major changes, outlined below, the first two of which are breaking changes. # If you need to retain the old ordering behavior, use `vec_order_base()`. # # * Character vectors now order in the C locale by default, which is _much_ # faster than ordering in the system's locale. To order in a specific locale, # you can provide a character proxy function through `chr_proxy_collate`, # such as `stringi::stri_sort_key()`. # # * Optional arguments, such as `direction` and `na_value`, must now be # specified by name. Specifying by position will result in an error. # # * When ordering data frames, you can now control the behavior of `direction` # and `na_value` on a per column basis. # # * There is a new `nan_distinct` argument for differentiating between `NaN` # and `NA` in double and complex vectors. #' Order and sort vectors #' #' @description #' `vec_order_radix()` computes the order of `x`. For data frames, the order is #' computed along the rows by computing the order of the first column and #' using subsequent columns to break ties. #' #' `vec_sort_radix()` sorts `x`. It is equivalent to `vec_slice(x, vec_order_radix(x))`. #' #' @inheritParams rlang::args_dots_empty #' #' @param x A vector #' @param direction Direction to sort in. #' - A single `"asc"` or `"desc"` for ascending or descending order #' respectively. #' - For data frames, a length `1` or `ncol(x)` character vector containing #' only `"asc"` or `"desc"`, specifying the direction for each column. #' @param na_value Ordering of missing values. #' - A single `"largest"` or `"smallest"` for ordering missing values as the #' largest or smallest values respectively. #' - For data frames, a length `1` or `ncol(x)` character vector containing #' only `"largest"` or `"smallest"`, specifying how missing values should #' be ordered within each column. #' @param nan_distinct A single logical specifying whether or not `NaN` should #' be considered distinct from `NA` for double and complex vectors. If `TRUE`, #' `NaN` will always be ordered between `NA` and non-missing numbers. #' @param chr_proxy_collate A function generating an alternate representation #' of character vectors to use for collation, often used for locale-aware #' ordering. #' - If `NULL`, no transformation is done. #' - Otherwise, this must be a function of one argument. If the input contains #' a character vector, it will be passed to this function after it has been #' translated to UTF-8. This function should return a character vector with #' the same length as the input. The result should sort as expected in the #' C-locale, regardless of encoding. #' #' For data frames, `chr_proxy_collate` will be applied to all character #' columns. #' #' Common transformation functions include: `tolower()` for case-insensitive #' ordering and `stringi::stri_sort_key()` for locale-aware ordering. #' #' @return #' * `vec_order_radix()` an integer vector the same size as `x`. #' * `vec_sort_radix()` a vector with the same size and type as `x`. #' #' @section Differences with `order()`: #' #' Unlike the `na.last` argument of `order()` which decides the positions of #' missing values irrespective of the `decreasing` argument, the `na_value` #' argument of `vec_order_radix()` interacts with `direction`. If missing values #' are considered the largest value, they will appear last in ascending order, #' and first in descending order. #' #' Character vectors are ordered in the C-locale. This is different from #' `base::order()`, which respects `base::Sys.setlocale()`. Sorting in a #' consistent locale can produce more reproducible results between different #' sessions and platforms, however, the results of sorting in the C-locale #' can be surprising. For example, capital letters sort before lower case #' letters. Sorting `c("b", "C", "a")` with `vec_sort_radix()` will return #' `c("C", "a", "b")`, but with `base::order()` will return `c("a", "b", "C")` #' unless `base::order(method = "radix")` is explicitly set, which also uses #' the C-locale. While sorting with the C-locale can be useful for #' algorithmic efficiency, in many real world uses it can be the cause of #' data analysis mistakes. To balance these trade-offs, you can supply a #' `chr_proxy_collate` function to transform character vectors into an #' alternative representation that orders in the C-locale in a less surprising #' way. For example, providing [base::tolower()] as a transform will order the #' original vector in a case-insensitive manner. Locale-aware ordering can be #' achieved by providing `stringi::stri_sort_key()` as a transform, setting the #' collation options as appropriate for your locale. #' #' Character vectors are always translated to UTF-8 before ordering, and before #' any transform is applied by `chr_proxy_collate`. #' #' For complex vectors, if either the real or imaginary component is `NA` or #' `NaN`, then the entire observation is considered missing. #' #' @section Dependencies of `vec_order_radix()`: #' * [vec_proxy_order()] #' #' @section Dependencies of `vec_sort_radix()`: #' * [vec_order_radix()] #' * [vec_slice()] #' #' @name order-radix #' @keywords internal #' #' @examples #' if (FALSE) { #' #' x <- round(sample(runif(5), 9, replace = TRUE), 3) #' x <- c(x, NA) #' #' vec_order_radix(x) #' vec_sort_radix(x) #' vec_sort_radix(x, direction = "desc") #' #' # Can also handle data frames #' df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) #' vec_order_radix(df) #' vec_sort_radix(df) #' vec_sort_radix(df, direction = "desc") #' #' # For data frames, `direction` and `na_value` are allowed to be vectors #' # with length equal to the number of columns in the data frame #' vec_sort_radix( #' df, #' direction = c("desc", "asc"), #' na_value = c("largest", "smallest") #' ) #' #' # Character vectors are ordered in the C locale, which orders capital letters #' # below lowercase ones #' y <- c("B", "A", "a") #' vec_sort_radix(y) #' #' # To order in a case-insensitive manner, provide a `chr_proxy_collate` #' # function that transforms the strings to all lowercase #' vec_sort_radix(y, chr_proxy_collate = tolower) #' #' } NULL #' @rdname order-radix vec_order_radix <- function(x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL) { check_dots_empty0(...) .Call(vctrs_order, x, direction, na_value, nan_distinct, chr_proxy_collate) } #' @rdname order-radix vec_sort_radix <- function(x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL) { check_dots_empty0(...) idx <- vec_order_radix( x = x, direction = direction, na_value = na_value, nan_distinct = nan_distinct, chr_proxy_collate = chr_proxy_collate ) vec_slice(x, idx) } # ------------------------------------------------------------------------------ #' Locate sorted groups #' #' @description #' `r lifecycle::badge("experimental")` #' #' `vec_locate_sorted_groups()` returns a data frame containing a `key` column #' with sorted unique groups, and a `loc` column with the locations of each #' group in `x`. It is similar to [vec_group_loc()], except the groups are #' returned sorted rather than by first appearance. #' #' @details #' `vec_locate_sorted_groups(x)` is equivalent to, but faster than: #' #' ``` #' info <- vec_group_loc(x) #' vec_slice(info, vec_order(info$key)) #' ``` #' #' @inheritParams order-radix #' #' @return #' A two column data frame with size equal to `vec_size(vec_unique(x))`. #' * A `key` column of type `vec_ptype(x)`. #' * A `loc` column of type list, with elements of type integer. #' #' @section Dependencies of `vec_locate_sorted_groups()`: #' * [vec_proxy_order()] #' #' @export #' @keywords internal #' @examples #' df <- data.frame( #' g = sample(2, 10, replace = TRUE), #' x = c(NA, sample(5, 9, replace = TRUE)) #' ) #' #' # `vec_locate_sorted_groups()` is similar to `vec_group_loc()`, except keys #' # are returned ordered rather than by first appearance. #' vec_locate_sorted_groups(df) #' #' vec_group_loc(df) vec_locate_sorted_groups <- function(x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL) { check_dots_empty0(...) .Call( vctrs_locate_sorted_groups, x, direction, na_value, nan_distinct, chr_proxy_collate ) } # ------------------------------------------------------------------------------ vec_order_info <- function(x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL, chr_ordered = TRUE) { check_dots_empty0(...) .Call(vctrs_order_info, x, direction, na_value, nan_distinct, chr_proxy_collate, chr_ordered) } # ------------------------------------------------------------------------------ #' Order and sort vectors #' #' @inheritParams rlang::args_dots_empty #' #' @param x A vector #' @param direction Direction to sort in. Defaults to `asc`ending. #' @param na_value Should `NA`s be treated as the largest or smallest values? #' @return #' * `vec_order()` an integer vector the same size as `x`. #' * `vec_sort()` a vector with the same size and type as `x`. #' #' @section Differences with `order()`: #' Unlike the `na.last` argument of `order()` which decides the #' positions of missing values irrespective of the `decreasing` #' argument, the `na_value` argument of `vec_order()` interacts with #' `direction`. If missing values are considered the largest value, #' they will appear last in ascending order, and first in descending #' order. #' #' @section Dependencies of `vec_order()`: #' * [vec_proxy_order()] #' #' @section Dependencies of `vec_sort()`: #' * [vec_proxy_order()] #' * [vec_order()] #' * [vec_slice()] #' @export #' @examples #' x <- round(c(runif(9), NA), 3) #' vec_order(x) #' vec_sort(x) #' vec_sort(x, direction = "desc") #' #' # Can also handle data frames #' df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) #' vec_order(df) #' vec_sort(df) #' vec_sort(df, direction = "desc") #' #' # Missing values interpreted as largest values are last when #' # in increasing order: #' vec_order(c(1, NA), na_value = "largest", direction = "asc") #' vec_order(c(1, NA), na_value = "largest", direction = "desc") vec_order <- function(x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest")) { check_dots_empty0(...) direction <- arg_match0(direction, c("asc", "desc")) na_value <- arg_match0(na_value, c("largest", "smallest")) decreasing <- !identical(direction, "asc") na.last <- identical(na_value, "largest") if (decreasing) { na.last <- !na.last } proxy <- vec_proxy_order(x) if (is.data.frame(proxy)) { if (length(proxy) == 0L) { # Work around type-instability in `base::order()` return(vec_seq_along(proxy)) } args <- map(unstructure(proxy), function(.x) { if (is.data.frame(.x)) { .x <- order(vec_order(.x, direction = direction, na_value = na_value)) } .x }) exec("order", !!!args, decreasing = decreasing, na.last = na.last) } else if (is_character(proxy) || is_logical(proxy) || is_integer(proxy) || is_double(proxy) || is.complex(proxy)) { if (is.object(proxy)) { proxy <- unstructure(proxy) } order(proxy, decreasing = decreasing, na.last = na.last) } else { abort("Invalid type returned by `vec_proxy_order()`.") } } #' @export #' @rdname vec_order vec_sort <- function(x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest")) { check_dots_empty0(...) direction <- arg_match0(direction, c("asc", "desc")) na_value <- arg_match0(na_value, c("largest", "smallest")) idx <- vec_order(x, direction = direction, na_value = na_value) vec_slice(x, idx) } vctrs/R/type-data-frame.R0000644000176200001440000003101414511524374014724 0ustar liggesusers#' Assemble attributes for data frame construction #' #' `new_data_frame()` constructs a new data frame from an existing list. It is #' meant to be performant, and does not check the inputs for correctness in any #' way. It is only safe to use after a call to [df_list()], which collects and #' validates the columns used to construct the data frame. #' #' @seealso #' [df_list()] for a way to safely construct a data frame's underlying #' data structure from individual columns. This can be used to create a #' named list for further use by `new_data_frame()`. #' #' @param x A named list of equal-length vectors. The lengths are not #' checked; it is responsibility of the caller to make sure they are #' equal. #' @param n Number of rows. If `NULL`, will be computed from the length of #' the first element of `x`. #' @param ...,class Additional arguments for creating subclasses. #' #' The following attributes have special behavior: #' - `"names"` is preferred if provided, overriding existing names in `x`. #' - `"row.names"` is preferred if provided, overriding both `n` and the size #' implied by `x`. #' #' @export #' @examples #' new_data_frame(list(x = 1:10, y = 10:1)) new_data_frame <- function(x = list(), n = NULL, ..., class = NULL) { .External(ffi_new_data_frame, x, n, class, ...) } new_data_frame <- fn_inline_formals(new_data_frame, "x") #' Collect columns for data frame construction #' #' `df_list()` constructs the data structure underlying a data #' frame, a named list of equal-length vectors. It is often used in #' combination with [new_data_frame()] to safely and consistently create #' a helper function for data frame subclasses. #' #' @section Properties: #' #' - Inputs are [recycled][theory-faq-recycling] to a common size with #' [vec_recycle_common()]. #' #' - With the exception of data frames, inputs are not modified in any way. #' Character vectors are never converted to factors, and lists are stored #' as-is for easy creation of list-columns. #' #' - Unnamed data frame inputs are automatically unpacked. Named data frame #' inputs are stored unmodified as data frame columns. #' #' - `NULL` inputs are completely ignored. #' #' - The dots are dynamic, allowing for splicing of lists with `!!!` and #' unquoting. #' #' @seealso #' [new_data_frame()] for constructing data frame subclasses from a validated #' input. [data_frame()] for a fast data frame creation helper. #' #' @inheritParams rlang::args_error_context #' #' @param ... Vectors of equal-length. When inputs are named, those names #' are used for names of the resulting list. #' @param .size The common size of vectors supplied in `...`. If `NULL`, this #' will be computed as the common size of the inputs. #' @param .unpack Should unnamed data frame inputs be unpacked? Defaults to #' `TRUE`. #' @param .name_repair One of `"check_unique"`, `"unique"`, `"universal"`, #' `"minimal"`, `"unique_quiet"`, or `"universal_quiet"`. See [vec_as_names()] #' for the meaning of these options. #' #' @export #' @examples #' # `new_data_frame()` can be used to create custom data frame constructors #' new_fancy_df <- function(x = list(), n = NULL, ..., class = NULL) { #' new_data_frame(x, n = n, ..., class = c(class, "fancy_df")) #' } #' #' # Combine this constructor with `df_list()` to create a safe, #' # consistent helper function for your data frame subclass #' fancy_df <- function(...) { #' data <- df_list(...) #' new_fancy_df(data) #' } #' #' df <- fancy_df(x = 1) #' class(df) df_list <- function(..., .size = NULL, .unpack = TRUE, .name_repair = c("check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env()) { .Call(ffi_df_list, list2(...), .size, .unpack, .name_repair, environment()) } df_list <- fn_inline_formals(df_list, ".name_repair") #' Construct a data frame #' #' @description #' `data_frame()` constructs a data frame. It is similar to #' [base::data.frame()], but there are a few notable differences that make it #' more in line with vctrs principles. The Properties section outlines these. #' #' @details #' If no column names are supplied, `""` will be used as a default name for all #' columns. This is applied before name repair occurs, so the default name #' repair of `"check_unique"` will error if any unnamed inputs are supplied and #' `"unique"` (or `"unique_quiet"`) will repair the empty string column names #' appropriately. If the column names don't matter, use a `"minimal"` name #' repair for convenience and performance. #' #' @inheritSection df_list Properties #' #' @seealso #' [df_list()] for safely creating a data frame's underlying data structure from #' individual columns. [new_data_frame()] for constructing the actual data #' frame from that underlying data structure. Together, these can be useful #' for developers when creating new data frame subclasses supporting #' standard evaluation. #' #' @inheritParams rlang::args_error_context #' #' @param ... Vectors to become columns in the data frame. When inputs are #' named, those names are used for column names. #' @param .size The number of rows in the data frame. If `NULL`, this will #' be computed as the common size of the inputs. #' @param .name_repair One of `"check_unique"`, `"unique"`, `"universal"`, #' `"minimal"`, `"unique_quiet"`, or `"universal_quiet"`. See [vec_as_names()] #' for the meaning of these options. #' #' @export #' @examples #' data_frame(x = 1, y = 2) #' #' # Inputs are recycled using tidyverse recycling rules #' data_frame(x = 1, y = 1:3) #' #' # Strings are never converted to factors #' class(data_frame(x = "foo")$x) #' #' # List columns can be easily created #' df <- data_frame(x = list(1:2, 2, 3:4), y = 3:1) #' #' # However, the base print method is suboptimal for displaying them, #' # so it is recommended to convert them to tibble #' if (rlang::is_installed("tibble")) { #' tibble::as_tibble(df) #' } #' #' # Named data frame inputs create data frame columns #' df <- data_frame(x = data_frame(y = 1:2, z = "a")) #' #' # The `x` column itself is another data frame #' df$x #' #' # Again, it is recommended to convert these to tibbles for a better #' # print method #' if (rlang::is_installed("tibble")) { #' tibble::as_tibble(df) #' } #' #' # Unnamed data frame input is automatically unpacked #' data_frame(x = 1, data_frame(y = 1:2, z = "a")) data_frame <- function(..., .size = NULL, .name_repair = c("check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env()) { .Call(ffi_data_frame, list2(...), .size, .name_repair, environment()) } data_frame <- fn_inline_formals(data_frame, ".name_repair") #' @export vec_ptype_abbr.data.frame <- function(x, ...) { "df" } # For testing # Keep in sync with `enum vctrs_proxy_kind` in `vctrs.h` df_proxy <- function(x, kind) { .Call(ffi_df_proxy, x, kind) } VCTRS_PROXY_KIND_equal <- 0L VCTRS_PROXY_KIND_compare <- 1L VCTRS_PROXY_KIND_order <- 2L df_is_coercible <- function(x, y, opts) { vec_is_coercible( new_data_frame(x), new_data_frame(y), opts = opts ) } # Coercion ---------------------------------------------------------------- #' Coercion between two data frames #' #' `df_ptype2()` and `df_cast()` are the two functions you need to #' call from `vec_ptype2()` and `vec_cast()` methods for data frame #' subclasses. See [?howto-faq-coercion-data-frame][howto-faq-coercion-data-frame]. #' Their main job is to determine the common type of two data frames, #' adding and coercing columns as needed, or throwing an incompatible #' type error when the columns are not compatible. #' #' @param x,y,to Subclasses of data frame. #' @param ... If you call `df_ptype2()` or `df_cast()` from a #' `vec_ptype2()` or `vec_cast()` method, you must forward the dots #' passed to your method on to `df_ptype2()` or `df_cast()`. #' @inheritParams vec_ptype2 #' @inheritParams vec_cast #' #' @return #' * When `x` and `y` are not compatible, an error of class #' `vctrs_error_incompatible_type` is thrown. #' * When `x` and `y` are compatible, `df_ptype2()` returns the common #' type as a bare data frame. `tib_ptype2()` returns the common type #' as a bare tibble. #' #' @export df_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { .Call( ffi_df_ptype2_opts, x, y, opts = match_fallback_opts(...), environment() ) } #' @rdname df_ptype2 #' @export df_cast <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { .Call( ffi_df_cast_opts, x, to, opts = match_fallback_opts(...), environment() ) } df_ptype2_opts <- function(x, y, ..., opts, x_arg = "", y_arg = "", call = caller_env()) { .Call(ffi_df_ptype2_opts, x, y, opts = opts, environment()) } df_cast_opts <- function(x, to, ..., opts = fallback_opts(), x_arg = "", to_arg = "", call = caller_env()) { .Call( ffi_df_cast_opts, x, to, opts, environment() ) } df_cast_params <- function(x, to, ..., x_arg = "", to_arg = "", s3_fallback = NULL) { opts <- fallback_opts(s3_fallback = s3_fallback) df_cast_opts(x, to, opts = opts, x_arg = x_arg, to_arg = to_arg) } #' vctrs methods for data frames #' #' These functions help the base data.frame class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name vctrs-data-frame NULL #' @rdname vctrs-data-frame #' @export vec_ptype2.data.frame #' @method vec_ptype2 data.frame #' @export vec_ptype2.data.frame <- function(x, y, ...) { UseMethod("vec_ptype2.data.frame") } #' @method vec_ptype2.data.frame data.frame #' @export vec_ptype2.data.frame.data.frame <- function(x, y, ...) { df_ptype2(x, y, ...) } # Fallback for data frame subclasses (#981) vec_ptype2_df_fallback <- function(x, y, opts, x_arg = "", y_arg = "", call = caller_env()) { vec_ptype2_params( as_base_df(x), as_base_df(y), s3_fallback = opts$s3_fallback, x_arg = x_arg, y_arg = y_arg, call = call ) } as_base_df <- function(x) { if (inherits(x, "tbl_df")) { new_data_frame(x, class = c("tbl_df", "tbl")) } else { new_data_frame(x) } } # Cast -------------------------------------------------------------------- #' @rdname vctrs-data-frame #' @export vec_cast.data.frame #' @method vec_cast data.frame #' @export vec_cast.data.frame <- function(x, to, ...) { UseMethod("vec_cast.data.frame") } #' @export #' @method vec_cast.data.frame data.frame vec_cast.data.frame.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") { df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) } #' @export vec_restore.data.frame <- function(x, to, ...) { .Call(ffi_vec_bare_df_restore, x, to) } # Helpers ----------------------------------------------------------------- df_size <- function(x) { .Call(vctrs_df_size, x) } df_lossy_cast <- function(out, x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { extra <- setdiff(names(x), names(to)) maybe_lossy_cast( result = out, x = x, to = to, lossy = length(extra) > 0, locations = int(), x_arg = x_arg, to_arg = to_arg, call = call, details = inline_list("Dropped variables: ", extra, quote = "`"), class = "vctrs_error_cast_lossy_dropped" ) } is_informative_error_vctrs_error_cast_lossy_dropped <- function(x, ...) { FALSE } df_attrib <- function(x) { attributes(x)[c("row.names", "names")] } non_df_attrib <- function(x) { attrib <- attributes(x) attrib <- attrib[!names(attrib) %in% c("row.names", "names")] # Sort to allow comparison attrib[order(names(attrib))] } vctrs/R/type-unspecified.R0000644000176200001440000000176014276722575015241 0ustar liggesusers#' A 1d vector of unspecified type #' #' This is a [partial type][new_partial] used to represent logical vectors #' that only contain `NA`. These require special handling because we want to #' allow `NA` to specify missingness without requiring a type. #' #' @keywords internal #' @param n Length of vector #' @export #' @examples #' vec_ptype_show() #' vec_ptype_show(NA) #' #' vec_c(NA, factor("x")) #' vec_c(NA, Sys.Date()) #' vec_c(NA, Sys.time()) #' vec_c(NA, list(1:3, 4:5)) unspecified <- function(n = 0) { .Call(vctrs_unspecified, n) } #' @export `[.vctrs_unspecified` <- function(x, i, ...) { unspecified(length(NextMethod())) } #' @export print.vctrs_unspecified <- function(x, ...) { cat(" [", length(x), "]\n", sep = "") } #' @export vec_ptype_abbr.vctrs_unspecified <- function(x, ...) { "???" } is_unspecified <- function(x) { .Call(vctrs_is_unspecified, x) } ununspecify <- function(x) { if (is_unspecified(x)) { new_logical(length(x)) } else { x } } vctrs/R/vctrs-deprecated.R0000644000176200001440000001202014401377400015171 0ustar liggesusers#' Is a vector empty #' #' @description #' #' `r lifecycle::badge("defunct")` #' #' This function is defunct, please use [vec_is_empty()]. #' #' @param x An object. #' #' @keywords internal #' @export vec_empty <- function(x) { # Defunct: 2019-06 lifecycle::deprecate_stop( when = "0.2.0", what = "vec_empty()", with = "vec_is_empty()" ) } #' Deprecated type functions #' #' @description #' #' `r lifecycle::badge("deprecated")` #' #' These functions have been renamed: #' #' * `vec_type()` => [vec_ptype()] #' * `vec_type2()` => [vec_ptype2()] #' * `vec_type_common()` => [vec_ptype_common()] #' #' @param x,y,...,.ptype Arguments for deprecated functions. #' #' @keywords internal #' @export vec_type <- function(x) { # Deprecated: 2019-06 lifecycle::deprecate_warn( when = "0.2.0", what = "vec_type()", with = "vec_ptype()", always = TRUE ) vec_ptype(x) } #' @rdname vec_type #' @export vec_type_common <- function(..., .ptype = NULL) { # Deprecated: 2019-06 lifecycle::deprecate_warn( when = "0.2.0", what = "vec_type_common()", with = "vec_ptype_common()", always = TRUE ) vec_ptype_common(..., .ptype = .ptype) } #' @rdname vec_type #' @export vec_type2 <- function(x, y, ...) { # Deprecated: 2019-06 lifecycle::deprecate_warn( when = "0.2.0", what = "vec_type2()", with = "vec_ptype2()", always = TRUE ) vec_ptype2(x, y, ...) } #' Convert to an index vector #' #' @description #' #' `r lifecycle::badge("deprecated")` #' #' `vec_as_index()` has been renamed to [vec_as_location()] and is #' deprecated as of vctrs 0.2.2. #' #' @inheritParams vec_as_location #' #' @keywords internal #' @export vec_as_index <- function(i, n, names = NULL) { # Soft-deprecated: 2020-01 lifecycle::deprecate_soft( when = "0.2.2", what = "vec_as_index()", with = "vec_as_location()" ) n <- vec_cast(n, integer()) vec_check_size(n, size = 1L) i <- vec_as_subscript(i) # Picked up from the environment at the C level arg <- NULL .Call( ffi_as_location, i = i, n = n, names = names, loc_negative = "invert", loc_oob = "error", loc_zero = "remove", missing = "propagate", env = environment() ) } #' Expand the length of a vector #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `vec_repeat()` has been replaced with [vec_rep()] and [vec_rep_each()] and is #' deprecated as of vctrs 0.3.0. #' #' @param x A vector. #' @param each Number of times to repeat each element of `x`. #' @param times Number of times to repeat the whole vector of `x`. #' @return A vector the same type as `x` with size `vec_size(x) * times * each`. #' @keywords internal #' @export vec_repeat <- function(x, each = 1L, times = 1L) { # Soft-deprecated: 2020-03 lifecycle::deprecate_soft( when = "0.3.0", what = "vec_repeat()", with = I("either `vec_rep()` or `vec_rep_each()`") ) vec_check_size(each, size = 1L) vec_check_size(times, size = 1L) idx <- rep(vec_seq_along(x), times = times, each = each) vec_slice(x, idx) } #' Chopping #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `vec_unchop()` has been renamed to [list_unchop()] and is deprecated as of #' vctrs 0.5.0. #' #' @inheritParams list_unchop #' @inherit list_unchop return #' #' @keywords internal #' @export vec_unchop <- function(x, indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal")) { # Soft-deprecated: 2022-09 lifecycle::deprecate_soft("0.5.0", "vec_unchop()", "list_unchop()") list_unchop( x = x, indices = indices, ptype = ptype, name_spec = name_spec, name_repair = name_repair ) } #' Missing values #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `vec_equal_na()` has been renamed to [vec_detect_missing()] and is deprecated #' as of vctrs 0.5.0. #' #' @inheritParams vec_detect_missing #' #' @return #' A logical vector the same size as `x`. #' #' @keywords internal #' @export vec_equal_na <- function(x) { # Soft-deprecated: 2022-09 lifecycle::deprecate_soft("0.5.0", "vec_equal_na()", "vec_detect_missing()") vec_detect_missing(x) } #' List checks #' #' @description #' `r lifecycle::badge("deprecated")` #' #' These functions have been deprecated as of vctrs 0.6.0. #' #' - `vec_is_list()` has been renamed to [obj_is_list()]. #' - `vec_check_list()` has been renamed to [obj_check_list()]. #' #' @inheritParams obj_is_list #' #' @keywords internal #' @export vec_is_list <- function(x) { # Silently-deprecated: 2023-03 # lifecycle::deprecate_soft("0.6.0", "vec_is_list()", "obj_is_list()") obj_is_list(x) } #' @rdname vec_is_list #' @export vec_check_list <- function(x, ..., arg = caller_arg(x), call = caller_env()) { # Silently-deprecated: 2023-03 # lifecycle::deprecate_soft("0.6.0", "vec_check_list()", "obj_check_list()") obj_check_list(x, ..., arg = arg, call = call) } vctrs/R/import-standalone-types-check.R0000644000176200001440000002556514377215505017642 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-types-check.R # last-updated: 2023-02-15 # license: https://unlicense.org # dependencies: standalone-obj-type.R # --- # # ## Changelog # # 2023-02-15: # - Added `check_logical()`. # # - `check_bool()`, `check_number_whole()`, and # `check_number_decimal()` are now implemented in C. # # - For efficiency, `check_number_whole()` and # `check_number_decimal()` now take a `NULL` default for `min` and # `max`. This makes it possible to bypass unnecessary type-checking # and comparisons in the default case of no bounds checks. # # 2022-10-07: # - `check_number_whole()` and `_decimal()` no longer treat # non-numeric types such as factors or dates as numbers. Numeric # types are detected with `is.numeric()`. # # 2022-10-04: # - Added `check_name()` that forbids the empty string. # `check_string()` allows the empty string by default. # # 2022-09-28: # - Removed `what` arguments. # - Added `allow_na` and `allow_null` arguments. # - Added `allow_decimal` and `allow_infinite` arguments. # - Improved errors with absent arguments. # # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Added changelog. # # nocov start # Scalars ----------------------------------------------------------------- .standalone_types_check_dot_call <- .Call check_bool <- function(x, ..., allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x) && .standalone_types_check_dot_call(ffi_standalone_is_bool_1.0.7, x, allow_na, allow_null)) { return(invisible(NULL)) } stop_input_type( x, c("`TRUE`", "`FALSE`"), ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_string <- function(x, ..., allow_empty = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = allow_empty, allow_na = allow_na, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a single string", ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .rlang_check_is_string <- function(x, allow_empty, allow_na, allow_null) { if (is_string(x)) { if (allow_empty || !is_string(x, "")) { return(TRUE) } } if (allow_null && is_null(x)) { return(TRUE) } if (allow_na && (identical(x, NA) || identical(x, na_chr))) { return(TRUE) } FALSE } check_name <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = FALSE, allow_na = FALSE, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a valid name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } IS_NUMBER_true <- 0 IS_NUMBER_false <- 1 IS_NUMBER_oob <- 2 check_number_decimal <- function(x, ..., min = NULL, max = NULL, allow_infinite = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = TRUE, min, max, allow_infinite, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = TRUE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_number_whole <- function(x, ..., min = NULL, max = NULL, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = FALSE, min, max, allow_infinite = FALSE, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = FALSE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .stop_not_number <- function(x, ..., exit_code, allow_decimal, min, max, allow_na, allow_null, arg, call) { if (exit_code == IS_NUMBER_oob) { min <- min %||% -Inf max <- max %||% Inf if (min > -Inf && max < Inf) { what <- sprintf("a number between %s and %s", min, max) } else if (x < min) { what <- sprintf("a number larger than %s", min) } else if (x > max) { what <- sprintf("a number smaller than %s", max) } else { abort("Unexpected state in OOB check", .internal = TRUE) } } else if (allow_decimal) { what <- "a number" } else { what <- "a whole number" } stop_input_type( x, what, ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_symbol <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a symbol", ..., allow_null = allow_null, arg = arg, call = call ) } check_arg <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an argument name", ..., allow_null = allow_null, arg = arg, call = call ) } check_call <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_call(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a defused call", ..., allow_null = allow_null, arg = arg, call = call ) } check_environment <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_environment(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an environment", ..., allow_null = allow_null, arg = arg, call = call ) } check_function <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_function(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a function", ..., allow_null = allow_null, arg = arg, call = call ) } check_closure <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_closure(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an R function", ..., allow_null = allow_null, arg = arg, call = call ) } check_formula <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_formula(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a formula", ..., allow_null = allow_null, arg = arg, call = call ) } # Vectors ----------------------------------------------------------------- check_character <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_character(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a character vector", ..., allow_null = allow_null, arg = arg, call = call ) } check_logical <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_logical(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a logical vector", ..., allow_null = allow_null, arg = arg, call = call ) } # nocov end vctrs/R/type-dplyr.R0000644000176200001440000000605014420030332014040 0ustar liggesusers # All methods in this file are conditionally registered in .onLoad() ### `grouped_df` ----------------------------------------------------- group_intersect <- function(x, new) { intersect(dplyr::group_vars(x), names(new)) } vec_restore_grouped_df <- function(x, to, ...) { vars <- group_intersect(to, x) drop <- dplyr::group_by_drop_default(to) dplyr::grouped_df(x, vars, drop = drop) } # `vec_ptype2()` ----------------------------------------------------- vec_ptype2_grouped_df_grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_grouped_df_data.frame <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_data.frame_grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_grouped_df_tbl_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_tbl_df_grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } gdf_ptype2 <- function(x, y, ...) { common <- df_ptype2(x, y, ...) x_vars <- dplyr::group_vars(x) y_vars <- dplyr::group_vars(y) vars <- union(x_vars, y_vars) drop <- dplyr::group_by_drop_default(x) && dplyr::group_by_drop_default(y) dplyr::grouped_df(common, vars, drop = drop) } # `vec_cast()` ------------------------------------------------------- vec_cast_grouped_df_grouped_df <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast_grouped_df_data.frame <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast_data.frame_grouped_df <- function(x, to, ...) { df_cast(x, to, ...) } vec_cast_grouped_df_tbl_df <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast_tbl_df_grouped_df <- function(x, to, ...) { tib_cast(x, to, ...) } gdf_cast <- function(x, to, ...) { df <- df_cast(x, to, ...) vars <- dplyr::group_vars(to) drop <- dplyr::group_by_drop_default(to) dplyr::grouped_df(df, vars, drop = drop) } ### `rowwise` -------------------------------------------------------- vec_restore_rowwise_df <- function(x, to, ...) { dplyr::rowwise(x) } # `vec_ptype2()` ----------------------------------------------------- vec_ptype2_rowwise_df_rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_rowwise_df_data.frame <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_data.frame_rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_rowwise_df_tbl_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_tbl_df_rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } rww_ptype2 <- function(x, y, ...) { dplyr::rowwise(df_ptype2(x, y, ...)) } # `vec_cast()` ------------------------------------------------------- vec_cast_rowwise_df_rowwise_df <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast_rowwise_df_data.frame <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast_data.frame_rowwise_df <- function(x, to, ...) { df_cast(x, to, ...) } vec_cast_rowwise_df_tbl_df <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast_tbl_df_rowwise_df <- function(x, to, ...) { tib_cast(x, to, ...) } rww_cast <- function(x, to, ...) { dplyr::rowwise(df_cast(x, to, ...)) } vctrs/R/shape.R0000644000176200001440000000453414315060307013044 0ustar liggesusers# The dimensionality of an matrix/array is partition into two parts: # * the first dimension = the number of observations # * all other dimensions = the shape parameter of the type # These helpers work with the shape parameter new_shape <- function(type, shape = integer()) { structure(type, dim = c(0L, shape)) } vec_shaped_ptype <- function(ptype, x, y, ..., x_arg = "", y_arg = "") { check_dots_empty0(...) .Call(ffi_vec_shaped_ptype, ptype, x, y, environment()) } vec_shape2 <- function(x, y, ..., x_arg = "", y_arg = "") { check_dots_empty0(...) .Call(ffi_vec_shape2, x, y, environment()) } # Should take same signature as `vec_cast()` shape_broadcast <- function(x, to, ..., x_arg, to_arg, call = caller_env()) { if (is.null(x) || is.null(to)) { return(x) } dim_x <- vec_dim(x) dim_to <- vec_dim(to) # Don't set dimensions for vectors if (length(dim_x) == 1L && length(dim_to) == 1L) { return(x) } if (length(dim_x) > length(dim_to)) { details <- sprintf( "Can't decrease dimensionality from %s to %s.", length(dim_x), length(dim_to) ) stop_incompatible_cast( x, to, details = details, x_arg = x_arg, to_arg = to_arg, call = call ) } dim_x <- n_dim2(dim_x, dim_to)$x dim_to[[1]] <- dim_x[[1]] # don't change number of observations ok <- dim_x == dim_to | dim_x == 1 if (any(!ok)) { stop_incompatible_cast( x, to, details = "Non-recyclable dimensions.", x_arg = x_arg, to_arg = to_arg, call = call ) } # Increase dimensionality if required if (vec_dim_n(x) != length(dim_x)) { dim(x) <- dim_x } recycle <- dim_x != dim_to # Avoid expensive subset if (all(!recycle)) { return(x) } indices <- rep(list(missing_arg()), length(dim_to)) indices[recycle] <- map(dim_to[recycle], rep_len, x = 1L) eval_bare(expr(x[!!!indices, drop = FALSE])) } # Helpers ----------------------------------------------------------------- n_dim2 <- function(x, y) { nx <- length(x) ny <- length(y) if (nx == ny) { list(x = x, y = y) } else if (nx < ny) { list(x = c(x, rep(1L, ny - nx)), y = y) } else { list(x = x, y = c(y, rep(1L, nx - ny))) } } vctrs/R/type-table.R0000644000176200001440000000314414520724751014016 0ustar liggesusers#' Table S3 class #' #' These functions help the base table class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name table NULL #' @export vec_restore.table <- function(x, to, ...) { new_table(x, dim = dim(x), dimnames = dimnames(x)) } # Print ------------------------------------------------------------------- #' @export vec_ptype_full.table <- function(x, ...) { paste0("table", vec_ptype_shape(x)) } #' @export vec_ptype_abbr.table <- function(x, ...) { "table" } # Coercion ---------------------------------------------------------------- #' @export vec_ptype2.table.table <- function(x, y, ..., x_arg = "", y_arg = "") { ptype <- vec_ptype2(unstructure(x), unstructure(y)) vec_shaped_ptype(new_table(ptype), x, y, x_arg = x_arg, y_arg = y_arg) } #' @export vec_cast.table.table <- function(x, to, ...) { out <- vec_cast(unstructure(x), unstructure(to)) out <- new_table(out, dim = dim(x), dimnames = dimnames(x)) shape_broadcast(out, to, ...) } # ------------------------------------------------------------------------------ new_table <- function(x = integer(), dim = NULL, dimnames = NULL) { if (is_null(dim)) { dim <- length(x) } else if (!is.integer(dim)) { abort("`dim` must be an integer vector.") } n_elements <- prod(dim) n_x <- length(x) if (n_elements != n_x) { abort(glue::glue( "Length implied by `dim`, {n_elements}, must match the length of `x`, {n_x}." )) } structure(x, dim = dim, dimnames = dimnames, class = "table") } is_bare_table <- function(x) { identical(class(x), "table") } vctrs/R/type-bare.R0000644000176200001440000002634014315060307013633 0ustar liggesusers# Type2 ------------------------------------------------------------------- # Left generics ----------------------------------------------------------- #' @rdname vec_ptype2 #' @export vec_ptype2.logical #' @method vec_ptype2 logical #' @export vec_ptype2.logical <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.logical") } #' @rdname vec_ptype2 #' @export vec_ptype2.integer #' @method vec_ptype2 integer #' @export vec_ptype2.integer <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.integer") } #' @rdname vec_ptype2 #' @export vec_ptype2.double #' @method vec_ptype2 double #' @export vec_ptype2.double <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.double") } #' @rdname vec_ptype2 #' @export vec_ptype2.complex #' @method vec_ptype2 complex #' @export vec_ptype2.complex <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.complex") } #' @rdname vec_ptype2 #' @export vec_ptype2.character #' @method vec_ptype2 character #' @export vec_ptype2.character <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.character") } #' @rdname vec_ptype2 #' @export vec_ptype2.raw #' @method vec_ptype2 raw #' @export vec_ptype2.raw <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.raw") } #' @rdname vec_ptype2 #' @export vec_ptype2.list #' @method vec_ptype2 list #' @export vec_ptype2.list <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.list") } # Numeric-ish #' @method vec_ptype2.logical logical #' @export vec_ptype2.logical.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.logical") } #' @export #' @method vec_ptype2.integer integer vec_ptype2.integer.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.integer") } #' @export #' @method vec_ptype2.logical integer vec_ptype2.logical.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.integer") } #' @export #' @method vec_ptype2.integer logical vec_ptype2.integer.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.logical") } #' @export #' @method vec_ptype2.double double vec_ptype2.double.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.double") } #' @export #' @method vec_ptype2.logical double vec_ptype2.logical.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.double") } #' @export #' @method vec_ptype2.double logical vec_ptype2.double.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.logical") } #' @export #' @method vec_ptype2.integer double vec_ptype2.integer.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.double") } #' @export #' @method vec_ptype2.double integer vec_ptype2.double.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.integer") } #' @export #' @method vec_ptype2.complex complex vec_ptype2.complex.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.complex") } #' @export #' @method vec_ptype2.integer complex vec_ptype2.integer.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.complex") } #' @export #' @method vec_ptype2.complex integer vec_ptype2.complex.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.integer") } #' @export #' @method vec_ptype2.double complex vec_ptype2.double.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.complex") } #' @export #' @method vec_ptype2.complex double vec_ptype2.complex.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.double") } # Character #' @method vec_ptype2.character character #' @export vec_ptype2.character.character <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.character.character") } # Raw #' @export #' @method vec_ptype2.raw raw vec_ptype2.raw.raw <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.raw.raw") } # Lists #' @method vec_ptype2.list list #' @export vec_ptype2.list.list <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.list.list") } # Cast -------------------------------------------------------------------- # These methods for base types are handled at the C level unless # inputs have shape or have lossy casts #' @export #' @rdname vec_cast #' @export vec_cast.logical #' @method vec_cast logical vec_cast.logical <- function(x, to, ...) { UseMethod("vec_cast.logical") } #' @export #' @method vec_cast.logical logical vec_cast.logical.logical <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.logical integer vec_cast.logical.integer <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { out <- vec_coerce_bare(x, "logical") out <- shape_broadcast( out, to, x_arg = x_arg, to_arg = to_arg, call = call ) lossy <- !x %in% c(0L, 1L, NA_integer_) maybe_lossy_cast( out, x, to, lossy, x_arg = x_arg, to_arg = to_arg, call = call ) } #' @export #' @method vec_cast.logical double vec_cast.logical.double <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { out <- vec_coerce_bare(x, "logical") out <- shape_broadcast( out, to, x_arg = x_arg, to_arg = to_arg, call = call ) lossy <- !x %in% c(0, 1, NA_real_) maybe_lossy_cast( out, x, to, lossy, x_arg = x_arg, to_arg = to_arg, call = call ) } #' @export #' @rdname vec_cast #' @export vec_cast.integer #' @method vec_cast integer vec_cast.integer <- function(x, to, ...) { UseMethod("vec_cast.integer") } #' @export #' @method vec_cast.integer logical vec_cast.integer.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "integer") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.integer integer vec_cast.integer.integer <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.integer double vec_cast.integer.double <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { out <- suppressWarnings(vec_coerce_bare(x, "integer")) x_na <- is.na(x) lossy <- (out != x & !x_na) | xor(x_na, is.na(out)) out <- shape_broadcast( out, to, x_arg = x_arg, to_arg = to_arg, call = call ) maybe_lossy_cast( out, x, to, lossy, x_arg = x_arg, to_arg = to_arg, call = call ) } #' @export #' @rdname vec_cast #' @export vec_cast.double #' @method vec_cast double vec_cast.double <- function(x, to, ...) { UseMethod("vec_cast.double") } #' @export #' @method vec_cast.double logical vec_cast.double.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "double") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.double integer vec_cast.double.integer <- vec_cast.double.logical #' @export #' @method vec_cast.double double vec_cast.double.double <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.complex #' @method vec_cast complex vec_cast.complex <- function(x, to, ...) { UseMethod("vec_cast.complex") } #' @export #' @method vec_cast.complex logical vec_cast.complex.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "complex") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.complex integer vec_cast.complex.integer <- vec_cast.complex.logical #' @export #' @method vec_cast.complex double vec_cast.complex.double <- vec_cast.complex.logical #' @export #' @method vec_cast.complex complex vec_cast.complex.complex <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.raw #' @method vec_cast raw vec_cast.raw <- function(x, to, ...) { UseMethod("vec_cast.raw") } #' @export #' @method vec_cast.raw raw vec_cast.raw.raw <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.character #' @method vec_cast character vec_cast.character <- function(x, to, ...) { UseMethod("vec_cast.character") } #' @export #' @method vec_cast.character character vec_cast.character.character <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @rdname vec_cast #' @export vec_cast.list #' @method vec_cast list #' @export vec_cast.list <- function(x, to, ...) { UseMethod("vec_cast.list") } #' @export #' @method vec_cast.list list vec_cast.list.list <- function(x, to, ...) { shape_broadcast(x, to, ...) } # equal -------------------------------------------------------------- #' @export vec_proxy_equal.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_equal(x) } # compare ------------------------------------------------------------ #' @export vec_proxy_compare.raw <- function(x, ...) { # because: # order(as.raw(1:3)) # #> Error in order(as.raw(1:3)): unimplemented type 'raw' in 'orderVector1' as.integer(x) } #' @export vec_proxy_compare.list <- function(x, ...) { stop_unsupported(x, "vec_proxy_compare") } #' @export vec_proxy_compare.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_compare(x) } # order ------------------------------------------------------------ #' @export vec_proxy_order.raw <- function(x, ...) { # Can't rely on fallthrough behavior to `vec_proxy_compare()` because this # isn't an S3 object. Have to call it manually. vec_proxy_compare(x) } #' @export vec_proxy_order.list <- function(x, ...) { # Order lists by first appearance. # This allows list elements to be grouped in `vec_order()`. # Have to separately ensure missing values are propagated. out <- vec_duplicate_id(x) if (vec_any_missing(x)) { missing <- vec_detect_missing(x) out <- vec_assign(out, missing, NA_integer_) } out } #' @export vec_proxy_order.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_order(x) } vctrs/R/utils-cli.R0000644000176200001440000000214713551660764013665 0ustar liggesusers parens <- function(x, left = TRUE) { x_lines <- strsplit(x, "\n") x_lines <- map(x_lines, paren, left = left) map_chr(x_lines, paste0, collapse = "\n") } paren <- function(x, left = TRUE) { if (length(x) <= 1) { if (left) { paste0("( ", x) } else { paste0(x, " )") } } else { if (left) { paste0(c("\u250c ", rep("\u2502 ", length(x) - 2), "\u2514 "), x) } else { paste0(format(x), c(" \u2510", rep(" \u2502", length(x) - 2), " \u2518")) } } } pad_height <- function(x) { pad <- function(x, n) c(x, rep("", n - length(x))) lines <- strsplit(x, "\n") height <- max(map_int(lines, length)) lines <- map(lines, pad, height) map_chr(lines, paste0, "\n", collapse = "") } pad_width <- function(x) { lines <- strsplit(x, "\n", fixed = TRUE) # fix up strsplit bug n <- map_int(lines, length) lines[n == 0] <- "" width <- max(unlist(map(lines, nchar))) lines <- map(lines, format, width = width) map_chr(lines, paste, collapse = "\n") } str_backtick <- function(x) { paste0("`", x, "`") } str_is_multiline <- function(x) { grepl("\n", x) } vctrs/R/recycle.R0000644000176200001440000000327614511524374013403 0ustar liggesusers#' Vector recycling #' #' `vec_recycle(x, size)` recycles a single vector to a given size. #' `vec_recycle_common(...)` recycles multiple vectors to their common size. All #' functions obey the [vctrs recycling rules][theory-faq-recycling], and will #' throw an error if recycling is not possible. See [vec_size()] for the precise #' definition of size. #' #' @inheritParams rlang::args_error_context #' #' @param x A vector to recycle. #' @param ... Depending on the function used: #' * For `vec_recycle_common()`, vectors to recycle. #' * For `vec_recycle()`, these dots should be empty. #' @param size Desired output size. #' @param .size Desired output size. If omitted, #' will use the common size from [vec_size_common()]. #' @param x_arg Argument name for `x`. These are used in error #' messages to inform the user about which argument has an #' incompatible size. #' #' @section Dependencies: #' - [vec_slice()] #' #' @export #' @examples #' # Inputs with 1 observation are recycled #' vec_recycle_common(1:5, 5) #' vec_recycle_common(integer(), 5) #' \dontrun{ #' vec_recycle_common(1:5, 1:2) #' } #' #' # Data frames and matrices are recycled along their rows #' vec_recycle_common(data.frame(x = 1), 1:5) #' vec_recycle_common(array(1:2, c(1, 2)), 1:5) #' vec_recycle_common(array(1:3, c(1, 3, 1)), 1:5) vec_recycle <- function(x, size, ..., x_arg = "", call = caller_env()) { check_dots_empty0(...) .Call(ffi_recycle, x, size, environment()) } #' @export #' @rdname vec_recycle vec_recycle_common <- function(..., .size = NULL, .arg = "", .call = caller_env()) { .External2(ffi_recycle_common, .size) } vctrs/R/arith.R0000644000176200001440000000616214315060307013052 0ustar liggesusers#' Arithmetic operations #' #' This generic provides a common double dispatch mechanism for all infix #' operators (`+`, `-`, `/`, `*`, `^`, `%%`, `%/%`, `!`, `&`, `|`). It is used #' to power the default arithmetic and boolean operators for [vctr]s objects, #' overcoming the limitations of the base [Ops] generic. #' #' `vec_arith_base()` is provided as a convenience for writing methods. It #' recycles `x` and `y` to common length then calls the base operator with the #' underlying [vec_data()]. #' #' `vec_arith()` is also used in `diff.vctrs_vctr()` method via `-`. #' #' @param op An arithmetic operator as a string #' @param x,y A pair of vectors. For `!`, unary `+` and unary `-`, `y` will be #' a sentinel object of class `MISSING`, as created by `MISSING()`. #' @inheritParams rlang::args_dots_empty #' #' @seealso [stop_incompatible_op()] for signalling that an arithmetic #' operation is not permitted/supported. #' @seealso See [vec_math()] for the equivalent for the unary mathematical #' functions. #' @export #' @keywords internal #' @examples #' d <- as.Date("2018-01-01") #' dt <- as.POSIXct("2018-01-02 12:00") #' t <- as.difftime(12, unit = "hours") #' #' vec_arith("-", dt, 1) #' vec_arith("-", dt, t) #' vec_arith("-", dt, d) #' #' vec_arith("+", dt, 86400) #' vec_arith("+", dt, t) #' vec_arith("+", t, t) #' #' vec_arith("/", t, t) #' vec_arith("/", t, 2) #' #' vec_arith("*", t, 2) vec_arith <- function(op, x, y, ...) { check_dots_empty0(...) UseMethod("vec_arith", x) } #' @export #' @rdname vec_arith vec_arith.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } # Atomic vectors ---------------------------------------------------------- #' @rdname vec_arith #' @export vec_arith.logical #' @method vec_arith logical #' @export vec_arith.logical <- function(op, x, y, ...) UseMethod("vec_arith.logical", y) #' @method vec_arith.logical default #' @export vec_arith.logical.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.logical logical #' @export vec_arith.logical.logical <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @method vec_arith.logical numeric #' @export vec_arith.logical.numeric <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @rdname vec_arith #' @export vec_arith.numeric #' @method vec_arith numeric #' @export vec_arith.numeric <- function(op, x, y, ...) UseMethod("vec_arith.numeric", y) #' @method vec_arith.numeric default #' @export vec_arith.numeric.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.numeric logical #' @export vec_arith.numeric.logical <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @method vec_arith.numeric numeric #' @export vec_arith.numeric.numeric <- function(op, x, y, ...) vec_arith_base(op, x, y) # Helpers ----------------------------------------------------------------- #' @export #' @rdname vec_arith vec_arith_base <- function(op, x, y) { args <- vec_recycle_common(x, y) op_fn <- getExportedValue("base", op) op_fn(vec_data(args[[1L]]), vec_data(args[[2L]])) } #' @export #' @rdname vec_arith MISSING <- function() { structure(list(), class = "MISSING") } vctrs/R/ptype-abbr-full.R0000644000176200001440000000716714315060307014756 0ustar liggesusers#' Vector type as a string #' #' `vec_ptype_full()` displays the full type of the vector. `vec_ptype_abbr()` #' provides an abbreviated summary suitable for use in a column heading. #' #' @section S3 dispatch: #' The default method for `vec_ptype_full()` uses the first element of the #' class vector. Override this method if your class has parameters that should #' be prominently displayed. #' #' The default method for `vec_ptype_abbr()` [abbreviate()]s `vec_ptype_full()` #' to 8 characters. You should almost always override, aiming for 4-6 #' characters where possible. #' #' These arguments are handled by the generic and not passed to methods: #' * `prefix_named` #' * `suffix_shape` #' #' @param x A vector. #' @param prefix_named If `TRUE`, add a prefix for named vectors. #' @param suffix_shape If `TRUE` (the default), append the shape of #' the vector. #' @inheritParams rlang::args_dots_empty #' #' @keywords internal #' @return A string. #' @export #' @examples #' cat(vec_ptype_full(1:10)) #' cat(vec_ptype_full(iris)) #' #' cat(vec_ptype_abbr(1:10)) vec_ptype_full <- function(x, ...) { check_dots_empty0(...) # Data frames and their subclasses have internal handling in the # default method to get the inner types format method <- s3_method_specific(x, "vec_ptype_full", ns = "vctrs") return(method(x, ...)) UseMethod("vec_ptype_full") } #' @export #' @rdname vec_ptype_full vec_ptype_abbr <- function(x, ..., prefix_named = FALSE, suffix_shape = TRUE) { check_dots_empty0(...) method <- s3_method_specific(x, "vec_ptype_abbr", ns = "vctrs") abbr <- method(x, ...) named <- if ((prefix_named || is_bare_list(x)) && !is.null(vec_names(x))) "named " shape <- if (suffix_shape) vec_ptype_shape(x) abbr <- paste0(named, abbr, shape) return(abbr) UseMethod("vec_ptype_abbr") } #' @export vec_ptype_full.NULL <- function(x, ...) "NULL" #' @export vec_ptype_abbr.NULL <- function(x, ...) "NULL" # Default: base types and fallback for S3/S4 ------------------------------ #' @export vec_ptype_full.default <- function(x, ...) { if (is.data.frame(x)) { vec_ptype_full_data_frame(x, ...) } else if (is.object(x)) { class(x)[[1]] } else if (is_vector(x)) { paste0(typeof(x), vec_ptype_shape(x)) } else { abort("Not a vector.") } } #' @export vec_ptype_abbr.default <- function(x, ...) { if (is.object(x)) { type <- class(x)[[1]] } else if (is_vector(x)) { type <- vec_ptype_abbr_bare(x, ...) } else { abort("Not a vector.") } unname(abbreviate(type, 8)) } vec_ptype_full_data_frame <- function(x, ...) { if (length(x) == 0) { return(paste0(class(x)[[1]], "<>")) } else if (length(x) == 1) { return(paste0(class(x)[[1]], "<", names(x), ":", vec_ptype_full(x[[1]]), ">")) } # Needs to handle recursion with indenting types <- map_chr(x, vec_ptype_full) needs_indent <- grepl("\n", types) types[needs_indent] <- map(types[needs_indent], function(x) indent(paste0("\n", x), 4)) names <- paste0(" ", format(names(x))) paste0( class(x)[[1]], "<\n", paste0(names, ": ", types, collapse = "\n"), "\n>" ) } vec_ptype_abbr_bare <- function(x, ...) { switch(typeof(x), list = "list", logical = "lgl", integer = "int", double = "dbl", character = "chr", complex = "cpl", list = "list", expression = "expr", raw = "raw", typeof(x) ) } # Helpers ----------------------------------------------------------------- vec_ptype_shape <- function(x) { dim <- dim2(x) if (length(dim) == 1) { if (is_null(dim(x))) { "" } else { "[1d]" } } else { paste0("[,", paste(dim[-1], collapse = ","), "]") } } vctrs/R/faq-developer.R0000644000176200001440000000216114511524374014477 0ustar liggesusers#' FAQ - Is my class compatible with vctrs? #' #' @includeRmd man/faq/developer/reference-compatibility.Rmd description #' #' @name reference-faq-compatibility NULL #' FAQ - How does coercion work in vctrs? #' #' @includeRmd man/faq/developer/theory-coercion.Rmd description #' #' @name theory-faq-coercion NULL # Also see the `redirects:` section in `_pkgdown.yml` # for `vector_recycling_rules.html` #' FAQ - How does recycling work in vctrs and the tidyverse? #' #' @includeRmd man/faq/developer/theory-recycling.Rmd description #' #' @name theory-faq-recycling #' @aliases vector_recycling_rules NULL #' FAQ - How to implement ptype2 and cast methods? #' #' @includeRmd man/faq/developer/howto-coercion.Rmd description #' #' @name howto-faq-coercion NULL #' FAQ - How to implement ptype2 and cast methods? (Data frames) #' #' @includeRmd man/faq/developer/howto-coercion-data-frame.Rmd description #' #' @name howto-faq-coercion-data-frame NULL #' FAQ - Why isn't my class treated as a vector? #' #' @includeRmd man/faq/developer/howto-faq-fix-scalar-type-error.Rmd description #' #' @name howto-faq-fix-scalar-type-error NULL vctrs/R/type-asis.R0000644000176200001440000000516314315060307013661 0ustar liggesusers#' AsIs S3 class #' #' These functions help the base AsIs class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name as-is NULL # ------------------------------------------------------------------------------ # Printing #' @export vec_ptype_full.AsIs <- function(x, ...) { x <- asis_strip(x) paste0("I<", vec_ptype_full(x), ">") } #' @export vec_ptype_abbr.AsIs <- function(x, ...) { x <- asis_strip(x) paste0("I<", vec_ptype_abbr(x), ">") } # ------------------------------------------------------------------------------ # Proxy / restore # Arises with base df ctor: `data.frame(x = I(list(1, 2:3)))` #' @export vec_proxy.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy(x) } #' @export vec_restore.AsIs <- function(x, to, ...) { asis_restore(x) } #' @export vec_proxy_equal.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy_equal(x) } #' @export vec_proxy_compare.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy_compare(x) } #' @export vec_proxy_order.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy_order(x) } # ------------------------------------------------------------------------------ # Coercion #' @rdname as-is #' @export vec_ptype2.AsIs #' @method vec_ptype2 AsIs #' @export vec_ptype2.AsIs <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.AsIs") } #' @method vec_ptype2.AsIs AsIs #' @export vec_ptype2.AsIs.AsIs <- function(x, y, ..., x_arg = "", y_arg = "") { x <- asis_strip(x) y <- asis_strip(y) vec_ptype2_asis(x, y, ..., x_arg = x_arg, y_arg = y_arg) } vec_ptype2_asis_left <- function(x, y, ...) { x <- asis_strip(x) vec_ptype2_asis(x, y, ...) } vec_ptype2_asis_right <- function(x, y, ...) { y <- asis_strip(y) vec_ptype2_asis(x, y, ...) } vec_ptype2_asis <- function(x, y, ...) { out <- vec_ptype2(x, y, ...) asis_restore(out) } # ------------------------------------------------------------------------------ # Casting vec_cast_from_asis <- function(x, to, ..., call = caller_env()) { x <- asis_strip(x) vec_cast(x, to, ..., call = call) } vec_cast_to_asis <- function(x, to, ..., call = caller_env()) { to <- asis_strip(to) out <- vec_cast(x, to, ..., call = call) asis_restore(out) } # ------------------------------------------------------------------------------ is_asis <- function(x) { inherits(x, "AsIs") } asis_strip <- function(x) { class(x) <- setdiff(class(x), "AsIs") x } asis_restore <- function(x) { # Using `oldClass()` here to return `NULL` for atomics # so that their implicit class isn't added class(x) <- c("AsIs", oldClass(x)) x } vctrs/R/subscript.R0000644000176200001440000002401614512002112013744 0ustar liggesusers#' Convert to a base subscript type #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' Convert `i` to the base type expected by [vec_as_location()] or #' [vec_as_location2()]. The values of the subscript type are #' not checked in any way (length, missingness, negative elements). #' #' @inheritParams vec_as_location #' #' @param logical,numeric,character How to handle logical, numeric, #' and character subscripts. #' #' If `"cast"` and the subscript is not one of the three base types #' (logical, integer or character), the subscript is #' [cast][vec_cast] to the relevant base type, e.g. factors are #' coerced to character. `NULL` is treated as an empty integer #' vector, and is thus coercible depending on the setting of #' `numeric`. Symbols are treated as character vectors and thus #' coercible depending on the setting of `character`. #' #' If `"error"`, the subscript type is disallowed and triggers an #' informative error. #' @keywords internal #' @export vec_as_subscript <- function(i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env()) { check_dots_empty0(...) .Call( ffi_as_subscript, i = i, logical = logical, numeric = numeric, character = character, frame = environment() ) } vec_as_subscript_result <- function(i, arg, call, logical, numeric, character) { .Call( ffi_as_subscript_result, i = i, logical = logical, numeric = numeric, character = character, frame = environment() ) } #' @rdname vec_as_subscript #' @export vec_as_subscript2 <- function(i, ..., numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env()) { check_dots <- function(..., logical = "error", call = caller_env()) { if (!is_string(logical, "error")) { abort( "`vctrs::vec_as_subscript2(logical = 'cast')` is deprecated.", call = caller_env() ) } check_dots_empty0(..., call = call) } check_dots(...) result_get(vec_as_subscript2_result( i, arg, call, numeric = numeric, character = character )) } vec_as_subscript2_result <- function(i, arg, call, numeric = "cast", character = "cast") { numeric <- arg_match0(numeric, c("cast", "error")) character <- arg_match0(character, c("cast", "error")) result <- vec_as_subscript_result( i, arg = arg, call = call, logical = "error", numeric = numeric, character = character ) # This should normally be a `vctrs_error_subscript`. Indicate to # message methods that this error refers to a `[[` subscript. if (!is_null(result$err)) { result$err$subscript_scalar <- TRUE } result } subscript_type_opts <- c("logical", "numeric", "character") subscript_type_opts_indefinite_singular <- c("a logical flag", "a location", "a name") subscript_type_opts_indefinite_plural <- c("logical flags", "locations", "names") as_opts_subscript_type <- function(x, arg = NULL) { if (inherits(x, "vctrs_opts_subscript_type")) { return(x) } new_opts( x, subscript_type_opts, subclass = "vctrs_opts_subscript_type", arg = arg ) } as_opts_subscript2_type <- function(x, arg = NULL) { if ("logical" %in% x) { abort("Logical subscripts can't be converted to a single location.") } as_opts_subscript_type(x, arg = arg) } stop_subscript <- function(i, ..., class = NULL, call = caller_env()) { abort( class = c(class, "vctrs_error_subscript"), i = i, ..., call = call ) } new_error_subscript <- function(class = NULL, i, ...) { error_cnd( c(class, "vctrs_error_subscript"), i = i, ... ) } new_error_subscript_type <- function(i, logical = "cast", numeric = "cast", character = "cast", ..., call = NULL, class = NULL) { new_error_subscript( class = c(class, "vctrs_error_subscript_type"), i = i, logical = logical, numeric = numeric, character = character, ..., call = call ) } #' @export cnd_header.vctrs_error_subscript_type <- function(cnd, ...) { arg <- cnd[["subscript_arg"]] if (is_subscript_arg(arg)) { with <- glue::glue(" with {format_subscript_arg(arg)}") } else { with <- "" } action <- cnd_subscript_action(cnd, assign_to = FALSE) elt <- cnd_subscript_element(cnd) if (cnd_subscript_scalar(cnd)) { glue::glue("Can't {action} {elt[[1]]}{with}.") } else { glue::glue("Can't {action} {elt[[2]]}{with}.") } } #' @export cnd_body.vctrs_error_subscript_type <- function(cnd, ...) { arg <- cnd_subscript_arg(cnd) type <- obj_type_friendly(cnd$i) expected_types <- cnd_subscript_expected_types(cnd) format_error_bullets(c( x = cli::format_inline("{arg} must be {.or {expected_types}}, not {type}.") )) } new_cnd_bullets_subscript_lossy_cast <- function(lossy_err) { function(cnd, ...) { format_error_bullets(c(x = cnd_header(lossy_err))) } } collapse_subscript_type <- function(cnd) { types <- cnd_subscript_expected_types(cnd) if (length(types) == 2) { last <- " or " } else { last <- ", or " } glue::glue_collapse(types, sep = ", ", last = last) } cnd_subscript_expected_types <- function(cnd) { types <- c("logical", "numeric", "character") allowed <- cnd[types] != "error" types[allowed] } new_error_subscript_size <- function(i, ..., class = NULL) { new_error_subscript( class = c(class, "vctrs_error_subscript_size"), i = i, ... ) } #' @export cnd_header.vctrs_error_subscript_size <- function(cnd, ...) { cnd_header.vctrs_error_subscript_type(cnd, ...) } new_error_subscript2_type <- function(i, numeric, character, ...) { new_error_subscript_type( i = i, logical = "error", numeric = numeric, character = character, subscript_scalar = TRUE, ... ) } cnd_body_subscript_dim <- function(cnd, ...) { arg <- append_arg("Subscript", cnd$subscript_arg) dim <- length(dim(cnd$i)) if (dim < 2) { abort("Internal error: Unexpected dimensionality in `cnd_body_subcript_dim()`.") } if (dim == 2) { shape <- "a matrix" } else { shape <- "an array" } format_error_bullets(c( x = glue::glue("{arg} must be a simple vector, not {shape}.") )) } cnd_subscript_element <- function(cnd, capital = FALSE) { elt <- cnd$subscript_elt %||% "element" if (!is_string(elt, c("element", "row", "column", "table"))) { abort(paste0( "Internal error: `cnd$subscript_elt` must be one of ", "`element`, `row`, `column` or `table`." )) } if (capital) { switch( elt, element = c("Element", "Elements"), row = c("Row", "Rows"), column = c("Column", "Columns"), table = c("Table", "Tables") ) } else { switch(elt, element = c("element", "elements"), row = c("row", "rows"), column = c("column", "columns"), table = c("table", "tables") ) } } cnd_subscript_element_cli <- function(n, cnd, capital = FALSE) { elt <- cnd$subscript_elt %||% "element" if (!is_string(elt, c("element", "row", "column", "table"))) { abort(paste0( "Internal error: `cnd$subscript_elt` must be one of ", "`element`, `row`, `column` or `table`." )) } if (capital) { elt <- switch( elt, element = "Element{?s}", row = "Row{?s}", column = "Column{?s}", table = "Table{?s}" ) } else { elt <- switch( elt, element = "element{?s}", row = "row{?s}", column = "column{?s}", table = "table{?s}" ) } cli::pluralize("{n} ", elt) } subscript_actions <- c( "select", "subset", "extract", "assign", "rename", "relocate", "remove", "negate" ) cnd_subscript_action <- function(cnd, assign_to = TRUE) { action <- cnd$subscript_action if (is_null(action)) { if (cnd_subscript_scalar(cnd)) { action <- "extract" } else { action <- "subset" } } if (!is_string(action, subscript_actions)) { cli::cli_abort( "`cnd$subscript_action` must be one of {.or {.arg {subscript_actions}}}.", .internal = TRUE ) } if (assign_to && action == "assign") { "assign to" } else { action } } cnd_subscript_arg <- function(cnd, ...) { format_subscript_arg(cnd[["subscript_arg"]], ...) } format_subscript_arg <- function(arg, capitalise = TRUE) { if (is_subscript_arg(arg)) { if (!is_string(arg)) { arg <- as_label(arg) } cli::format_inline("{.arg {arg}}") } else { if (capitalise) { "Subscript" } else { "subscript" } } } is_subscript_arg <- function(x) { !is_null(x) && !is_string(x, "") } cnd_subscript_type <- function(cnd) { type <- cnd$subscript_type if (!is_string(type, c("logical", "numeric", "character"))) { abort("Internal error: `cnd$subscript_type` must be `logical`, `numeric`, or `character`.") } type } cnd_subscript_scalar <- function(cnd) { out <- cnd$subscript_scalar %||% FALSE if (!is_bool(out)) { abort("Internal error: `cnd$subscript_scalar` must be a boolean.") } out } vctrs/R/size.R0000644000176200001440000001211614511524374012720 0ustar liggesusers#' Number of observations #' #' @description #' #' `vec_size(x)` returns the size of a vector. `vec_is_empty()` #' returns `TRUE` if the size is zero, `FALSE` otherwise. #' #' The size is distinct from the [length()] of a vector because it #' generalises to the "number of observations" for 2d structures, #' i.e. it's the number of rows in matrix or a data frame. This #' definition has the important property that every column of a data #' frame (even data frame and matrix columns) have the same size. #' `vec_size_common(...)` returns the common size of multiple vectors. #' #' `list_sizes()` returns an integer vector containing the size of each element #' of a list. It is nearly equivalent to, but faster than, #' `map_int(x, vec_size)`, with the exception that `list_sizes()` will #' error on non-list inputs, as defined by [obj_is_list()]. `list_sizes()` is #' to `vec_size()` as [lengths()] is to [length()]. #' #' @seealso [vec_slice()] for a variation of `[` compatible with `vec_size()`, #' and [vec_recycle()] to [recycle][theory-faq-recycling] vectors to common #' length. #' @section Invariants: #' * `vec_size(dataframe)` == `vec_size(dataframe[[i]])` #' * `vec_size(matrix)` == `vec_size(matrix[, i, drop = FALSE])` #' * `vec_size(vec_c(x, y))` == `vec_size(x)` + `vec_size(y)` #' #' @inheritParams rlang::args_error_context #' #' @param x,... Vector inputs or `NULL`. #' @param .size If `NULL`, the default, the output size is determined by #' recycling the lengths of all elements of `...`. Alternatively, you can #' supply `.size` to force a known size; in this case, `x` and `...` are #' ignored. #' @param .absent The size used when no input is provided, or when all input #' is `NULL`. If left as `NULL` when no input is supplied, an error is thrown. #' @return An integer (or double for long vectors). #' #' `vec_size_common()` returns `.absent` if all inputs are `NULL` or #' absent, `0L` by default. #' #' #' @details #' #' There is no vctrs helper that retrieves the number of columns: as this #' is a property of the [type][vec_ptype_show()]. #' #' `vec_size()` is equivalent to `NROW()` but has a name that is easier to #' pronounce, and throws an error when passed non-vector inputs. #' #' #' @section The size of NULL: #' #' The size of `NULL` is hard-coded to `0L` in `vec_size()`. #' `vec_size_common()` returns `.absent` when all inputs are `NULL` #' (if only some inputs are `NULL`, they are simply ignored). #' #' A default size of 0 makes sense because sizes are most often #' queried in order to compute a total size while assembling a #' collection of vectors. Since we treat `NULL` as an absent input by #' principle, we return the identity of sizes under addition to #' reflect that an absent input doesn't take up any size. #' #' Note that other defaults might make sense under different #' circumstances. For instance, a default size of 1 makes sense for #' finding the common size because 1 is the identity of the recycling #' rules. #' #' @section Dependencies: #' - [vec_proxy()] #' #' @export #' @examples #' vec_size(1:100) #' vec_size(mtcars) #' vec_size(array(dim = c(3, 5, 10))) #' #' vec_size_common(1:10, 1:10) #' vec_size_common(1:10, 1) #' vec_size_common(integer(), 1) #' #' list_sizes(list("a", 1:5, letters)) vec_size <- function(x) { .Call(ffi_size, x, environment()) } #' @export #' @rdname vec_size vec_size_common <- function(..., .size = NULL, .absent = 0L, .arg = "", .call = caller_env()) { .External2(ffi_size_common, .size, .absent) } #' @rdname vec_size #' @export list_sizes <- function(x) { .Call(ffi_list_sizes, x, environment()) } #' @rdname vec_size #' @export vec_is_empty <- function(x) { vec_size(x) == 0L } #' Default value for empty vectors #' #' Use this inline operator when you need to provide a default value for #' empty (as defined by [vec_is_empty()]) vectors. #' #' @param x A vector #' @param y Value to use if `x` is empty. To preserve type-stability, should #' be the same type as `x`. #' @rdname op-empty-default #' @export #' @examples #' 1:10 %0% 5 #' integer() %0% 5 `%0%` <- function(x, y) { if (vec_is_empty(x)) y else x } # sequences ------------------------------------------------------------------- #' Useful sequences #' #' `vec_seq_along()` is equivalent to [seq_along()] but uses size, not length. #' `vec_init_along()` creates a vector of missing values with size matching #' an existing object. #' #' @param x,y Vectors #' @return #' * `vec_seq_along()` an integer vector with the same size as `x`. #' * `vec_init_along()` a vector with the same type as `x` and the same size #' as `y`. #' @export #' @examples #' vec_seq_along(mtcars) #' vec_init_along(head(mtcars)) vec_seq_along <- function(x) { seq_len(vec_size(x)) } #' @export #' @rdname vec_seq_along vec_init_along <- function(x, y = x) { vec_slice(x, rep_len(NA_integer_, vec_size(y))) } vec_as_short_length <- function(n, arg = caller_arg(n), call = caller_env()) { .Call(ffi_as_short_length, n, environment()) } vctrs/R/cast.R0000644000176200001440000001561314420030332012666 0ustar liggesusers#' Cast a vector to a specified type #' #' @description #' #' `vec_cast()` provides directional conversions from one type of #' vector to another. Along with [vec_ptype2()], this generic forms #' the foundation of type coercions in vctrs. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @inheritParams rlang::args_error_context #' @param x Vectors to cast. #' @param ... For `vec_cast_common()`, vectors to cast. For #' `vec_cast()`, `vec_cast_default()`, and `vec_restore()`, these #' dots are only for future extensions and should be empty. #' @param to,.to Type to cast to. If `NULL`, `x` will be returned as is. #' @param x_arg Argument name for `x`, used in error messages to #' inform the user about the locations of incompatible types #' (see [stop_incompatible_type()]). #' @param to_arg Argument name `to` used in error messages to #' inform the user about the locations of incompatible types #' (see [stop_incompatible_type()]). #' @return A vector the same length as `x` with the same type as `to`, #' or an error if the cast is not possible. An error is generated if #' information is lost when casting between compatible types (i.e. when #' there is no 1-to-1 mapping for a specific value). #' #' @section Dependencies of `vec_cast_common()`: #' #' ## vctrs dependencies #' #' - [vec_ptype2()] #' - [vec_cast()] #' #' #' ## base dependencies #' #' Some functions enable a base-class fallback for #' `vec_cast_common()`. In that case the inputs are deemed compatible #' when they have the same [base type][base::typeof] and inherit from #' the same base class. #' #' @seealso Call [stop_incompatible_cast()] when you determine from the #' attributes that an input can't be cast to the target type. #' @export #' @examples #' # x is a double, but no information is lost #' vec_cast(1, integer()) #' #' # When information is lost the cast fails #' try(vec_cast(c(1, 1.5), integer())) #' try(vec_cast(c(1, 2), logical())) #' #' # You can suppress this error and get the partial results #' allow_lossy_cast(vec_cast(c(1, 1.5), integer())) #' allow_lossy_cast(vec_cast(c(1, 2), logical())) #' #' # By default this suppress all lossy cast errors without #' # distinction, but you can be specific about what cast is allowed #' # by supplying prototypes #' allow_lossy_cast(vec_cast(c(1, 1.5), integer()), to_ptype = integer()) #' try(allow_lossy_cast(vec_cast(c(1, 2), logical()), to_ptype = integer())) #' #' # No sensible coercion is possible so an error is generated #' try(vec_cast(1.5, factor("a"))) #' #' # Cast to common type #' vec_cast_common(factor("a"), factor(c("a", "b"))) vec_cast <- function(x, to, ..., x_arg = caller_arg(x), to_arg = "", call = caller_env()) { if (!missing(...)) { check_ptype2_dots_empty(...) } return(.Call(ffi_cast, x, to, environment())) UseMethod("vec_cast", to) } vec_cast_dispatch <- function(x, to, ..., x_arg = "", to_arg = "") { UseMethod("vec_cast", to) } vec_cast_dispatch_native <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { .Call( ffi_cast_dispatch_native, x, to, match_fallback_opts(...), x_arg, to_arg, environment() ) } #' @export #' @rdname vec_cast vec_cast_common <- function(..., .to = NULL, .arg = "", .call = caller_env()) { .External2(ffi_cast_common, .to) } vec_cast_common_opts <- function(..., .to = NULL, .opts = fallback_opts(), .arg = "", .call = caller_env()) { .External2(ffi_cast_common_opts, .to, .opts) } vec_cast_common_params <- function(..., .to = NULL, .s3_fallback = NULL, .arg = "", .call = caller_env()) { opts <- fallback_opts( s3_fallback = .s3_fallback ) vec_cast_common_opts( ..., .to = .to, .opts = opts, .arg = .arg, .call = .call ) } vec_cast_common_fallback <- function(..., .to = NULL, .arg = "", .call = caller_env()) { vec_cast_common_opts( ..., .to = .to, .opts = full_fallback_opts(), .arg = .arg, .call = .call ) } #' @rdname vec_default_ptype2 #' @inheritParams vec_cast #' @export vec_default_cast <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { if (is_asis(x)) { return(vec_cast_from_asis( x, to, x_arg = x_arg, to_arg = to_arg, call = call )) } if (is_asis(to)) { return(vec_cast_to_asis( x, to, x_arg = x_arg, to_arg = to_arg, call = call )) } if (inherits(to, "vctrs_vctr") && !inherits(to, c("vctrs_rcrd", "vctrs_list_of"))) { return(vctr_cast( x, to, x_arg = x_arg, to_arg = to_arg, call = call )) } opts <- match_fallback_opts(...) if (is_common_class_fallback(to) && length(common_class_suffix(x, to))) { return(x) } # Data frames have special bare class and same type fallbacks if (is.data.frame(x) && is.data.frame(to)) { out <- df_cast_opts( x, to, ..., opts = opts, x_arg = x_arg, to_arg = to_arg, call = call ) # Same-type fallback for data frames. If attributes of the empty # data frames are congruent, just reproduce these attributes. This # eschews any constraints on rows and cols that `[` and `[<-` # methods might have. If that is a problem, the class needs to # implement vctrs methods. if (identical(non_df_attrib(x), non_df_attrib(to))) { attributes(out) <- c(df_attrib(out), non_df_attrib(to)) return(out) } # Bare-class fallback for data frames. # FIXME: Should we only allow it when target is a bare df? if (inherits(to, "tbl_df")) { out <- df_as_tibble(out) } return(out) } if (is_same_type(x, to)) { return(x) } withRestarts( stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, `vctrs:::from_dispatch` = match_from_dispatch(...), call = call ), vctrs_restart_cast = function(out) { out } ) } is_bare_df <- function(x) { inherits_only(x, "data.frame") || inherits_only(x, c("tbl_df", "tbl", "data.frame")) } is_informative_error_vctrs_error_cast_lossy <- function(x, ...) { FALSE } vctrs/R/aaa.R0000644000176200001440000000104014315060307012453 0ustar liggesusersreplace_from <- function(what, pkg, to = topenv(caller_env())) { if (what %in% getNamespaceExports(pkg)) { env <- ns_env(pkg) } else { env <- to } env_get(env, what, inherit = TRUE) } # nocov start # Useful for micro-optimising default arguments requiring evaluation, # such as `param = c("foo", "bar")`. Buys about 0.6us on my desktop. fn_inline_formals <- function(fn, names) { stopifnot(typeof(fn) == "closure") fmls <- formals(fn) fmls[names] <- lapply(fmls[names], eval) formals(fn) <- fmls fn } # nocov end vctrs/R/slice.R0000644000176200001440000001523514376223321013047 0ustar liggesusers#' Get or set observations in a vector #' #' This provides a common interface to extracting and modifying observations #' for all vector types, regardless of dimensionality. It is an analog to `[` #' that matches [vec_size()] instead of `length()`. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x A vector #' @param i An integer, character or logical vector specifying the #' locations or names of the observations to get/set. Specify #' `TRUE` to index all elements (as in `x[]`), or `NULL`, `FALSE` or #' `integer()` to index none (as in `x[NULL]`). #' @param value Replacement values. `value` is cast to the type of #' `x`, but only if they have a common type. See below for examples #' of this rule. #' @param x_arg,value_arg Argument names for `x` and `value`. These are used #' in error messages to inform the user about the locations of #' incompatible types and sizes (see [stop_incompatible_type()] and #' [stop_incompatible_size()]). #' #' @return A vector of the same type as `x`. #' #' @section Genericity: #' #' Support for S3 objects depends on whether the object implements a #' [vec_proxy()] method. #' #' * When a `vec_proxy()` method exists, the proxy is sliced and #' `vec_restore()` is called on the result. #' #' * Otherwise `vec_slice()` falls back to the base generic `[`. #' #' Note that S3 lists are treated as scalars by default, and will #' cause an error if they don't implement a [vec_proxy()] method. #' #' @section Differences with base R subsetting: #' #' * `vec_slice()` only slices along one dimension. For #' two-dimensional types, the first dimension is subsetted. #' #' * `vec_slice()` preserves attributes by default. #' #' * `vec_slice<-()` is type-stable and always returns the same type #' as the LHS. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_proxy()] #' - [vec_restore()] #' #' ## base dependencies #' #' - \code{base::`[`} #' #' If a non-data-frame vector class doesn't have a [vec_proxy()] #' method, the vector is sliced with `[` instead. #' #' @export #' @keywords internal #' @examples #' x <- sample(10) #' x #' vec_slice(x, 1:3) #' #' # You can assign with the infix variant: #' vec_slice(x, 2) <- 100 #' x #' #' # Or with the regular variant that doesn't modify the original input: #' y <- vec_assign(x, 3, 500) #' y #' x #' #' #' # Slicing objects of higher dimension: #' vec_slice(mtcars, 1:3) #' #' # Type stability -------------------------------------------------- #' #' # The assign variant is type stable. It always returns the same #' # type as the input. #' x <- 1:5 #' vec_slice(x, 2) <- 20.0 #' #' # `x` is still an integer vector because the RHS was cast to the #' # type of the LHS: #' vec_ptype(x) #' #' # Compare to `[<-`: #' x[2] <- 20.0 #' vec_ptype(x) #' #' #' # Note that the types must be coercible for the cast to happen. #' # For instance, you can cast a double vector of whole numbers to an #' # integer vector: #' vec_cast(1, integer()) #' #' # But not fractional doubles: #' try(vec_cast(1.5, integer())) #' #' # For this reason you can't assign fractional values in an integer #' # vector: #' x <- 1:3 #' try(vec_slice(x, 2) <- 1.5) vec_slice <- function(x, i, ..., error_call = current_env()) { check_dots_empty0(...) .Call(ffi_slice, x, i, environment()) } # Called when `x` has dimensions vec_slice_fallback <- function(x, i) { out <- unclass(vec_proxy(x)) obj_check_vector(out) d <- vec_dim_n(out) if (d == 2) { out <- out[i, , drop = FALSE] } else { miss_args <- rep(list(missing_arg()), d - 1) out <- eval_bare(expr(out[i, !!!miss_args, drop = FALSE])) } vec_restore(out, x) } vec_slice_fallback_integer64 <- function(x, i) { d <- vec_dim_n(x) if (d == 2) { out <- x[i, , drop = FALSE] } else { miss_args <- rep(list(missing_arg()), d - 1) out <- eval_bare(expr(x[i, !!!miss_args, drop = FALSE])) } is_na <- is.na(i) if (!any(is_na)) { return(out) } if (d == 2) { out[is_na,] <- bit64::NA_integer64_ } else { eval_bare(expr(out[is_na, !!!miss_args] <- bit64::NA_integer64_)) } out } # bit64::integer64() objects do not have support for `NA_integer_` # slicing. This manually replaces the garbage values that are created # any time a slice with `NA_integer_` is made. vec_slice_dispatch_integer64 <- function(x, i) { out <- x[i] is_na <- is.na(i) if (!any(is_na)) { return(out) } out[is_na] <- bit64::NA_integer64_ out } #' @rdname vec_slice #' @export `vec_slice<-` <- function(x, i, value) { x_arg <- "" # Substitution is `*tmp*` delayedAssign("value_arg", as_label(substitute(value))) .Call(ffi_assign, x, i, value, environment()) } #' @rdname vec_slice #' @export vec_assign <- function(x, i, value, ..., x_arg = "", value_arg = "") { check_dots_empty0(...) .Call(ffi_assign, x, i, value, environment()) } vec_assign_fallback <- function(x, i, value) { # Work around bug in base `[<-` existing <- !is.na(i) i <- vec_slice(i, existing) value <- vec_slice(value, existing) d <- vec_dim_n(x) miss_args <- rep(list(missing_arg()), d - 1) eval_bare(expr(x[i, !!!miss_args] <- value)) x } # `start` is 0-based vec_assign_seq <- function(x, value, start, size, increasing = TRUE) { .Call(ffi_assign_seq, x, value, start, size, increasing) } vec_assign_params <- function(x, i, value, assign_names = FALSE) { .Call(ffi_assign_params, x, i, value, assign_names) } vec_remove <- function(x, i) { vec_slice(x, -vec_as_location(i, length(x), names(x))) } vec_index <- function(x, i, ...) { i <- maybe_missing(i, TRUE) out <- vec_slice(x, i) if (!dots_n(...)) { return(out) } # Need to unclass to avoid infinite recursion through `[` proxy <- vec_data(out) out <- proxy[, ..., drop = FALSE] vec_restore(out, x) } #' Initialize a vector #' #' @param x Template of vector to initialize. #' @param n Desired size of result. #' @export #' @section Dependencies: #' * vec_slice() #' @examples #' vec_init(1:10, 3) #' vec_init(Sys.Date(), 5) #' vec_init(mtcars, 2) vec_init <- function(x, n = 1L) { .Call(ffi_init, x, n, environment()) } # Exposed for testing (`start` is 0-based) vec_slice_seq <- function(x, start, size, increasing = TRUE) { .Call(ffi_slice_seq, x, start, size, increasing) } # Exposed for testing (`i` is 1-based) vec_slice_rep <- function(x, i, n) { .Call(ffi_slice_rep, x, i, n) } # Forwards arguments to `base::rep()` base_vec_rep <- function(x, ...) { i <- rep(seq_len(vec_size(x)), ...) vec_slice(x, i) } # Emulates `length<-` vec_size_assign <- function(x, n) { x_size <- vec_size(x) if (n > x_size) { i <- seq_len(x_size) i <- c(i, vec_init(int(), n - x_size)) } else { i <- seq_len(n) } vec_slice(x, i) } vctrs/R/partial-frame.R0000644000176200001440000000502614276722575014507 0ustar liggesusers#' Partially specify columns of a data frame #' #' @description #' `r lifecycle::badge("experimental")` #' #' This special class can be passed to `.ptype` in order to specify the #' types of only some of the columns in a data frame. #' #' @param ... Attributes of subclass #' @keywords internal #' @export #' @examples #' pf <- partial_frame(x = double()) #' pf #' #' vec_rbind( #' data.frame(x = 1L, y = "a"), #' data.frame(x = FALSE, z = 10), #' .ptype = partial_frame(x = double(), a = character()) #' ) partial_frame <- function(...) { args <- list2(...) args <- lapply(args, vec_ptype) partial <- new_data_frame(args, n = 0L) new_partial_frame(partial) } new_partial_frame <- function(partial = data.frame(), learned = data.frame()) { stopifnot( is.data.frame(partial), is.data.frame(learned) ) # Fails if `learned` is not compatible with `partial` vec_ptype2(partial, learned) new_partial( partial = partial, learned = learned, class = "vctrs_partial_frame" ) } #' @export vec_ptype_full.vctrs_partial_frame <- function(x, ...) { both <- c(as.list(x$partial), as.list(x$learned)) types <- map_chr(both, vec_ptype_full) needs_indent <- grepl("\n", types) types[needs_indent] <- map(types[needs_indent], function(x) indent(paste0("\n", x), 4)) source <- c(rep(" {partial}", length(x$partial)), rep("", length(x$learned))) names <- paste0(" ", format(names(both))) paste0( "partial_frame<\n", paste0(names, ": ", types, source, collapse = "\n"), "\n>" ) } #' @export vec_ptype_abbr.vctrs_partial_frame <- function(x, ...) { "prtl" } #' @method vec_ptype2 vctrs_partial_frame #' @export vec_ptype2.vctrs_partial_frame <- function(x, y, ...) { UseMethod("vec_ptype2.vctrs_partial_frame") } #' @method vec_ptype2.vctrs_partial_frame vctrs_partial_frame #' @export vec_ptype2.vctrs_partial_frame.vctrs_partial_frame <- function(x, y, ...) { partial <- vec_ptype2(x$partial, y$partial) learned <- vec_ptype2(x$learned, y$learned) new_partial_frame(partial, learned) } #' @method vec_ptype2.vctrs_partial_frame data.frame #' @export vec_ptype2.vctrs_partial_frame.data.frame <- function(x, y, ...) { new_partial_frame(x$partial, vec_ptype2(x$learned, y)) } #' @method vec_ptype2.data.frame vctrs_partial_frame #' @export vec_ptype2.data.frame.vctrs_partial_frame <- function(x, y, ...) { new_partial_frame(y$partial, vec_ptype2(y$learned, x)) } #' @export vec_ptype_finalise.vctrs_partial_frame <- function(x, ...) { out <- x$learned out[names(x$partial)] <- x$partial out } vctrs/R/rep.R0000644000176200001440000000646014511524374012541 0ustar liggesusers#' Repeat a vector #' #' @description #' - `vec_rep()` repeats an entire vector a set number of `times`. #' #' - `vec_rep_each()` repeats each element of a vector a set number of `times`. #' #' - `vec_unrep()` compresses a vector with repeated values. The repeated values #' are returned as a `key` alongside the number of `times` each key is #' repeated. #' #' @details #' Using `vec_unrep()` and `vec_rep_each()` together is similar to using #' [base::rle()] and [base::inverse.rle()]. The following invariant shows #' the relationship between the two functions: #' #' ``` #' compressed <- vec_unrep(x) #' identical(x, vec_rep_each(compressed$key, compressed$times)) #' ``` #' #' There are two main differences between `vec_unrep()` and [base::rle()]: #' #' - `vec_unrep()` treats adjacent missing values as equivalent, while `rle()` #' treats them as different values. #' #' - `vec_unrep()` works along the size of `x`, while `rle()` works along its #' length. This means that `vec_unrep()` works on data frames by compressing #' repeated rows. #' #' @inheritParams rlang::args_error_context #' @inheritParams rlang::args_dots_empty #' @param x A vector. #' @param times #' For `vec_rep()`, a single integer for the number of times to repeat #' the entire vector. #' #' For `vec_rep_each()`, an integer vector of the number of times to repeat #' each element of `x`. `times` will be [recycled][theory-faq-recycling] to #' the size of `x`. #' @param x_arg,times_arg Argument names for errors. #' #' @return #' For `vec_rep()`, a vector the same type as `x` with size #' `vec_size(x) * times`. #' #' For `vec_rep_each()`, a vector the same type as `x` with size #' `sum(vec_recycle(times, vec_size(x)))`. #' #' For `vec_unrep()`, a data frame with two columns, `key` and `times`. `key` #' is a vector with the same type as `x`, and `times` is an integer vector. #' #' @section Dependencies: #' - [vec_slice()] #' #' @name vec-rep #' @examples #' # Repeat the entire vector #' vec_rep(1:2, 3) #' #' # Repeat within each vector #' vec_rep_each(1:2, 3) #' x <- vec_rep_each(1:2, c(3, 4)) #' x #' #' # After using `vec_rep_each()`, you can recover the original vector #' # with `vec_unrep()` #' vec_unrep(x) #' #' df <- data.frame(x = 1:2, y = 3:4) #' #' # `rep()` repeats columns of data frames, and returns lists #' rep(df, each = 2) #' #' # `vec_rep()` and `vec_rep_each()` repeat rows, and return data frames #' vec_rep(df, 2) #' vec_rep_each(df, 2) #' #' # `rle()` treats adjacent missing values as different #' y <- c(1, NA, NA, 2) #' rle(y) #' #' # `vec_unrep()` treats them as equivalent #' vec_unrep(y) NULL #' @rdname vec-rep #' @export vec_rep <- function(x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times") { check_dots_empty0(...) .Call(ffi_vec_rep, x, times, environment()) } #' @rdname vec-rep #' @export vec_rep_each <- function(x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times") { check_dots_empty0(...) .Call(ffi_vec_rep_each, x, times, environment()) } #' @rdname vec-rep #' @export vec_unrep <- function(x) { .Call(ffi_vec_unrep, x, environment()) } vctrs/R/missing.R0000644000176200001440000000337014315060307013412 0ustar liggesusers#' Missing values #' #' @description #' - `vec_detect_missing()` returns a logical vector the same size as `x`. For #' each element of `x`, it returns `TRUE` if the element is missing, and `FALSE` #' otherwise. #' #' - `vec_any_missing()` returns a single `TRUE` or `FALSE` depending on whether #' or not `x` has _any_ missing values. #' #' ## Differences with [is.na()] #' #' Data frame rows are only considered missing if every element in the row is #' missing. Similarly, [record vector][new_rcrd()] elements are only considered #' missing if every field in the record is missing. Put another way, rows with #' _any_ missing values are considered [incomplete][vec_detect_complete()], but #' only rows with _all_ missing values are considered missing. #' #' List elements are only considered missing if they are `NULL`. #' #' @param x A vector #' #' @return #' - `vec_detect_missing()` returns a logical vector the same size as `x`. #' #' - `vec_any_missing()` returns a single `TRUE` or `FALSE`. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @name missing #' @seealso [vec_detect_complete()] #' #' @examples #' x <- c(1, 2, NA, 4, NA) #' #' vec_detect_missing(x) #' vec_any_missing(x) #' #' # Data frames are iterated over rowwise, and only report a row as missing #' # if every element of that row is missing. If a row is only partially #' # missing, it is said to be incomplete, but not missing. #' y <- c("a", "b", NA, "d", "e") #' df <- data_frame(x = x, y = y) #' #' df$missing <- vec_detect_missing(df) #' df$incomplete <- !vec_detect_complete(df) #' df NULL #' @rdname missing #' @export vec_detect_missing <- function(x) { .Call(ffi_vec_detect_missing, x) } #' @rdname missing #' @export vec_any_missing <- function(x) { .Call(ffi_vec_any_missing, x) } vctrs/R/dictionary.R0000644000176200001440000002120214351104227014100 0ustar liggesusers#' Count unique values in a vector #' #' Count the number of unique values in a vector. `vec_count()` has two #' important differences to `table()`: it returns a data frame, and when #' given multiple inputs (as a data frame), it only counts combinations that #' appear in the input. #' #' @param x A vector (including a data frame). #' @param sort One of "count", "key", "location", or "none". #' * "count", the default, puts most frequent values at top #' * "key", orders by the output key column (i.e. unique values of `x`) #' * "location", orders by location where key first seen. This is useful #' if you want to match the counts up to other unique/duplicated functions. #' * "none", leaves unordered. This is not guaranteed to produce the same #' ordering across R sessions, but is the fastest method. #' @return A data frame with columns `key` (same type as `x`) and #' `count` (an integer vector). #' #' @section Dependencies: #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_order()] #' @export #' @examples #' vec_count(mtcars$vs) #' vec_count(iris$Species) #' #' # If you count a data frame you'll get a data frame #' # column in the output #' str(vec_count(mtcars[c("vs", "am")])) #' #' # Sorting --------------------------------------- #' #' x <- letters[rpois(100, 6)] #' # default is to sort by frequency #' vec_count(x) #' #' # by can sort by key #' vec_count(x, sort = "key") #' #' # or location of first value #' vec_count(x, sort = "location") #' head(x) #' #' # or not at all #' vec_count(x, sort = "none") vec_count <- function(x, sort = c("count", "key", "location", "none")) { sort <- arg_match0(sort, c("count", "key", "location", "none")) # Returns info pair giving index of first occurrence value and count info <- vec_count_impl(x) # Sorting based on rearranging `info` if (sort == "location") { loc <- vec_order(info$loc) info <- vec_slice(info, loc) } else if (sort == "count") { # Order by descending count, but ascending original location. # This retains stable ordering in case of ties in the `count`. # Need `vec_order_radix()` to handle different `direction`s. loc <- vec_order_radix(info[c("count", "loc")], direction = c("desc", "asc")) info <- vec_slice(info, loc) } out <- data_frame( key = vec_slice(x, info$loc), count = info$count ) # Sorting based on rearranging `out` if (sort == "key") { loc <- vec_order(out$key) out <- vec_slice(out, loc) } out } vec_count_impl <- function(x) { .Call(vctrs_count, x) } # Duplicates -------------------------------------------------------------- #' Find duplicated values #' #' * `vec_duplicate_any()`: detects the presence of duplicated values, #' similar to [anyDuplicated()]. #' * `vec_duplicate_detect()`: returns a logical vector describing if each #' element of the vector is duplicated elsewhere. Unlike [duplicated()], it #' reports all duplicated values, not just the second and subsequent #' repetitions. #' * `vec_duplicate_id()`: returns an integer vector giving the location of #' the first occurrence of the value. #' #' @section Missing values: #' In most cases, missing values are not considered to be equal, i.e. #' `NA == NA` is not `TRUE`. This behaviour would be unappealing here, #' so these functions consider all `NAs` to be equal. (Similarly, #' all `NaN` are also considered to be equal.) #' #' @param x A vector (including a data frame). #' @return #' * `vec_duplicate_any()`: a logical vector of length 1. #' * `vec_duplicate_detect()`: a logical vector the same length as `x`. #' * `vec_duplicate_id()`: an integer vector the same length as `x`. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @seealso [vec_unique()] for functions that work with the dual of duplicated #' values: unique values. #' @name vec_duplicate #' @examples #' vec_duplicate_any(1:10) #' vec_duplicate_any(c(1, 1:10)) #' #' x <- c(10, 10, 20, 30, 30, 40) #' vec_duplicate_detect(x) #' # Note that `duplicated()` doesn't consider the first instance to #' # be a duplicate #' duplicated(x) #' #' # Identify elements of a vector by the location of the first element that #' # they're equal to: #' vec_duplicate_id(x) #' # Location of the unique values: #' vec_unique_loc(x) #' # Equivalent to `duplicated()`: #' vec_duplicate_id(x) == seq_along(x) NULL #' @rdname vec_duplicate #' @export vec_duplicate_any <- function(x) { .Call(vctrs_duplicated_any, x) } #' @rdname vec_duplicate #' @export vec_duplicate_detect <- function(x) { .Call(vctrs_duplicated, x) } #' @rdname vec_duplicate #' @export vec_duplicate_id <- function(x) { .Call(vctrs_id, x) } # Unique values ----------------------------------------------------------- #' Find and count unique values #' #' * `vec_unique()`: the unique values. Equivalent to [unique()]. #' * `vec_unique_loc()`: the locations of the unique values. #' * `vec_unique_count()`: the number of unique values. #' #' @inherit vec_duplicate sections #' @param x A vector (including a data frame). #' @return #' * `vec_unique()`: a vector the same type as `x` containing only unique #' values. #' * `vec_unique_loc()`: an integer vector, giving locations of unique values. #' * `vec_unique_count()`: an integer vector of length 1, giving the #' number of unique values. #' @seealso [vec_duplicate] for functions that work with the dual of #' unique values: duplicated values. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @export #' @examples #' x <- rpois(100, 8) #' vec_unique(x) #' vec_unique_loc(x) #' vec_unique_count(x) #' #' # `vec_unique()` returns values in the order that encounters them #' # use sort = "location" to match to the result of `vec_count()` #' head(vec_unique(x)) #' head(vec_count(x, sort = "location")) #' #' # Normally missing values are not considered to be equal #' NA == NA #' #' # But they are for the purposes of considering uniqueness #' vec_unique(c(NA, NA, NA, NA, 1, 2, 1)) vec_unique <- function(x) { vec_slice(x, vec_unique_loc(x)) } #' @rdname vec_unique #' @export vec_unique_loc <- function(x) { .Call(vctrs_unique_loc, x) } #' @rdname vec_unique #' @export vec_unique_count <- function(x) { .Call(vctrs_n_distinct, x) } # Matching ---------------------------------------------------------------- #' Find matching observations across vectors #' #' `vec_in()` returns a logical vector based on whether `needle` is found in #' haystack. `vec_match()` returns an integer vector giving location of #' `needle` in `haystack`, or `NA` if it's not found. #' #' `vec_in()` is equivalent to [%in%]; `vec_match()` is equivalent to `match()`. #' #' @section Missing values: #' In most cases places in R, missing values are not considered to be equal, #' i.e. `NA == NA` is not `TRUE`. The exception is in matching functions #' like [match()] and [merge()], where an `NA` will match another `NA`. #' By `vec_match()` and `vec_in()` will match `NA`s; but you can control #' this behaviour with the `na_equal` argument. #' #' @param needles,haystack Vector of `needles` to search for in vector haystack. #' `haystack` should usually be unique; if not `vec_match()` will only #' return the location of the first match. #' #' `needles` and `haystack` are coerced to the same type prior to #' comparison. #' @inheritParams rlang::args_dots_empty #' @param na_equal If `TRUE`, missing values in `needles` can be #' matched to missing values in `haystack`. If `FALSE`, they #' propagate, missing values in `needles` are represented as `NA` in #' the return value. #' @param needles_arg,haystack_arg Argument tags for `needles` and #' `haystack` used in error messages. #' @return A vector the same length as `needles`. `vec_in()` returns a #' logical vector; `vec_match()` returns an integer vector. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_proxy_equal()] #' #' @export #' @examples #' hadley <- strsplit("hadley", "")[[1]] #' vec_match(hadley, letters) #' #' vowels <- c("a", "e", "i", "o", "u") #' vec_match(hadley, vowels) #' vec_in(hadley, vowels) #' #' # Only the first index of duplicates is returned #' vec_match(c("a", "b"), c("a", "b", "a", "b")) vec_match <- function(needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "") { check_dots_empty0(...) .Call(vctrs_match, needles, haystack, na_equal, environment()) } #' @export #' @rdname vec_match vec_in <- function(needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "") { check_dots_empty0(...) .Call(vctrs_in, needles, haystack, na_equal, environment()) } vctrs/R/equal.R0000644000176200001440000000453614376223321013061 0ustar liggesusers#' Equality proxy #' #' Returns a proxy object (i.e. an atomic vector or data frame of atomic #' vectors). For [vctr]s, this determines the behaviour of `==` and #' `!=` (via [vec_equal()]); [unique()], [duplicated()] (via #' [vec_unique()] and [vec_duplicate_detect()]); [is.na()] and [anyNA()] #' (via [vec_detect_missing()]). #' #' The default method calls [vec_proxy()], as the default underlying #' vector data should be equal-able in most cases. If your class is #' not equal-able, provide a `vec_proxy_equal()` method that throws an #' error. #' #' @section Data frames: #' If the proxy for `x` is a data frame, the proxy function is automatically #' recursively applied on all columns as well. After applying the proxy #' recursively, if there are any data frame columns present in the proxy, then #' they are unpacked. Finally, if the resulting data frame only has a single #' column, then it is unwrapped and a vector is returned as the proxy. #' #' @param x A vector x. #' @inheritParams rlang::args_dots_empty #' #' @return A 1d atomic vector or a data frame. #' @keywords internal #' #' @section Dependencies: #' - [vec_proxy()] called by default #' #' @export vec_proxy_equal <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_proxy_equal, x)) UseMethod("vec_proxy_equal") } #' @export vec_proxy_equal.default <- function(x, ...) { stop_native_implementation("vec_proxy_equal.default") } #' Equality #' #' `vec_equal()` tests if two vectors are equal. #' #' @inheritParams vec_compare #' @return A logical vector the same size as the common size of `x` and `y`. #' Will only contain `NA`s if `na_equal` is `FALSE`. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_recycle_common()] #' - [vec_proxy_equal()] #' #' @seealso [vec_detect_missing()] #' #' @export #' @examples #' vec_equal(c(TRUE, FALSE, NA), FALSE) #' vec_equal(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) #' #' vec_equal(5, 1:10) #' vec_equal("d", letters[1:10]) #' #' df <- data.frame(x = c(1, 1, 2, 1), y = c(1, 2, 1, NA)) #' vec_equal(df, data.frame(x = 1, y = 2)) vec_equal <- function(x, y, na_equal = FALSE, .ptype = NULL) { check_bool(na_equal) args <- vec_recycle_common(x, y) args <- vec_cast_common_params(!!!args, .to = .ptype) .Call(vctrs_equal, args[[1]], args[[2]], na_equal) } obj_equal <- function(x, y) { .Call(vctrs_equal_object, x, y) } vctrs/R/rank.R0000644000176200001440000000742414315060307012700 0ustar liggesusers#' Compute ranks #' #' `vec_rank()` computes the sample ranks of a vector. For data frames, ranks #' are computed along the rows, using all columns after the first to break #' ties. #' #' @details #' Unlike [base::rank()], when `incomplete = "rank"` all missing values are #' given the same rank, rather than an increasing sequence of ranks. When #' `nan_distinct = FALSE`, `NaN` values are given the same rank as `NA`, #' otherwise they are given a rank that differentiates them from `NA`. #' #' Like [vec_order_radix()], ordering is done in the C-locale. This can affect #' the ranks of character vectors, especially regarding how uppercase and #' lowercase letters are ranked. See the documentation of [vec_order_radix()] #' for more information. #' #' @inheritParams order-radix #' @inheritParams rlang::args_dots_empty #' #' @param ties Ranking of duplicate values. #' - `"min"`: Use the current rank for all duplicates. The next non-duplicate #' value will have a rank incremented by the number of duplicates present. #' #' - `"max"`: Use the current rank `+ n_duplicates - 1` for all duplicates. #' The next non-duplicate value will have a rank incremented by the number of #' duplicates present. #' #' - `"sequential"`: Use an increasing sequence of ranks starting at the #' current rank, applied to duplicates in order of appearance. #' #' - `"dense"`: Use the current rank for all duplicates. The next #' non-duplicate value will have a rank incremented by `1`, effectively #' removing any gaps in the ranking. #' #' @param incomplete Ranking of missing and [incomplete][vec_detect_complete] #' observations. #' #' - `"rank"`: Rank incomplete observations normally. Missing values within #' incomplete observations will be affected by `na_value` and `nan_distinct`. #' #' - `"na"`: Don't rank incomplete observations at all. Instead, they are #' given a rank of `NA`. In this case, `na_value` and `nan_distinct` have #' no effect. #' #' @section Dependencies: #' #' - [vec_order_radix()] #' - [vec_slice()] #' #' @export #' @examples #' x <- c(5L, 6L, 3L, 3L, 5L, 3L) #' #' vec_rank(x, ties = "min") #' vec_rank(x, ties = "max") #' #' # Sequential ranks use an increasing sequence for duplicates #' vec_rank(x, ties = "sequential") #' #' # Dense ranks remove gaps between distinct values, #' # even if there are duplicates #' vec_rank(x, ties = "dense") #' #' y <- c(NA, x, NA, NaN) #' #' # Incomplete values match other incomplete values by default, and their #' # overall position can be adjusted with `na_value` #' vec_rank(y, na_value = "largest") #' vec_rank(y, na_value = "smallest") #' #' # NaN can be ranked separately from NA if required #' vec_rank(y, nan_distinct = TRUE) #' #' # Rank in descending order. Since missing values are the largest value, #' # they are given a rank of `1` when ranking in descending order. #' vec_rank(y, direction = "desc", na_value = "largest") #' #' # Give incomplete values a rank of `NA` by setting `incomplete = "na"` #' vec_rank(y, incomplete = "na") #' #' # Can also rank data frames, using columns after the first to break ties #' z <- c(2L, 3L, 4L, 4L, 5L, 2L) #' df <- data_frame(x = x, z = z) #' df #' #' vec_rank(df) vec_rank <- function(x, ..., ties = c("min", "max", "sequential", "dense"), incomplete = c("rank", "na"), direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL) { check_dots_empty0(...) ties <- arg_match0(ties, c("min", "max", "sequential", "dense"), "ties") incomplete <- arg_match0(incomplete, c("rank", "na"), "incomplete") .Call( vctrs_rank, x, ties, incomplete, direction, na_value, nan_distinct, chr_proxy_collate ) } vctrs/R/dim.R0000644000176200001440000000311314276722575012527 0ustar liggesusers#' Actual vector dimensions #' #' @description #' * `vec_dim_n()` gives the dimensionality (i.e. number of dimensions) #' * `vec_dim()` returns the size of each dimension #' #' These functions access the raw `"dim"` attribute of the object #' and do not dispatch over the [dim()] generic. #' #' @details #' Unlike base R, we treat vectors with `NULL` dimensions as 1d. This #' simplifies the type system by eliding a special case. Compared to the base R #' equivalent, `vec_dim()` returns [length()], not `NULL`, when `x` is 1d. #' #' @seealso #' [dim2()], a variant of [dim()] that returns [length()] if an object #' doesn't have dimensions. #' #' @param x A vector #' @noRd #' @examples #' # Compared to base R #' x <- 1:5 #' dim(x) #' vec_dim(x) NULL # FIXME: Should `vec_dim()` return the size instead of the length? vec_dim <- function(x) { .Call(vctrs_dim, x) } vec_dim_n <- function(x) { .Call(vctrs_dim_n, x) } has_dim <- function(x) { .Call(vctrs_has_dim, x) } #' Perceived vector dimensions #' #' @description #' `dim2()` is a variant of [dim()] that returns [vec_size()] if an object #' doesn't have dimensions. #' #' @details #' Unlike base R, we treat vectors with `NULL` dimensions as 1d. This #' simplifies the type system by eliding a special case. Compared to the base R #' equivalent, `dim2()` returns [length()], not `NULL`, when `x` is 1d. #' #' @seealso #' [vec_dim()], a variant that never dispatches over the [dim()] generic. #' #' @param x A vector #' @noRd #' @examples #' # Compared to base R #' x <- 1:5 #' dim(x) #' vec_dim(x) dim2 <- function(x) { dim(x) %||% length(x) } vctrs/R/vctrs-package.R0000644000176200001440000000107114401435202014463 0ustar liggesusers#' @description #' `r lifecycle::badge("maturing")` #' #' Defines new notions of prototype and size that are #' used to provide tools for consistent and well-founded type-coercion #' and size-recycling, and are in turn connected to ideas of type- and #' size-stability useful for analysing function interfaces. #' #' @keywords internal #' @import rlang #' @useDynLib vctrs, .registration = TRUE "_PACKAGE" release_extra_revdeps <- function() { # Extra revdeps to run before release. # Recognized by `usethis::use_release_issue()`. c("dplyr", "tidyr", "purrr") } vctrs/R/type-explore.R0000644000176200001440000000130613532250523014374 0ustar liggesuserscoerces_to <- function(x, y, using = "strict") { type_max <- switch(using, strict = vec_ptype2, base_c = c, base_unlist = function(x, y) unlist(list(x, y)), base_modify = function(x, y) `[<-`(x, 2, value = y) ) tryCatch({ type <- suppressWarnings(type_max(x, y)) vec_ptype_full(type) }, error = function(e) { NA_character_ }) } maxtype_mat <- function(types, using = "strict") { names(types) <- map_chr(types, function(x) vec_ptype_full(vec_ptype(x))) grid <- expand.grid(x = types, y = types) grid$max <- map2_chr(grid$x, grid$y, coerces_to, using = using) matrix( grid$max, nrow = length(types), dimnames = list(names(types), names(types)) ) } vctrs/R/complete.R0000644000176200001440000000300514315060307013544 0ustar liggesusers#' Complete #' #' @description #' `vec_detect_complete()` detects "complete" observations. An observation is #' considered complete if it is non-missing. For most vectors, this implies that #' `vec_detect_complete(x) == !vec_detect_missing(x)`. #' #' For data frames and matrices, a row is only considered complete if all #' elements of that row are non-missing. To compare, `!vec_detect_missing(x)` #' detects rows that are partially complete (they have at least one non-missing #' value). #' #' @details #' A [record][new_rcrd] type vector is similar to a data frame, and is only #' considered complete if all fields are non-missing. #' #' @param x A vector #' #' @return #' A logical vector with the same size as `x`. #' #' @seealso [stats::complete.cases()] #' @export #' @examples #' x <- c(1, 2, NA, 4, NA) #' #' # For most vectors, this is identical to `!vec_detect_missing(x)` #' vec_detect_complete(x) #' !vec_detect_missing(x) #' #' df <- data_frame( #' x = x, #' y = c("a", "b", NA, "d", "e") #' ) #' #' # This returns `TRUE` where all elements of the row are non-missing. #' # Compare that with `!vec_detect_missing()`, which detects rows that have at #' # least one non-missing value. #' df2 <- df #' df2$all_non_missing <- vec_detect_complete(df) #' df2$any_non_missing <- !vec_detect_missing(df) #' df2 vec_detect_complete <- function(x) { .Call(vctrs_detect_complete, x) } vec_slice_complete <- function(x) { .Call(vctrs_slice_complete, x) } vec_locate_complete <- function(x) { .Call(vctrs_locate_complete, x) } vctrs/R/print-str.R0000644000176200001440000000656214276722575013733 0ustar liggesusers# print ------------------------------------------------------------------- #' `print()` and `str()` generics. #' #' These are constructed to be more easily extensible since you can override #' the `_header()`, `_data()` or `_footer()` components individually. The #' default methods are built on top of `format()`. #' #' @param x A vector #' @param ... Additional arguments passed on to methods. See [print()] and #' [str()] for commonly used options #' @keywords internal #' @export obj_print <- function(x, ...) { obj_print_header(x, ...) obj_print_data(x, ...) obj_print_footer(x, ...) invisible(x) } #' @export #' @rdname obj_print obj_print_header <- function(x, ...) { UseMethod("obj_print_header") } #' @export obj_print_header.default <- function(x, ...) { cat_line("<", vec_ptype_full(x), "[", vec_size(x), "]>") invisible(x) } #' @export #' @rdname obj_print obj_print_data <- function(x, ...) { UseMethod("obj_print_data") } #' @export obj_print_data.default <- function(x, ...) { if (length(x) == 0) return(invisible(x)) out <- stats::setNames(format(x), names(x)) print(out, quote = FALSE) invisible(x) } #' @export #' @rdname obj_print obj_print_footer <- function(x, ...) { UseMethod("obj_print_footer") } #' @export obj_print_footer.default <- function(x, ...) { invisible(x) } # str --------------------------------------------------------------------- #' @export #' @rdname obj_print obj_str <- function(x, ...) { obj_str_header(x, ...) obj_str_data(x, ...) obj_str_footer(x, ...) } #' @export #' @rdname obj_print obj_str_header <- function(x, ...) { UseMethod("obj_str_header") } #' @export obj_str_header.default <- function(x, ...) { invisible(x) } #' @export #' @rdname obj_print obj_str_data <- function(x, ...) { UseMethod("obj_str_data") } #' @export obj_str_data.default <- function(x, ...) { if (is.list(x)) { obj_str_recursive(x, ...) } else { obj_str_leaf(x, ...) } } obj_str_recursive <- function(x, ..., indent.str = "", nest.lev = 0) { if (nest.lev != 0L) cat(" ") cat_line(glue::glue("{vec_ptype_abbr(x)} [1:{vec_size(x)}] ")) utils::str( vec_data(x), no.list = TRUE, ..., nest.lev = nest.lev + 1L, indent.str = indent.str ) } obj_str_leaf <- function(x, ..., indent.str = "", width = getOption("width")) { width <- width - nchar(indent.str) - 2 # Avoid spending too much time formatting elements that won't see length <- ceiling(width / 2) if (length(x) > length) { out <- x[seq2(1, length)] } else { out <- x } title <- glue::glue(" {vec_ptype_abbr(x)} [1:{length(x)}] ") cat_line(inline_list(title, format(out), width = width)) invisible(x) } #' @export #' @rdname obj_print obj_str_footer <- function(x, ...) { UseMethod("obj_str_footer") } #' @export obj_str_footer.default <- function(x, ..., indent.str = "", nest.lev = 0) { attr <- attributes(x) attr[["class"]] <- NULL attr[["names"]] <- NULL if (length(attr) == 0) return(invisible(x)) if (!is.list(x)) { indent.str <- paste0(" ", indent.str) } utils::str( attr, no.list = TRUE, ..., comp.str = "@ ", nest.lev = nest.lev + 1L, indent.str = indent.str ) invisible(x) } vctrs/R/type-factor.R0000644000176200001440000001370314362266120014202 0ustar liggesusers#' Factor/ordered factor S3 class #' #' A [factor] is an integer with attribute `levels`, a character vector. There #' should be one level for each integer between 1 and `max(x)`. #' An [ordered] factor has the same properties as a factor, but possesses #' an extra class that marks levels as having a total ordering. #' #' These functions help the base factor and ordered factor classes fit in to #' the vctrs type system by providing constructors, coercion functions, #' and casting functions. `new_factor()` and `new_ordered()` are low-level #' constructors - they only check that types, but not values, are valid, so #' are for expert use only. #' #' @param x Integer values which index in to `levels`. #' @param levels Character vector of labels. #' @param ...,class Used to for subclasses. #' @keywords internal #' @export new_factor <- function(x = integer(), levels = character(), ..., class = character()) { stopifnot(is.integer(x)) stopifnot(is.character(levels)) structure( x, levels = levels, ..., class = c(class, "factor") ) } #' @export #' @rdname new_factor new_ordered <- function(x = integer(), levels = character()) { new_factor(x = x, levels = levels, class = "ordered") } #' @export vec_proxy.factor <- function(x, ...) { x } #' @export vec_proxy.ordered <- function(x, ...) { x } #' @export vec_restore.factor <- function(x, to, ...) { NextMethod() } #' @export vec_restore.ordered <- function(x, to, ...) { NextMethod() } # Print ------------------------------------------------------------------- #' @export vec_ptype_full.factor <- function(x, ...) { paste0("factor<", hash_label(levels(x)), ">", vec_ptype_shape(x)) } #' @export vec_ptype_abbr.factor <- function(x, ...) { "fct" } #' @export vec_ptype_full.ordered <- function(x, ...) { paste0("ordered<", hash_label(levels(x)), ">", vec_ptype_shape(x)) } #' @export vec_ptype_abbr.ordered <- function(x, ...) { "ord" } # Coerce ------------------------------------------------------------------ #' @rdname new_factor #' @export vec_ptype2.factor #' @method vec_ptype2 factor #' @export vec_ptype2.factor <- function(x, y, ...) { UseMethod("vec_ptype2.factor") } #' @export vec_ptype2.factor.factor <- function(x, y, ...) { stop_native_implementation("vec_ptype2.factor.factor") } #' @export vec_ptype2.character.factor <- function(x, y, ...) { stop_native_implementation("vec_ptype2.character.factor") } #' @export vec_ptype2.factor.character <- function(x, y, ...) { stop_native_implementation("vec_ptype2.factor.character") } #' @rdname new_factor #' @export vec_ptype2.ordered #' @method vec_ptype2 ordered #' @export vec_ptype2.ordered <- function(x, y, ...) { UseMethod("vec_ptype2.ordered") } #' @export vec_ptype2.ordered.ordered <- function(x, y, ...) { stop_native_implementation("vec_ptype2.ordered.ordered") } #' @export vec_ptype2.ordered.character <- function(x, y, ...) { stop_native_implementation("vec_ptype2.ordered.character") } #' @export vec_ptype2.character.ordered <- function(x, y, ...) { stop_native_implementation("vec_ptype2.character.ordered") } #' @export vec_ptype2.ordered.factor <- function(x, y, ...) { vec_default_ptype2(x, y, ...) } #' @export vec_ptype2.factor.ordered <- function(x, y, ...) { vec_default_ptype2(x, y, ...) } # Cast -------------------------------------------------------------------- #' @rdname new_factor #' @export vec_cast.factor #' @method vec_cast factor #' @export vec_cast.factor <- function(x, to, ...) { UseMethod("vec_cast.factor") } fct_cast <- function(x, to, ..., call = caller_env()) { fct_cast_impl(x, to, ..., ordered = FALSE, call = call) } fct_cast_impl <- function(x, to, ..., x_arg = "", to_arg = "", ordered = FALSE, call = caller_env()) { if (length(levels(to)) == 0L) { levels <- levels(x) if (is.null(levels)) { exclude <- NA levels <- unique(x) } else { exclude <- NULL } factor( as.character(x), levels = levels, ordered = ordered, exclude = exclude ) } else { lossy <- !(x %in% levels(to) | is.na(x)) out <- factor( x, levels = levels(to), ordered = ordered, exclude = NULL ) maybe_lossy_cast( out, x, to, lossy, loss_type = "generality", x_arg = x_arg, to_arg = to_arg, call = call ) } } #' @export vec_cast.factor.factor <- function(x, to, ...) { fct_cast(x, to, ...) } #' @export vec_cast.factor.character <-function(x, to, ...) { fct_cast(x, to, ...) } #' @export vec_cast.character.factor <- function(x, to, ...) { stop_native_implementation("vec_cast.character.factor") } #' @rdname new_factor #' @export vec_cast.ordered #' @method vec_cast ordered #' @export vec_cast.ordered <- function(x, to, ...) { UseMethod("vec_cast.ordered") } ord_cast <- function(x, to, ..., call = caller_env()) { fct_cast_impl(x, to, ..., ordered = TRUE, call = call) } #' @export vec_cast.ordered.ordered <- function(x, to, ...) { ord_cast(x, to, ...) } #' @export vec_cast.ordered.character <-function(x, to, ...) { ord_cast(x, to, ...) } #' @export vec_cast.character.ordered <- function(x, to, ...) { stop_native_implementation("vec_cast.character.ordered") } # Math and arithmetic ----------------------------------------------------- #' @export vec_math.factor <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_arith.factor <- function(op, x, y, ...) { stop_unsupported(x, op) } # Helpers ----------------------------------------------------------------- hash_label <- function(x, length = 5) { if (length(x) == 0) { "" } else { # Can't use obj_hash() because it hashes the string pointers # for performance, so the values in the test change each time substr(rlang::hash(x), 1, length) } } levels_union <- function(x, y) { union(levels(x), levels(y)) } vctrs/R/faq-internal.R0000644000176200001440000000026314276722575014342 0ustar liggesusers#' Internal FAQ - `vec_ptype2()`, `NULL`, and unspecified vectors #' #' @includeRmd man/faq/internal/ptype2-identity.Rmd description #' #' @name internal-faq-ptype2-identity NULL vctrs/R/interval.R0000644000176200001440000002355014315060307013567 0ustar liggesusers#' Group overlapping intervals #' #' @description #' These functions are used to group together any overlaps that are present #' within a set of vector intervals. When multiple overlapping intervals are #' grouped together they result in a wider interval containing the smallest #' `start` and the largest `end` of the overlaps. #' #' - `vec_interval_groups()` merges all overlapping intervals found within #' `start` and `end`. The resulting intervals are known as the interval #' "groups". #' #' - `vec_interval_locate_groups()` returns a two column data frame with a `key` #' column containing the result of `vec_interval_groups()` and a `loc` #' list-column containing integer vectors that map each interval in `start` and #' `end` to the group that it falls in. #' #' These functions require that `start < end`. Additionally, intervals are #' treated as if they are right-open, i.e. `[start, end)`. #' #' @section Assumptions: #' For performance and simplicity, these functions make a few assumptions about #' `start` and `end` that are not checked internally: #' #' - `start < end` must be true, with an exception for missing intervals. #' #' - If the i-th observation of `start` is missing, then the i-th observation #' of `end` must also be missing. #' #' - Each observation of `start` and `end` must be either #' [complete][vec_detect_complete] or [missing][vec_detect_missing]. Partially #' complete values such as `start = data_frame(x = 1, y = NA)` are not allowed. #' #' If any of these assumptions are invalid, then the result is undefined. #' #' Developer note: These assumptions stem from the idea that if these functions #' were in ivs itself, then we could safely make these assumptions in the C #' code, because the `iv()` helper would assert them for us ahead of time. #' Trying to re-assert these checks in the C code here is wasteful and makes the #' code more complex. #' #' @inheritParams rlang::args_dots_empty #' #' @param start,end #' A pair of vectors representing the starts and ends of the intervals. #' #' It is required that `start < end`. #' #' `start` and `end` will be cast to their common type, and must have the same #' size. #' #' @param abutting #' A single logical controlling whether or not abutting intervals should be #' grouped together. If `TRUE`, `[a, b)` and `[b, c)` will be grouped. #' #' @param missing #' Handling of missing intervals. #' #' - `"group"`: Group all missing intervals together. #' #' - `"drop"`: Drop all missing intervals from the result. #' #' @return #' - `vec_interval_groups()` returns a data frame with two columns, `start` and #' `end`, which contain vectors matching the types of `start` and `end`. #' #' - `vec_interval_locate_groups()` returns a data frame with two columns, `key` #' and `loc`. `key` contains the result of `vec_interval_groups()` and `loc` is #' a list of integer vectors. #' #' @name interval-groups #' #' @examples #' bounds <- data_frame( #' start = c(1, 2, NA, 5, NA, 9, 12), #' end = c(5, 3, NA, 6, NA, 12, 14) #' ) #' bounds #' #' # Group overlapping intervals together #' vec_interval_groups(bounds$start, bounds$end) #' #' # You can choose not to group abutting intervals if you want to retain #' # those boundaries #' vec_interval_groups(bounds$start, bounds$end, abutting = FALSE) #' #' # You can also choose to drop all missing intervals if you don't consider #' # them part of the result #' vec_interval_groups(bounds$start, bounds$end, missing = "drop") #' #' # You can also locate the groups, which allows you to map each original #' # interval to its corresponding group #' vec_interval_locate_groups(bounds$start, bounds$end) #' #' @noRd vec_interval_groups <- function(start, end, ..., abutting = TRUE, missing = "group") { check_dots_empty0(...) .Call(ffi_interval_groups, start, end, abutting, missing) } #' @noRd #' @rdname interval-groups vec_interval_locate_groups <- function(start, end, ..., abutting = TRUE, missing = "group") { check_dots_empty0(...) .Call(ffi_interval_locate_groups, start, end, abutting, missing) } # ------------------------------------------------------------------------------ #' Interval complement #' #' @description #' `vec_interval_complement()` takes the complement of the intervals defined by #' `start` and `end`. The complement can also be thought of as the "gaps" #' between the intervals. By default, the minimum of `start` and the maximum of #' `end` define the bounds to take the complement over, but this can be adjusted #' with `lower` and `upper`. Missing intervals are always dropped from the #' complement. #' #' These functions require that `start < end`. Additionally, intervals are #' treated as if they are right-open, i.e. `[start, end)`. #' #' @inheritSection interval-groups Assumptions #' #' @inheritParams rlang::args_dots_empty #' #' @param start,end #' A pair of vectors representing the starts and ends of the intervals. #' #' It is required that `start < end`. #' #' `start` and `end` will be cast to their common type, and must have the same #' size. #' #' @param lower,upper #' Bounds for the universe over which to compute the complement. These should #' be singular values with the same type as `start` and `end`. #' #' @return #' A two column data frame with a `start` column containing a vector of the #' same type as `start` and an `end` column containing a vector of the same #' type as `end`. #' #' @examples #' x <- data_frame( #' start = c(10, 0, NA, 3, -5, NA), #' end = c(12, 5, NA, 6, -2, NA) #' ) #' x #' #' # The complement contains any values from `[-5, 12)` that aren't represented #' # in these intervals. Missing intervals are dropped. #' vec_interval_complement(x$start, x$end) #' #' # Expand out the "universe" of possible values #' vec_interval_complement(x$start, x$end, lower = -Inf) #' vec_interval_complement(x$start, x$end, lower = -Inf, upper = Inf) #' #' @noRd vec_interval_complement <- function(start, end, ..., lower = NULL, upper = NULL) { check_dots_empty0(...) .Call(ffi_interval_complement, start, end, lower, upper) } # ------------------------------------------------------------------------------ #' Interval containers #' #' @description #' `vec_interval_locate_containers()` locates interval _containers_. Containers #' are defined as the widest intervals that aren't contained by any other #' interval. The returned locations will arrange the containers in ascending #' order. #' #' For example, with the following vector of intervals: `[1, 5), [2, 6), [3, 4), #' [5, 9), [5, 8)`, the containers are: `[1, 5), [2, 6), [5, 9)`. The intervals #' `[3, 4)` and `[5, 8)` aren't containers because they are completely contained #' within at least one other interval. Note that containers can partially #' overlap, i.e. `[1, 5)` and `[2, 6)`, and multiple containers can contain the #' same intervals, i.e. both `[1, 5)` and `[2, 6)` contain `[3, 4)`. #' #' Missing intervals are placed into their own container at the end, separate #' from all other intervals. #' #' These functions require that `start < end`. Additionally, intervals are #' treated as if they are right-open, i.e. `[start, end)`. #' #' @inheritSection interval-groups Assumptions #' #' @param start,end #' A pair of vectors representing the starts and ends of the intervals. #' #' It is required that `start < end`. #' #' `start` and `end` will be cast to their common type, and must have the same #' size. #' #' @return #' An integer vector that represents the locations of the containers in `start` #' and `end`. #' #' @examples #' x <- data_frame( #' start = c(10, 0, NA, 3, 2, 2, NA, 11), #' end = c(12, 5, NA, 5, 6, 6, NA, 12) #' ) #' x #' #' loc <- vec_interval_locate_containers(x$start, x$end) #' loc #' #' vec_slice(x, loc) #' #' @noRd vec_interval_locate_containers <- function(start, end) { .Call(ffi_interval_locate_containers, start, end) } # ------------------------------------------------------------------------------ # Experimental shims of interval functions used by other packages (mainly, ivs). # # This gives us the freedom to experiment with the signature of these functions # while being backwards compatible with ivs in the meantime. # # We can remove these after: # - The interval functions are exported # - ivs updates to use them directly # - A short deprecation period goes by that allows users time to update their # version of ivs exp_vec_interval_groups <- function(start, end, ..., abutting = TRUE, missing = "group") { vec_interval_groups( start = start, end = end, ..., abutting = abutting, missing = missing ) } exp_vec_interval_locate_groups <- function(start, end, ..., abutting = TRUE, missing = "group") { vec_interval_locate_groups( start = start, end = end, ..., abutting = abutting, missing = missing ) } exp_vec_interval_complement <- function(start, end, ..., lower = NULL, upper = NULL) { vec_interval_complement( start = start, end = end, ..., lower = lower, upper = upper ) } exp_vec_interval_locate_containers <- function(start, end) { vec_interval_locate_containers( start = start, end = end ) } vctrs/R/runs.R0000644000176200001440000000370114363767360012746 0ustar liggesusers#' Runs #' #' @description #' - `vec_identify_runs()` returns a vector of identifiers for the elements of #' `x` that indicate which run of repeated values they fall in. The number of #' runs is also returned as an attribute, `n`. #' #' - `vec_run_sizes()` returns an integer vector corresponding to the size of #' each run. This is identical to the `times` column from `vec_unrep()`, but #' is faster if you don't need the run keys. #' #' - [vec_unrep()] is a generalized [base::rle()]. It is documented alongside #' the "repeat" functions of [vec_rep()] and [vec_rep_each()]; look there for #' more information. #' #' @details #' Unlike [base::rle()], adjacent missing values are considered identical when #' constructing runs. For example, `vec_identify_runs(c(NA, NA))` will return #' `c(1, 1)`, not `c(1, 2)`. #' #' @param x A vector. #' #' @return #' - For `vec_identify_runs()`, an integer vector with the same size as `x`. A #' scalar integer attribute, `n`, is attached. #' #' - For `vec_run_sizes()`, an integer vector with size equal to the number of #' runs in `x`. #' #' @seealso #' [vec_unrep()] for a generalized [base::rle()]. #' #' @name runs #' @examples #' x <- c("a", "z", "z", "c", "a", "a") #' #' vec_identify_runs(x) #' vec_run_sizes(x) #' vec_unrep(x) #' #' y <- c(1, 1, 1, 2, 2, 3) #' #' # With multiple columns, the runs are constructed rowwise #' df <- data_frame( #' x = x, #' y = y #' ) #' #' vec_identify_runs(df) #' vec_run_sizes(df) #' vec_unrep(df) NULL #' @rdname runs #' @export vec_identify_runs <- function(x) { .Call(ffi_vec_identify_runs, x, environment()) } #' @rdname runs #' @export vec_run_sizes <- function(x) { .Call(ffi_vec_run_sizes, x, environment()) } vec_locate_run_bounds <- function(x, which = c("start", "end")) { .Call(ffi_vec_locate_run_bounds, x, which, environment()) } vec_detect_run_bounds <- function(x, which = c("start", "end")) { .Call(ffi_vec_detect_run_bounds, x, which, environment()) } vctrs/R/bind.R0000644000176200001440000001666414511524374012676 0ustar liggesusers#' Combine many data frames into one data frame #' #' This pair of functions binds together data frames (and vectors), either #' row-wise or column-wise. Row-binding creates a data frame with common type #' across all arguments. Column-binding creates a data frame with common length #' across all arguments. #' #' @section Invariants: #' #' All inputs are first converted to a data frame. The conversion for #' 1d vectors depends on the direction of binding: #' #' * For `vec_rbind()`, each element of the vector becomes a column in #' a single row. #' * For `vec_cbind()`, each element of the vector becomes a row in a #' single column. #' #' Once the inputs have all become data frames, the following #' invariants are observed for row-binding: #' #' * `vec_size(vec_rbind(x, y)) == vec_size(x) + vec_size(y)` #' * `vec_ptype(vec_rbind(x, y)) = vec_ptype_common(x, y)` #' #' Note that if an input is an empty vector, it is first converted to #' a 1-row data frame with 0 columns. Despite being empty, its #' effective size for the total number of rows is 1. #' #' For column-binding, the following invariants apply: #' #' * `vec_size(vec_cbind(x, y)) == vec_size_common(x, y)` #' * `vec_ptype(vec_cbind(x, y)) == vec_cbind(vec_ptype(x), vec_ptype(x))` #' #' @inheritParams vec_c #' @inheritParams rlang::args_error_context #' #' @param ... Data frames or vectors. #' #' When the inputs are named: #' * `vec_rbind()` assigns names to row names unless `.names_to` is #' supplied. In that case the names are assigned in the column #' defined by `.names_to`. #' * `vec_cbind()` creates packed data frame columns with named #' inputs. #' #' `NULL` inputs are silently ignored. Empty (e.g. zero row) inputs #' will not appear in the output, but will affect the derived `.ptype`. #' @param .names_to #' This controls what to do with input names supplied in `...`. #' * By default, input names are [zapped][rlang::zap]. #' #' * If a string, specifies a column where the input names will be #' copied. These names are often useful to identify rows with #' their original input. If a column name is supplied and `...` is #' not named, an integer column is used instead. #' #' * If `NULL`, the input names are used as row names. #' @param .name_repair One of `"unique"`, `"universal"`, `"check_unique"`, #' `"unique_quiet"`, or `"universal_quiet"`. See [vec_as_names()] for the #' meaning of these options. #' #' With `vec_rbind()`, the repair function is applied to all inputs #' separately. This is because `vec_rbind()` needs to align their #' columns before binding the rows, and thus needs all inputs to #' have unique names. On the other hand, `vec_cbind()` applies the #' repair function after all inputs have been concatenated together #' in a final data frame. Hence `vec_cbind()` allows the more #' permissive minimal names repair. #' #' @return A data frame, or subclass of data frame. #' #' If `...` is a mix of different data frame subclasses, `vec_ptype2()` #' will be used to determine the output type. For `vec_rbind()`, this #' will determine the type of the container and the type of each column; #' for `vec_cbind()` it only determines the type of the output container. #' If there are no non-`NULL` inputs, the result will be `data.frame()`. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_cast_common()] #' - [vec_proxy()] #' - [vec_init()] #' - [vec_assign()] #' - [vec_restore()] #' #' #' ## base dependencies of `vec_rbind()` #' #' - [base::c()] #' #' If columns to combine inherit from a common class, #' `vec_rbind()` falls back to `base::c()` if there exists a `c()` #' method implemented for this class hierarchy. #' #' @seealso [vec_c()] for combining 1d vectors. #' @examples #' # row binding ----------------------------------------- #' #' # common columns are coerced to common class #' vec_rbind( #' data.frame(x = 1), #' data.frame(x = FALSE) #' ) #' #' # unique columns are filled with NAs #' vec_rbind( #' data.frame(x = 1), #' data.frame(y = "x") #' ) #' #' # null inputs are ignored #' vec_rbind( #' data.frame(x = 1), #' NULL, #' data.frame(x = 2) #' ) #' #' # bare vectors are treated as rows #' vec_rbind( #' c(x = 1, y = 2), #' c(x = 3) #' ) #' #' # default names will be supplied if arguments are not named #' vec_rbind( #' 1:2, #' 1:3, #' 1:4 #' ) #' #' # column binding -------------------------------------- #' #' # each input is recycled to have common length #' vec_cbind( #' data.frame(x = 1), #' data.frame(y = 1:3) #' ) #' #' # bare vectors are treated as columns #' vec_cbind( #' data.frame(x = 1), #' y = letters[1:3] #' ) #' #' # if you supply a named data frame, it is packed in a single column #' data <- vec_cbind( #' x = data.frame(a = 1, b = 2), #' y = 1 #' ) #' data #' #' # Packed data frames are nested in a single column. This makes it #' # possible to access it through a single name: #' data$x #' #' # since the base print method is suboptimal with packed data #' # frames, it is recommended to use tibble to work with these: #' if (rlang::is_installed("tibble")) { #' vec_cbind(x = tibble::tibble(a = 1, b = 2), y = 1) #' } #' #' # duplicate names are flagged #' vec_cbind(x = 1, x = 2) #' #' @name vec_bind NULL #' @export #' @param .name_spec A name specification (as documented in [vec_c()]) #' for combining the outer inputs names in `...` and the inner row #' names of the inputs. This only has an effect when `.names_to` is #' set to `NULL`, which causes the input names to be assigned as row #' names. #' @rdname vec_bind vec_rbind <- function(..., .ptype = NULL, .names_to = rlang::zap(), .name_repair = c("unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), .name_spec = NULL, .error_call = current_env()) { .External2(ffi_rbind, .ptype, .names_to, .name_repair, .name_spec) } vec_rbind <- fn_inline_formals(vec_rbind, ".name_repair") #' @export #' @rdname vec_bind #' @param .size If, `NULL`, the default, will determine the number of rows in #' `vec_cbind()` output by using the tidyverse [recycling #' rules][theory-faq-recycling]. #' #' Alternatively, specify the desired number of rows, and any inputs of length #' 1 will be recycled appropriately. vec_cbind <- function(..., .ptype = NULL, .size = NULL, .name_repair = c("unique", "universal", "check_unique", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env()) { .External2(ffi_cbind, .ptype, .size, .name_repair) } vec_cbind <- fn_inline_formals(vec_cbind, ".name_repair") as_df_row <- function(x, quiet = FALSE) { .Call(ffi_as_df_row, x, quiet, environment()) } as_df_col <- function(x, outer_name) { .Call(ffi_as_df_col, x, outer_name, environment()) } #' Frame prototype #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' This is an experimental generic that returns zero-columns variants #' of a data frame. It is needed for [vec_cbind()], to work around the #' lack of colwise primitives in vctrs. Expect changes. #' #' @param x A data frame. #' @inheritParams rlang::args_dots_empty #' #' @keywords internal #' @export vec_cbind_frame_ptype <- function(x, ...) { UseMethod("vec_cbind_frame_ptype") } #' @export vec_cbind_frame_ptype.default <- function(x, ...) { x[0] } #' @export vec_cbind_frame_ptype.sf <- function(x, ...) { data.frame() } vctrs/R/slice-chop.R0000644000176200001440000001473614402367170014004 0ustar liggesusers#' Chopping #' #' @description #' - `vec_chop()` provides an efficient method to repeatedly slice a vector. It #' captures the pattern of `map(indices, vec_slice, x = x)`. When no indices #' are supplied, it is generally equivalent to [as.list()]. #' #' - `list_unchop()` combines a list of vectors into a single vector, placing #' elements in the output according to the locations specified by `indices`. #' It is similar to [vec_c()], but gives greater control over how the elements #' are combined. When no indices are supplied, it is identical to `vec_c()`, #' but typically a little faster. #' #' If `indices` selects every value in `x` exactly once, in any order, then #' `list_unchop()` is the inverse of `vec_chop()` and the following invariant #' holds: #' #' ``` #' list_unchop(vec_chop(x, indices = indices), indices = indices) == x #' ``` #' #' @inheritParams rlang::args_dots_empty #' @inheritParams vec_c #' #' @param x A vector #' @param indices For `vec_chop()`, a list of positive integer vectors to #' slice `x` with, or `NULL`. Can't be used if `sizes` is already specified. #' If both `indices` and `sizes` are `NULL`, `x` is split into its individual #' elements, equivalent to using an `indices` of `as.list(vec_seq_along(x))`. #' #' For `list_unchop()`, a list of positive integer vectors specifying the #' locations to place elements of `x` in. Each element of `x` is recycled to #' the size of the corresponding index vector. The size of `indices` must #' match the size of `x`. If `NULL`, `x` is combined in the order it is #' provided in, which is equivalent to using [vec_c()]. #' @param sizes An integer vector of non-negative sizes representing sequential #' indices to slice `x` with, or `NULL`. Can't be used if `indices` is already #' specified. #' #' For example, `sizes = c(2, 4)` is equivalent to `indices = list(1:2, 3:6)`, #' but is typically faster. #' #' `sum(sizes)` must be equal to `vec_size(x)`, i.e. `sizes` must completely #' partition `x`, but an individual size is allowed to be `0`. #' @param ptype If `NULL`, the default, the output type is determined by #' computing the common type across all elements of `x`. Alternatively, you #' can supply `ptype` to give the output a known type. #' @return #' - `vec_chop()`: A list where each element has the same type as `x`. The size #' of the list is equal to `vec_size(indices)`, `vec_size(sizes)`, or #' `vec_size(x)` depending on whether or not `indices` or `sizes` is provided. #' #' - `list_unchop()`: A vector of type `vec_ptype_common(!!!x)`, or `ptype`, if #' specified. The size is computed as `vec_size_common(!!!indices)` unless #' the indices are `NULL`, in which case the size is `vec_size_common(!!!x)`. #' #' @section Dependencies of `vec_chop()`: #' - [vec_slice()] #' #' @section Dependencies of `list_unchop()`: #' - [vec_c()] #' #' @export #' @examples #' vec_chop(1:5) #' #' # These two are equivalent #' vec_chop(1:5, indices = list(1:2, 3:5)) #' vec_chop(1:5, sizes = c(2, 3)) #' #' # Can also be used on data frames #' vec_chop(mtcars, indices = list(1:3, 4:6)) #' #' # If `indices` selects every value in `x` exactly once, #' # in any order, then `list_unchop()` inverts `vec_chop()` #' x <- c("a", "b", "c", "d") #' indices <- list(2, c(3, 1), 4) #' vec_chop(x, indices = indices) #' list_unchop(vec_chop(x, indices = indices), indices = indices) #' #' # When unchopping, size 1 elements of `x` are recycled #' # to the size of the corresponding index #' list_unchop(list(1, 2:3), indices = list(c(1, 3, 5), c(2, 4))) #' #' # Names are retained, and outer names can be combined with inner #' # names through the use of a `name_spec` #' lst <- list(x = c(a = 1, b = 2), y = 1) #' list_unchop(lst, indices = list(c(3, 2), c(1, 4)), name_spec = "{outer}_{inner}") #' #' # An alternative implementation of `ave()` can be constructed using #' # `vec_chop()` and `list_unchop()` in combination with `vec_group_loc()` #' ave2 <- function(.x, .by, .f, ...) { #' indices <- vec_group_loc(.by)$loc #' chopped <- vec_chop(.x, indices = indices) #' out <- lapply(chopped, .f, ...) #' list_unchop(out, indices = indices) #' } #' #' breaks <- warpbreaks$breaks #' wool <- warpbreaks$wool #' #' ave2(breaks, wool, mean) #' #' identical( #' ave2(breaks, wool, mean), #' ave(breaks, wool, FUN = mean) #' ) #' #' # If you know your input is sorted and you'd like to split on the groups, #' # `vec_run_sizes()` can be efficiently combined with `sizes` #' df <- data_frame( #' g = c(2, 5, 5, 6, 6, 6, 6, 8, 9, 9), #' x = 1:10 #' ) #' vec_chop(df, sizes = vec_run_sizes(df$g)) #' #' # If you have a list of homogeneous vectors, sometimes it can be useful to #' # unchop, apply a function to the flattened vector, and then rechop according #' # to the original indices. This can be done efficiently with `list_sizes()`. #' x <- list(c(1, 2, 1), c(3, 1), 5, double()) #' x_flat <- list_unchop(x) #' x_flat <- x_flat + max(x_flat) #' vec_chop(x_flat, sizes = list_sizes(x)) vec_chop <- function(x, ..., indices = NULL, sizes = NULL) { if (!missing(...)) { indices <- check_dots_chop(..., indices = indices) } .Call(ffi_vec_chop, x, indices, sizes) } check_dots_chop <- function(..., indices = NULL, call = caller_env()) { if (!is_null(indices)) { # Definitely can't supply both `indices` and `...` check_dots_empty0(..., call = call) } if (dots_n(...) != 1L) { # Backwards compatible case doesn't allow for length >1 `...`. # This must be an error case. check_dots_empty0(..., call = call) } # TODO: Soft-deprecate this after dplyr/tidyr have updated all `vec_chop()` # calls to be explicit about `indices =` # Assume this is an old style `vec_chop(x, indices)` call, before we # added the `...` indices <- list(...)[[1L]] indices } #' @rdname vec_chop #' @export list_unchop <- function(x, ..., indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), error_arg = "x", error_call = current_env()) { check_dots_empty0(...) .Call(ffi_list_unchop, x, indices, ptype, name_spec, name_repair, environment()) } # Exposed for testing (`starts` is 0-based) vec_chop_seq <- function(x, starts, sizes, increasings = TRUE) { args <- vec_recycle_common(starts, sizes, increasings) .Call(ffi_vec_chop_seq, x, args[[1]], args[[2]], args[[3]]) } vctrs/R/fill.R0000644000176200001440000000257214276722575012714 0ustar liggesusers#' Fill in missing values with the previous or following value #' #' @description #' `r lifecycle::badge("experimental")` #' #' `vec_fill_missing()` fills gaps of missing values with the previous or #' following non-missing value. #' #' @param x A vector #' @param direction Direction in which to fill missing values. Must be either #' `"down"`, `"up"`, `"downup"`, or `"updown"`. #' @param max_fill A single positive integer specifying the maximum number of #' sequential missing values that will be filled. If `NULL`, there is #' no limit. #' #' @export #' @examples #' x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) #' #' # Filling down replaces missing values with the previous non-missing value #' vec_fill_missing(x, direction = "down") #' #' # To also fill leading missing values, use `"downup"` #' vec_fill_missing(x, direction = "downup") #' #' # Limit the number of sequential missing values to fill with `max_fill` #' vec_fill_missing(x, max_fill = 1) #' #' # Data frames are filled rowwise. Rows are only considered missing #' # if all elements of that row are missing. #' y <- c(1, NA, 2, NA, NA, 3, 4, NA, 5) #' df <- data_frame(x = x, y = y) #' df #' #' vec_fill_missing(df) vec_fill_missing <- function(x, direction = c("down", "up", "downup", "updown"), max_fill = NULL) { .Call(vctrs_fill_missing, x, direction, max_fill) } vctrs/R/type.R0000644000176200001440000001555514362266120012735 0ustar liggesusers#' Find the prototype of a set of vectors #' #' `vec_ptype()` returns the unfinalised prototype of a single vector. #' `vec_ptype_common()` finds the common type of multiple vectors. #' `vec_ptype_show()` nicely prints the common type of any number of #' inputs, and is designed for interactive exploration. #' #' @inheritParams rlang::args_error_context #' #' @param x A vector #' @param ... For `vec_ptype()`, these dots are for future extensions and must #' be empty. #' #' For `vec_ptype_common()` and `vec_ptype_show()`, vector inputs. #' @param x_arg Argument name for `x`. This is used in error messages to inform #' the user about the locations of incompatible types. #' @param .ptype If `NULL`, the default, the output type is determined by #' computing the common type across all elements of `...`. #' #' Alternatively, you can supply `.ptype` to give the output known type. #' If `getOption("vctrs.no_guessing")` is `TRUE` you must supply this value: #' this is a convenient way to make production code demand fixed types. #' @return `vec_ptype()` and `vec_ptype_common()` return a prototype #' (a size-0 vector) #' #' @section `vec_ptype()`: #' `vec_ptype()` returns [size][vec_size] 0 vectors potentially #' containing attributes but no data. Generally, this is just #' `vec_slice(x, 0L)`, but some inputs require special #' handling. #' #' * While you can't slice `NULL`, the prototype of `NULL` is #' itself. This is because we treat `NULL` as an identity value in #' the `vec_ptype2()` monoid. #' #' * The prototype of logical vectors that only contain missing values #' is the special [unspecified] type, which can be coerced to any #' other 1d type. This allows bare `NA`s to represent missing values #' for any 1d vector type. #' #' See [internal-faq-ptype2-identity] for more information about #' identity values. #' #' `vec_ptype()` is a _performance_ generic. It is not necessary to implement it #' because the default method will work for any vctrs type. However the default #' method builds around other vctrs primitives like `vec_slice()` which incurs #' performance costs. If your class has a static prototype, you might consider #' implementing a custom `vec_ptype()` method that returns a constant. This will #' improve the performance of your class in many cases ([common #' type][vec_ptype2] imputation in particular). #' #' Because it may contain unspecified vectors, the prototype returned #' by `vec_ptype()` is said to be __unfinalised__. Call #' [vec_ptype_finalise()] to finalise it. Commonly you will need the #' finalised prototype as returned by `vec_slice(x, 0L)`. #' #' @section `vec_ptype_common()`: #' `vec_ptype_common()` first finds the prototype of each input, then #' successively calls [vec_ptype2()] to find a common type. It returns #' a [finalised][vec_ptype_finalise] prototype. #' #' @section Dependencies of `vec_ptype()`: #' - [vec_slice()] for returning an empty slice #' #' @section Dependencies of `vec_ptype_common()`: #' - [vec_ptype2()] #' - [vec_ptype_finalise()] #' #' @export #' @examples #' # Unknown types ------------------------------------------ #' vec_ptype_show() #' vec_ptype_show(NA) #' vec_ptype_show(NULL) #' #' # Vectors ------------------------------------------------ #' vec_ptype_show(1:10) #' vec_ptype_show(letters) #' vec_ptype_show(TRUE) #' #' vec_ptype_show(Sys.Date()) #' vec_ptype_show(Sys.time()) #' vec_ptype_show(factor("a")) #' vec_ptype_show(ordered("a")) #' #' # Matrices ----------------------------------------------- #' # The prototype of a matrix includes the number of columns #' vec_ptype_show(array(1, dim = c(1, 2))) #' vec_ptype_show(array("x", dim = c(1, 2))) #' #' # Data frames -------------------------------------------- #' # The prototype of a data frame includes the prototype of #' # every column #' vec_ptype_show(iris) #' #' # The prototype of multiple data frames includes the prototype #' # of every column that in any data frame #' vec_ptype_show( #' data.frame(x = TRUE), #' data.frame(y = 2), #' data.frame(z = "a") #' ) vec_ptype <- function(x, ..., x_arg = "", call = caller_env()) { check_dots_empty0(...) return(.Call(ffi_ptype, x, x_arg, environment())) UseMethod("vec_ptype") } #' @export #' @rdname vec_ptype vec_ptype_common <- function(..., .ptype = NULL, .arg = "", .call = caller_env()) { .External2(ffi_ptype_common, .ptype) } vec_ptype_common_opts <- function(..., .ptype = NULL, .opts = fallback_opts(), .call = caller_env()) { .External2(ffi_ptype_common_opts, .ptype, .opts) } vec_ptype_common_params <- function(..., .ptype = NULL, .s3_fallback = NULL, .call = caller_env()) { opts <- fallback_opts( s3_fallback = .s3_fallback ) vec_ptype_common_opts( ..., .ptype = .ptype, .opts = opts, .call = .call ) } vec_ptype_common_fallback <- function(..., .ptype = NULL, .call = caller_env()) { vec_ptype_common_opts( ..., .ptype = .ptype, .opts = full_fallback_opts(), .call = .call ) } #' @export #' @rdname vec_ptype vec_ptype_show <- function(...) { args <- compact(list2(...)) n <- length(args) if (n == 0) { cat_line("Prototype: NULL") } else if (n == 1) { cat_line("Prototype: ", vec_ptype_full(args[[1]])) } else { in_types <- map(args, vec_ptype) out_types <- vector("list", length(in_types)) out_types[[1]] <- in_types[[1]] for (i in seq2(2, n)) { out_types[[i]] <- vec_ptype2(out_types[[i - 1]], in_types[[i]]) } in_full <- paste0("<", map_chr(in_types, vec_ptype_full), ">") out_full <- paste0("<", map_chr(out_types, vec_ptype_full), ">") out <- cbind( n = paste0(seq(0, n - 1), ". "), lhs = c("", out_full[-n]), comma = " , ", rhs = in_full, equals = " = ", res = c(in_full[[1]], out_full[-1]) ) out <- t(apply(out, 1, pad_height)) out <- apply(out, 2, pad_width) out[, "lhs"] <- parens(out[, "lhs"]) out[, "rhs"] <- parens(out[, "rhs"], FALSE) lines <- strsplit(out, "\n") dim(lines) <- dim(out) steps <- apply(lines, 1, function(x) do.call(cbind, x)) if (is.list(steps)) { step_lines <- unlist(lapply(steps, function(x) apply(x, 1, paste0, collapse = ""))) } else { step_lines <- apply(steps, 2, paste0, collapse = "") } cat_line("Prototype: ", out_full[[n]]) cat_line(step_lines) } invisible() } vec_typeof <- function(x) { .Call(vctrs_typeof, x, TRUE) } vec_typeof_bare <- function(x) { .Call(vctrs_typeof, x, FALSE) } vec_type_info <- function(x) { .Call(ffi_type_info, x) } vec_proxy_info <- function(x) { .Call(ffi_proxy_info, x) } vctrs/R/conditions.R0000644000176200001440000005474514512001764014127 0ustar liggesusers#' Custom conditions for vctrs package #' #' These functions are called for their side effect of raising #' errors and warnings. #' These conditions have custom classes and structures to make #' testing easier. #' #' @inheritParams rlang::args_error_context #' @param x,y,to Vectors #' @param ...,class Only use these fields when creating a subclass. #' @param x_arg,y_arg,to_arg Argument names for `x`, `y`, and `to`. Used in #' error messages to inform the user about the locations of incompatible #' types. #' @param action An option to customize the incompatible type message depending #' on the context. Errors thrown from [vec_ptype2()] use `"combine"` and #' those thrown from [vec_cast()] use `"convert"`. #' @param details Any additional human readable details. #' @param message An overriding message for the error. `details` and #' `message` are mutually exclusive, supplying both is an error. #' #' @examples #' #' # Most of the time, `maybe_lossy_cast()` returns its input normally: #' maybe_lossy_cast( #' c("foo", "bar"), #' NA, #' "", #' lossy = c(FALSE, FALSE), #' x_arg = "", #' to_arg = "" #' ) #' #' # If `lossy` has any `TRUE`, an error is thrown: #' try(maybe_lossy_cast( #' c("foo", "bar"), #' NA, #' "", #' lossy = c(FALSE, TRUE), #' x_arg = "", #' to_arg = "" #' )) #' #' # Unless lossy casts are allowed: #' allow_lossy_cast( #' maybe_lossy_cast( #' c("foo", "bar"), #' NA, #' "", #' lossy = c(FALSE, TRUE), #' x_arg = "", #' to_arg = "" #' ) #' ) #' #' @keywords internal #' @name vctrs-conditions NULL stop_vctrs <- function(message = NULL, class = NULL, ..., call = caller_env()) { abort( message, class = c(class, "vctrs_error"), ..., call = call ) } warn_vctrs <- function(message = NULL, class = NULL, ..., call = caller_env()) { warn( message, class = c(class, "vctrs_warning"), ..., call = call ) } stop_incompatible <- function(x, y, ..., details = NULL, message = NULL, class = NULL, call = caller_env()) { stop_vctrs( message, class = c(class, "vctrs_error_incompatible"), x = x, y = y, details = details, ..., call = call ) } #' @return #' `stop_incompatible_*()` unconditionally raise an error of class #' `"vctrs_error_incompatible_*"` and `"vctrs_error_incompatible"`. #' #' @rdname vctrs-conditions #' @export stop_incompatible_type <- function(x, y, ..., x_arg, y_arg, action = c("combine", "convert"), details = NULL, message = NULL, class = NULL, call = caller_env()) { obj_check_vector(x, arg = x_arg) obj_check_vector(y, arg = y_arg) action <- arg_match(action) message <- cnd_type_message( x, y, x_arg, y_arg, details, action, message, from_dispatch = match_from_dispatch(...) ) subclass <- switch( action, combine = "vctrs_error_ptype2", convert = "vctrs_error_cast" ) stop_incompatible( x, y, x_arg = x_arg, y_arg = y_arg, details = details, ..., message = message, class = c(class, subclass, "vctrs_error_incompatible_type"), call = call ) } #' @rdname vctrs-conditions #' @export stop_incompatible_cast <- function(x, to, ..., x_arg, to_arg, details = NULL, message = NULL, class = NULL, call = caller_env()) { stop_incompatible_type( x = x, y = to, to = to, ..., x_arg = x_arg, y_arg = to_arg, to_arg = to_arg, action = "convert", details = details, message = message, class = class, call = call ) } stop_incompatible_shape <- function(x, y, x_size, y_size, axis, x_arg, y_arg, call = caller_env()) { details <- format_error_bullets(c( x = glue::glue("Incompatible sizes {x_size} and {y_size} along axis {axis}.") )) stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = details, call = call ) } type_actions <- c( "combine", "convert" ) cnd_type_separator <- function(action) { if (identical(action, "combine")) { "and" } else if (identical(action, "convert")) { "to" } else { abort("Internal error: Unknown `action`.") } } cnd_type_message <- function(x, y, x_arg, y_arg, details, action, message, from_dispatch = FALSE, fallback = NULL) { if (!is_null(message)) { if (!is_null(details)) { abort("Can't supply both `message` and `details`.") } return(message) } x_arg <- arg_as_string(x_arg) y_arg <- arg_as_string(y_arg) if (nzchar(x_arg)) { x_name <- paste0(" `", x_arg, "` ") } else { x_name <- " " } if (nzchar(y_arg)) { y_name <- paste0(" `", y_arg, "` ") } else { y_name <- " " } separator <- cnd_type_separator(action) if (is.data.frame(x) && is.data.frame(y)) { if (vec_is_coercible(new_data_frame(x), new_data_frame(y))) { x_type <- cnd_type_message_df_label(x) y_type <- cnd_type_message_df_label(y) } else { x_type <- vec_ptype_full(x) y_type <- vec_ptype_full(y) } } else { x_type <- cnd_type_message_type_label(x) y_type <- cnd_type_message_type_label(y) } converting <- action == "convert" # If we are here directly from dispatch, this means there is no # ptype2 method implemented and the is-same-class fallback has # failed because of diverging attributes. The author of the class # should implement a ptype2 method as documented in the FAQ # indicated below. if (from_dispatch && !converting && identical(class(x)[[1]], class(y)[[1]])) { details <- c(incompatible_attrib_bullets(), details) details <- format_error_bullets(details) } if (is_null(fallback)) { end <- "." } else { end <- glue::glue("; falling back to {fallback}.") } if (converting && nzchar(y_arg)) { header <- glue::glue("Can't convert{x_name}<{x_type}> to match type of{y_name}<{y_type}>{end}") } else { header <- glue::glue("Can't {action}{x_name}<{x_type}> {separator}{y_name}<{y_type}>{end}") } paste_line(header, details) } cnd_type_message_type_label <- function(x) { if (is.data.frame(x)) { class(x)[[1]] } else { vec_ptype_full(x) } } incompatible_attrib_bullets <- function() { c( x = "Some attributes are incompatible.", i = "The author of the class should implement vctrs methods.", i = "See ." ) } cnd_type_message_df_label <- function(x) { x <- class(x)[[1]] if (identical(x, "tbl_df")) { "tibble" } else { x } } #' @rdname vctrs-conditions #' @export stop_incompatible_op <- function(op, x, y, details = NULL, ..., message = NULL, class = NULL, call = caller_env()) { message <- message %||% glue_lines( "<{vec_ptype_full(x)}> {op} <{vec_ptype_full(y)}> is not permitted", details ) stop_incompatible( x, y, op = op, details = details, ..., message = message, class = c(class, "vctrs_error_incompatible_op"), call = call ) } #' @rdname vctrs-conditions #' @export stop_incompatible_size <- function(x, y, x_size, y_size, ..., x_arg, y_arg, details = NULL, message = NULL, class = NULL, call = caller_env()) { stop_incompatible( x, y, x_size = x_size, y_size = y_size, ..., x_arg = x_arg, y_arg = y_arg, details = details, message = message, class = c(class, "vctrs_error_incompatible_size"), call = call ) } #' @export cnd_header.vctrs_error_incompatible_size <- function(cnd, ...) { if (is_string(cnd$message) && nzchar(cnd$message)) { return(cnd$message) } x_size <- vec_cast(cnd$x_size, int()) y_size <- vec_cast(cnd$y_size, int()) stopifnot( length(x_size) == 1, length(y_size) == 1 ) x_arg <- arg_as_string(cnd$x_arg) y_arg <- arg_as_string(cnd$y_arg) if (nzchar(x_arg)) { x_tag <- glue::glue("`{x_arg}` (size {x_size})") } else { x_tag <- glue::glue("input of size {x_size}") } if (nzchar(y_arg)) { y_tag <- glue::glue("to match `{y_arg}` (size {y_size})") } else { y_tag <- glue::glue("to size {y_size}") } glue::glue("Can't recycle {x_tag} {y_tag}.") } #' @export cnd_body.vctrs_error_incompatible_size <- function(cnd, ...) { cnd$details } #' Lossy cast error #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' By default, lossy casts are an error. Use `allow_lossy_cast()` to #' silence these errors and continue with the partial results. In this #' case the lost values are typically set to `NA` or to a lower value #' resolution, depending on the type of cast. #' #' Lossy cast errors are thrown by `maybe_lossy_cast()`. Unlike #' functions prefixed with `stop_`, `maybe_lossy_cast()` usually #' returns a result. If a lossy cast is detected, it throws an error, #' unless it's been wrapped in `allow_lossy_cast()`. In that case, it #' returns the result silently. #' #' @inheritParams stop_incompatible_cast #' @inheritParams vec_cast #' @inheritParams rlang::args_error_context #' @param result The result of a potentially lossy cast. #' @param to Type to cast to. #' @param lossy A logical vector indicating which elements of `result` #' were lossy. #' #' Can also be a single `TRUE`, but note that `locations` picks up #' locations from this vector by default. In this case, supply your #' own location vector, possibly empty. #' @param loss_type The kind of lossy cast to be mentioned in error #' messages. Can be loss of precision (for instance from double to #' integer) or loss of generality (from character to factor). #' @param locations An optional integer vector giving the #' locations where `x` lost information. #' @param .deprecation If `TRUE`, the error is downgraded to a #' deprecation warning. This is useful for transitioning your class #' to a stricter conversion scheme. The warning advises your users #' to wrap their code with `allow_lossy_cast()`. #' @keywords internal #' @export maybe_lossy_cast <- function(result, x, to, lossy = NULL, locations = NULL, ..., loss_type = c("precision", "generality"), x_arg, to_arg, call = caller_env(), details = NULL, message = NULL, class = NULL, .deprecation = FALSE) { if (!any(lossy)) { return(result) } if (.deprecation) { maybe_warn_deprecated_lossy_cast(x, to, loss_type, x_arg, to_arg) return(result) } locations <- locations %||% which(lossy) withRestarts( vctrs_restart_error_cast_lossy = function() result, stop_lossy_cast( x = x, to = to, result = result, locations = locations, ..., loss_type = loss_type, x_arg = x_arg, to_arg = to_arg, details = details, message = message, class = class, call = call ) ) } stop_lossy_cast <- function(x, to, result, locations = NULL, ..., loss_type, x_arg, to_arg, details = NULL, message = NULL, class = NULL, call = caller_env()) { stop_incompatible_cast( x = x, to = to, result = result, locations = locations, ..., loss_type = loss_type, x_arg = x_arg, to_arg = to_arg, details = details, class = c(class, "vctrs_error_cast_lossy"), call = call ) } #' @export cnd_header.vctrs_error_cast_lossy <- function(cnd, ...) { x_label <- format_arg_label(vec_ptype_full(cnd$x), cnd$x_arg) to_label <- format_arg_label(vec_ptype_full(cnd$y), cnd$y_arg) loss_type <- loss_type(cnd$loss_type) glue::glue("Can't convert from {x_label} to {to_label} due to loss of {loss_type}.") } #' @export cnd_body.vctrs_error_cast_lossy <- function(cnd, ...) { if (length(cnd$locations)) { format_error_bullets(inline_list("Locations: ", cnd$locations)) } else { character() } } loss_type <- function(x) { stopifnot( is_character(x), all(x %in% c("precision", "generality")) ) x[[1]] } # Used in maybe_warn_deprecated_lossy_cast() new_error_cast_lossy <- function(x, to, loss_type, x_arg = "", to_arg = "") { error_cnd( c("vctrs_error_cast_lossy", "vctrs_error_incompatible_type"), x = x, y = to, loss_type = loss_type, x_arg = x_arg, y_arg = to_arg ) } #' @rdname vctrs-conditions #' @param x_ptype,to_ptype Suppress only the casting errors where `x` #' or `to` match these [prototypes][vec_ptype]. #' @export allow_lossy_cast <- function(expr, x_ptype = NULL, to_ptype = NULL) { withCallingHandlers( vctrs_error_cast_lossy = function(err) { if (!is_null(x_ptype) && !vec_is(err$x, x_ptype)) { return() } if (!is_null(to_ptype) && !vec_is(err$y, to_ptype)) { return() } invokeRestart("vctrs_restart_error_cast_lossy") }, expr ) } maybe_warn_deprecated_lossy_cast <- function(x, to, loss_type, x_arg, to_arg, user_env = caller_env(2)) { # Returns `TRUE` if `allow_lossy_cast()` is on the stack and accepts # to handle the condition handled <- withRestarts( vctrs_restart_error_cast_lossy = function() TRUE, { # Signal fully formed condition but strip the error classes in # case someone is catching: This is not an abortive condition. cnd <- new_error_cast_lossy( x, to, loss_type = loss_type, x_arg = x_arg, to_arg = to_arg ) class(cnd) <- setdiff(class(cnd), c("error", "rlang_error")) signalCondition(cnd) FALSE } ) if (handled) { return(invisible()) } from <- format_arg_label(vec_ptype_abbr(x), x_arg) to <- format_arg_label(vec_ptype_abbr(to), to_arg) lifecycle::deprecate_warn( when = "0.2.0", what = I("Coercion with lossy casts"), with = "allow_lossy_cast()", details = paste0( glue::glue("We detected a lossy transformation from { from } to { to }. "), "The result will contain lower-resolution values or missing values. ", "To suppress this warning, wrap your code with `allow_lossy_cast()`." ), always = TRUE, user_env = user_env ) invisible() } stop_unsupported <- function(x, method, call = caller_env()) { msg <- glue::glue("`{method}.{class(x)[[1]]}()` not supported.") stop_vctrs( "vctrs_error_unsupported", message = msg, x = x, method = method, call = call ) } stop_unimplemented <- function(x, method, call = caller_env()) { msg <- glue::glue("`{method}.{class(x)[[1]]}()` not implemented.") stop_vctrs( "vctrs_error_unimplemented", message = msg, x = x, method = method, call = call ) } stop_scalar_type <- function(x, arg = NULL, call = caller_env()) { if (is_null(arg) || !nzchar(arg)) { arg <- "Input" } else { arg <- glue::backtick(arg) } msg <- glue::glue("{arg} must be a vector, not {obj_type_friendly(x)}.") stop_vctrs( msg, "vctrs_error_scalar_type", actual = x, call = call ) } stop_corrupt_factor_levels <- function(x, arg = "x", call = caller_env()) { msg <- glue::glue("`{arg}` is a corrupt factor with non-character levels") abort(msg, call = call) } stop_corrupt_ordered_levels <- function(x, arg = "x", call = caller_env()) { msg <- glue::glue("`{arg}` is a corrupt ordered factor with non-character levels") abort(msg, call = call) } stop_recycle_incompatible_size <- function(x_size, size, x_arg = "x", call = caller_env()) { stop_vctrs( x_size = x_size, y_size = size, x_arg = x_arg, # FIXME: tibble is the only package that uses `vctrs_error_recycle_incompatible_size` class = c("vctrs_error_incompatible_size", "vctrs_error_recycle_incompatible_size"), call = call ) } # Names ------------------------------------------------------------------- stop_names <- function(class = NULL, ..., call = caller_env()) { stop_vctrs( class = c(class, "vctrs_error_names"), ..., call = call ) } stop_names_cannot_be_empty <- function(names, call = caller_env()) { stop_names( class = "vctrs_error_names_cannot_be_empty", names = names, call = call ) } #' @export cnd_header.vctrs_error_names_cannot_be_empty <- function(cnd, ...) { "Names can't be empty." } #' @export cnd_body.vctrs_error_names_cannot_be_empty <- function(cnd, ...) { locations <- detect_empty_names(cnd$names) if (length(locations) == 1) { bullet <- glue::glue("Empty name found at location {locations}.") } else { bullet <- glue::glue("Empty names found at locations {ensure_full_stop(enumerate(locations))}") } bullet <- c(x = bullet) format_error_bullets(bullet) } stop_names_cannot_be_dot_dot <- function(names, call = caller_env()) { stop_names( class = "vctrs_error_names_cannot_be_dot_dot", names = names, call = call ) } #' @export cnd_header.vctrs_error_names_cannot_be_dot_dot <- function(cnd, ...) { "Names can't be of the form `...` or `..j`." } #' @export cnd_body.vctrs_error_names_cannot_be_dot_dot <- function(cnd, ...) { names <- cnd$names locations <- detect_dot_dot(names) names <- names[locations] split <- vec_group_loc(names) info <- map2_chr(split$key, split$loc, make_names_loc_bullet) header <- "These names are invalid:" header <- c(x = header) header <- format_error_bullets(header) message <- bullets(info, header = header) message <- indent(message, 2) message } stop_names_must_be_unique <- function(names, arg = "", call = caller_env()) { stop_names( class = "vctrs_error_names_must_be_unique", arg = arg, names = names, call = call ) } #' @export cnd_header.vctrs_error_names_must_be_unique <- function(cnd, ...) { "Names must be unique." } #' @export cnd_body.vctrs_error_names_must_be_unique <- function(cnd, ...) { names <- cnd$names dups <- vec_group_loc(names) dup_indicator <- map_lgl(dups$loc, function(x) length(x) != 1L) dups <- vec_slice(dups, dup_indicator) header <- "These names are duplicated:" header <- c(x = header) header <- format_error_bullets(header) info <- map2_chr(dups$key, dups$loc, make_names_loc_bullet) message <- bullets(info, header = header) message <- indent(message, 2) arg <- arg_as_string(cnd$arg) if (arg != "") { hint <- c(i = glue::glue("Use argument `{cnd$arg}` to specify repair strategy.")) message <- c(message, format_error_bullets(hint)) } message } make_names_loc_bullet <- function(x, loc) { if (length(loc) == 1) { glue::glue("{glue::double_quote(x)} at location {loc}.") } else { glue::glue("{glue::double_quote(x)} at locations {ensure_full_stop(enumerate(loc))}") } } enumerate <- function(x, max = 5L, allow_empty = FALSE) { n <- length(x) if (n == 0L && !allow_empty) { abort("Internal error: Enumeration can't be empty.") } if (n > max) { paste0(glue::glue_collapse(x[seq2(1, max)], ", "), ", etc.") } else { if (n == 2) { last <- " and " } else { last <- ", and " } glue::glue_collapse(x, ", ", last = last) } } ensure_full_stop <- function(x) { n <- nchar(x) if (substr(x, n, n) == ".") { x } else { paste0(x, ".") } } stop_native_implementation <- function(fn) { cli::cli_abort( c( "{.fn {fn}} is implemented at C level.", " " = "This R function is purely indicative and should never be called." ), .internal = TRUE ) } # Helpers ----------------------------------------------------------------- glue_lines <- function(..., env = parent.frame()) { out <- map_chr(chr(...), glue::glue, .envir = env) paste(out, collapse = "\n") } format_arg_label <- function(type, arg = "") { type <- paste0("<", type, ">") if (nzchar(arg)) { paste0("`", arg, "` ", type) } else { type } } arg_as_string <- function(arg) { if (is_null(arg)) { "" } else if (is_string(arg)) { arg } else { as_label(arg) } } append_arg <- function(x, arg) { if (is_null(arg)) { return(x) } arg <- arg_as_string(arg) if (nzchar(arg)) { glue::glue("{x} `{arg}`") } else { x } } vctrs/R/match.R0000644000176200001440000005227614525444107013055 0ustar liggesusers#' Locate observations matching specified conditions #' #' @description #' `r lifecycle::badge("experimental")` #' #' `vec_locate_matches()` is a more flexible version of [vec_match()] used to #' identify locations where each value of `needles` matches one or multiple #' values in `haystack`. Unlike `vec_match()`, `vec_locate_matches()` returns #' all matches by default, and can match on binary conditions other than #' equality, such as `>`, `>=`, `<`, and `<=`. #' #' @details #' [vec_match()] is identical to (but often slightly faster than): #' #' ``` #' vec_locate_matches( #' needles, #' haystack, #' condition = "==", #' multiple = "first", #' nan_distinct = TRUE #' ) #' ``` #' #' `vec_locate_matches()` is extremely similar to a SQL join between `needles` #' and `haystack`, with the default being most similar to a left join. #' #' Be very careful when specifying match `condition`s. If a condition is #' misspecified, it is very easy to accidentally generate an exponentially #' large number of matches. #' #' @section Dependencies of `vec_locate_matches()`: #' - [vec_order_radix()] #' - [vec_detect_complete()] #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' @inheritParams order-radix #' #' @param needles,haystack Vectors used for matching. #' #' - `needles` represents the vector to search for. #' #' - `haystack` represents the vector to search in. #' #' Prior to comparison, `needles` and `haystack` are coerced to the same type. #' #' @param condition Condition controlling how `needles` should be compared #' against `haystack` to identify a successful match. #' #' - One of: `"=="`, `">"`, `">="`, `"<"`, or `"<="`. #' #' - For data frames, a length `1` or `ncol(needles)` character vector #' containing only the above options, specifying how matching is determined #' for each column. #' #' @param filter Filter to be applied to the matched results. #' #' - `"none"` doesn't apply any filter. #' #' - `"min"` returns only the minimum haystack value matching the current #' needle. #' #' - `"max"` returns only the maximum haystack value matching the current #' needle. #' #' - For data frames, a length `1` or `ncol(needles)` character vector #' containing only the above options, specifying a filter to apply to #' each column. #' #' Filters don't have any effect on `"=="` conditions, but are useful for #' computing "rolling" matches with other conditions. #' #' A filter can return multiple haystack matches for a particular needle #' if the maximum or minimum haystack value is duplicated in `haystack`. These #' can be further controlled with `multiple`. #' #' @param incomplete Handling of missing and [incomplete][vec_detect_complete] #' values in `needles`. #' #' - `"compare"` uses `condition` to determine whether or not a missing value #' in `needles` matches a missing value in `haystack`. If `condition` is #' `==`, `>=`, or `<=`, then missing values will match. #' #' - `"match"` always allows missing values in `needles` to match missing #' values in `haystack`, regardless of the `condition`. #' #' - `"drop"` drops incomplete values in `needles` from the result. #' #' - `"error"` throws an error if any `needles` are incomplete. #' #' - If a single integer is provided, this represents the value returned #' in the `haystack` column for values of `needles` that are incomplete. If #' `no_match = NA`, setting `incomplete = NA` forces incomplete values in #' `needles` to be treated like unmatched values. #' #' `nan_distinct` determines whether a `NA` is allowed to match a `NaN`. #' #' @param no_match Handling of `needles` without a match. #' #' - `"drop"` drops `needles` with zero matches from the result. #' #' - `"error"` throws an error if any `needles` have zero matches. #' #' - If a single integer is provided, this represents the value returned in #' the `haystack` column for values of `needles` that have zero matches. The #' default represents an unmatched needle with `NA`. #' #' @param remaining Handling of `haystack` values that `needles` never matched. #' #' - `"drop"` drops remaining `haystack` values from the result. #' Typically, this is the desired behavior if you only care when `needles` #' has a match. #' #' - `"error"` throws an error if there are any remaining `haystack` #' values. #' #' - If a single integer is provided (often `NA`), this represents the value #' returned in the `needles` column for the remaining `haystack` values #' that `needles` never matched. Remaining `haystack` values are always #' returned at the end of the result. #' #' @param multiple Handling of `needles` with multiple matches. For each needle: #' #' - `"all"` returns all matches detected in `haystack`. #' #' - `"any"` returns any match detected in `haystack` with no guarantees on #' which match will be returned. It is often faster than `"first"` and #' `"last"` if you just need to detect if there is at least one match. #' #' - `"first"` returns the first match detected in `haystack`. #' #' - `"last"` returns the last match detected in `haystack`. #' #' @param relationship Handling of the expected relationship between #' `needles` and `haystack`. If the expectations chosen from the list below #' are invalidated, an error is thrown. #' #' - `"none"` doesn't perform any relationship checks. #' #' - `"one-to-one"` expects: #' - Each value in `needles` matches at most 1 value in `haystack`. #' - Each value in `haystack` matches at most 1 value in `needles`. #' #' - `"one-to-many"` expects: #' - Each value in `needles` matches any number of values in `haystack`. #' - Each value in `haystack` matches at most 1 value in `needles`. #' #' - `"many-to-one"` expects: #' - Each value in `needles` matches at most 1 value in `haystack`. #' - Each value in `haystack` matches any number of values in `needles`. #' #' - `"many-to-many"` expects: #' - Each value in `needles` matches any number of values in `haystack`. #' - Each value in `haystack` matches any number of values in `needles`. #' #' This performs no checks, and is identical to `"none"`, but is provided to #' allow you to be explicit about this relationship if you know it exists. #' #' - `"warn-many-to-many"` doesn't assume there is any known relationship, but #' will warn if `needles` and `haystack` have a many-to-many relationship #' (which is typically unexpected), encouraging you to either take a closer #' look at your inputs or make this relationship explicit by specifying #' `"many-to-many"`. #' #' `relationship` is applied after `filter` and `multiple` to allow potential #' multiple matches to be filtered out first. #' #' `relationship` doesn't handle cases where there are zero matches. For that, #' see `no_match` and `remaining`. #' #' @param needles_arg,haystack_arg Argument tags for `needles` and `haystack` #' used in error messages. #' #' @return A two column data frame containing the locations of the matches. #' #' - `needles` is an integer vector containing the location of #' the needle currently being matched. #' #' - `haystack` is an integer vector containing the location of the #' corresponding match in the haystack for the current needle. #' #' @export #' @examples #' x <- c(1, 2, NA, 3, NaN) #' y <- c(2, 1, 4, NA, 1, 2, NaN) #' #' # By default, for each value of `x`, all matching locations in `y` are #' # returned #' matches <- vec_locate_matches(x, y) #' matches #' #' # The result can be used to slice the inputs to align them #' data_frame( #' x = vec_slice(x, matches$needles), #' y = vec_slice(y, matches$haystack) #' ) #' #' # If multiple matches are present, control which is returned with `multiple` #' vec_locate_matches(x, y, multiple = "first") #' vec_locate_matches(x, y, multiple = "last") #' vec_locate_matches(x, y, multiple = "any") #' #' # Use `relationship` to add constraints and error on multiple matches if #' # they aren't expected #' try(vec_locate_matches(x, y, relationship = "one-to-one")) #' #' # In this case, the `NA` in `y` matches two rows in `x` #' try(vec_locate_matches(x, y, relationship = "one-to-many")) #' #' # By default, `NA` is treated as being identical to `NaN`. #' # Using `nan_distinct = TRUE` treats `NA` and `NaN` as different values, so #' # `NA` can only match `NA`, and `NaN` can only match `NaN`. #' vec_locate_matches(x, y, nan_distinct = TRUE) #' #' # If you never want missing values to match, set `incomplete = NA` to return #' # `NA` in the `haystack` column anytime there was an incomplete value #' # in `needles`. #' vec_locate_matches(x, y, incomplete = NA) #' #' # Using `incomplete = NA` allows us to enforce the one-to-many relationship #' # that we couldn't before #' vec_locate_matches(x, y, relationship = "one-to-many", incomplete = NA) #' #' # `no_match` allows you to specify the returned value for a needle with #' # zero matches. Note that this is different from an incomplete value, #' # so specifying `no_match` allows you to differentiate between incomplete #' # values and unmatched values. #' vec_locate_matches(x, y, incomplete = NA, no_match = 0L) #' #' # If you want to require that every `needle` has at least 1 match, set #' # `no_match` to `"error"`: #' try(vec_locate_matches(x, y, incomplete = NA, no_match = "error")) #' #' # By default, `vec_locate_matches()` detects equality between `needles` and #' # `haystack`. Using `condition`, you can detect where an inequality holds #' # true instead. For example, to find every location where `x[[i]] >= y`: #' matches <- vec_locate_matches(x, y, condition = ">=") #' #' data_frame( #' x = vec_slice(x, matches$needles), #' y = vec_slice(y, matches$haystack) #' ) #' #' # You can limit which matches are returned with a `filter`. For example, #' # with the above example you can filter the matches returned by `x[[i]] >= y` #' # down to only the ones containing the maximum `y` value of those matches. #' matches <- vec_locate_matches(x, y, condition = ">=", filter = "max") #' #' # Here, the matches for the `3` needle value have been filtered down to #' # only include the maximum haystack value of those matches, `2`. This is #' # often referred to as a rolling join. #' data_frame( #' x = vec_slice(x, matches$needles), #' y = vec_slice(y, matches$haystack) #' ) #' #' # In the very rare case that you need to generate locations for a #' # cross match, where every value of `x` is forced to match every #' # value of `y` regardless of what the actual values are, you can #' # replace `x` and `y` with integer vectors of the same size that contain #' # a single value and match on those instead. #' x_proxy <- vec_rep(1L, vec_size(x)) #' y_proxy <- vec_rep(1L, vec_size(y)) #' nrow(vec_locate_matches(x_proxy, y_proxy)) #' vec_size(x) * vec_size(y) #' #' # By default, missing values will match other missing values when using #' # `==`, `>=`, or `<=` conditions, but not when using `>` or `<` conditions. #' # This is similar to how `vec_compare(x, y, na_equal = TRUE)` works. #' x <- c(1, NA) #' y <- c(NA, 2) #' #' vec_locate_matches(x, y, condition = "<=") #' vec_locate_matches(x, y, condition = "<") #' #' # You can force missing values to match regardless of the `condition` #' # by using `incomplete = "match"` #' vec_locate_matches(x, y, condition = "<", incomplete = "match") #' #' # You can also use data frames for `needles` and `haystack`. The #' # `condition` will be recycled to the number of columns in `needles`, or #' # you can specify varying conditions per column. In this example, we take #' # a vector of date `values` and find all locations where each value is #' # between lower and upper bounds specified by the `haystack`. #' values <- as.Date("2019-01-01") + 0:9 #' needles <- data_frame(lower = values, upper = values) #' #' set.seed(123) #' lower <- as.Date("2019-01-01") + sample(10, 10, replace = TRUE) #' upper <- lower + sample(3, 10, replace = TRUE) #' haystack <- data_frame(lower = lower, upper = upper) #' #' # (values >= lower) & (values <= upper) #' matches <- vec_locate_matches(needles, haystack, condition = c(">=", "<=")) #' #' data_frame( #' lower = vec_slice(lower, matches$haystack), #' value = vec_slice(values, matches$needle), #' upper = vec_slice(upper, matches$haystack) #' ) vec_locate_matches <- function(needles, haystack, ..., condition = "==", filter = "none", incomplete = "compare", no_match = NA_integer_, remaining = "drop", multiple = "all", relationship = "none", nan_distinct = FALSE, chr_proxy_collate = NULL, needles_arg = "needles", haystack_arg = "haystack", error_call = current_env()) { check_dots_empty0(...) frame <- environment() .Call( ffi_locate_matches, needles, haystack, condition, filter, incomplete, no_match, remaining, multiple, relationship, nan_distinct, chr_proxy_collate, needles_arg, haystack_arg, frame ) } # ------------------------------------------------------------------------------ #' Internal FAQ - Implementation of `vec_locate_matches()` #' #' ```{r, child = "man/faq/internal/matches-algorithm.Rmd"} #' ``` #' #' @name internal-faq-matches-algorithm NULL # ------------------------------------------------------------------------------ # Helper used for testing and in the internal FAQ. # It needs to live in R/ to be usable by the FAQ Rmd. compute_nesting_container_info <- function(x, condition) { .Call(ffi_compute_nesting_container_info, x, condition) } # ------------------------------------------------------------------------------ stop_matches <- function(message = NULL, class = NULL, ..., call = caller_env()) { stop_vctrs( message = message, class = c(class, "vctrs_error_matches"), ..., call = call ) } warn_matches <- function(message, class = NULL, ..., call = caller_env()) { warn_vctrs( message = message, class = c(class, "vctrs_warning_matches"), ..., call = call ) } # ------------------------------------------------------------------------------ stop_matches_overflow <- function(size, call) { size <- format(size, scientific = FALSE) # Pre-generating the message in this case because we want to use # `.internal = TRUE` and that doesn't work with lazy messages message <- c( "Match procedure results in an allocation larger than 2^31-1 elements.", i = glue::glue("Attempted allocation size was {size}.") ) stop_matches( message = message, class = "vctrs_error_matches_overflow", size = size, call = call, .internal = TRUE ) } # ------------------------------------------------------------------------------ stop_matches_nothing <- function(i, needles_arg, haystack_arg, call) { stop_matches( class = "vctrs_error_matches_nothing", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_nothing <- function(cnd, ...) { glue::glue("Each value of `{cnd$needles_arg}` must have a match in `{cnd$haystack_arg}`.") } #' @export cnd_body.vctrs_error_matches_nothing <- function(cnd, ...) { bullet <- glue::glue("Location {cnd$i} of `{cnd$needles_arg}` does not have a match.") bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ stop_matches_remaining <- function(i, needles_arg, haystack_arg, call) { stop_matches( class = "vctrs_error_matches_remaining", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_remaining <- function(cnd, ...) { glue::glue("Each value of `{cnd$haystack_arg}` must be matched by `{cnd$needles_arg}`.") } #' @export cnd_body.vctrs_error_matches_remaining <- function(cnd, ...) { bullet <- glue::glue("Location {cnd$i} of `{cnd$haystack_arg}` was not matched.") bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ stop_matches_incomplete <- function(i, needles_arg, call) { stop_matches( class = "vctrs_error_matches_incomplete", i = i, needles_arg = needles_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_incomplete <- function(cnd, ...) { glue::glue("`{cnd$needles_arg}` can't contain missing values.") } #' @export cnd_body.vctrs_error_matches_incomplete <- function(cnd, ...) { bullet <- glue::glue("Location {cnd$i} contains missing values.") bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ stop_matches_multiple <- function(i, needles_arg, haystack_arg, call) { stop_matches( class = "vctrs_error_matches_multiple", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_multiple <- function(cnd, ...) { cnd_matches_multiple_header(cnd$needles_arg, cnd$haystack_arg) } #' @export cnd_body.vctrs_error_matches_multiple <- function(cnd, ...) { cnd_matches_multiple_body(cnd$i, cnd$needles_arg) } # ------------------------------------------------------------------------------ warn_matches_multiple <- function(i, needles_arg, haystack_arg, call) { message <- paste( cnd_matches_multiple_header(needles_arg, haystack_arg), cnd_matches_multiple_body(i, needles_arg), sep = "\n" ) warn_matches( message = message, class = "vctrs_warning_matches_multiple", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } # ------------------------------------------------------------------------------ stop_matches_relationship_one_to_one <- function(i, which, needles_arg, haystack_arg, call) { stop_matches_relationship( class = "vctrs_error_matches_relationship_one_to_one", i = i, which = which, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_relationship_one_to_one <- function(cnd, ...) { if (cnd$which == "needles") { cnd_matches_multiple_header(cnd$needles_arg, cnd$haystack_arg) } else { cnd_matches_multiple_header(cnd$haystack_arg, cnd$needles_arg) } } #' @export cnd_body.vctrs_error_matches_relationship_one_to_one <- function(cnd, ...) { if (cnd$which == "needles") { cnd_matches_multiple_body(cnd$i, cnd$needles_arg) } else { cnd_matches_multiple_body(cnd$i, cnd$haystack_arg) } } stop_matches_relationship_one_to_many <- function(i, needles_arg, haystack_arg, call) { stop_matches_relationship( class = "vctrs_error_matches_relationship_one_to_many", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_relationship_one_to_many <- function(cnd, ...) { cnd_matches_multiple_header(cnd$haystack_arg, cnd$needles_arg) } #' @export cnd_body.vctrs_error_matches_relationship_one_to_many <- function(cnd, ...) { cnd_matches_multiple_body(cnd$i, cnd$haystack_arg) } stop_matches_relationship_many_to_one <- function(i, needles_arg, haystack_arg, call) { stop_matches_relationship( class = "vctrs_error_matches_relationship_many_to_one", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_relationship_many_to_one <- function(cnd, ...) { cnd_matches_multiple_header(cnd$needles_arg, cnd$haystack_arg) } #' @export cnd_body.vctrs_error_matches_relationship_many_to_one <- function(cnd, ...) { cnd_matches_multiple_body(cnd$i, cnd$needles_arg) } stop_matches_relationship <- function(class = NULL, ..., call = caller_env()) { stop_matches( class = c(class, "vctrs_error_matches_relationship"), ..., call = call ) } cnd_matches_multiple_header <- function(x_arg, y_arg) { glue::glue("Each value of `{x_arg}` can match at most 1 value from `{y_arg}`.") } cnd_matches_multiple_body <- function(i, name) { bullet <- glue::glue("Location {i} of `{name}` matches multiple values.") bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ warn_matches_relationship_many_to_many <- function(i, j, needles_arg, haystack_arg, call) { message <- paste( glue::glue("Detected an unexpected many-to-many relationship between `{needles_arg}` and `{haystack_arg}`."), cnd_matches_multiple_body(i, needles_arg), cnd_matches_multiple_body(j, haystack_arg), sep = "\n" ) warn_matches_relationship( message = message, class = "vctrs_warning_matches_relationship_many_to_many", i = i, j = j, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } warn_matches_relationship <- function(message, class = NULL, ..., call = caller_env()) { warn_matches( message = message, class = c(class, "vctrs_warning_matches_relationship"), ..., call = call ) } vctrs/R/proxy.R0000644000176200001440000001701214376223321013124 0ustar liggesusers#' Proxy and restore #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' `vec_proxy()` returns the data structure containing the values of a #' vector. This data structure is usually the vector itself. In this #' case the proxy is the [identity function][base::identity], which is #' the default `vec_proxy()` method. #' #' Only experts should implement special `vec_proxy()` methods, for #' these cases: #' #' - A vector has vectorised attributes, i.e. metadata for #' each element of the vector. These _record types_ are implemented #' in vctrs by returning a data frame in the proxy method. If you're #' starting your class from scratch, consider deriving from the #' [`rcrd`][new_rcrd] class. It implements the appropriate data #' frame proxy and is generally the preferred way to create a record #' class. #' #' - When you're implementing a vector on top of a non-vector type, #' like an environment or an S4 object. This is currently only #' partially supported. #' #' - S3 lists are considered scalars by default. This is the safe #' choice for list objects such as returned by `stats::lm()`. To #' declare that your S3 list class is a vector, you normally add #' `"list"` to the right of your class vector. Explicit inheritance #' from list is generally the preferred way to declare an S3 list in #' R, for instance it makes it possible to dispatch on #' `generic.list` S3 methods. #' #' If you can't modify your class vector, you can implement an #' identity proxy (i.e. a proxy method that just returns its input) #' to let vctrs know this is a vector list and not a scalar. #' #' `vec_restore()` is the inverse operation of `vec_proxy()`. It #' should only be called on vector proxies. #' #' - It undoes the transformations of `vec_proxy()`. #' #' - It restores attributes and classes. These may be lost when the #' memory values are manipulated. For example slicing a subset of a #' vector's proxy causes a new proxy to be allocated. #' #' By default vctrs restores all attributes and classes #' automatically. You only need to implement a `vec_restore()` method #' if your class has attributes that depend on the data. #' #' @param x A vector. #' @inheritParams rlang::args_dots_empty #' #' @section Proxying: #' #' You should only implement `vec_proxy()` when your type is designed #' around a non-vector class. I.e. anything that is not either: #' #' * An atomic vector #' * A bare list #' * A data frame #' #' In this case, implement `vec_proxy()` to return such a vector #' class. The vctrs operations such as [vec_slice()] are applied on #' the proxy and `vec_restore()` is called to restore the original #' representation of your type. #' #' The most common case where you need to implement `vec_proxy()` is #' for S3 lists. In vctrs, S3 lists are treated as scalars by #' default. This way we don't treat objects like model fits as #' vectors. To prevent vctrs from treating your S3 list as a scalar, #' unclass it in the `vec_proxy()` method. For instance, here is the #' definition for `list_of`: #' #' ``` #' vec_proxy.vctrs_list_of <- function(x) { #' unclass(x) #' } #' ``` #' #' Another case where you need to implement a proxy is [record #' types][new_rcrd]. Record types should return a data frame, as in #' the `POSIXlt` method: #' #' ``` #' vec_proxy.POSIXlt <- function(x) { #' new_data_frame(unclass(x)) #' } #' ``` #' #' Note that you don't need to implement `vec_proxy()` when your class #' inherits from `vctrs_vctr` or `vctrs_rcrd`. #' #' #' @section Restoring: #' #' A restore is a specialised type of cast, primarily used in #' conjunction with `NextMethod()` or a C-level function that works on #' the underlying data structure. A `vec_restore()` method can make #' the following assumptions about `x`: #' #' * It has the correct type. #' * It has the correct names. #' * It has the correct `dim` and `dimnames` attributes. #' * It is unclassed. This way you can call vctrs generics with `x` #' without triggering an infinite loop of restoration. #' #' The length may be different (for example after [vec_slice()] has #' been called), and all other attributes may have been lost. The #' method should restore all attributes so that after restoration, #' `vec_restore(vec_data(x), x)` yields `x`. #' #' To understand the difference between `vec_cast()` and `vec_restore()` #' think about factors: it doesn't make sense to cast an integer to a factor, #' but if `NextMethod()` or another low-level function has stripped attributes, #' you still need to be able to restore them. #' #' The default method copies across all attributes so you only need to #' provide your own method if your attributes require special care #' (i.e. they are dependent on the data in some way). When implementing #' your own method, bear in mind that many R users add attributes to track #' additional metadata that is important to them, so you should preserve any #' attributes that don't require special handling for your class. #' #' @section Dependencies: #' - `x` must be a vector in the vctrs sense (see [vec_is()]) #' - By default the underlying data is returned as is (identity proxy) #' #' All vector classes have a proxy, even those who don't implement any #' vctrs methods. The exception is S3 lists that don't inherit from #' `"list"` explicitly. These might have to implement an identity #' proxy for compatibility with vctrs (see discussion above). #' #' @keywords internal #' @export vec_proxy <- function(x, ...) { check_dots_empty0(...) return(.Call(ffi_vec_proxy, x)) UseMethod("vec_proxy") } #' @export vec_proxy.default <- function(x, ...) { x } #' @rdname vec_proxy #' @param to The original vector to restore to. #' @export vec_restore <- function(x, to, ...) { check_dots_empty0(...) return(.Call(ffi_vec_restore, x, to)) UseMethod("vec_restore", to) } vec_restore_dispatch <- function(x, to, ...) { UseMethod("vec_restore", to) } #' @export vec_restore.default <- function(x, to, ...) { .Call(ffi_vec_restore_default, x, to) } vec_restore_default <- function(x, to, ...) { .Call(ffi_vec_restore_default, x, to) } vec_proxy_recurse <- function(x, ...) { .Call(ffi_vec_proxy_recurse, x) } vec_restore_recurse <- function(x, to, ...) { .Call(ffi_vec_restore_recurse, x, to) } #' Extract underlying data #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' Extract the data underlying an S3 vector object, i.e. the underlying #' (named) atomic vector, data frame, or list. #' #' @param x A vector or object implementing `vec_proxy()`. #' @return The data underlying `x`, free from any attributes except the names. #' #' @section Difference with `vec_proxy()`: #' #' * `vec_data()` returns unstructured data. The only attributes #' preserved are names, dims, and dimnames. #' #' Currently, due to the underlying memory architecture of R, this #' creates a full copy of the data for atomic vectors. #' #' * `vec_proxy()` may return structured data. This generic is the #' main customisation point for accessing memory values in vctrs, #' along with [vec_restore()]. #' #' Methods must return a vector type. Records and data frames will #' be processed rowwise. #' #' @keywords internal #' @export vec_data <- function(x) { obj_check_vector(x) x <- vec_proxy(x) if (is.data.frame(x)) { return(new_data_frame(x, row.names = .row_names_info(x, 0L))) } if (has_dim(x)) { x <- vec_set_attributes(x, list(dim = dim(x), dimnames = dimnames(x))) } else { x <- vec_set_attributes(x, list(names = names(x))) } # Reset S4 bit in vector-like S4 objects unset_s4(x) } unset_s4 <- function(x) { .Call(ffi_unset_s4, x) } vctrs/R/group.R0000644000176200001440000000600314376223321013075 0ustar liggesusers#' Identify groups #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' * `vec_group_id()` returns an identifier for the group that each element of #' `x` falls in, constructed in the order that they appear. The number of #' groups is also returned as an attribute, `n`. #' #' * `vec_group_loc()` returns a data frame containing a `key` column with the #' unique groups, and a `loc` column with the locations of each group in `x`. #' #' * `vec_group_rle()` locates groups in `x` and returns them run length #' encoded in the order that they appear. The return value is a rcrd object #' with fields for the `group` identifiers and the run `length` of the #' corresponding group. The number of groups is also returned as an #' attribute, `n`. #' #' @param x A vector #' @return #' * `vec_group_id()`: An integer vector with the same size as `x`. #' * `vec_group_loc()`: A two column data frame with size equal to #' `vec_size(vec_unique(x))`. #' * A `key` column of type `vec_ptype(x)` #' * A `loc` column of type list, with elements of type integer. #' * `vec_group_rle()`: A `vctrs_group_rle` rcrd object with two integer #' vector fields: `group` and `length`. #' #' Note that when using `vec_group_loc()` for complex types, the default #' `data.frame` print method will be suboptimal, and you will want to coerce #' into a tibble to better understand the output. #' @name vec_group #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @keywords internal #' @examples #' purrr <- c("p", "u", "r", "r", "r") #' vec_group_id(purrr) #' vec_group_rle(purrr) #' #' groups <- mtcars[c("vs", "am")] #' vec_group_id(groups) #' #' group_rle <- vec_group_rle(groups) #' group_rle #' #' # Access fields with `field()` #' field(group_rle, "group") #' field(group_rle, "length") #' #' # `vec_group_id()` is equivalent to #' vec_match(groups, vec_unique(groups)) #' #' vec_group_loc(mtcars$vs) #' vec_group_loc(mtcars[c("vs", "am")]) #' #' if (require("tibble")) { #' as_tibble(vec_group_loc(mtcars[c("vs", "am")])) #' } NULL #' @rdname vec_group #' @export vec_group_id <- function(x) { .Call(vctrs_group_id, x) } #' @rdname vec_group #' @export vec_group_loc <- function(x) { .Call(vctrs_group_loc, x) } #' @rdname vec_group #' @export vec_group_rle <- function(x) { .Call(vctrs_group_rle, x) } #' @export format.vctrs_group_rle <- function(x, ...) { group <- field(x, "group") length <- field(x, "length") paste0(group, "x", length) } #' @export obj_print_header.vctrs_group_rle <- function(x, ...) { size <- vec_size(x) n <- attr(x, "n") cat_line("<", vec_ptype_full(x), "[", size, "][n = ", n, "]>") invisible(x) } # For testing new_group_rle <- function(group, length, n) { stopifnot(is_integer(group)) stopifnot(is_integer(length)) stopifnot(is_integer(n)) vec_check_size(n, size = 1L) if (vec_size(group) != vec_size(length)) { abort("`group` and `length` must have the same size.") } new_rcrd(list(group = group, length = length), n = n, class = "vctrs_group_rle") } vctrs/R/type-vctr.R0000644000176200001440000004340514401377400013702 0ustar liggesusers#' vctr (vector) S3 class #' #' @description #' This abstract class provides a set of useful default methods that makes it #' considerably easier to get started with a new S3 vector class. See #' `vignette("s3-vector")` to learn how to use it to create your own S3 #' vector classes. #' #' @details #' List vctrs are special cases. When created through `new_vctr()`, the #' resulting list vctr should always be recognized as a list by #' `obj_is_list()`. Because of this, if `inherit_base_type` is `FALSE` #' an error is thrown. #' #' @section Base methods: #' The vctr class provides methods for many base generics using a smaller #' set of generics defined by this package. Generally, you should think #' carefully before overriding any of the methods that vctrs implements for #' you as they've been carefully planned to be internally consistent. #' #' * `[[` and `[` use `NextMethod()` dispatch to the underlying base function, #' then restore attributes with `vec_restore()`. #' `rep()` and `length<-` work similarly. #' #' * `[[<-` and `[<-` cast `value` to same type as `x`, then call #' `NextMethod()`. #' #' * `as.logical()`, `as.integer()`, `as.numeric()`, `as.character()`, #' `as.Date()` and `as.POSIXct()` methods call `vec_cast()`. #' The `as.list()` method calls `[[` repeatedly, and the `as.data.frame()` #' method uses a standard technique to wrap a vector in a data frame. #' #' * `as.factor()`, `as.ordered()` and `as.difftime()` are not generic functions #' in base R, but have been reimplemented as generics in the `generics` #' package. `vctrs` extends these and calls `vec_cast()`. To inherit this #' behaviour in a package, import and re-export the generic of interest #' from `generics`. #' #' * `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()` use #' [vec_proxy()]. #' #' * `<`, `<=`, `>=`, `>`, `min()`, `max()`, `range()`, `median()`, #' `quantile()`, and `xtfrm()` methods use [vec_proxy_compare()]. #' #' * `+`, `-`, `/`, `*`, `^`, `%%`, `%/%`, `!`, `&`, and `|` operators #' use [vec_arith()]. #' #' * Mathematical operations including the Summary group generics (`prod()`, #' `sum()`, `any()`, `all()`), the Math group generics (`abs()`, `sign()`, #' etc), `mean()`, `is.nan()`, `is.finite()`, and `is.infinite()` #' use [vec_math()]. #' #' * `dims()`, `dims<-`, `dimnames()`, `dimnames<-`, `levels()`, and #' `levels<-` methods throw errors. #' #' @param .data Foundation of class. Must be a vector #' @param ... Name-value pairs defining attributes #' @param class Name of subclass. #' @param inherit_base_type `r lifecycle::badge("experimental")` #' A single logical, or `NULL`. Does this class extend the base type of #' `.data`? i.e. does the resulting object extend the behaviour of the #' underlying type? Defaults to `FALSE` for all types except lists, which #' are required to inherit from the base type. #' @export #' @keywords internal #' @aliases vctr new_vctr <- function(.data, ..., class = character(), inherit_base_type = NULL) { if (!is_vector(.data)) { abort("`.data` must be a vector type.") } if (is_list(.data)) { if (is.data.frame(.data)) { abort("`.data` can't be a data frame.") } if (is.null(inherit_base_type)) { inherit_base_type <- TRUE } else if (is_false(inherit_base_type)) { abort("List `.data` must inherit from the base type.") } } # Default to `FALSE` in all cases except lists if (is.null(inherit_base_type)) { inherit_base_type <- FALSE } names <- names(.data) names <- names_repair_missing(names) class <- c(class, "vctrs_vctr", if (inherit_base_type) typeof(.data)) attrib <- list(names = names, ..., class = class) vec_set_attributes(.data, attrib) } names_repair_missing <- function(x) { if (is.null(x)) { return(x) } if (vec_any_missing(x)) { # We never want to allow `NA_character_` names to slip through, but # erroring on them has caused issues. Instead, we repair them to the # empty string (#784). missing <- vec_detect_missing(x) x <- vec_assign(x, missing, "") } x } #' @export vec_proxy.vctrs_vctr <- function(x, ...) { if (is_list(x)) { unclass(x) } else { x } } #' @export vec_restore.vctrs_vctr <- function(x, to, ..., i = NULL) { if (typeof(x) != typeof(to)) { stop_incompatible_cast(x, to, x_arg = "", to_arg = "") } NextMethod() } #' @method vec_cast vctrs_vctr #' @export vec_cast.vctrs_vctr <- function(x, to, ...) { UseMethod("vec_cast.vctrs_vctr") } vctr_cast <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { # These are not strictly necessary, but make bootstrapping a new class # a bit simpler if (is.object(x)) { if (is_same_type(x, to)) { x } else { stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, call = call ) } } else { # FIXME: `vec_restore()` should only be called on proxies vec_restore(x, to) } } #' @export c.vctrs_vctr <- function(..., recursive = FALSE, use.names = TRUE) { if (!is_false(recursive)) { abort("`recursive` must be `FALSE` when concatenating vctrs classes.") } if (!is_true(use.names)) { abort("`use.names` must be `TRUE` when concatenating vctrs classes.") } vec_c(...) } # Printing ---------------------------------------------------------------- #' @export print.vctrs_vctr <- function(x, ...) { obj_print(x, ...) invisible(x) } #' @export str.vctrs_vctr <- function(object, ...) { obj_str(object, ...) } #' @export format.vctrs_vctr <- function(x, ...) { format(vec_data(x), ...) } # Subsetting -------------------------------------------------------------- #' @export `[.vctrs_vctr` <- function(x, i, ...) { vec_index(x, i, ...) } #' @export `[[.vctrs_vctr` <- function(x, i, ...) { if (is.list(x)) { NextMethod() } else { vec_restore(NextMethod(), x) } } #' @export `$.vctrs_vctr` <- function(x, i) { if (is.list(x)) { NextMethod() } else { vec_restore(NextMethod(), x) } } #' @export rep.vctrs_vctr <- function(x, ...) { vec_restore(NextMethod(), x) } #' @export `length<-.vctrs_vctr` <- function(x, value) { vec_restore(NextMethod(), x) } #' @export diff.vctrs_vctr <- function(x, lag = 1L, differences = 1L, ...) { stopifnot(length(lag) == 1L, lag >= 1L) stopifnot(length(differences) == 1L, differences >= 1L) n <- vec_size(x) if (lag * differences >= n) return(vec_slice(x, 0L)) out <- x for (i in seq_len(differences)) { n <- vec_size(out) lhs <- (1L + lag):n rhs <- 1L:(n - lag) out <- vec_slice(out, lhs) - vec_slice(out, rhs) } out } # Modification ------------------------------------------------------------- #' @export `[[<-.vctrs_vctr` <- function(x, ..., value) { if (!is.list(x)) { value <- vec_cast(value, x) } NextMethod() } #' @export `$<-.vctrs_vctr` <- function(x, i, value) { if (is.list(x)) { NextMethod() } else { # Default behaviour is to cast LHS to a list abort("$ operator is invalid for atomic vectors.") } } #' @export `[<-.vctrs_vctr` <- function(x, i, value) { value <- vec_cast(value, x) NextMethod() } #' @export `names<-.vctrs_vctr` <- function(x, value) { if (length(value) != 0 && length(value) != length(x)) { abort("`names()` must be the same length as x.") } value <- names_repair_missing(value) NextMethod() } # Coercion ---------------------------------------------------------------- #' @export as.logical.vctrs_vctr <- function(x, ...) { vec_cast(x, logical()) } #' @export as.integer.vctrs_vctr <- function(x, ...) { vec_cast(x, integer()) } #' @export as.double.vctrs_vctr <- function(x, ...) { vec_cast(x, double()) } #' @export as.character.vctrs_vctr <- function(x, ...) { vec_cast(x, character()) } #' @export as.list.vctrs_vctr <- function(x, ...) { out <- vec_chop(x) if (obj_is_list(x)) { out <- lapply(out, `[[`, 1) } out } #' @export as.Date.vctrs_vctr <- function(x, ...) { vec_cast(x, new_date()) } #' @export as.POSIXct.vctrs_vctr <- function(x, tz = "", ...) { vec_cast(x, new_datetime(tzone = tz)) } #' @export as.POSIXlt.vctrs_vctr <- function(x, tz = "", ...) { to <- as.POSIXlt(new_datetime(), tz = tz) vec_cast(x, to) } # Work around inconsistencies in as.data.frame() as.data.frame2 <- function(x) { # Unclass to avoid dispatching on `as.data.frame()` methods that break size # invariants, like `as.data.frame.table()` (#913). This also prevents infinite # recursion with shaped vctrs in `as.data.frame.vctrs_vctr()`. x <- unclass(x) out <- as.data.frame(x) if (vec_dim_n(x) == 1) { # 1D arrays are not stripped from their dimensions out[[1]] <- as.vector(out[[1]]) # 1D arrays are auto-labelled with substitute() names(out) <- "V1" } out } #' @export as.data.frame.vctrs_vctr <- function(x, row.names = NULL, optional = FALSE, ..., nm = paste(deparse(substitute(x), width.cutoff = 500L), collapse = " ")) { force(nm) if (has_dim(x)) { return(as.data.frame2(x)) } cols <- list(x) if (!optional) { names(cols) <- nm } new_data_frame(cols, n = vec_size(x)) } # Dynamically registered in .onLoad() as.factor.vctrs_vctr <- function(x, levels = character(), ...) { vec_cast(x, new_factor(levels = levels)) } # Dynamically registered in .onLoad() as.ordered.vctrs_vctr <- function(x, levels = character(), ...) { vec_cast(x, new_ordered(levels = levels)) } # Dynamically registered in .onLoad() as.difftime.vctrs_vctr <- function(x, units = "secs", ...) { vec_cast(x, new_duration(units = units)) } # Equality ---------------------------------------------------------------- #' @export `==.vctrs_vctr` <- function(e1, e2) { vec_equal(e1, e2) } #' @export `!=.vctrs_vctr` <- function(e1, e2) { !vec_equal(e1, e2) } #' @export is.na.vctrs_vctr <- function(x) { vec_detect_missing(x) } #' @importFrom stats na.fail #' @export na.fail.vctrs_vctr <- function(object, ...) { if (vec_any_missing(object)) { # Return the same error as `na.fail.default()` abort("missing values in object") } object } #' @importFrom stats na.omit #' @export na.omit.vctrs_vctr <- function(object, ...) { na_remove(object, "omit") } #' @importFrom stats na.exclude #' @export na.exclude.vctrs_vctr <- function(object, ...) { na_remove(object, "exclude") } na_remove <- function(x, type) { # The only difference between `na.omit()` and `na.exclude()` is the class # of the `na.action` attribute if (!vec_any_missing(x)) { return(x) } # `na.omit/exclude()` attach the locations of the omitted values to the result missing <- vec_detect_missing(x) loc <- which(missing) names <- vec_names(x) if (!is_null(names)) { # `na.omit/exclude()` retain the original names, if applicable names <- vec_slice(names, loc) loc <- vec_set_names(loc, names) } attr(loc, "class") <- type out <- vec_slice(x, !missing) attr(out, "na.action") <- loc out } #' @export anyNA.vctrs_vctr <- function(x, recursive = FALSE) { if (recursive && obj_is_list(x)) { any(map_lgl(x, anyNA, recursive = recursive)) } else { any(is.na(x)) } } #' @export unique.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_unique(x) } #' @export duplicated.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_duplicate_id(x) != seq_along(x) } #' @export anyDuplicated.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_duplicate_any(x) } # Comparison ---------------------------------------------------------------- #' @export `<=.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) <= 0 } #' @export `<.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) < 0 } #' @export `>=.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) >= 0 } #' @export `>.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) > 0 } #' @export xtfrm.vctrs_vctr <- function(x) { proxy <- vec_proxy_order(x) type <- typeof(proxy) if (type == "logical") { proxy <- unstructure(proxy) proxy <- as.integer(proxy) return(proxy) } if (type %in% c("integer", "double")) { proxy <- unstructure(proxy) return(proxy) } vec_rank(proxy, ties = "dense", incomplete = "na") } #' @importFrom stats median #' @export median.vctrs_vctr <- function(x, ..., na.rm = FALSE) { # nocov start stop_unimplemented(x, "median") # nocov end } #' @importFrom stats quantile #' @export quantile.vctrs_vctr <- function(x, ..., type = 1, na.rm = FALSE) { # nocov start stop_unimplemented(x, "quantile") # nocov end } vec_cast_or_na <- function(x, to, ...) { tryCatch( vctrs_error_incompatible_type = function(...) vec_init(to, length(x)), vec_cast(x, to) ) } #' @export min.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast_or_na(Inf, x)) } # TODO: implement to do vec_arg_min() rank <- xtfrm(x) if (isTRUE(na.rm)) { idx <- which.min(rank) if (vec_is_empty(idx)) { return(vec_cast_or_na(Inf, x)) } } else { idx <- which(vec_equal(rank, min(rank), na_equal = TRUE)) } x[[idx[[1]]]] } #' @export max.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast_or_na(-Inf, x)) } # TODO: implement to do vec_arg_max() rank <- xtfrm(x) if (isTRUE(na.rm)) { idx <- which.max(rank) if (vec_is_empty(idx)) { return(vec_cast_or_na(-Inf, x)) } } else { idx <- which(vec_equal(rank, max(rank), na_equal = TRUE)) } x[[idx[[1]]]] } #' @export range.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast_or_na(c(Inf, -Inf), x)) } # Inline `min()` / `max()` to only call `xtfrm()` once rank <- xtfrm(x) if (isTRUE(na.rm)) { idx_min <- which.min(rank) idx_max <- which.max(rank) if (vec_is_empty(idx_min) && vec_is_empty(idx_max)) { return(vec_cast_or_na(c(Inf, -Inf), x)) } } else { idx_min <- which(vec_equal(rank, min(rank), na_equal = TRUE)) idx_max <- which(vec_equal(rank, max(rank), na_equal = TRUE)) } c(x[[idx_min[[1]]]], x[[idx_max[[1]]]]) } # Numeric ----------------------------------------------------------------- #' @export Math.vctrs_vctr <- function(x, ...) { vec_math(.Generic, x, ...) } #' @export Summary.vctrs_vctr <- function(..., na.rm = FALSE) { vec_math(.Generic, vec_c(...), na.rm = na.rm) } #' @export mean.vctrs_vctr <- function(x, ..., na.rm = FALSE) { vec_math("mean", x, na.rm = na.rm) } #' @export is.finite.vctrs_vctr <- function(x) { vec_math("is.finite", x) } #' @export is.infinite.vctrs_vctr <- function(x) { vec_math("is.infinite", x) } #' @export is.nan.vctrs_vctr <- function(x) { vec_math("is.nan", x) } # Arithmetic -------------------------------------------------------------- #' @export `+.vctrs_vctr` <- function(e1, e2) { if (missing(e2)) { vec_arith("+", e1, MISSING()) } else { vec_arith("+", e1, e2) } } #' @export `-.vctrs_vctr` <- function(e1, e2) { if (missing(e2)) { vec_arith("-", e1, MISSING()) } else { vec_arith("-", e1, e2) } } #' @export `*.vctrs_vctr` <- function(e1, e2) { vec_arith("*", e1, e2) } #' @export `/.vctrs_vctr` <- function(e1, e2) { vec_arith("/", e1, e2) } #' @export `^.vctrs_vctr` <- function(e1, e2) { vec_arith("^", e1, e2) } #' @export `%%.vctrs_vctr` <- function(e1, e2) { vec_arith("%%", e1, e2) } #' @export `%/%.vctrs_vctr` <- function(e1, e2) { vec_arith("%/%", e1, e2) } #' @export `!.vctrs_vctr` <- function(x) { vec_arith("!", x, MISSING()) } #' @export `&.vctrs_vctr` <- function(e1, e2) { vec_arith("&", e1, e2) } #' @export `|.vctrs_vctr` <- function(e1, e2) { vec_arith("|", e1, e2) } # Unimplemented ------------------------------------------------------------ #' @export summary.vctrs_vctr <- function(object, ...) { # nocov start stop_unimplemented(object, "summary") # nocov end } # Unsupported -------------------------------------------------------------- #' @export `dim<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "dim<-") } #' @export `dimnames<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "dimnames<-") } #' @export levels.vctrs_vctr <- function(x) { NULL } #' @export `levels<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "levels<-") } #' @export `t.vctrs_vctr` <- function(x) { stop_unsupported(x, "t") } #' @export `is.na<-.vctrs_vctr` <- function(x, value) { vec_assign(x, value, vec_init(x)) } # Helpers ----------------------------------------------------------------- # This simple class is used for testing as defining methods inside # a test does not work (because the lexical scope is lost) # nocov start new_hidden <- function(x = double()) { stopifnot(is.numeric(x)) new_vctr(vec_cast(x, double()), class = "hidden", inherit_base_type = FALSE) } format.hidden <- function(x, ...) rep("xxx", length(x)) local_hidden <- function(frame = caller_env()) { local_bindings(.env = global_env(), .frame = frame, vec_ptype2.hidden.hidden = function(x, y, ...) new_hidden(), vec_ptype2.hidden.double = function(x, y, ...) new_hidden(), vec_ptype2.double.hidden = function(x, y, ...) new_hidden(), vec_ptype2.hidden.logical = function(x, y, ...) new_hidden(), vec_ptype2.logical.hidden = function(x, y, ...) new_hidden(), vec_cast.hidden.hidden = function(x, to, ...) x, vec_cast.hidden.double = function(x, to, ...) new_hidden(vec_data(x)), vec_cast.double.hidden = function(x, to, ...) vec_data(x), vec_cast.hidden.logical = function(x, to, ...) new_hidden(as.double(x)), vec_cast.logical.hidden = function(x, to, ...) as.logical(vec_data(x)) ) } # nocov end vctrs/R/type-rcrd.R0000644000176200001440000000700614401377400013653 0ustar liggesusers# Constructor and basic methods --------------------------------------------- #' rcrd (record) S3 class #' #' The rcrd class extends [vctr]. A rcrd is composed of 1 or more [field]s, #' which must be vectors of the same length. Is designed specifically for #' classes that can naturally be decomposed into multiple vectors of the same #' length, like [POSIXlt], but where the organisation should be considered #' an implementation detail invisible to the user (unlike a [data.frame]). #' #' @param fields A list or a data frame. Lists must be rectangular #' (same sizes), and contain uniquely named vectors (at least #' one). `fields` is validated with [df_list()] to ensure uniquely #' named vectors. #' @param ... Additional attributes #' @param class Name of subclass. #' @export #' @aliases ses rcrd #' @keywords internal new_rcrd <- function(fields, ..., class = character()) { if (obj_is_list(fields) && length(vec_unique(list_sizes(fields))) > 1L) { abort("All fields must be the same size.") } fields <- df_list(!!!fields) if (!length(fields)) { abort("`fields` must be a list of length 1 or greater.") } structure(fields, ..., class = c(class, "vctrs_rcrd", "vctrs_vctr")) } #' @export vec_proxy.vctrs_rcrd <- function(x, ...) { new_data_frame(x) } #' @export vec_restore.vctrs_rcrd <- function(x, to, ...) { x <- NextMethod() attr(x, "row.names") <- NULL x } #' @export length.vctrs_rcrd <- function(x) { vec_size(x) } #' @export names.vctrs_rcrd <- function(x) { NULL } #' @export `names<-.vctrs_rcrd` <- function(x, value) { if (is_null(value)) { x } else { abort("Can't assign names to a .") } } #' @export format.vctrs_rcrd <- function(x, ...) { if (inherits(x, "vctrs_foobar")) { # For unit tests exec("paste", !!!vec_data(x), sep = ":") } else { stop_unimplemented(x, "format") } } #' @export obj_str_data.vctrs_rcrd <- function(x, ...) { obj_str_leaf(x, ...) } #' @method vec_cast vctrs_rcrd #' @export vec_cast.vctrs_rcrd <- function(x, to, ...) UseMethod("vec_cast.vctrs_rcrd") #' @export vec_cast.vctrs_rcrd.vctrs_rcrd <- function(x, to, ...) { out <- vec_cast(vec_data(x), vec_data(to), ...) new_rcrd(out) } # Subsetting -------------------------------------------------------------- #' @export `[.vctrs_rcrd` <- function(x, i, ...) { if (!missing(...)) { abort("Can't index record vectors on dimensions greater than 1.") } vec_slice(x, maybe_missing(i)) } #' @export `[[.vctrs_rcrd` <- function(x, i, ...) { out <- vec_slice(vec_data(x), i) vec_restore(out, x) } #' @export `$.vctrs_rcrd` <- function(x, i, ...) { stop_unsupported(x, "subsetting with $") } #' @export rep.vctrs_rcrd <- function(x, ...) { out <- lapply(vec_data(x), base_vec_rep, ...) vec_restore(out, x) } #' @export `length<-.vctrs_rcrd` <- function(x, value) { out <- vec_size_assign(vec_data(x), value) vec_restore(out, x) } # Replacement ------------------------------------------------------------- #' @export `[[<-.vctrs_rcrd` <- function(x, i, value) { force(i) x[i] <- value x } #' @export `$<-.vctrs_rcrd` <- function(x, i, value) { stop_unsupported(x, "subset assignment with $") } #' @export `[<-.vctrs_rcrd` <- function(x, i, value) { i <- maybe_missing(i, TRUE) value <- vec_cast(value, x) out <- vec_assign(vec_data(x), i, vec_data(value)) vec_restore(out, x) } # Equality and ordering --------------------------------------------------- #' @export vec_math.vctrs_rcrd <- function(.fn, .x, ...) { stop_unsupported(.x, "vec_math") } vctrs/R/type2.R0000644000176200001440000002416114362266120013010 0ustar liggesusers#' Find the common type for a pair of vectors #' #' @description #' #' `vec_ptype2()` defines the coercion hierarchy for a set of related #' vector types. Along with [vec_cast()], this generic forms the #' foundation of type coercions in vctrs. #' #' `vec_ptype2()` is relevant when you are implementing vctrs methods #' for your class, but it should not usually be called directly. If #' you need to find the common type of a set of inputs, call #' [vec_ptype_common()] instead. This function supports multiple #' inputs and [finalises][vec_ptype_finalise] the common type. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' @param x,y Vector types. #' @param x_arg,y_arg Argument names for `x` and `y`. These are used #' in error messages to inform the user about the locations of #' incompatible types (see [stop_incompatible_type()]). #' #' @seealso [stop_incompatible_type()] when you determine from the #' attributes that an input can't be cast to the target type. #' #' @section Dependencies: #' - [vec_ptype()] is applied to `x` and `y` #' #' @export vec_ptype2 <- function(x, y, ..., x_arg = caller_arg(x), y_arg = caller_arg(y), call = caller_env()) { if (!missing(...)) { check_ptype2_dots_empty(...) return(vec_ptype2_opts( x, y, opts = match_fallback_opts(...), x_arg = x_arg, y_arg = y_arg, call = call )) } return(.Call(ffi_ptype2, x, y, environment())) UseMethod("vec_ptype2") } vec_ptype2_dispatch_s3 <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { UseMethod("vec_ptype2") } vec_ptype2_dispatch_native <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { fallback_opts <- match_fallback_opts(...) .Call( ffi_ptype2_dispatch_native, x, y, fallback_opts, frame = environment() ) } #' Default cast and ptype2 methods #' #' @description #' #' These functions are automatically called when no [vec_ptype2()] or #' [vec_cast()] method is implemented for a pair of types. #' #' * They apply special handling if one of the inputs is of type #' `AsIs` or `sfc`. #' #' * They attempt a number of fallbacks in cases where it would be too #' inconvenient to be strict: #' #' - If the class and attributes are the same they are considered #' compatible. `vec_default_cast()` returns `x` in this case. #' #' - In case of incompatible data frame classes, they fall back to #' `data.frame`. If an incompatible subclass of tibble is #' involved, they fall back to `tbl_df`. #' #' * Otherwise, an error is thrown with [stop_incompatible_type()] or #' [stop_incompatible_cast()]. #' #' @keywords internal #' @export vec_default_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { if (is_asis(x)) { return(vec_ptype2_asis_left( x, y, x_arg = x_arg, y_arg = y_arg, call = call )) } if (is_asis(y)) { return(vec_ptype2_asis_right( x, y, x_arg = x_arg, y_arg = y_arg, call = call )) } opts <- match_fallback_opts(...) if (opts$s3_fallback && can_fall_back_2(x, y)) { common <- common_class_suffix(x, y) if (length(common)) { return(new_common_class_fallback(x, common)) } } if (is.data.frame(x) && is.data.frame(y)) { out <- vec_ptype2_df_fallback( x, y, opts, x_arg = x_arg, y_arg = y_arg, call = call ) if (identical(non_df_attrib(x), non_df_attrib(y))) { attributes(out) <- c(df_attrib(out), non_df_attrib(x)) } return(out) } if (is_same_type(x, y)) { return(vec_ptype(x, x_arg = x_arg)) } # The from-dispatch parameter is set only when called from our S3 # dispatch mechanism, when no method is found to dispatch to. It # indicates whether the error message should provide advice about # diverging attributes. withRestarts( stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, `vctrs:::from_dispatch` = match_from_dispatch(...), call = call ), vctrs_restart_ptype2 = function(ptype) { ptype } ) } # This wrapper for `stop_incompatible_type()` matches error context # arguments. It is useful to pass ptype2 arguments through dots # without risking unknown arguments getting stored as condition fields. vec_incompatible_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, call = call ) } # We can't check for a proxy or ptype2 method to determine whether a # class is foreign, because we implement these generics for many base # classes and we still need to allow base fallbacks with subclasses. can_fall_back_2 <- function(x, y) { if (!identical(typeof(x), typeof(y))) { return(FALSE) } if (!can_fall_back(x) || !can_fall_back(y)) { return(FALSE) } TRUE } can_fall_back <- function(x) { UseMethod("can_fall_back") } #' @export can_fall_back.vctrs_vctr <- function(x) { # Work around bad interaction when `c()` method calls back into `vec_c()` FALSE } #' @export can_fall_back.ts <- function(x) { # Work around bug with hard-coded `tsp` attribute in Rf_setAttrib() FALSE } #' @export can_fall_back.data.frame <- function(x) { # The `c()` fallback is only for 1D vectors FALSE } #' @export `can_fall_back.vctrs:::common_class_fallback` <- function(x) { TRUE } #' @export can_fall_back.default <- function(x) { # Don't fall back for classes that directly implement a proxy. # # NOTE: That's suboptimal. For instance this forces us to override # `can_fall_back()` for `vctrs_vctr` to avoid recursing into # `vec_c()` through `c()`. Maybe we want to avoid falling back for # any vector that inherits a `vec_proxy()` method implemented # _outside_ of vctrs, i.e. not for a base class? is_null(s3_get_method(class(x)[[1]], "vec_proxy", ns = "vctrs")) } new_common_class_fallback <- function(x, fallback_class) { structure( vec_ptype(x), class = "vctrs:::common_class_fallback", fallback_class = fallback_class ) } #' @export `vec_proxy.vctrs:::common_class_fallback` <- function(x, ...) { x } is_common_class_fallback <- function(x) { inherits(x, "vctrs:::common_class_fallback") } common_class_suffix <- function(x, y) { vec_common_suffix(fallback_class(x), fallback_class(y)) } fallback_class <- function(x) { if (is_common_class_fallback(x)) { attr(x, "fallback_class") } else { class(x) } } check_ptype2_dots_empty <- function(..., `vctrs:::from_dispatch`, `vctrs:::s3_fallback`) { check_dots_empty0(...) } match_fallback_opts <- function(..., `vctrs:::s3_fallback` = NULL) { fallback_opts( s3_fallback = `vctrs:::s3_fallback` ) } match_from_dispatch <- function(..., `vctrs:::from_dispatch` = FALSE) { `vctrs:::from_dispatch` } fallback_opts <- function(s3_fallback = NULL) { # Order is important for the C side list( s3_fallback = s3_fallback %||% s3_fallback_default() ) } full_fallback_opts <- function() { fallback_opts( s3_fallback = S3_FALLBACK_true ) } vec_ptype2_opts <- function(x, y, ..., opts, x_arg = "", y_arg = "", call = caller_env()) { .Call(ffi_ptype2_opts, x, y, opts, environment()) } vec_ptype2_params <- function(x, y, ..., s3_fallback = NULL, x_arg = "", y_arg = "", call = caller_env()) { opts <- fallback_opts( s3_fallback = s3_fallback ) vec_ptype2_opts( x, y, opts = opts, x_arg = x_arg, y_arg = y_arg, call = call ) } vec_ptype2_no_fallback <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { opts <- fallback_opts( s3_fallback = S3_FALLBACK_false ) vec_ptype2_opts( x, y, ..., , opts = opts, x_arg = x_arg, y_arg = y_arg, call = call ) } s3_fallback_default <- function() 0L S3_FALLBACK_false <- 0L S3_FALLBACK_true <- 1L vec_typeof2 <- function(x, y) { .Call(ffi_typeof2, x, y) } vec_typeof2_s3 <- function(x, y) { .Call(ffi_typeof2_s3, x, y) } # https://github.com/r-lib/vctrs/issues/571 vec_is_coercible <- function(x, y, ..., opts = fallback_opts(), x_arg = "", y_arg = "", call = caller_env()) { check_dots_empty0(...) .Call( ffi_is_coercible, x, y, opts, environment() ) } vec_is_subtype <- function(x, super, ..., x_arg = "", super_arg = "") { tryCatch( vctrs_error_incompatible_type = function(...) FALSE, { common <- vctrs::vec_ptype2(x, super, ..., x_arg = x_arg, y_arg = super_arg) vec_is(common, super) } ) } vec_implements_ptype2 <- function(x) { .Call(vctrs_implements_ptype2, x) } vctrs/R/hash.R0000644000176200001440000000042513634425744012677 0ustar liggesusers # These return raw vectors of hashes. Vector elements are coded with # 32 bit hashes. Thus, the size of the raw vector of hashes is 4 times # the size of the input. vec_hash <- function(x) { .Call(vctrs_hash, x) } obj_hash <- function(x) { .Call(vctrs_hash_object, x) } vctrs/R/translate.R0000644000176200001440000000011714276722575013754 0ustar liggesusersvec_normalize_encoding <- function(x) { .Call(vctrs_normalize_encoding, x) } vctrs/R/import-standalone-purrr.R0000644000176200001440000001277714377215505016576 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-purrr.R # last-updated: 2023-02-23 # license: https://unlicense.org # --- # # This file provides a minimal shim to provide a purrr-like API on top of # base R functions. They are not drop-in replacements but allow a similar style # of programming. # # ## Changelog # # 2023-02-23: # * Added `list_c()` # # 2022-06-07: # * `transpose()` is now more consistent with purrr when inner names # are not congruent (#1346). # # 2021-12-15: # * `transpose()` now supports empty lists. # # 2021-05-21: # * Fixed "object `x` not found" error in `imap()` (@mgirlich) # # 2020-04-14: # * Removed `pluck*()` functions # * Removed `*_cpl()` functions # * Used `as_function()` to allow use of `~` # * Used `.` prefix for helpers # # nocov start map <- function(.x, .f, ...) { .f <- as_function(.f, env = global_env()) lapply(.x, .f, ...) } walk <- function(.x, .f, ...) { map(.x, .f, ...) invisible(.x) } map_lgl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, logical(1), ...) } map_int <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, integer(1), ...) } map_dbl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, double(1), ...) } map_chr <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, character(1), ...) } .rlang_purrr_map_mold <- function(.x, .f, .mold, ...) { .f <- as_function(.f, env = global_env()) out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE) names(out) <- names(.x) out } map2 <- function(.x, .y, .f, ...) { .f <- as_function(.f, env = global_env()) out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE) if (length(out) == length(.x)) { set_names(out, names(.x)) } else { set_names(out, NULL) } } map2_lgl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "logical") } map2_int <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "integer") } map2_dbl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "double") } map2_chr <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "character") } imap <- function(.x, .f, ...) { map2(.x, names(.x) %||% seq_along(.x), .f, ...) } pmap <- function(.l, .f, ...) { .f <- as.function(.f) args <- .rlang_purrr_args_recycle(.l) do.call("mapply", c( FUN = list(quote(.f)), args, MoreArgs = quote(list(...)), SIMPLIFY = FALSE, USE.NAMES = FALSE )) } .rlang_purrr_args_recycle <- function(args) { lengths <- map_int(args, length) n <- max(lengths) stopifnot(all(lengths == 1L | lengths == n)) to_recycle <- lengths == 1L args[to_recycle] <- map(args[to_recycle], function(x) rep.int(x, n)) args } keep <- function(.x, .f, ...) { .x[.rlang_purrr_probe(.x, .f, ...)] } discard <- function(.x, .p, ...) { sel <- .rlang_purrr_probe(.x, .p, ...) .x[is.na(sel) | !sel] } map_if <- function(.x, .p, .f, ...) { matches <- .rlang_purrr_probe(.x, .p) .x[matches] <- map(.x[matches], .f, ...) .x } .rlang_purrr_probe <- function(.x, .p, ...) { if (is_logical(.p)) { stopifnot(length(.p) == length(.x)) .p } else { .p <- as_function(.p, env = global_env()) map_lgl(.x, .p, ...) } } compact <- function(.x) { Filter(length, .x) } transpose <- function(.l) { if (!length(.l)) { return(.l) } inner_names <- names(.l[[1]]) if (is.null(inner_names)) { fields <- seq_along(.l[[1]]) } else { fields <- set_names(inner_names) .l <- map(.l, function(x) { if (is.null(names(x))) { set_names(x, inner_names) } else { x } }) } # This way missing fields are subsetted as `NULL` instead of causing # an error .l <- map(.l, as.list) map(fields, function(i) { map(.l, .subset2, i) }) } every <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (!rlang::is_true(.p(.x[[i]], ...))) return(FALSE) } TRUE } some <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (rlang::is_true(.p(.x[[i]], ...))) return(TRUE) } FALSE } negate <- function(.p) { .p <- as_function(.p, env = global_env()) function(...) !.p(...) } reduce <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init) } reduce_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE) } accumulate <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init, accumulate = TRUE) } accumulate_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE, accumulate = TRUE) } detect <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(.x[[i]]) } } NULL } detect_index <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(i) } } 0L } .rlang_purrr_index <- function(x, right = FALSE) { idx <- seq_along(x) if (right) { idx <- rev(idx) } idx } list_c <- function(x) { inject(c(!!!x)) } # nocov end vctrs/R/import-standalone-obj-type.R0000644000176200001440000002026714377215505017146 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-obj-type.R # last-updated: 2022-10-04 # license: https://unlicense.org # --- # # ## Changelog # # 2022-10-04: # - `obj_type_friendly(value = TRUE)` now shows numeric scalars # literally. # - `stop_friendly_type()` now takes `show_value`, passed to # `obj_type_friendly()` as the `value` argument. # # 2022-10-03: # - Added `allow_na` and `allow_null` arguments. # - `NULL` is now backticked. # - Better friendly type for infinities and `NaN`. # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Prefixed usage of rlang functions with `rlang::`. # # 2022-06-22: # - `friendly_type_of()` is now `obj_type_friendly()`. # - Added `obj_type_oo()`. # # 2021-12-20: # - Added support for scalar values and empty vectors. # - Added `stop_input_type()` # # 2021-06-30: # - Added support for missing arguments. # # 2021-04-19: # - Added support for matrices and arrays (#141). # - Added documentation. # - Added changelog. # # nocov start #' Return English-friendly type #' @param x Any R object. #' @param value Whether to describe the value of `x`. Special values #' like `NA` or `""` are always described. #' @param length Whether to mention the length of vectors and lists. #' @return A string describing the type. Starts with an indefinite #' article, e.g. "an integer vector". #' @noRd obj_type_friendly <- function(x, value = TRUE) { if (is_missing(x)) { return("absent") } if (is.object(x)) { if (inherits(x, "quosure")) { type <- "quosure" } else { type <- paste(class(x), collapse = "/") } return(sprintf("a <%s> object", type)) } if (!is_vector(x)) { return(.rlang_as_friendly_type(typeof(x))) } n_dim <- length(dim(x)) if (!n_dim) { if (!is_list(x) && length(x) == 1) { if (is_na(x)) { return(switch( typeof(x), logical = "`NA`", integer = "an integer `NA`", double = if (is.nan(x)) { "`NaN`" } else { "a numeric `NA`" }, complex = "a complex `NA`", character = "a character `NA`", .rlang_stop_unexpected_typeof(x) )) } show_infinites <- function(x) { if (x > 0) { "`Inf`" } else { "`-Inf`" } } str_encode <- function(x, width = 30, ...) { if (nchar(x) > width) { x <- substr(x, 1, width - 3) x <- paste0(x, "...") } encodeString(x, ...) } if (value) { if (is.numeric(x) && is.infinite(x)) { return(show_infinites(x)) } if (is.numeric(x) || is.complex(x)) { number <- as.character(round(x, 2)) what <- if (is.complex(x)) "the complex number" else "the number" return(paste(what, number)) } return(switch( typeof(x), logical = if (x) "`TRUE`" else "`FALSE`", character = { what <- if (nzchar(x)) "the string" else "the empty string" paste(what, str_encode(x, quote = "\"")) }, raw = paste("the raw value", as.character(x)), .rlang_stop_unexpected_typeof(x) )) } return(switch( typeof(x), logical = "a logical value", integer = "an integer", double = if (is.infinite(x)) show_infinites(x) else "a number", complex = "a complex number", character = if (nzchar(x)) "a string" else "\"\"", raw = "a raw value", .rlang_stop_unexpected_typeof(x) )) } if (length(x) == 0) { return(switch( typeof(x), logical = "an empty logical vector", integer = "an empty integer vector", double = "an empty numeric vector", complex = "an empty complex vector", character = "an empty character vector", raw = "an empty raw vector", list = "an empty list", .rlang_stop_unexpected_typeof(x) )) } } vec_type_friendly(x) } vec_type_friendly <- function(x, length = FALSE) { if (!is_vector(x)) { abort("`x` must be a vector.") } type <- typeof(x) n_dim <- length(dim(x)) add_length <- function(type) { if (length && !n_dim) { paste0(type, sprintf(" of length %s", length(x))) } else { type } } if (type == "list") { if (n_dim < 2) { return(add_length("a list")) } else if (is.data.frame(x)) { return("a data frame") } else if (n_dim == 2) { return("a list matrix") } else { return("a list array") } } type <- switch( type, logical = "a logical %s", integer = "an integer %s", numeric = , double = "a double %s", complex = "a complex %s", character = "a character %s", raw = "a raw %s", type = paste0("a ", type, " %s") ) if (n_dim < 2) { kind <- "vector" } else if (n_dim == 2) { kind <- "matrix" } else { kind <- "array" } out <- sprintf(type, kind) if (n_dim >= 2) { out } else { add_length(out) } } .rlang_as_friendly_type <- function(type) { switch( type, list = "a list", NULL = "`NULL`", environment = "an environment", externalptr = "a pointer", weakref = "a weak reference", S4 = "an S4 object", name = , symbol = "a symbol", language = "a call", pairlist = "a pairlist node", expression = "an expression vector", char = "an internal string", promise = "an internal promise", ... = "an internal dots object", any = "an internal `any` object", bytecode = "an internal bytecode object", primitive = , builtin = , special = "a primitive function", closure = "a function", type ) } .rlang_stop_unexpected_typeof <- function(x, call = caller_env()) { abort( sprintf("Unexpected type <%s>.", typeof(x)), call = call ) } #' Return OO type #' @param x Any R object. #' @return One of `"bare"` (for non-OO objects), `"S3"`, `"S4"`, #' `"R6"`, or `"R7"`. #' @noRd obj_type_oo <- function(x) { if (!is.object(x)) { return("bare") } class <- inherits(x, c("R6", "R7_object"), which = TRUE) if (class[[1]]) { "R6" } else if (class[[2]]) { "R7" } else if (isS4(x)) { "S4" } else { "S3" } } #' @param x The object type which does not conform to `what`. Its #' `obj_type_friendly()` is taken and mentioned in the error message. #' @param what The friendly expected type as a string. Can be a #' character vector of expected types, in which case the error #' message mentions all of them in an "or" enumeration. #' @param show_value Passed to `value` argument of `obj_type_friendly()`. #' @param ... Arguments passed to [abort()]. #' @inheritParams args_error_context #' @noRd stop_input_type <- function(x, what, ..., allow_na = FALSE, allow_null = FALSE, show_value = TRUE, arg = caller_arg(x), call = caller_env()) { # From standalone-cli.R cli <- env_get_list( nms = c("format_arg", "format_code"), last = topenv(), default = function(x) sprintf("`%s`", x), inherit = TRUE ) if (allow_na) { what <- c(what, cli$format_code("NA")) } if (allow_null) { what <- c(what, cli$format_code("NULL")) } if (length(what)) { what <- oxford_comma(what) } message <- sprintf( "%s must be %s, not %s.", cli$format_arg(arg), what, obj_type_friendly(x, value = show_value) ) abort(message, ..., call = call, arg = arg) } oxford_comma <- function(chr, sep = ", ", final = "or") { n <- length(chr) if (n < 2) { return(chr) } head <- chr[seq_len(n - 1)] last <- chr[n] head <- paste(head, collapse = sep) # Write a or b. But a, b, or c. if (n > 2) { paste0(head, sep, final, " ", last) } else { paste0(head, " ", final, " ", last) } } # nocov end vctrs/R/compare.R0000644000176200001440000001060514376223321013372 0ustar liggesusers# proxies ----------------------------------------------------------------- #' Comparison and order proxy #' #' @description #' `vec_proxy_compare()` and `vec_proxy_order()` return proxy objects, i.e. #' an atomic vector or data frame of atomic vectors. #' #' For [`vctrs_vctr`][vctr] objects: #' #' - `vec_proxy_compare()` determines the behavior of `<`, `>`, `>=` #' and `<=` (via [vec_compare()]); and [min()], [max()], [median()], and #' [quantile()]. #' #' - `vec_proxy_order()` determines the behavior of `order()` and `sort()` #' (via `xtfrm()`). #' #' @details #' The default method of `vec_proxy_compare()` assumes that all classes built #' on top of atomic vectors or records are comparable. Internally the default #' calls [vec_proxy_equal()]. If your class is not comparable, you will need #' to provide a `vec_proxy_compare()` method that throws an error. #' #' The behavior of `vec_proxy_order()` is identical to `vec_proxy_compare()`, #' with the exception of lists. Lists are not comparable, as comparing #' elements of different types is undefined. However, to allow ordering of #' data frames containing list-columns, the ordering proxy of a list is #' generated as an integer vector that can be used to order list elements #' by first appearance. #' #' If a class implements a `vec_proxy_compare()` method, it usually doesn't need #' to provide a `vec_proxy_order()` method, because the latter is implemented #' by forwarding to `vec_proxy_compare()` by default. Classes inheriting from #' list are an exception: due to the default `vec_proxy_order()` implementation, #' `vec_proxy_compare()` and `vec_proxy_order()` should be provided for such #' classes (with identical implementations) to avoid mismatches between #' comparison and sorting. #' #' @inheritSection vec_proxy_equal Data frames #' #' @param x A vector x. #' @inheritParams rlang::args_dots_empty #' @return A 1d atomic vector or a data frame. #' #' @section Dependencies: #' - [vec_proxy_equal()] called by default in `vec_proxy_compare()` #' - [vec_proxy_compare()] called by default in `vec_proxy_order()` #' #' @keywords internal #' @export #' @examples #' # Lists are not comparable #' x <- list(1:2, 1, 1:2, 3) #' try(vec_compare(x, x)) #' #' # But lists are orderable by first appearance to allow for #' # ordering data frames with list-cols #' df <- new_data_frame(list(x = x)) #' vec_sort(df) vec_proxy_compare <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_proxy_compare, x)) UseMethod("vec_proxy_compare") } #' @export vec_proxy_compare.default <- function(x, ...) { stop_native_implementation("vec_proxy_compare.default") } #' @rdname vec_proxy_compare #' @export vec_proxy_order <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_proxy_order, x)) UseMethod("vec_proxy_order") } #' @export vec_proxy_order.default <- function(x, ...) { stop_native_implementation("vec_proxy_order.default") } # compare ----------------------------------------------------------------- #' Compare two vectors #' #' @section S3 dispatch: #' `vec_compare()` is not generic for performance; instead it uses #' [vec_proxy_compare()] to create a proxy that is used in the comparison. #' #' @param x,y Vectors with compatible types and lengths. #' @param na_equal Should `NA` values be considered equal? #' @param .ptype Override to optionally specify common type #' @return An integer vector with values -1 for `x < y`, 0 if `x == y`, #' and 1 if `x > y`. If `na_equal` is `FALSE`, the result will be `NA` #' if either `x` or `y` is `NA`. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_recycle_common()] #' - [vec_proxy_compare()] #' #' @export #' @examples #' vec_compare(c(TRUE, FALSE, NA), FALSE) #' vec_compare(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) #' #' vec_compare(1:10, 5) #' vec_compare(runif(10), 0.5) #' vec_compare(letters[1:10], "d") #' #' df <- data.frame(x = c(1, 1, 1, 2), y = c(0, 1, 2, 1)) #' vec_compare(df, data.frame(x = 1, y = 1)) vec_compare <- function(x, y, na_equal = FALSE, .ptype = NULL) { obj_check_vector(x) obj_check_vector(y) check_bool(na_equal) args <- vec_recycle_common(x, y) args <- vec_cast_common_params(!!!args, .to = .ptype) .Call(ffi_vec_compare, vec_proxy_compare(args[[1]]), vec_proxy_compare(args[[2]]), na_equal) } # Helpers ----------------------------------------------------------------- # Used for testing cmp <- function(x, y) (x > y) - (x < y) vctrs/R/type-sf.R0000644000176200001440000000657114420030332013326 0ustar liggesusers # Imported at load-time in `sf_env` st_crs = function(...) stop_sf() st_precision = function(...) stop_sf() st_as_sf = function(...) stop_sf() stop_sf = function() abort("Internal error: Failed sf import.") sf_deps = c( "st_crs", "st_precision", "st_as_sf" ) sf_env = env() # sf namespace local(envir = sf_env, { # Registered at load-time (same for all other methods) vec_proxy_sf = function(x, ...) { x } vec_restore_sf = function(x, to, ...) { sfc_name = attr(to, "sf_column") crs = st_crs(to) prec = st_precision(to) st_as_sf( x, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } sf_ptype2 = function(x, y, ...) { data = vctrs::df_ptype2(x, y, ...) # Workaround for `c()` fallback sentinels. Must be fixed before # moving the methods downstream. opts <- match_fallback_opts(...) if (identical(opts$s3_fallback, S3_FALLBACK_true)) { return(data) } x_sf <- inherits(x, "sf") y_sf <- inherits(y, "sf") if (x_sf && y_sf) { # Take active geometry from left-hand side sfc_name = attr(x, "sf_column") # CRS and precision must match crs = common_crs(x, y) prec = common_prec(x, y) } else if (x_sf) { sfc_name = attr(x, "sf_column") crs = st_crs(x) prec = st_precision(x) } else if (y_sf) { sfc_name = attr(y, "sf_column") crs = st_crs(y) prec = st_precision(y) } else { stop("Internal error: Expected at least one `sf` input.") } st_as_sf( data, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } vec_ptype2_sf_sf = function(x, y, ...) { sf_ptype2(x, y, ...) } vec_ptype2_sf_data.frame = function(x, y, ...) { sf_ptype2(x, y, ...) } vec_ptype2_data.frame_sf = function(x, y, ...) { sf_ptype2(x, y, ...) } # Maybe we should not have these methods, but they are currently # required to avoid the base-df fallback vec_ptype2_sf_tbl_df = function(x, y, ...) { new_data_frame(sf_ptype2(x, y, ...)) } vec_ptype2_tbl_df_sf = function(x, y, ...) { new_data_frame(sf_ptype2(x, y, ...)) } sf_cast = function(x, to, ...) { data = vctrs::df_cast(x, to, ...) # Workaround for `c()` fallback sentinels. Must be fixed before # moving the methods downstream. opts <- match_fallback_opts(...) if (identical(opts$s3_fallback, S3_FALLBACK_true)) { return(data) } sfc_name = attr(to, "sf_column") crs = st_crs(to) prec = st_precision(to) st_as_sf( data, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } vec_cast_sf_sf = function(x, to, ...) { sf_cast(x, to, ...) } vec_cast_sf_data.frame = function(x, to, ...) { sf_cast(x, to, ...) } vec_cast_data.frame_sf = function(x, to, ...) { df_cast(x, to, ...) } vec_proxy_order_sfc <- function(x, ...) { # These are list columns, so they need to use the order-by-appearance proxy # that is defined by `vec_proxy_order.list()` x <- unstructure(x) vec_proxy_order(x) } # take conservative approach of requiring equal CRS and precision common_crs = function(x, y) { lhs = st_crs(x) rhs = st_crs(y) if (lhs != rhs) stop("coordinate reference systems not equal: use st_transform() first?") lhs } common_prec = function(x, y) { lhs = st_precision(x) rhs = st_precision(y) if (lhs != rhs) stop("precisions not equal") lhs } }) # local(envir = sf_env) env_bind(ns_env("vctrs"), !!!as.list(sf_env)) # Local Variables: # indent-tabs-mode: t # ess-indent-offset: 4 # tab-width: 4 # End: vctrs/R/slice-interleave.R0000644000176200001440000000334114511524374015201 0ustar liggesusers#' Interleave many vectors into one vector #' #' @description #' `vec_interleave()` combines multiple vectors together, much like [vec_c()], #' but does so in such a way that the elements of each vector are interleaved #' together. #' #' It is a more efficient equivalent to the following usage of `vec_c()`: #' #' ``` #' vec_interleave(x, y) == vec_c(x[1], y[1], x[2], y[2], ..., x[n], y[n]) #' ``` #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [list_unchop()] #' #' @inheritParams vec_c #' #' @param ... Vectors to interleave. These will be #' [recycled][theory-faq-recycling] to a common size. #' #' @export #' @examples #' # The most common case is to interleave two vectors #' vec_interleave(1:3, 4:6) #' #' # But you aren't restricted to just two #' vec_interleave(1:3, 4:6, 7:9, 10:12) #' #' # You can also interleave data frames #' x <- data_frame(x = 1:2, y = c("a", "b")) #' y <- data_frame(x = 3:4, y = c("c", "d")) #' #' vec_interleave(x, y) vec_interleave <- function(..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet")) { args <- list2(...) # `NULL`s must be dropped up front to generate appropriate indices if (vec_any_missing(args)) { missing <- vec_detect_missing(args) args <- vec_slice(args, !missing) } n <- length(args) size <- vec_size_common(!!!args) indices <- vec_interleave_indices(n, size) list_unchop( x = args, indices = indices, ptype = .ptype, name_spec = .name_spec, name_repair = .name_repair ) } vec_interleave_indices <- function(n, size) { .Call(ffi_interleave_indices, n, size) } vctrs/R/partial.R0000644000176200001440000000316414315060307013376 0ustar liggesusers#' Partial type #' #' @description #' `r lifecycle::badge("experimental")` #' #' Use `new_partial()` when constructing a new partial type subclass; #' and use `is_partial()` to test if a type is partial. All subclasses #' need to provide a `vec_ptype_finalise()` method. #' #' @details #' As the name suggests, a partial type _partially_ specifies a type, and #' it must be combined with data to yield a full type. A useful example #' of a partial type is [partial_frame()], which makes it possible to #' specify the type of just a few columns in a data frame. Use this constructor #' if you're making your own partial type. #' #' @param ... Attributes of the partial type #' @param class Name of subclass. #' @export #' @keywords internal new_partial <- function(..., class = character()) { new_sclr(..., class = c(class, "vctrs_partial")) } #' @export obj_print_header.vctrs_partial <- function(x, ...) { NULL invisible(x) } #' @export obj_print_data.vctrs_partial <- function(x, ...) { cat_line(vec_ptype_full(x)) invisible(x) } #' @rdname new_partial #' @export is_partial <- function(x) { .Call(ffi_is_partial, x) } #' @rdname new_partial #' @inheritParams rlang::args_dots_empty #' @export vec_ptype_finalise <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_ptype_finalise, x)) UseMethod("vec_ptype_finalise") } vec_ptype_finalise_dispatch <- function(x, ...) { UseMethod("vec_ptype_finalise") } #' @export vec_ptype_finalise.vctrs_partial <- function(x, ...) { # nocov start stop_unimplemented(x, "vec_ptype_finalise") # nocov end } #' @export vec_ptype_finalise.default <- function(x, ...) { x } vctrs/R/numeric.R0000644000176200001440000000432514276722575013426 0ustar liggesusers#' Mathematical operations #' #' This generic provides a common dispatch mechanism for all regular unary #' mathematical functions. It is used as a common wrapper around many of the #' Summary group generics, the Math group generics, and a handful of other #' mathematical functions like `mean()` (but not `var()` or `sd()`). #' #' `vec_math_base()` is provided as a convenience for writing methods. It #' calls the base `.fn` on the underlying [vec_data()]. #' #' @section Included functions: #' #' * From the [Summary] group generic: #' `prod()`, `sum()`, `any()`, `all()`. #' #' * From the [Math] group generic: #' `abs()`, `sign()`, `sqrt()`, `ceiling()`, `floor()`, `trunc()`, `cummax()`, #' `cummin()`, `cumprod()`, `cumsum()`, `log()`, `log10()`, `log2()`, #' `log1p()`, `acos()`, `acosh()`, `asin()`, `asinh()`, `atan()`, `atanh()`, #' `exp()`, `expm1()`, `cos()`, `cosh()`, `cospi()`, `sin()`, `sinh()`, #' `sinpi()`, `tan()`, `tanh()`, `tanpi()`, `gamma()`, `lgamma()`, #' `digamma()`, `trigamma()`. #' #' * Additional generics: `mean()`, `is.nan()`, `is.finite()`, `is.infinite()`. #' #' Note that `median()` is currently not implemented, and `sd()` and #' `var()` are currently not generic and so do not support custom #' classes. #' #' @seealso [vec_arith()] for the equivalent for the arithmetic infix operators. #' @param .fn A mathematical function from the base package, as a string. #' @param .x A vector. #' @param ... Additional arguments passed to `.fn`. #' @keywords internal #' @export #' @examples #' x <- new_vctr(c(1, 2.5, 10)) #' x #' #' abs(x) #' sum(x) #' cumsum(x) vec_math <- function(.fn, .x, ...) { UseMethod("vec_math", .x) } #' @export vec_math.default <- function(.fn, .x, ...) { if (!is_double(.x) && !is_logical_dispatch(.fn, .x)) { stop_unimplemented(.x, "vec_math") } out <- vec_math_base(.fn, .x, ...) # Don't restore output of logical predicates like `any()`, # `is.finite()`, or `is.nan()` if (is_double(out)) { out <- vec_restore(out, .x) } out } is_logical_dispatch <- function(fn, x) { is_logical(x) && fn %in% c("any", "all") } #' @export #' @rdname vec_math vec_math_base <- function(.fn, .x, ...) { .fn <- getExportedValue("base", .fn) .fn(vec_data(.x), ...) } vctrs/R/expand.R0000644000176200001440000000410114362266120013214 0ustar liggesusers#' Create a data frame from all combinations of the inputs #' #' @description #' `vec_expand_grid()` creates a new data frame by creating a grid of all #' possible combinations of the input vectors. It is inspired by #' [expand.grid()]. Compared with `expand.grid()`, it: #' #' - Produces sorted output by default by varying the first column the slowest, #' rather than the fastest. Control this with `.vary`. #' #' - Never converts strings to factors. #' #' - Does not add additional attributes. #' #' - Drops `NULL` inputs. #' #' - Can expand any vector type, including data frames and [records][new_rcrd]. #' #' @details #' If any input is empty (i.e. size 0), then the result will have 0 rows. #' #' If no inputs are provided, the result is a 1 row data frame with 0 columns. #' This is consistent with the fact that `prod()` with no inputs returns `1`. #' #' @inheritParams rlang::args_error_context #' @inheritParams df_list #' #' @param ... Name-value pairs. The name will become the column name in the #' resulting data frame. #' #' @param .vary One of: #' #' - `"slowest"` to vary the first column slowest. This produces sorted #' output and is generally the most useful. #' #' - `"fastest"` to vary the first column fastest. This matches the behavior #' of [expand.grid()]. #' #' @returns #' A data frame with as many columns as there are inputs in `...` and as many #' rows as the [prod()] of the sizes of the inputs. #' #' @export #' @examples #' vec_expand_grid(x = 1:2, y = 1:3) #' #' # Use `.vary` to match `expand.grid()`: #' vec_expand_grid(x = 1:2, y = 1:3, .vary = "fastest") #' #' # Can also expand data frames #' vec_expand_grid( #' x = data_frame(a = 1:2, b = 3:4), #' y = 1:4 #' ) vec_expand_grid <- function(..., .vary = "slowest", .name_repair = "check_unique", .error_call = current_env()) { .vary <- arg_match0( arg = .vary, values = c("slowest", "fastest"), error_call = .error_call ) .Call(ffi_vec_expand_grid, list2(...), .vary, .name_repair, environment()) } vctrs/R/altrep.R0000644000176200001440000000007114315060307013223 0ustar liggesusersis_altrep <- function(x) { .Call(vctrs_is_altrep, x) } vctrs/R/faq.R0000644000176200001440000000150614276722575012531 0ustar liggesusers#' FAQ - How is the compatibility of vector types decided? #' #' @includeRmd man/faq/user/faq-compatibility-types.Rmd description #' #' @name faq-compatibility-types NULL #' FAQ - Error/Warning: Some attributes are incompatible #' #' @description #' #' This error occurs when [vec_ptype2()] or [vec_cast()] are supplied #' vectors of the same classes with different attributes. In this #' case, vctrs doesn't know how to combine the inputs. #' #' To fix this error, the maintainer of the class should implement #' self-to-self coercion methods for [vec_ptype2()] and [vec_cast()]. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @name faq-error-incompatible-attributes NULL #' FAQ - Error: Input must be a vector #' #' @includeRmd man/faq/user/faq-error-scalar-type.Rmd description #' #' @name faq-error-scalar-type NULL vctrs/R/c.R0000644000176200001440000000764414362266120012176 0ustar liggesusers#' Combine many vectors into one vector #' #' Combine all arguments into a new vector of common type. #' #' @section Invariants: #' * `vec_size(vec_c(x, y)) == vec_size(x) + vec_size(y)` #' * `vec_ptype(vec_c(x, y)) == vec_ptype_common(x, y)`. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_cast_common()] with fallback #' - [vec_proxy()] #' - [vec_restore()] #' #' #' ## base dependencies #' #' - [base::c()] #' #' If inputs inherit from a common class hierarchy, `vec_c()` falls #' back to `base::c()` if there exists a `c()` method implemented for #' this class hierarchy. #' #' @inheritParams rlang::args_error_context #' @inheritParams vec_ptype_show #' @inheritParams name_spec #' @inheritParams vec_as_names #' #' @param ... Vectors to coerce. #' @param .name_repair How to repair names, see `repair` options in #' [vec_as_names()]. #' @return A vector with class given by `.ptype`, and length equal to the #' sum of the `vec_size()` of the contents of `...`. #' #' The vector will have names if the individual components have names #' (inner names) or if the arguments are named (outer names). If both #' inner and outer names are present, an error is thrown unless a #' `.name_spec` is provided. #' #' @seealso [vec_cbind()]/[vec_rbind()] for combining data frames by rows #' or columns. #' @export #' @examples #' vec_c(FALSE, 1L, 1.5) #' #' # Date/times -------------------------- #' c(Sys.Date(), Sys.time()) #' c(Sys.time(), Sys.Date()) #' #' vec_c(Sys.Date(), Sys.time()) #' vec_c(Sys.time(), Sys.Date()) #' #' # Factors ----------------------------- #' c(factor("a"), factor("b")) #' vec_c(factor("a"), factor("b")) #' #' #' # By default, named inputs must be length 1: #' vec_c(name = 1) #' try(vec_c(name = 1:3)) #' #' # Pass a name specification to work around this: #' vec_c(name = 1:3, .name_spec = "{outer}_{inner}") #' #' # See `?name_spec` for more examples of name specifications. vec_c <- function(..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), .error_arg = "", .error_call = current_env()) { .External2(ffi_vec_c, .ptype, .name_spec, .name_repair) } vec_c <- fn_inline_formals(vec_c, ".name_repair") base_c <- function(xs) { # Dispatch in the base namespace which inherits from the global env exec("c", !!!xs, .env = ns_env("base")) } base_c_invoke <- function(xs) { local_options("vctrs:::base_c_in_progress" = TRUE) # Remove all `NULL` arguments which prevent dispatch if in first # position and might not be handled correctly by methods xs <- compact(xs) unspecified <- map_lgl(xs, fallback_is_unspecified) if (!any(unspecified)) { return(base_c(xs)) } # First concatenate without the unspecified chunks. This way the # `c()` method doesn't need to handle unspecified inputs correctly, # and we're guaranteed to dispatch on the correct class even if the # first input is unspecified. out <- base_c(xs[!unspecified]) # Create index vector with `NA` locations for unspecified chunks locs <- c_locs(xs) locs[unspecified] <- map(locs[unspecified], rep_along, na_int) locs[!unspecified] <- c_locs(xs[!unspecified]) locs <- vec_c(!!!locs, .ptype = int()) # Expand the concatenated vector with unspecified chunks out[locs] } # FIXME: Should be unnecessary in the future. We currently attach an # attribute to unspecified columns initialised in `df_cast()`. We # can't use an unspecified vector because we (unnecessarily but for # convenience) go through `vec_assign()` before falling back in # `vec_rbind()`. fallback_is_unspecified <- function(x) { is_unspecified(x) || is_true(attr(x, "vctrs:::unspecified")) } c_locs <- function(xs) { locs <- reduce(lengths(xs), .init = list(0), function(output, input) { n <- last(last(output)) c(output, list(seq(n + 1, n + input))) }) locs[-1] } vctrs/NEWS.md0000644000176200001440000015207214532374152012526 0ustar liggesusers# vctrs 0.6.5 * Internal changes requested by CRAN around C level format strings (#1896). * Fixed tests related to changes to `dim<-()` in R-devel (#1889). # vctrs 0.6.4 * Fixed a performance issue with `vec_c()` and ALTREP vectors (in particular, the new ALTREP list vectors in R-devel) (#1884). * Fixed an issue with complex vector tests related to changes in R-devel (#1883). * Added a class to the `vec_locate_matches()` error that is thrown when an overflow would otherwise occur (#1845). * Fixed an issue with `vec_rank()` and 0-column data frames (#1863). # vctrs 0.6.3 * Fixed an issue where certain ALTREP row names were being materialized when passed to `new_data_frame()`. We've fixed this by removing a safeguard in `new_data_frame()` that performed a compatibility check when both `n` and `row.names` were provided. Because this is a low level function designed for performance, it is up to the caller to ensure these inputs are compatible (tidyverse/dplyr#6596). * Fixed an issue where `vec_set_*()` used with data frames could accidentally return an object with the type of the proxy rather than the type of the original inputs (#1837). * Fixed a rare `vec_locate_matches()` bug that could occur when using a max/min `filter` (tidyverse/dplyr#6835). # vctrs 0.6.2 * Fixed conditional S3 registration to avoid a CRAN check NOTE that appears in R >=4.3.0 (#1832). * Fixed tests to maintain compatibility with the next version of waldo (#1829). # vctrs 0.6.1 * Fixed a test related to `c.sfc()` changes in sf 1.0-10 (#1817). # vctrs 0.6.0 * New `vec_run_sizes()` for computing the size of each run within a vector. It is identical to the `times` column from `vec_unrep()`, but is faster if you don't need the run key (#1210). * New `sizes` argument to `vec_chop()` which allows you to partition a vector using an integer vector describing the size of each expected slice. It is particularly useful in combination with `vec_run_sizes()` and `list_sizes()` (#1210, #1598). * New `obj_is_vector()`, `obj_check_vector()`, and `vec_check_size()` validation helpers. We believe these are a better approach to vector validation than `vec_assert()` and `vec_is()`, which have been marked as questioning because the semantics of their `ptype` arguments are hard to define and can often be replaced by `vec_cast()` or a type predicate function like `rlang::is_logical()` (#1784). * `vec_is_list()` and `vec_check_list()` have been renamed to `obj_is_list()` and `obj_check_list()`, in line with the new `obj_is_vector()` helper. The old functions have been silently deprecated, but an official deprecation process will start in the next vctrs release (#1803). * `vec_locate_matches()` gains a new `relationship` argument that holistically handles multiple matches between `needles` and `haystack`. In particular, `relationship = "many-to-one"` replaces `multiple = "error"` and `multiple = "warning"`, which have been removed from the documentation and silently soft-deprecated. Official deprecation for those options will start in a future release (#1791). * `vec_locate_matches()` has changed its default `needles_arg` and `haystack_arg` values from `""` to `"needles"` and `"haystack"`, respectively. This generally generates more informative error messages (#1792). * `vec_chop()` has gained empty `...` between `x` and the optional `indices` argument. For backwards compatibility, supplying `vec_chop(x, indices)` without naming `indices` still silently works, but will be deprecated in a future release (#1813). * `vec_slice()` has gained an `error_call` argument (#1785). * The `numeric_version` type from base R is now better supported in equality, comparison, and order based operations (tidyverse/dplyr#6680). * R >=3.5.0 is now explicitly required. This is in line with the tidyverse policy of supporting the [5 most recent versions of R](https://www.tidyverse.org/blog/2019/04/r-version-support/). # vctrs 0.5.2 * New `vec_expand_grid()`, which is a lower level helper that is similar to `tidyr::expand_grid()` (#1325). * New `vec_set_intersect()`, `vec_set_difference()`, `vec_set_union()`, and `vec_set_symmetric_difference()` which compute set operations like `intersect()`, `setdiff()`, and `union()`, but the vctrs variants don't strip attributes and work with data frames (#1755, #1765). * `vec_identify_runs()` is now faster when used with data frames (#1684). * The maximum load factor of the internal dictionary was reduced from 77% to 50%, which improves performance of functions like `vec_match()`, `vec_set_intersect()`, and `vec_unique()` in some cases (#1760). * Fixed a bug with the internal `vec_order_radix()` function related to matrix columns (#1753). # vctrs 0.5.1 * Fix for CRAN checks. # vctrs 0.5.0 * vctrs is now compliant with `-Wstrict-prototypes` as requested by CRAN (#1729). * `vec_ptype2()` now consistently falls back to bare data frame in case of incompatible data frame subclasses. This is part of a general move towards relaxed coercion rules. * Common type and cast errors now inherit from `"vctrs_error_ptype2"` and `"vctrs_error_cast"` respectively. They are still both subclasses from `"vctrs_error_incompatible_type"` (which used to be their most specific class and is now a parent class). * New `list_all_size()` and `list_check_all_size()` to quickly determine if a list contains elements of a particular `size` (#1582). * `list_unchop()` has gained empty `...` to force optional arguments to be named (#1715). * `vec_rep_each(times = 0)` now works correctly with logical vectors that are considered unspecified and with named vectors (#1673). * `list_of()` was relaxed to make it easier to combine. It is now coercible with `list()` (#1161). When incompatible `list_of()` types are combined, the result is now a bare `list()`. Following this change, the role of `list_of()` is mainly to carry type information for potential optimisations, rather than to guarantee a certain type throughout an analysis. * `validate_list_of()` has been removed. It hasn't proven to be practically useful, and isn't used by any packages on CRAN (#1697). * Directed calls to `vec_c()`, like `vec_c(.ptype = )`, now mention the position of the problematic argument when there are cast errors (#1690). * `list_unchop()` no longer drops names in some cases when `indices` were supplied (#1689). * `"unique_quiet"` and `"universal_quiet"` are newly accepted by `vec_as_names(repair =)` and `vec_names2(repair =)`. These options exist to help users who call these functions indirectly, via another function which only exposes `repair` but not `quiet`. Specifying `repair = "unique_quiet"` is like specifying `repair = "unique", quiet = TRUE`. When the `"*_quiet"` options are used, any setting of `quiet` is silently overridden (@jennybc, #1629). `"unique_quiet"` and `"universal_quiet"` are also newly accepted for the name repair argument of several other functions that do not expose a `quiet` argument: `data_frame()`, `df_list()`, `vec_c()`, `list_unchop()`, `vec_interleave()`, `vec_rbind()`, and `vec_cbind()` (@jennybc, #1716). * `list_unchop()` has gained `error_call` and `error_arg` arguments (#1641, #1692). * `vec_c()` has gained `.error_call` and `.error_arg` arguments (#1641, #1692). * Improved the performance of list-of common type methods (#1686, #875). * The list-of method for `as_list_of()` now places the optional `.ptype` argument after the `...` (#1686). * `vec_rbind()` now applies `base::c()` fallback recursively within packed df-cols (#1331, #1462, #1640). * `vec_c()`, `vec_unchop()`, and `vec_rbind()` now proxy and restore recursively (#1107). This prevents `vec_restore()` from being called with partially filled vectors and improves performance (#1217, #1496). * New `vec_any_missing()` for quickly determining if a vector has any missing values (#1672). * `vec_equal_na()` has been renamed to `vec_detect_missing()` to align better with vctrs naming conventions. `vec_equal_na()` will stick around for a few minor versions, but has been formally soft-deprecated (#1672). * `vec_c(outer = c(inner = 1))` now produces correct error messages (#522). * If a data frame is returned as the proxy from `vec_proxy_equal()`, `vec_proxy_compare()`, or `vec_proxy_order()`, then the corresponding proxy function is now automatically applied recursively along all of the columns. Additionally, packed data frame columns will be unpacked, and 1 column data frames will be unwrapped. This ensures that the simplest possible types are provided to the native C algorithms, improving both correctness and performance (#1664). * When used with record vectors, `vec_proxy_compare()` and `vec_proxy_order()` now call the correct proxy function while recursing over the fields (#1664). * The experimental function `vec_list_cast()` has been removed from the package (#1382). * Native classes like dates and datetimes now accept dimensions (#1290, #1329). * `vec_compare()` now throws a more informative error when attempting to compare complex vectors (#1655). * `vec_rep()` and friends gain `error_call`, `x_arg`, and `times_arg` arguments so they can be embedded in frontends (#1303). * Record vectors now fail as expected when indexed along dimensions greater than 1 (#1295). * `vec_order()` and `vec_sort()` now have `...` between the required and optional arguments to make them easier to extend (#1647). * S3 vignette was extended to show how to make the polynomial class atomic instead of a list (#1030). * The experimental `n` argument of `vec_restore()` has been removed. It was only used to inform on the size of data frames in case a bare list is restored. It is now expected that bare lists be initialised to data frame so that the size is carried through row attributes. This makes the generic simpler and fixes some performance issues (#650). * The `anyNA()` method for `vctrs_vctr` (and thus `vctrs_list_of`) now supports the `recursive` argument (#1278). * `vec_as_location()` and `num_as_location()` have gained a `missing = "remove"` option (#1595). * `vec_as_location()` no longer matches `NA_character_` and `""` indices if those invalid names appear in `names` (#1489). * `vec_unchop()` has been renamed to `list_unchop()` to better indicate that it requires list input. `vec_unchop()` will stick around for a few minor versions, but has been formally soft-deprecated (#1209). * Lossy cast errors during scalar subscript validation now have the correct message (#1606). * Fixed confusing error message with logical `[[` subscripts (#1608). * New `vec_rank()` to compute various types of sample ranks (#1600). * `num_as_location()` now throws the right error when there are out-of-bounds negative values and `oob = "extend"` and `negative = "ignore"` are set (#1614, #1630). * `num_as_location()` now works correctly when a combination of `zero = "error"` and `negative = "invert"` are used (#1612). * `data_frame()` and `df_list()` have gained `.error_call` arguments (#1610). * `vec_locate_matches()` has gained an `error_call` argument (#1611). * `"select"` and `"relocate"` have been added as valid subscript actions to support tidyselect and dplyr (#1596). * `num_as_location()` has a new `oob = "remove"` argument to remove out-of-bounds locations (#1595). * `vec_rbind()` and `vec_cbind()` now have `.error_call` arguments (#1597). * `df_list()` has gained a new `.unpack` argument to optionally disable data frame unpacking (#1616). * `vec_check_list(arg = "")` now throws the correct error (#1604). * The `difftime` to `difftime` `vec_cast()` method now standardizes the internal storage type to double, catching potentially corrupt integer storage `difftime` vectors (#1602). * `vec_as_location2()` and `vec_as_subscript2()` more correctly utilize their `call` arguments (#1605). * `vec_count(sort = "count")` now uses a stable sorting method. This ensures that different keys with the same count are sorted in the order that they originally appeared in (#1588). * Lossy cast error conditions now show the correct message when `conditionMessage()` is called on them (#1592). * Fixed inconsistent reporting of conflicting inputs in `vec_ptype_common()` (#1570). * `vec_ptype_abbr()` and `vec_ptype_full()` now suffix 1d arrays with `[1d]`. * `vec_ptype_abbr()` and `vec_ptype_full()` methods are no longer inherited (#1549). * `vec_cast()` now throws the correct error when attempting to cast a subclassed data frame to a non-data frame type (#1568). * `vec_locate_matches()` now uses a more conservative heuristic when taking the joint ordering proxy. This allows it to work correctly with sf's sfc vectors and the classes from the bignum package (#1558). * An sfc method for `vec_proxy_order()` was added to better support the sf package. These vectors are generally treated like list-columns even though they don't explicitly have a `"list"` class, and the `vec_proxy_order()` method now forwards to the list method to reflect that (#1558). * `vec_proxy_compare()` now works correctly for raw vectors wrapped in `I()`. `vec_proxy_order()` now works correctly for raw and list vectors wrapped in `I()` (#1557). # vctrs 0.4.2 * HTML documentation fixes for CRAN checks. # vctrs 0.4.1 * OOB errors with `character()` indexes use "that don't exist" instead of "past the end" (#1543). * Fixed memory protection issues related to common type determination (#1551, tidyverse/tidyr#1348). # vctrs 0.4.0 * New experimental `vec_locate_sorted_groups()` for returning the locations of groups in sorted order. This is equivalent to, but faster than, calling `vec_group_loc()` and then sorting by the `key` column of the result. * New experimental `vec_locate_matches()` for locating where each observation in one vector matches one or more observations in another vector. It is similar to `vec_match()`, but returns all matches by default (rather than just the first), and can match on binary conditions other than equality. The algorithm is inspired by data.table's very fast binary merge procedure. * The `vec_proxy_equal()`, `vec_proxy_compare()`, and `vec_proxy_order()` methods for `vctrs_rcrd` are now applied recursively over the fields (#1503). * Lossy cast errors now inherit from incompatible type errors. * `vec_is_list()` now returns `TRUE` for `AsIs` lists (#1463). * `vec_assert()`, `vec_ptype2()`, `vec_cast()`, and `vec_as_location()` now use `caller_arg()` to infer a default `arg` value from the caller. This may result in unhelpful arguments being mentioned in error messages. In general, you should consider snapshotting vctrs error messages thrown in your package and supply `arg` and `call` arguments if the error context is not adequately reported to your users. * `vec_ptype_common()`, `vec_cast_common()`, `vec_size_common()`, and `vec_recycle_common()` gain `call` and `arg` arguments for specifying an error context. * `vec_compare()` can now compare zero column data frames (#1500). * `new_data_frame()` now errors on negative and missing `n` values (#1477). * `vec_order()` now correctly orders zero column data frames (#1499). * vctrs now depends on cli to help with error message generation. * New `vec_check_list()` and `list_check_all_vectors()` input checkers, and an accompanying `list_all_vectors()` predicate. * New `vec_interleave()` for combining multiple vectors together, interleaving their elements in the process (#1396). * `vec_equal_na(NULL)` now returns `logical(0)` rather than erroring (#1494). * `vec_as_location(missing = "error")` now fails with `NA` and `NA_character_` in addition to `NA_integer_` (#1420, @krlmlr). * Starting with rlang 1.0.0, errors are displayed with the contextual function call. Several vctrs operations gain a `call` argument that makes it possible to report the correct context in error messages. This concerns: - `vec_cast()` and `vec_ptype2()` - `vec_default_cast()` and `vec_default_ptype2()` - `vec_assert()` - `vec_as_names()` - `stop_` constructors like `stop_incompatible_type()` Note that default `vec_cast()` and `vec_ptype2()` methods automatically support this if they pass `...` to the corresponding `vec_default_` functions. If you throw a non-internal error from a non-default method, add a `call = caller_env()` argument in the method and pass it to `rlang::abort()`. * If `NA_character_` is specified as a name for `vctrs_vctr` objects, it is now automatically repaired to `""` (#780). * `""` is now an allowed name for `vctrs_vctr` objects and all its subclasses (`vctrs_list_of` in particular) (#780). * `list_of()` is now much faster when many values are provided. * `vec_as_location()` evaluates `arg` only in case of error, for performance (#1150, @krlmlr). * `levels.vctrs_vctr()` now returns `NULL` instead of failing (#1186, @krlmlr). * `vec_assert()` produces a more informative error when `size` is invalid (#1470). * `vec_duplicate_detect()` is a bit faster when there are many unique values. * `vec_proxy_order()` is described in `vignette("s3-vectors")` (#1373, @krlmlr). * `vec_chop()` now materializes ALTREP vectors before chopping, which is more efficient than creating many small ALTREP pieces (#1450). * New `list_drop_empty()` for removing empty elements from a list (#1395). * `list_sizes()` now propagates the names of the list onto the result. * Name repair messages are now signaled by `rlang::names_inform_repair()`. This means that the messages are now sent to stdout by default rather than to stderr, resulting in prettier messages. Additionally, name repair messages can now be silenced through the global option `rlib_name_repair_verbosity`, which is useful for testing purposes. See `?names_inform_repair` for more information (#1429). * `vctrs_vctr` methods for `na.omit()`, `na.exclude()`, and `na.fail()` have been added (#1413). * `vec_init()` is now slightly faster (#1423). * `vec_set_names()` no longer corrupts `vctrs_rcrd` types (#1419). * `vec_detect_complete()` now computes completeness for `vctrs_rcrd` types in the same way as data frames, which means that if any field is missing, the entire record is considered incomplete (#1386). * The `na_value` argument of `vec_order()` and `vec_sort()` now correctly respect missing values in lists (#1401). * `vec_rep()` and `vec_rep_each()` are much faster for `times = 0` and `times = 1` (@mgirlich, #1392). * `vec_equal_na()` and `vec_fill_missing()` now work with integer64 vectors (#1304). * The `xtfrm()` method for vctrs_vctr objects no longer accidentally breaks ties (#1354). * `min()`, `max()` and `range()` no longer throw an error if `na.rm = TRUE` is set and all values are `NA` (@gorcha, #1357). In this case, and where an empty input is given, it will return `Inf`/`-Inf`, or `NA` if `Inf` can't be cast to the input type. * `vec_group_loc()`, used for grouping in dplyr, now correctly handles vectors with billions of elements (up to `.Machine$integer.max`) (#1133). # vctrs 0.3.8 * Compatibility with next version of rlang. # vctrs 0.3.7 * `vec_ptype_abbr()` gains arguments to control whether to indicate named vectors with a prefix (`prefix_named`) and indicate shaped vectors with a suffix (`suffix_shape`) (#781, @krlmlr). * `vec_ptype()` is now an optional _performance_ generic. It is not necessary to implement, but if your class has a static prototype, you might consider implementing a custom `vec_ptype()` method that returns a constant to improve performance in some cases (such as common type imputation). * New `vec_detect_complete()`, inspired by `stats::complete.cases()`. For most vectors, this is identical to `!vec_equal_na()`. For data frames and matrices, this detects rows that only contain non-missing values. * `vec_order()` can now order complex vectors (#1330). * Removed dependency on digest in favor of `rlang::hash()`. * Fixed an issue where `vctrs_rcrd` objects were not being proxied correctly when used as a data frame column (#1318). * `register_s3()` is now licensed with the "unlicense" which makes it very clear that it's fine to copy and paste into your own package (@maxheld83, #1254). # vctrs 0.3.6 * Fixed an issue with tibble 3.0.0 where removing column names with `names(x) <- NULL` is now deprecated (#1298). * Fixed a GCC 11 issue revealed by CRAN checks. # vctrs 0.3.5 * New experimental `vec_fill_missing()` for filling in missing values with the previous or following value. It is similar to `tidyr::fill()`, but also works with data frames and has an additional `max_fill` argument to limit the number of sequential missing values to fill. * New `vec_unrep()` to compress a vector with repeated values. It is very similar to run length encoding, and works nicely alongside `vec_rep_each()` as a way to invert the compression. * `vec_cbind()` with only empty data frames now preserves the common size of the inputs in the result (#1281). * `vec_c()` now correctly returns a named result with named empty inputs (#1263). * vctrs has been relicensed as MIT (#1259). * Functions that make comparisons within a single vector, such as `vec_unique()`, or between two vectors, such as `vec_match()`, now convert all character input to UTF-8 before making comparisons (#1246). * New `vec_identify_runs()` which returns a vector of identifiers for the elements of `x` that indicate which run of repeated values they fall in (#1081). * Fixed an encoding translation bug with lists containing data frames which have columns where `vec_size()` is different from the low level `Rf_length()` (#1233). # vctrs 0.3.4 * Fixed a GCC sanitiser error revealed by CRAN checks. # vctrs 0.3.3 * The `table` class is now implemented as a wrapper type that delegates its coercion methods. It used to be restricted to integer tables (#1190). * Named one-dimensional arrays now behave consistently with simple vectors in `vec_names()` and `vec_rbind()`. * `new_rcrd()` now uses `df_list()` to validate the fields. This makes it more flexible as the fields can now be of any type supported by vctrs, including data frames. * Thanks to the previous change the `[[` method of records now preserves list fields (#1205). * `vec_data()` now preserves data frames. This is consistent with the notion that data frames are a primitive vector type in vctrs. This shouldn't affect code that uses `[[` and `length()` to manipulate the data. On the other hand, the vctrs primitives like `vec_slice()` will now operate rowwise when `vec_data()` returns a data frame. * `outer` is now passed unrecycled to name specifications. Instead, the return value is recycled (#1099). * Name specifications can now return `NULL`. The names vector will only be allocated if the spec function returns non-`NULL` during the concatenation. This makes it possible to ignore outer names without having to create an empty names vector when there are no inner names: ``` zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner # `NULL` names rather than a vector of "" names(vec_c(a = 1:2, .name_spec = zap_outer_spec)) #> NULL # Names are allocated when inner names exist names(vec_c(a = 1:2, c(b = 3L), .name_spec = zap_outer_spec)) #> [1] "" "" "b" ``` * Fixed several performance issues in `vec_c()` and `vec_unchop()` with named vectors. * The restriction that S3 lists must have a list-based proxy to be considered lists by `vec_is_list()` has been removed (#1208). * New performant `data_frame()` constructor for creating data frames in a way that follows tidyverse semantics. Among other things, inputs are recycled using tidyverse recycling rules, strings are never converted to factors, list-columns are easier to create, and unnamed data frame input is automatically spliced. * New `df_list()` for safely and consistently constructing the data structure underlying a data frame, a named list of equal-length vectors. It is useful in combination with `new_data_frame()` for creating user-friendly constructors for data frame subclasses that use the tidyverse rules for recycling and determining types. * Fixed performance issue with `vec_order()` on classed vectors which affected `dplyr::group_by()` (tidyverse/dplyr#5423). * `vec_set_names()` no longer alters the input in-place (#1194). * New `vec_proxy_order()` that provides an ordering proxy for use in `vec_order()` and `vec_sort()`. The default method falls through to `vec_proxy_compare()`. Lists are special cased, and return an integer vector proxy that orders by first appearance. * List columns in data frames are no longer comparable through `vec_compare()`. * The experimental `relax` argument has been removed from `vec_proxy_compare()`. # vctrs 0.3.2 * Fixed a performance issue in `bind_rows()` with S3 columns (#1122, #1124, #1151, tidyverse/dplyr#5327). * `vec_slice()` now checks sizes of data frame columns in case the data structure is corrupt (#552). * The native routines in vctrs now dispatch and evaluate in the vctrs namespace. This improves the continuity of evaluation in backtraces. * `new_data_frame()` is now twice as fast when `class` is supplied. * New `vec_names2()`, `vec_names()` and `vec_set_names()` (#1173). # vctrs 0.3.1 * `vec_slice()` no longer restores attributes of foreign objects for which a `[` method exist. This fixes an issue with `ts` objects which were previously incorrectly restored. * The `as.list()` method for `vctrs_rcrd` objects has been removed in favor of directly using the method for `vctrs_vctr`, which calls `vec_chop()`. * `vec_c()` and `vec_rbind()` now fall back to `base::c()` if the inputs have a common class hierarchy for which a `c()` method is implemented but no self-to-self `vec_ptype2()` method is implemented. * `vec_rbind()` now internally calls `vec_proxy()` and `vec_restore()` on the data frame common type that is used to create the output (#1109). * `vec_as_location2("0")` now works correctly (#1131). * `?reference-faq-compatibility` is a new reference guide on vctrs primitives. It includes an overview of the fallbacks to base R generics implemented in vctrs for compatibility with existing classes. * The documentation of vctrs functions now includes a Dependencies section to reference which other vctrs operations are called from that function. By following the dependencies links recursively, you will find the vctrs primitives on which an operation relies. ## CRAN results * Fixed type declaration mismatches revealed by LTO build. * Fixed r-devel issue with new `c.factor()` method. # vctrs 0.3.0 This version features an overhaul of the coercion system to make it more consistent and easier to implement. See the _Breaking changes_ and _Type system_ sections for details. There are three new documentation topics if you'd like to learn how to implement coercion methods to make your class compatible with tidyverse packages like dplyr: * https://vctrs.r-lib.org/reference/theory-faq-coercion.html for an overview of the coercion mechanism in vctrs. * https://vctrs.r-lib.org/reference/howto-faq-coercion.html for a practical guide about implementing methods for vectors. * https://vctrs.r-lib.org/reference/howto-faq-coercion-data-frame.html for a practical guide about implementing methods for data frames. ## Reverse dependencies troubleshooting The following errors are caused by breaking changes. * `"Can't convert to ."` `vec_cast()` no longer converts to list. Use `vec_chop()` or `as.list()` instead. * `"Can't convert to ."` `vec_cast()` no longer converts to character. Use `as.character()`to deparse objects. * `"names for target but not for current"` Names of list-columns are now preserved by `vec_rbind()`. Adjust tests accordingly. ## Breaking changes * Double-dispatch methods for `vec_ptype2()` and `vec_cast()` are no longer inherited (#710). Class implementers must implement one set of methods for each compatible class. For example, a tibble subclass no longer inherits from the `vec_ptype2()` methods between `tbl_df` and `data.frame`. This means that you explicitly need to implement `vec_ptype2()` methods with `tbl_df` and `data.frame`. This change requires a bit more work from class maintainers but is safer because the coercion hierarchies are generally different from class hierarchies. See the S3 dispatch section of `?vec_ptype2` for more information. * `vec_cast()` is now restricted to the same conversions as `vec_ptype2()` methods (#606, #741). This change is motivated by safety and performance: - It is generally sloppy to generically convert arbitrary inputs to one type. Restricted coercions are more predictable and allow your code to fail earlier when there is a type issue. - When unrestricted conversions are useful, this is generally towards a known type. For example, `glue::glue()` needs to convert arbitrary inputs to the known character type. In this case, using double dispatch instead of a single dispatch generic like `as.character()` is wasteful. - To implement the useful semantics of coercible casts (already used in `vec_assign()`), two double dispatch were needed. Now it can be done with one double dispatch by calling `vec_cast()` directly. * `stop_incompatible_cast()` now throws an error of class `vctrs_error_incompatible_type` rather than `vctrs_error_incompatible_cast`. This means that `vec_cast()` also throws errors of this class, which better aligns it with `vec_ptype2()` now that they are restricted to the same conversions. * The `y` argument of `stop_incompatible_cast()` has been renamed to `to` to better match `to_arg`. ## Type system * Double-dispatch methods for `vec_ptype2()` and `vec_cast()` are now easier to implement. They no longer need any the boiler plate. Implementing a method for classes `foo` and `bar` is now as simple as: ``` #' @export vec_ptype2.foo.bar <- function(x, y, ...) new_foo() ``` vctrs also takes care of implementing the default and unspecified methods. If you have implemented these methods, they are no longer called and can now be removed. One consequence of the new dispatch mechanism is that `NextMethod()` is now completely unsupported. This is for the best as it never worked correctly in a double-dispatch setting. Parent methods must now be called manually. * `vec_ptype2()` methods now get zero-size prototypes as inputs. This guarantees that methods do not peek at the data to determine the richer type. * `vec_is_list()` no longer allows S3 lists that implement a `vec_proxy()` method to automatically be considered lists. A S3 list must explicitly inherit from `"list"` in the base class to be considered a list. * `vec_restore()` no longer restores row names if the target is not a data frame. This fixes an issue where `POSIXlt` objects would carry a `row.names` attribute after a proxy/restore roundtrip. * `vec_cast()` to and from data frames preserves the row names of inputs. * The internal function `vec_names()` now returns row names if the input is a data frame. Similarly, `vec_set_names()` sets row names on data frames. This is part of a general effort at making row names the vector names of data frames in vctrs. If necessary, the row names are repaired verbosely but without error to make them unique. This should be a mostly harmless change for users, but it could break unit tests in packages if they make assumptions about the row names. ## Compatibility and fallbacks * With the double dispatch changes, the coercion methods are no longer inherited from parent classes. This is because the coercion hierarchy is in principle different from the S3 hierarchy. A consequence of this change is that subclasses that don't implement coercion methods are now in principle incompatible. This is particularly problematic with subclasses of data frames for which throwing incompatible errors would be too incovenient for users. To work around this, we have implemented a fallback to the relevant base data frame class (either `data.frame` or `tbl_df`) in coercion methods (#981). This fallback is silent unless you set the `vctrs:::warn_on_fallback` option to `TRUE`. In the future we may extend this fallback principle to other base types when they are explicitly included in the class vector (such as `"list"`). * Improved support for foreign classes in the combining operations `vec_c()`, `vec_rbind()`, and `vec_unchop()`. A foreign class is a class that doesn't implement `vec_ptype2()`. When all the objects to combine have the same foreign class, one of these fallbacks is invoked: - If the class implements a `base::c()` method, the method is used for the combination. (FIXME: `vec_rbind()` currently doesn't use this fallback.) - Otherwise if the objects have identical attributes and the same base type, we consider them to be compatible. The vectors are concatenated and the attributes are restored (#776). These fallbacks do not make your class completely compatible with vctrs-powered packages, but they should help in many simple cases. * `vec_c()` and `vec_unchop()` now fall back to `base::c()` for S4 objects if the object doesn't implement `vec_ptype2()` but sets an S4 `c()` method (#919). ## Vector operations * `vec_rbind()` and `vec_c()` with data frame inputs now consistently preserve the names of list-columns, df-columns, and matrix-columns (#689). This can cause some false positives in unit tests, if they are sensitive to internal names (#1007). * `vec_rbind()` now repairs row names silently to avoid confusing messages when the row names are not informative and were not created on purpose. * `vec_rbind()` gains option to treat input names as row names. This is disabled by default (#966). * New `vec_rep()` and `vec_rep_each()` for repeating an entire vector and elements of a vector, respectively. These two functions provide a clearer interface for the functionality of `vec_repeat()`, which is now deprecated. * `vec_cbind()` now calls `vec_restore()` on inputs emptied of their columns before computing the common type. This has consequences for data frame classes with special columns that devolve into simpler classes when the columns are subsetted out. These classes are now always simplified by `vec_cbind()`. For instance, column-binding a grouped data frame with a data frame now produces a tibble (the simplified class of a grouped data frame). * `vec_match()` and `vec_in()` gain parameters for argument tags (#944). * The internal version of `vec_assign()` now has support for assigning names and inner names. For data frames, the names are assigned recursively. * `vec_assign()` gains `x_arg` and `value_arg` parameters (#918). * `vec_group_loc()`, which powers `dplyr::group_by()`, now has more efficient vector access (#911). * `vec_ptype()` gained an `x_arg` argument. * New `list_sizes()` for computing the size of every element in a list. `list_sizes()` is to `vec_size()` as `lengths()` is to `length()`, except that it only supports lists. Atomic vectors and data frames result in an error. * `new_data_frame()` infers size from row names when `n = NULL` (#894). * `vec_c()` now accepts `rlang::zap()` as `.name_spec` input. The returned vector is then always unnamed, and the names do not cause errors when they can't be combined. They are still used to create more informative messages when the inputs have incompatible types (#232). ## Classes * vctrs now supports the `data.table` class. The common type of a data frame and a data table is a data table. * `new_vctr()` now always appends a base `"list"` class to list `.data` to be compatible with changes to `vec_is_list()`. This affects `new_list_of()`, which now returns an object with a base class of `"list"`. * dplyr methods are now implemented for `vec_restore()`, `vec_ptype2()`, and `vec_cast()`. The user-visible consequence (and breaking change) is that row-binding a grouped data frame and a data frame or tibble now returns a grouped data frame. It would previously return a tibble. * The `is.na<-()` method for `vctrs_vctr` now supports numeric and character subscripts to indicate where to insert missing values (#947). * Improved support for vector-like S4 objects (#550, #551). * The base classes `AsIs` and `table` have vctrs methods (#904, #906). * `POSIXlt` and `POSIXct` vectors are handled more consistently (#901). * Ordered factors that do not have identical levels are now incompatible. They are now incompatible with all factors. ## Indexing and names * `vec_as_subscript()` now fails when the subscript is a matrix or an array, consistently with `vec_as_location()`. * Improved error messages in `vec_as_location()` when subscript is a matrix or array (#936). * `vec_as_location2()` properly picks up `subscript_arg` (tidyverse/tibble#735). * `vec_as_names()` now has more informative error messages when names are not unique (#882). * `vec_as_names()` gains a `repair_arg` argument that when set will cause `repair = "check_unique"` to generate an informative hint (#692). ## Conditions * `stop_incompatible_type()` now has an `action` argument for customizing whether the coercion error came from `vec_ptype2()` or `vec_cast()`. `stop_incompatible_cast()` is now a thin wrapper around `stop_incompatible_type(action = "convert")`. * `stop_` functions now take `details` after the dots. This argument can no longer be passed by position. * Supplying both `details` and `message` to the `stop_` functions is now an internal error. * `x_arg`, `y_arg`, and `to_arg` are now compulsory arguments in `stop_` functions like `stop_incompatible_type()`. * Lossy cast errors are now considered internal. Please don't test for the class or explicitly handle them. * New argument `loss_type` for the experimental function `maybe_lossy_cast()`. It can take the values "precision" or "generality" to indicate in the error message which kind of loss is the error about (double to integer loses precision, character to factor loses generality). * Coercion and recycling errors are now more consistent. ## CRAN results * Fixed clang-UBSAN error "nan is outside the range of representable values of type 'int'" (#902). * Fixed compilation of stability vignette following the date conversion changes on R-devel. # vctrs 0.2.4 * Factors and dates methods are now implemented in C for efficiency. * `new_data_frame()` now correctly updates attributes and supports merging of the `"names"` and `"row.names"` arguments (#883). * `vec_match()` gains an `na_equal` argument (#718). * `vec_chop()`'s `indices` argument has been restricted to positive integer vectors. Character and logical subscripts haven't proven useful, and this aligns `vec_chop()` with `vec_unchop()`, for which only positive integer vectors make sense. * New `vec_unchop()` for combining a list of vectors into a single vector. It is similar to `vec_c()`, but gives greater control over how the elements are placed in the output through the use of a secondary `indices` argument. * Breaking change: When `.id` is supplied, `vec_rbind()` now creates the identifier column at the start of the data frame rather than at the end. * `numeric_version` and `package_version` lists are now treated as vectors (#723). * `vec_slice()` now properly handles symbols and S3 subscripts. * `vec_as_location()` and `vec_as_subscript()` are now fully implemented in C for efficiency. * `num_as_location()` gains a new argument, `zero`, for controlling whether to `"remove"`, `"ignore"`, or `"error"` on zero values (#852). # vctrs 0.2.3 * The main feature of this release is considerable performance improvements with factors and dates. * `vec_c()` now falls back to `base::c()` if the vector doesn't implement `vec_ptype2()` but implements `c()`. This should improve the compatibility of vctrs-based functions with foreign classes (#801). * `new_data_frame()` is now faster. * New `vec_is_list()` for detecting if a vector is a list in the vctrs sense. For instance, objects of class `lm` are not lists. In general, classes need to explicitly inherit from `"list"` to be considered as lists by vctrs. * Unspecified vectors of `NA` can now be assigned into a list (#819). ``` x <- list(1, 2) vec_slice(x, 1) <- NA x #> [[1]] #> NULL #> #> [[2]] #> 2 ``` * `vec_ptype()` now errors on scalar inputs (#807). * `vec_ptype_finalise()` is now recursive over all data frame types, ensuring that unspecified columns are correctly finalised to logical (#800). * `vec_ptype()` now correctly handles unspecified columns in data frames, and will always return an unspecified column type (#800). * `vec_slice()` and `vec_chop()` now work correctly with `bit64::integer64()` objects when an `NA` subscript is supplied. By extension, this means that `vec_init()` now works with these objects as well (#813). * `vec_rbind()` now binds row names. When named inputs are supplied and `names_to` is `NULL`, the names define row names. If `names_to` is supplied, they are assigned in the column name as before. * `vec_cbind()` now uses the row names of the first named input. * The `c()` method for `vctrs_vctr` now throws an error when `recursive` or `use.names` is supplied (#791). # vctrs 0.2.2 * New `vec_as_subscript()` function to cast inputs to the base type of a subscript (logical, numeric, or character). `vec_as_index()` has been renamed to `vec_as_location()`. Use `num_as_location()` if you need more options to control how numeric subscripts are converted to a vector of locations. * New `vec_as_subscript2()`, `vec_as_location2()`, and `num_as_location2()` variants for validating scalar subscripts and locations (e.g. for indexing with `[[`). * `vec_as_location()` now preserves names of its inputs if possible. * `vec_ptype2()` methods for base classes now prevent inheritance. This makes sense because the subtyping graph created by `vec_ptype2()` methods is generally not the same as the inheritance relationships defined by S3 classes. For instance, subclasses are often a richer type than their superclasses, and should often be declared as supertypes (e.g. `vec_ptype2()` should return the subclass). We introduced this breaking change in a patch release because `new_vctr()` now adds the base type to the class vector by default, which caused `vec_ptype2()` to dispatch erroneously to the methods for base types. We'll finish switching to this approach in vctrs 0.3.0 for the rest of the base S3 classes (dates, data frames, ...). * `vec_equal_na()` now works with complex vectors. * `vctrs_vctr` class gains an `as.POSIXlt()` method (#717). * `vec_is()` now ignores names and row names (#707). * `vec_slice()` now support Altvec vectors (@jimhester, #696). * `vec_proxy_equal()` is now applied recursively across the columns of data frames (#641). * `vec_split()` no longer returns the `val` column as a `list_of`. It is now returned as a bare list (#660). * Complex numbers are now coercible with integer and double (#564). * zeallot has been moved from Imports to Suggests, meaning that `%<-%` is no longer re-exported from vctrs. * `vec_equal()` no longer propagates missing values when comparing list elements. This means that `vec_equal(list(NULL), list(NULL))` will continue to return `NA` because `NULL` is the missing element for a list, but now `vec_equal(list(NA), list(NA))` returns `TRUE` because the `NA` values are compared directly without checking for missingness. * Lists of expressions are now supported in `vec_equal()` and functions that compare elements, such as `vec_unique()` and `vec_match()`. This ensures that they work with the result of modeling functions like `glm()` and `mgcv::gam()` which store "family" objects containing expressions (#643). * `new_vctr()` gains an experimental `inherit_base_type` argument which determines whether or not the class of the underlying type will be included in the class. * `list_of()` now inherits explicitly from "list" (#593). * `vec_ptype()` has relaxed default behaviour for base types; now if two vectors both inherit from (e.g.) "character", the common type is also "character" (#497). * `vec_equal()` now correctly treats `NULL` as the missing value element for lists (#653). * `vec_cast()` now casts data frames to lists rowwise, i.e. to a list of data frames of size 1. This preserves the invariant of `vec_size(vec_cast(x, to)) == vec_size(x)` (#639). * Positive and negative 0 are now considered equivalent by all functions that check for equality or uniqueness (#637). * New experimental functions `vec_group_rle()` for returning run length encoded groups; `vec_group_id()` for constructing group identifiers from a vector; `vec_group_loc()` for computing the locations of unique groups in a vector (#514). * New `vec_chop()` for repeatedly slicing a vector. It efficiently captures the pattern of `map(indices, vec_slice, x = x)`. * Support for multiple character encodings has been added to functions that compare elements within a single vector, such as `vec_unique()`, and across multiple vectors, such as `vec_match()`. When multiple encodings are encountered, a translation to UTF-8 is performed before any comparisons are made (#600, #553). * Equality and ordering methods are now implemented for raw and complex vectors (@romainfrancois). # vctrs 0.2.1 Maintenance release for CRAN checks. # vctrs 0.2.0 With the 0.2.0 release, many vctrs functions have been rewritten with native C code to improve performance. Functions like `vec_c()` and `vec_rbind()` should now be fast enough to be used in packages. This is an ongoing effort, for instance the handling of factors and dates has not been rewritten yet. These classes still slow down vctrs primitives. The API in 0.2.0 has been updated, please see a list of breaking changes below. vctrs has now graduated from experimental to a maturing package. Please note that API changes are still planned for future releases, for instance `vec_ptype2()` and `vec_cast()` might need to return a sentinel instead of failing with an error when there is no common type or possible cast. ## Breaking changes * Lossy casts now throw errors of type `vctrs_error_cast_lossy`. Previously these were warnings. You can suppress these errors selectively with `allow_lossy_cast()` to get the partial cast results. To implement your own lossy cast operation, call the new exported function `maybe_lossy_cast()`. * `vec_c()` now fails when an input is supplied with a name but has internal names or is length > 1: ``` vec_c(foo = c(a = 1)) #> Error: Can't merge the outer name `foo` with a named vector. #> Please supply a `.name_spec` specification. vec_c(foo = 1:3) #> Error: Can't merge the outer name `foo` with a vector of length > 1. #> Please supply a `.name_spec` specification. ``` You can supply a name specification that describes how to combine the external name of the input with its internal names or positions: ``` # Name spec as glue string: vec_c(foo = c(a = 1), .name_spec = "{outer}_{inner}") # Name spec as a function: vec_c(foo = c(a = 1), .name_spec = function(outer, inner) paste(outer, inner, sep = "_")) vec_c(foo = c(a = 1), .name_spec = ~ paste(.x, .y, sep = "_")) ``` * `vec_empty()` has been renamed to `vec_is_empty()`. * `vec_dim()` and `vec_dims()` are no longer exported. * `vec_na()` has been renamed to `vec_init()`, as the primary use case is to initialize an output container. * `vec_slice<-` is now type stable (#140). It always returns the same type as the LHS. If needed, the RHS is cast to the correct type, but only if both inputs are coercible. See examples in `?vec_slice`. * We have renamed the `type` particle to `ptype`: - `vec_type()` => `vec_ptype()` - `vec_type2()` => `vec_ptype2()` - `vec_type_common()` => `vec_ptype_common()` Consequently, `vec_ptype()` was renamed to `vec_ptype_show()`. ## New features * New `vec_proxy()` generic. This is the main customisation point in vctrs along with `vec_restore()`. You should only implement it when your type is designed around a non-vector class (atomic vectors, bare lists, data frames). In this case, `vec_proxy()` should return such a vector class. The vctrs operations will be applied on the proxy and `vec_restore()` is called to restore the original representation of your type. The most common case where you need to implement `vec_proxy()` is for S3 lists. In vctrs, S3 lists are treated as scalars by default. This way we don't treat objects like model fits as vectors. To prevent vctrs from treating your S3 list as a scalar, unclass it from the `vec_proxy()` method. For instance here is the definition for `list_of`: ``` #' @export vec_proxy.vctrs_list_of <- function(x) { unclass(x) } ``` If you inherit from `vctrs_vctr` or `vctrs_rcrd` you don't need to implement `vec_proxy()`. * `vec_c()`, `vec_rbind()`, and `vec_cbind()` gain a `.name_repair` argument (#227, #229). * `vec_c()`, `vec_rbind()`, `vec_cbind()`, and all functions relying on `vec_ptype_common()` now have more informative error messages when some of the inputs have nested data frames that are not convergent: ``` df1 <- tibble(foo = tibble(bar = tibble(x = 1:3, y = letters[1:3]))) df2 <- tibble(foo = tibble(bar = tibble(x = 1:3, y = 4:6))) vec_rbind(df1, df2) #> Error: No common type for `..1$foo$bar$y` and `..2$foo$bar$y` . ``` * `vec_cbind()` now turns named data frames to packed columns. ```r data <- tibble::tibble(x = 1:3, y = letters[1:3]) data <- vec_cbind(data, packed = data) data # A tibble: 3 x 3 x y packed$x $y 1 1 a 1 a 2 2 b 2 b 3 3 c 3 c ``` Packed data frames are nested in a single column. This makes it possible to access it through a single name: ```r data$packed # A tibble: 3 x 2 x y 1 1 a 2 2 b 3 3 c ``` We are planning to use this syntax more widely in the tidyverse. * New `vec_is()` function to check whether a vector conforms to a prototype and/or a size. Unlike `vec_assert()`, it doesn't throw errors but returns `TRUE` or `FALSE` (#79). Called without a specific type or size, `vec_assert()` tests whether an object is a data vector or a scalar. S3 lists are treated as scalars by default. Implement a `vec_is_vector()` for your class to override this property (or derive from `vctrs_vctr`). * New `vec_order()` and `vec_sort()` for ordering and sorting generalised vectors. * New `.names_to` parameter for `vec_rbind()`. If supplied, this should be the name of a column where the names of the inputs are copied. This is similar to the `.id` parameter of `dplyr::bind_rows()`. * New `vec_seq_along()` and `vec_init_along()` create useful sequences (#189). * `vec_slice()` now preserves character row names, if present. * New `vec_split(x, by)` is a generalisation of `split()` that can divide a vector into groups formed by the unique values of another vector. Returns a two-column data frame containing unique values of `by` aligned with matching `x` values (#196). ## Other features and bug fixes * Using classed errors of class `"vctrs_error_assert"` for failed assertions, and of class `"vctrs_error_incompatible"` (with subclasses `_type`, `_cast` and `_op`) for errors on incompatible types (#184). * Character indexing is now only supported for named objects, an error is raised for unnamed objects (#171). * Predicate generics now consistently return logical vectors when passed a `vctrs_vctr` class. They used to restore the output to their input type (#251). * `list_of()` now has an `as.character()` method. It uses `vec_ptype_abbr()` to collapse complex objects into their type representation (tidyverse/tidyr#654). * New `stop_incompatible_size()` to signal a failure due to mismatched sizes. * New `validate_list_of()` (#193). * `vec_arith()` is consistent with base R when combining `difftime` and `date`, with a warning if casts are lossy (#192). * `vec_c()` and `vec_rbind()` now handle data.frame columns properly (@yutannihilation, #182). * `vec_cast(x, data.frame())` preserves the number of rows in `x`. * `vec_equal()` now handles missing values symmetrically (#204). * `vec_equal_na()` now returns `TRUE` for data frames and records when every component is missing, not when _any_ component is missing (#201). * `vec_init()` checks input is a vector. * `vec_proxy_compare()` gains an experimental `relax` argument, which allows data frames to be orderable even if all their columns are not (#210). * `vec_size()` now works with positive short row names. This fixes issues with data frames created with jsonlite (#220). * `vec_slice<-` now has a `vec_assign()` alias. Use `vec_assign()` when you don't want to modify the original input. * `vec_slice()` now calls `vec_restore()` automatically. Unlike the default `[` method from base R, attributes are preserved by default. * `vec_slice()` can correct slice 0-row data frames (#179). * New `vec_repeat()` for repeating each element of a vector the same number of times. * `vec_type2(x, data.frame())` ensures that the returned object has names that are a length-0 character vector. vctrs/MD50000644000176200001440000010407214532470452011735 0ustar liggesusers4c9bb4bc268079cd278de5c3375264d7 *DESCRIPTION bb38d60fdc799f700b0f8537f9fc4110 *LICENSE 26e8ea3522f7f470d41408ce931449af *LICENSE.note b29d26a3acc8fc3be7b570b00281774d *NAMESPACE 44c69d677da4b4ca0ded4029fe6412a1 *NEWS.md aa8a25ae369276c1dab0b1a900b9365e *R/aaa.R 0bbbaba064d71671123f99018084f9de *R/altrep-lazy-character.R d0e98a3acb7a27924a7fac1435a02047 *R/altrep.R 5fa87a23bec0d23042ba59613fa1f61d *R/arith.R 91e62177e2ced57ecea18e239635266b *R/assert.R 9705883ebfe8038e5f6038713c2a2944 *R/bind.R 17dc3ace9d1aab98e288acc8cf2c1a80 *R/c.R dd3daedb4aa78711f36c4eaf6aac3718 *R/cast.R 62e9623c7d6d84360869399a393c93d6 *R/compare.R 03af8886c6365797bedd4aadfbf72680 *R/complete.R 439d9df74ccc01c7165ffd257dd728eb *R/conditions.R 7794355c0c13ee2a519b47c3995e045a *R/dictionary.R ecc8ced30047498f466cc3777bce6b2d *R/dim.R 376d5a1ea32d07e820ac1be281f5a8e9 *R/empty.R 4fe059e591f5b7aaff7e71d3b6d1765f *R/equal.R 8754a184250f7cc5f456720989f67f51 *R/expand.R c10342207b21e63f4ac3df29b72f337d *R/faq-developer.R 23e1082e791534d5f3d88a606451fb86 *R/faq-internal.R 35aaea2ead8b231a5122bf8441cbaca7 *R/faq.R 4086555f3ef197850db758371ae41269 *R/fields.R a769a65ebacdd8fb319b46649522e274 *R/fill.R 628039d8ef2b27f5e12f10ef77313006 *R/group.R a87a8c656a676bfb84d33e1a0f5e824e *R/hash.R 4e9bd17a652cb4314c1fc3a00647077f *R/import-standalone-linked-version.R 8c5f08458cfd4e7f75a27924b2789c83 *R/import-standalone-obj-type.R a6dfe5a5049fb897fec1dd3352a45b51 *R/import-standalone-purrr.R 09c045dfb7be704d32aec050f68a17a3 *R/import-standalone-types-check.R 5436356727d426e9bb2495d7fc076a3b *R/interval.R ca1bc5b34931ab20ba04dbf284711850 *R/match.R 19b436887a2e7e055505a220313faf93 *R/missing.R 454185496b35061b1a0ce6cbae9c689c *R/names.R 3a821b1c8551b93b103d03e3c472128e *R/numeric.R 8f69a8a386545c05cca548fc612fea2b *R/order.R f4a0ef3887560c5e0657d6c36edb722b *R/partial-factor.R 193f0f1abca2342cb18f524b4b810b34 *R/partial-frame.R 447ab18b6ef849d242d7a25c17a8b7c5 *R/partial.R f505e15a583469413be4b578bd67cf3f *R/print-str.R 18b62a8ebf843d64c3609a5aa77034d7 *R/proxy.R f7d62503fb9768b812bf0011b27dee08 *R/ptype-abbr-full.R 6503621c5b573b95a1afda313064b716 *R/rank.R d8bae2aa4d174070c7c84873c946cb0f *R/recycle.R 8a2a641d41bcb7bc1185b0adb50c166e *R/register-s3.R f760d51e624c62c788ed45685ac157de *R/rep.R 4017b5a8feabc7f1704da270e20378b3 *R/runs.R 2d25c2068268d055bf329d265b8a9aa3 *R/set.R 77e928b1c4a5f8019db16072538f38a0 *R/shape.R 04466beca87fd0cce1cb93d9ee279a4c *R/size.R 015b9d8a417711bb5d6f229c854cdc2e *R/slice-chop.R 79c49323959706ea1adba492043910d8 *R/slice-interleave.R 6199cef87d2919a9a899c57e4d4e816a *R/slice.R 8b848a6338284986af45a15584abc639 *R/split.R f68c5b74828f3ea0fc09b6469c536db3 *R/subscript-loc.R c6c29172a3f211420e2f5049924b8b4a *R/subscript.R 8df766ac76d37fb6d47979756441c6de *R/translate.R ee6f53133e7f63ffad088a9fa70fd20a *R/type-asis.R 0a988558ff6cdce41f6addf188133df5 *R/type-bare.R a3f3e3f2c584bde717e683fa81cd8163 *R/type-data-frame.R ebec573d77d615cc3f6e262791a89960 *R/type-data-table.R da512af73114614d1311a0998fc10be1 *R/type-date-time.R 6c5b781dd535f55c378c80a90c6fb0ef *R/type-dplyr.R 1bc366ed2f22e61411c9023903952a4f *R/type-explore.R 399a0a75b614f7e058bdc882159e94da *R/type-factor.R c669b26ac679c09f4a7943a143605846 *R/type-integer64.R f3c3a2ef0c5e2a64d7bef7a92392aeef *R/type-list-of.R a55979094e5110c87ed5dff7254d0873 *R/type-misc.R 008c234418619729f84e8beedaa7b214 *R/type-rcrd.R 5f3abd93050255499756203ed4b8a068 *R/type-sclr.R 5ddf33997ee2b1f3c38e56d68d6c14fb *R/type-sf.R 99282f06db3b453ce22d6cbef59c82e7 *R/type-table.R a70a7ac8cb811426bc7477a8e74128aa *R/type-tibble.R f436fec5de5f7708e5672ce7accc05a8 *R/type-unspecified.R aaccef295cb95aef915e25b775824cde *R/type-vctr.R 6f0bea2d57fcfd4f739fa4aa3b8dbd69 *R/type.R 485175b5607e1ea5eea8022c2b868386 *R/type2.R 6137f151be3f347f7585f08fe722db83 *R/utils-cli.R ec4ee23ee6dbf4ddb33f852ce0f5cbdd *R/utils.R eed40820275df16170e6021546e446a6 *R/vctrs-deprecated.R ef1cdf3aa2406d681fa626c8ba5cb6c5 *R/vctrs-package.R 07a9acfd1abc7bf8a500a741c6098966 *R/zzz.R fc5feb396ea9d90f995373458df361dd *README.md 5ae3013a5bcc115e7832c3bf993e6ce6 *build/vignette.rds 43cc81e569685f0ede8af83150d4f016 *inst/WORDLIST 91b665fdcc1e53bdb3313409cc1b80d7 *inst/doc/pillar.R 7dec628565582a504f5aa9744f865cd9 *inst/doc/pillar.Rmd accd5671c19c7c18ef5c7c31265f9a6e *inst/doc/pillar.html 3ca34e986554454ab5a425f27931c9e6 *inst/doc/s3-vector.R e9656a0f57974885a1cf61ebfba88fcb *inst/doc/s3-vector.Rmd 51fade1f0675b96b81bf7c7fd583ca5f *inst/doc/s3-vector.html dc96a0b7cee74c68fb1d1c9dc5d7998d *inst/doc/stability.R 4839ddd2a359470622d6edcca6c49edf *inst/doc/stability.Rmd ea2f3ad3bb33363f7940691c15144e7e *inst/doc/stability.html 98d334b47a42c08133df1013daacc5bf *inst/doc/type-size.R 317b475c17b8b2a70f68cca0402f9eb9 *inst/doc/type-size.Rmd 9124361676524fe75d9d624d85946a20 *inst/doc/type-size.html 23d206cd5b3ad3c4a357ddb6551fcd35 *inst/include/vctrs.c 67efd743e1f714d4fdb6a27898469147 *inst/include/vctrs.h 6b865e241282842dfe6ebf1861cf5d5b *man/as-is.Rd 7f8848a4407442328bc8f4a931294ff7 *man/data_frame.Rd 15a8baf0db9014ad118f860c62f960bc *man/df_list.Rd bbc1254fac87c05a95ecb28846a893cd *man/df_ptype2.Rd 8129dc2228cc653f7f8cc89402adf503 *man/faq-compatibility-types.Rd c8f030b331b929901ad27ef08a3a953f *man/faq-error-incompatible-attributes.Rd 7716dcd53adc4a3e771991e14704d198 *man/faq-error-scalar-type.Rd 853adb55a5e326ffce89148289ed888c *man/faq/developer/howto-coercion-data-frame.Rmd 1d2b404542db0d4e3917841f49381bdb *man/faq/developer/howto-coercion.Rmd abaaebf8139cdad9598b2d57eec4b3a3 *man/faq/developer/howto-faq-fix-scalar-type-error.Rmd ae1e2d8343daa530961f5fff4e31d454 *man/faq/developer/links-coercion.Rmd 0af8c2fcec3117584270e0f5d91b0611 *man/faq/developer/reference-compatibility.Rmd 7e11e2d99450d1d6c4a8677c12b6d43c *man/faq/developer/snippet-roxy-workflow.Rmd e2e62755d3bdb05a31548998c611f233 *man/faq/developer/theory-coercion.Rmd 34f562c8bdfb9397d8400b4241d5196e *man/faq/developer/theory-recycling.Rmd fb7ded0ec15e59cc621f0b881b053c38 *man/faq/internal/matches-algorithm.Rmd 4b524b24fa740228c5aec8fdcf1c0917 *man/faq/internal/ptype2-identity.Rmd 1f872537bbe23063c93840d93ad7a542 *man/faq/setup.Rmd b061365fbea176c543525a5f395653a5 *man/faq/user/faq-compatibility-types.Rmd b18af578ec9a68627e7b91a62de9abd5 *man/faq/user/faq-error-scalar-type.Rmd 856c670ccf80ab0a433a5a1a172b313d *man/fields.Rd 69eb638d262c60d5bd5ed2aa4717190c *man/figures/cast.png f5627d4a9d3071aad2dec7dbd3d33eca *man/figures/coerce.png e01d20997a9c8d069ef28e3c6013bd8c *man/figures/combined.png cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg 865c1e8576113b430bcb2d69bfdb0e76 *man/figures/logo.png 25e2f330cc7176f22ad3cc5a3ba4dc22 *man/figures/sizes-recycling.png ff7dec2d166e669c6b6abf085030f369 *man/figures/sizes.graffle 793b22af8f7b1648699748172cab8af7 *man/figures/types.graffle ecb1898f0d3232051d9a1969c0bc8570 *man/figures/vec-count-deps.graffle 0c3de09d34b6b7e94e7671c08e9df3f8 *man/figures/vec-count-deps.png 2995e7508f8f053600f8e21cec6b60c3 *man/figures/vec-count-deps.svg 71032aac5a63960168c62208653faf78 *man/howto-faq-coercion-data-frame.Rd ba1da74b9301c190b1370c3c20c6bb9f *man/howto-faq-coercion.Rd 566c147b0ac3d696ac534da66a92631d *man/howto-faq-fix-scalar-type-error.Rd adb03fe5026bdbf0027002f12f8792d5 *man/int64.Rd 025067de2ede9eb33d7ef832ed1f7355 *man/internal-faq-matches-algorithm.Rd 633f0627270e9a7585976ed30a6100b5 *man/internal-faq-ptype2-identity.Rd 4354e70c6d6ce4afa68f9cdcfd7ab71c *man/list_drop_empty.Rd 341deb91f3970a931b1046b9c7ca7435 *man/list_of.Rd 96052f38ddd3ccff3dd2e6b657aedeae *man/maybe_lossy_cast.Rd 135c38fc7e19016453eafc6f609a052a *man/missing.Rd 51cc0cbcd6d2dddb05f661f82320cc71 *man/name_spec.Rd af444cb8c20bd85d2ee2877ae42a65ce *man/new_data_frame.Rd 96a6f14e3014f3b7e3e9db90d0516fc4 *man/new_date.Rd 75dcc37c27e47ab214a3fb275e49365a *man/new_factor.Rd 4b83a08a6ed1055066d2860e552b8b46 *man/new_list_of.Rd 075149d44a907c86d691f73d36fd4008 *man/new_partial.Rd 55d7a6b660343c6c2ed6785475b5b433 *man/new_rcrd.Rd 055e37a6ce35ad98995b28f9202480fd *man/new_vctr.Rd 3a05c58cc1d371587bfb2e1fbc3faef8 *man/obj_is_list.Rd 0432b9bc77e37a36a76ba1f65697ea6e *man/obj_print.Rd 4bd8924e7585671c5f2995ca1fcb8578 *man/op-empty-default.Rd 5725105cbca9da7c11bc0e6dd8c0c799 *man/order-radix.Rd ec4886ff03bc5790d46a56a9e718dea0 *man/partial_factor.Rd 9aab73d2dab1a252ddc23a457dfe798e *man/partial_frame.Rd 15020f537e44b4e8d6b5824773f8f2d9 *man/reference-faq-compatibility.Rd 6cc6bb80b7b76a85fc455520fd6f9d81 *man/runs.Rd 189eee68fe149af920039bea657e0563 *man/s3_register.Rd febea111fcbfa3fadeb25ea4a5239008 *man/table.Rd c5808ec0a28a1a0a2d0ea946a2e8107e *man/theory-faq-coercion.Rd eb50d76bc6f40753af13ee3d0eead436 *man/theory-faq-recycling.Rd 679d46a041225b72bf80ad526513cc47 *man/unspecified.Rd 160b76db439f9a485c412da17e680549 *man/vctrs-conditions.Rd 00d4712f39f895035e899b48827887d0 *man/vctrs-data-frame.Rd a543f5ac20d91093b4b9044ab7893be1 *man/vctrs-package.Rd a67939fd9d6798d77c6410fdee060590 *man/vec-rep.Rd 0731d68f2ae4e1cd419ced8e5785d005 *man/vec-set.Rd 41aec7be7b1c7722937983d2fdf1b8e3 *man/vec_arith.Rd be5ef09fa278a0d5e5818d1e466abf31 *man/vec_as_index.Rd 399c863b0fdb4dee4e78a7862952f705 *man/vec_as_location.Rd 1cd97290bb613faa6826ee2aff7d2687 *man/vec_as_names.Rd 4751d9e1734abce229836322b3cd18ec *man/vec_as_names_legacy.Rd 5699d4892ebe1a70f9184180d0478afb *man/vec_as_subscript.Rd ef0f4d8cc4e9bc4786dbc6f7c8b11c8c *man/vec_assert.Rd d1cca9634fcfb0af4bc871e6c7661ebe *man/vec_bind.Rd f4782edc8c475cce80671c05933bdddd *man/vec_c.Rd da8a64be2f72048945150d170c50f049 *man/vec_cast.Rd 6497ea65f7a25c6b57f7da8868a969b7 *man/vec_cbind_frame_ptype.Rd 359853c74f5a500e85df2eeba36e2b07 *man/vec_chop.Rd f6a9055845c6ced7d866638412ff4a63 *man/vec_compare.Rd 8e6770ca84e71b3575b983f3eb347240 *man/vec_count.Rd ed716990b6a50620562e744ee927ab75 *man/vec_data.Rd d5da420b8a8686cf2e0910970d326b24 *man/vec_default_ptype2.Rd 0c73ae7aa558931401b22ba50c5a7faa *man/vec_detect_complete.Rd e1817b7f472d5eb862fae6e02081941c *man/vec_duplicate.Rd cf78fee622d53673b7e08d5c4406c222 *man/vec_empty.Rd cc74e2fa82c8103d5fab0ba127f5ed62 *man/vec_equal.Rd fd36dc7754da38f3882e56d83a55095c *man/vec_equal_na.Rd 73575c3ace93171061dccf6fe4cf8e0a *man/vec_expand_grid.Rd ab5db56ce9cf88567ef1544dc58a0fb4 *man/vec_fill_missing.Rd 8ba05e321b26a23b2e52d76a07dc625f *man/vec_group.Rd 6f8697155e5275ab798e6a430f020197 *man/vec_init.Rd e89be03cec8a023e3f0882147a7029eb *man/vec_interleave.Rd f8e6735b44208198a78e209a62a06e55 *man/vec_is_list.Rd ca47c6794895d64355333182409e37ed *man/vec_locate_matches.Rd 08318af9e975aa8b9680fc77ddf5906a *man/vec_locate_sorted_groups.Rd b4a957dc485dcfa8a4edd03827f3e85e *man/vec_match.Rd 6a0ece01d92f2d27a52fa1fc6ec99d36 *man/vec_math.Rd f930bd4d1e373a74ae18e09aa5ac01c0 *man/vec_names.Rd 711d2d36bffdf9c60781c993fd087b5f *man/vec_order.Rd effec5ea0d7340ceff0b6a784fe4e3f3 *man/vec_proxy.Rd f93b5ac3e9244b22bc44c0c1707b98ef *man/vec_proxy_compare.Rd f4f71559cb455487816c1bda66e8c168 *man/vec_proxy_equal.Rd 3a14dd23274237bfbe696a91f3a89bb2 *man/vec_ptype.Rd 61b0865d2076f200909eeab97143bace *man/vec_ptype2.Rd f2e5b287c46b120025e0974e5e136089 *man/vec_ptype_full.Rd 2ed2a803f564a69008048054cb981fda *man/vec_rank.Rd 43d99ffe2d453416bc8bd46c36684cd3 *man/vec_recycle.Rd c3c863bc5fe6d0bb6466b176a66c3548 *man/vec_repeat.Rd 998136cc73dfd5148c60699abd8424f0 *man/vec_seq_along.Rd 57ef40dec90ececb810274f2e7f703d7 *man/vec_size.Rd c147bc9a05d0ce51df56ab00f1e79d85 *man/vec_slice.Rd 45bfe942611373e1b51f27bd83a535c9 *man/vec_split.Rd a1c483af2aa4664b8757c8447bf03c59 *man/vec_type.Rd fbfda16f107405d27db7c4183f77823b *man/vec_unchop.Rd 2b6faf07f1ecd9709c37d9ac6c14d6a0 *man/vec_unique.Rd 47ace24cf648d9009dd6a38895bf1224 *man/vector-checks.Rd 1ac3ccfefdee618d4e30507fd0e26932 *src/Makevars 623f395690c6470bb705b34c358b4038 *src/altrep-lazy-character.c f5d8ca66a18afad3ecdfd469be7b437e *src/altrep-rle.c c5b1a3e04c67fd31fdfd169c66b17047 *src/altrep-rle.h 90da5d638848476a43c1fae8c367202e *src/altrep.c ae4239e8b147156aafd859f0060c1b65 *src/altrep.h 084428aa2c6d23761042f921764e48f7 *src/arg-counter.c 6f32383938594d64fb4fe0f28c5e7b01 *src/arg-counter.h abdddee57273dd215dafb217f99045cc *src/arg.c ea458752e721b2e759bb995d5820db82 *src/arg.h f11b41576a3e43d3c18cbd5973f7d96e *src/assert.c cb073fb2fd8690f8f1bff561adce43b0 *src/assert.h 4793657af8efc4895f1b7f1182d34b90 *src/bind.c 2f703ff3a31b46550575ae7a00a2cae8 *src/c-unchop.c 248395db06f076a1012f77bc6f4222cd *src/c-unchop.h e65bc7ee3d98f35aba372cb238db5ef4 *src/c.c 87189634453e4ed83aac2d1d8eac07fa *src/c.h 673159346d9fa10a892c17e220552154 *src/callables.c 0d0a0aa28807538419bf29611b1535cb *src/cast-bare.c 4786b323c411d0a502cc5088c12790aa *src/cast-bare.h 3dd63d5d5aa247dcee4611fad60c8d6c *src/cast-dispatch.c 97f9ea6c3e03726f4f3c7e01f9efa672 *src/cast-dispatch.h ed236bf67c830f09485ecd19ece3fc8f *src/cast.c 7b15cc6fd481249584248a2eb53c6842 *src/cast.h 0d46287107abe9592b1c481c5ff66ea7 *src/compare.c 90317d3845bc14fd9b5da4a9e71c88cd *src/compare.h b36813b77310f9617e3f3db4d0479229 *src/complete.c e1ebe187a40acce72660350d75d574c1 *src/complete.h c1e25ae50801e47eaa36b6f649d7acae *src/conditions.c 17504589099ef304415c104625bf74af *src/conditions.h d0aeaece74641ae3824b756d35efacc5 *src/decl/arg-counter-decl.h eec1742d9716cc9e97a25ff9295ba555 *src/decl/arg-decl.h f44cc3bbc4670083adfd80bf7ccc3853 *src/decl/assert-decl.h 929b7e226ecd665392655c87a0b53b14 *src/decl/bind-decl.h 5423dab3067333b2b9ff478d040d28ea *src/decl/c-decl.h d594a9adc4c40cdd9483273bbac77ee0 *src/decl/c-unchop-decl.h f5840400de6cf0f29d23ccd1f5624e67 *src/decl/cast-decl.h 97ffd65ddd0467ca608b64b070822e82 *src/decl/compare-decl.h e7374a0f8f3bb88fb6a5078f5d83cfff *src/decl/dictionary-decl.h 293d3e4c8453bfc07ba7323197171d76 *src/decl/empty-decl.h 37c2cafc0d795af35bfcd4ad0d0fb800 *src/decl/expand-decl.h 7affcadacd58c51956e2c12051becee2 *src/decl/interval-decl.h 2819da1b40a204cf28a881723b9365e4 *src/decl/match-decl.h 040c94470b1078814a75c0f7192b2e35 *src/decl/match-joint-decl.h 0b9a1f6068eb1a52be11cce018ab1731 *src/decl/missing-decl.h 8183bd4349e17260837f84b4b63fc863 *src/decl/names-decl.h 2548a6c3ecce772a393e164e5c1f0e31 *src/decl/poly-op-decl.h 4e373d103e3d9724f36303b08618dc8a *src/decl/proxy-decl.h 5f88129884b240af9b5f3a91fcb895b7 *src/decl/proxy-restore-decl.h 550ef255c5d7b10d37199d2cab8ce9a5 *src/decl/ptype-common-decl.h 6698f260b89f6ecf62d0e58625345ffb *src/decl/ptype-decl.h 330a19e0935a53c9bf7109bc68bd8b46 *src/decl/ptype2-decl.h f9ee2140c89a3b46d21a57a4cc40dd91 *src/decl/ptype2-dispatch-decl.h 39f9dfa970d529d829aafecbdaeb311d *src/decl/rank-decl.h dee83a2f55e2571233017bee6c1baefe *src/decl/rep-decl.h d41d8cd98f00b204e9800998ecf8427e *src/decl/rlang-dev-decl.h ccc116f10919ee64555f6fefe906107a *src/decl/runs-decl.h d41d8cd98f00b204e9800998ecf8427e *src/decl/set-decl.h 94d2833d4fec9b6dcb196d1450447566 *src/decl/shape-decl.h 87ac6ff14d37628705de89e601e05eeb *src/decl/size-common-decl.h c2d271519244c44dde211fb077505502 *src/decl/size-decl.h cc90e086f3344104f18c52234d27032d *src/decl/slice-assign-decl.h fa32dd09cfed1e5108349d6abca484ac *src/decl/slice-chop-decl.h e6acffa66af158b07f1539bb5b475189 *src/decl/slice-interleave-decl.h 433555dc73a06e3feee90942b96007c3 *src/decl/subscript-decl.h 1c4e53904d5ed995cae8864bfb363c24 *src/decl/subscript-loc-decl.h a533376e96a5d92d10dbde4ca664b6b3 *src/decl/type-data-frame-decl.h f68cd5eba5e27490973538366080e5f4 *src/decl/type-info-decl.h 7680470962f71a166726dcd0f737fc25 *src/decl/type-integer64-decl.h 373619649f37a04eca81cd284b208c85 *src/decl/typeof2-s3-decl.h ef7f38c88e5730928fb0be4e2e2a57de *src/decl/utils-dispatch-decl.h 9e453b11d1b08912c7fe50301fa3d119 *src/dictionary.c 90e0c6f48f7a6e2d9c664792fb7f8ba9 *src/dictionary.h 2b3cf083097c5a1877b4afe04521abde *src/dim.c 2631256187463c3754674c43dc395fbc *src/dim.h 950a06022f25325773266094620ced0b *src/empty.c b5c12009b15cd3149bd4b694bcd7c742 *src/equal.c 22a2319d197d1bcffdf2e31c5afa8203 *src/equal.h e1b3199d781ae0e5b5a8af3ca3f707cf *src/expand.c ede349cc5a6b749e0e8a7fb4bc23bc4e *src/expand.h 52b688cc38a5591baec273c76ae704a9 *src/fields.c 2df4677e0457d9778f94944d89a15655 *src/fill.c ed38937cc81ce53e8b8da52e62778d5e *src/globals.c 609991ab548c62b67a934deeee4beecb *src/globals.h 2e5fae216a6f8071d4d74a123c3ec836 *src/group.c 32b9453f80cb11322c6d39586d5483d6 *src/growable.c 048b74ac43be2e6c2fe16fc02ab9302e *src/hash.c 4604e7db87456d74586450eef43c6ff6 *src/hash.h 0d973d6a48a0a0b560e2a14309b0efe0 *src/init.c 61595259254e2ddd646234670fb1fecc *src/interval.c 3b257e25e40f98525eb76dbb7a1b3bd9 *src/lazy.h 60e3f13f2150fcd5e6ef005b19da59d9 *src/match-compare.h 63143432d5951a31b85f7e121a0eeaf5 *src/match-joint.c 3c3253e8f0505f5fbebf757fd4029abe *src/match-joint.h 574c0e2e66b45d270e754359e9b5fea7 *src/match.c f2b1208dbea45d1a9399e378f85ae1cf *src/missing.c b42039098e67e5741f5c9405a543506b *src/missing.h 2fe7fef1f1c9bc68b033d6e74afced21 *src/names.c 551f192655f01e7eb9052275f46b85ea *src/names.h baa9b1251c6689540fe992d9c524d40c *src/order-collate.c d930812f3ef3f9b36d979d5a1cd209a3 *src/order-collate.h 5424e3243c187ec2843b476e58016299 *src/order-groups.c b62509f182818d16384a7b7940aa53d0 *src/order-groups.h 5494153d07d833d95a46a21945453810 *src/order-sortedness.c d656801606a75be171d4c0506b529633 *src/order-sortedness.h dd885d6e6015140fcef07933bfb72da5 *src/order-truelength.c c0dcc693c8a874d9e7a1f394059e427e *src/order-truelength.h 8e24ac7125686675e5e2022622dd8d5f *src/order.c d81d297f96c182e3d8674771b475f2d0 *src/order.h 45858d16c7963c8dce6d5ecf1ec16035 *src/owned.h 7b07c110dd581c4cceb6f652a895ba89 *src/poly-op.c 4b410e5028e060a372e23694ec0d4731 *src/poly-op.h 8cb351004bc46087dbcdb7f5a9ee6aad *src/proxy-restore.c eb97b901a499fa9a94ffe1377c40e369 *src/proxy-restore.h 6e1409609f8496a7221e44571e247532 *src/proxy.c 17d8d3155bcf4dbb6b3d800bd35b65b4 *src/proxy.h 4150f286d20c0c7b3b2bfee768827f91 *src/ptype-common.c 0477f638def3427bce5d6c60063d2374 *src/ptype-common.h 1298f52d5c77f5de015ff3b8c3531ad2 *src/ptype.c d895283d57471e72a427a7dbe3ad5b1f *src/ptype.h 4cd9ce3eb6eec4d322d7eabdfd80e178 *src/ptype2-dispatch.c 538af418709ee744569b8e012b68b123 *src/ptype2-dispatch.h be30783937fe307a4247c673f7420e9b *src/ptype2.c b32d16fe48598bf66273af9aaf475968 *src/ptype2.h 583e1a55709d16ffe108651391b5c55a *src/rank.c a93ddd0ba9d57c65ecf27c16a7a53255 *src/rep.c 3abb34ab8a63d0c5171b13026088c330 *src/rep.h 81455781e011851726af965360b8635f *src/rlang-dev.c 76880e6ab9b473cc6502de3571116e03 *src/rlang-dev.h f675812d8927e9b13fce5f1e8ed31f1b *src/rlang.c 2ac29d4e778ca5cd4ede63936aceec6a *src/rlang/altrep.h 0608535fc0aaf4efdf5a51912e6a6c84 *src/rlang/arg.c 516c28cc1b7d182ce35425ddd1aeec92 *src/rlang/arg.h 51c833ea77aaa0b98613ea09327a4285 *src/rlang/attrib.c 91b9e07c242164935f070acd1906dc85 *src/rlang/attrib.h 9c9b7d12443672c8dee0a4615b56c2fc *src/rlang/c-utils.c 958e57eea0ecc2a59584284d13e74ac6 *src/rlang/c-utils.h 22e5d851558dcb490b87b5ffebbcb6f8 *src/rlang/call.c 8b7f8d124612d68d51fd8c1d1131674e *src/rlang/call.h 334cf2bf146b2092fd5ae90f0063bb46 *src/rlang/cnd.c beb3cae7fa9aaabc41ce9a4baa9919c9 *src/rlang/cnd.h b176a262a2ea2e884f4914289466de7b *src/rlang/cpp/rlang.cpp 29bd3e7d88cdb8a4a641f05ee089069c *src/rlang/cpp/vec.cpp 6641e807daa143b432f19ec466be377d *src/rlang/debug.c 6cfc4c254c05bb83ded1ccfcfdd73135 *src/rlang/debug.h c4c0b92858e604f1e651c5aa247ac5b0 *src/rlang/decl/cnd-decl.h c42446da8c4955c10b5acc67ec1ee130 *src/rlang/decl/df-decl.h 1aa66d903f3a4cb578ca16d7188511c6 *src/rlang/decl/dict-decl.h bcd3be7be1e7d4c9056adc78687c9f71 *src/rlang/decl/dyn-list-of-decl.h 4faf5ad969081c60feecc51bd0dc81b2 *src/rlang/decl/env-decl.h bf8df189e2ff68b0c0fe62d076814545 *src/rlang/decl/obj-decl.h 5cbb39aebac52f777c30ab65c94b2461 *src/rlang/decl/stack-decl.h 5be592253e1434fb98c28029abc7b5f2 *src/rlang/decl/walk-decl.h 1da9bd6ded0503adf71366ca9157ca33 *src/rlang/df.c b60f69df26d28f1549b7af6a743220d1 *src/rlang/df.h 71cb9741795db383b500a5df30ccac58 *src/rlang/dict.c 65f78ebe4befef95942b4ec7d809f37e *src/rlang/dict.h 6e469204c177ce2654c64ab1a4657518 *src/rlang/dyn-array.c 23a50dfa859e75379846ff54248765f5 *src/rlang/dyn-array.h 1a0756c1c50f978b84035395ccce3cb1 *src/rlang/dyn-list-of.c a0f33ea79f34c196989f738968e0e1d9 *src/rlang/dyn-list-of.h f33d928d13ae05dbc3ecb6e46ec5cf10 *src/rlang/env-binding.c 21089db32f310a24b72b0128faf10065 *src/rlang/env-binding.h d8dbbca7e94e0554eec92c097b5e5d7e *src/rlang/env.c e5e77cabfb2cf22230e7e11fd9660b2f *src/rlang/env.h ca30eb74e8fdf150423d871ae72818cc *src/rlang/eval.c 88c2128a2e83789a7908b28a140c469d *src/rlang/eval.h 47ced9fa82505197086534c254580b43 *src/rlang/export.c fa2eb194b1c5969ab31898499b10ae4b *src/rlang/export.h ec36cc014954270b6b827f3844f5cf1b *src/rlang/fn.c 1eae432b32bb6ac5533515d570ee50fe *src/rlang/fn.h 676b90d434ca8455b5306a86a73ec080 *src/rlang/formula.c db5097eb19dae675d9523431fe7a7b85 *src/rlang/formula.h 97d8e16a62839f989c53127d4339efb6 *src/rlang/globals.c ba2c20c5c3596a89f9c9b40c8eb242d7 *src/rlang/globals.h 240370964fa57c63867b60d3a66bb0ad *src/rlang/node.c 04013de2090e7a49f594111532d8dbe8 *src/rlang/node.h 4688cfc8593a2b4e293f4de1d29c0684 *src/rlang/obj.c 3c9d4c44e50870bb284b9fb618bf24fc *src/rlang/obj.h 4576f3bc9e1dafe198460d4c2b0e88f4 *src/rlang/parse.c 7816d9d5ca0a5c7173a7e94de9b3d369 *src/rlang/parse.h a5e865607e4e717784f5f951533d372c *src/rlang/quo.c 0e9752fcf2462f0639f6fff5ba35526c *src/rlang/quo.h 9a59495aec0fd237423215b6aacf66ed *src/rlang/rlang-types.h 6f7ae5669e60bbf2e910f5a81501e69e *src/rlang/rlang.c 7e0cfe3ef93b50bb6aebc281594ad12e *src/rlang/rlang.h a24cd0a56d6993488d143ed8f5ffb7d7 *src/rlang/rlang.hpp 3115b50ab84dff920d09f95b4f73e395 *src/rlang/session.c b66e2877a29b59aca668b26defc1004f *src/rlang/session.h 6225f1c2d3182e4bb957b8f1aba27937 *src/rlang/stack.c 569bf6966ce2f97b72d79717c070ab41 *src/rlang/stack.h 10e3dea942472e0b20dfd804dd3bdf71 *src/rlang/state.h 267ea357e68ff19af4f151c76b8c21c3 *src/rlang/sym.c 595f09b9b51c33a8e2a864b9d4a92d75 *src/rlang/sym.h 6ec4b806d0cad8eddae9250f2cbf3231 *src/rlang/vec-chr.c e267a560c64a208fcc7a4bab77b93bc9 *src/rlang/vec-chr.h bf006c89dcffdc61228a385ee5ad342f *src/rlang/vec-lgl.c e509790fe4a79d6943c93d96dbbe83c3 *src/rlang/vec-lgl.h bf1c072d4e6af784afd362c8f77aef00 *src/rlang/vec.c 13953fa3199b0cc7aaa63e7fb5a55df0 *src/rlang/vec.h aad9cb50156b9ab0f02ea91fb31db1b7 *src/rlang/vendor.c 139544f5576475c395dc8601655c7948 *src/rlang/vendor.h 4b8fd26a08a137a90c8d1323faa286ec *src/rlang/walk.c 05a1bc12017a29dd10726ce3a2320dcb *src/rlang/walk.h 43e3e8ae29364d6f5d03e2a6ad78eb17 *src/runs.c d1ba3e476de9acbc56add4a5f05be91f *src/runs.h aecceeb3b6d88ba80ed8b671c22778b9 *src/set.c 911c713ec8ef667d54a935105c5c15b4 *src/set.h ef8f06ea4ad59688369ff38ed38cc62b *src/shape.c aaf830568e37c230bec92dfec46d9379 *src/shape.h ac557e0a9b1253729de5483772d6949a *src/size-common.c 28e0b87891826699028c6d2dcf98ca14 *src/size-common.h 606a5d7e310c4c1288dedbfeb5dee2a0 *src/size.c 76824686ff0983cc5034e3b1e4ec01ae *src/size.h 3ed6bc529091fbe0ffd2721ad37aeca2 *src/slice-array.c 7ea4e2854c63007ac5a9461ee9431eb0 *src/slice-assign-array.c 71a2d24476cd4cc42f7f8b906d804e44 *src/slice-assign.c b62bb792fb3b7d8a84824dcc99b46bed *src/slice-assign.h 2fabb47fec2d771e6fb151eba886002f *src/slice-chop.c 31658c8cf058e0180b2b57bc44669375 *src/slice-chop.h 038673a62dbab145d291a9fca1f6eaaf *src/slice-interleave.c 0a71e18eca7e4588c52406b69611f52f *src/slice.c baea26d0931096b94a9f8188d79a4888 *src/slice.h 3ee26e1d25d8d496c04742ffb4397354 *src/split.c b04dfaae02340e9cf1992ca5588c3afe *src/strides.h 0258f7e83799e5f89b8dee53b63240cb *src/subscript-loc.c 352f1d55194156d9f7cf45f58119af53 *src/subscript-loc.h 8ecb54207240cc2d3946d6f1ac56bd6b *src/subscript.c 617f4f6807023f20d8556896e5e9f3a2 *src/subscript.h a207b17397e8887f94851554736ec0ff *src/translate.c a22c51f8acb72248cd47da10a7c37b27 *src/translate.h 2ed6cb1fe158181955e0d1173db382cf *src/type-complex.h deac45deba3b5b63eafaae3b2f7e3db5 *src/type-data-frame.c 5abc5a1bde13711cf58d81be207980dd *src/type-data-frame.h 0839a5563c902122093eb6892dc0b677 *src/type-date-time.c 9b0f4d12a4ca52dc2198939fcbaab6c8 *src/type-factor.c 7fdcdf1a5e42e21402e4d626393b7600 *src/type-factor.h ab4905a03248914607a5a06edb5b732f *src/type-info.c 6a1b605c60bfd2199c11f0d41fb3643a *src/type-info.h b84ff44169a9872fd1d249e75ac3eb26 *src/type-integer64.c 4d3a3015d3cf96e3105da13fb4fc0ef6 *src/type-tibble.c 774dd2a8572b9b793fdb285048ae8e4f *src/type-tibble.h d3623e72c54d9f737854bfeabc2b9c48 *src/typeof2-s3.c e74459d774e495cb76aa341db3105dc8 *src/typeof2-s3.h d801f5b1745be1548adb5fae28290080 *src/typeof2.c f0cad8b72a42ea05f555e1f2a6ad2f82 *src/typeof2.h 51a58f904763c67da7905b1786a49c16 *src/unspecified.c 69a4e18a58b3d160918b3491e0cb4b70 *src/utils-dispatch.c 75194d8e26fdf07af28f7b93ad6b0a9e *src/utils-dispatch.h 1822770b2fa46b42759d2c7695508518 *src/utils.c 7d065cd30b501db7c628489ea12a32b1 *src/utils.h 07bcb4acb7eff02dbd36fba1d2a8c0f9 *src/vctrs-core.c f640916d70276e1a3b6c02ea2ea0e993 *src/vctrs-core.h 92c0cd678d2d54b5e3936653f4153b93 *src/vctrs.h bae3061fb0a426f520601485ffdafba6 *src/vec-bool.h 94009183c1736b75433ae50cbe9e0f64 *src/version.c 8ed8f6a8bd8f35f801d615323a1830a6 *tests/testthat.R 6b203eef9d423c321c3b4c42fae12d76 *tests/testthat/_snaps/assert.md d1b8e10db72f4fa7acc9e9a28663572a *tests/testthat/_snaps/bind.md d8f005f161f1bc7d762e516311a5ef2c *tests/testthat/_snaps/c.md f427a10f8d202ed4b9d7d3cde931b21c *tests/testthat/_snaps/cast.md 095a349d8d4687659ebee35cff31e16d *tests/testthat/_snaps/compare.md 87a27d6076dfff20ef616112bf6ef151 *tests/testthat/_snaps/conditions.md 8a8b789f2314fa86c9f6c7c680d1a9fa *tests/testthat/_snaps/dictionary.md 113ebc0aee3442fa85fa8fc66ea5e1b8 *tests/testthat/_snaps/equal.md a0186d4baa38fb882db2c8107b3fc6b7 *tests/testthat/_snaps/error-call.md 99838287c3fb9982872ea92118afc408 *tests/testthat/_snaps/expand.md 1c8913fc381a13e1e87f68340c8ec71a *tests/testthat/_snaps/group.md 3e9f1f3efa867cf57cbfd59d80985427 *tests/testthat/_snaps/hash.md 3244f798510f053b0c0253ef3baa7c73 *tests/testthat/_snaps/interval.md 5eb46ec1f853d51dd0232bc6731941ed *tests/testthat/_snaps/lifecycle-deprecated.md 0d0ea2eb109702fb053445d1ae9fa590 *tests/testthat/_snaps/match.md 3c2be4ef8e796d64260b595a5f85ff22 *tests/testthat/_snaps/names.md 376e5c53749aabd269289e8470b4dddd *tests/testthat/_snaps/order.md c1c42a5bc8b55f7fb15079756893faba *tests/testthat/_snaps/partial-factor.md d60e707bc8349602cc9087d122cc3869 *tests/testthat/_snaps/partial-frame.md 1f836fe52827822a61873a2066086f97 *tests/testthat/_snaps/print-str.md be55f35d347fc4444929098fc4dec7b0 *tests/testthat/_snaps/ptype-abbr-full.md b8335026819390f410899ca4cce877b0 *tests/testthat/_snaps/rank.md a978222d0aa84caa72dac427a19c1f0d *tests/testthat/_snaps/recycle.md 43f83d4c8e4b909f5090237053d6ccb7 *tests/testthat/_snaps/rep.md dd09a8e90a830764c0655b14bf396817 *tests/testthat/_snaps/runs.md 9216d41bbcd072e7c70eed3355326984 *tests/testthat/_snaps/set.md 0dd03b8a4ec7d22cbaff17e8792887d6 *tests/testthat/_snaps/shape.md 5ac529d16617ed2208bfed986a597403 *tests/testthat/_snaps/size.md ea4a1409ab2df396835d39bc6a21fd18 *tests/testthat/_snaps/slice-assign.md 8faf974622c774d381f6ef4f775ba3f0 *tests/testthat/_snaps/slice-chop.md de61afd5b080bb09b3aa9e1a692bc4e1 *tests/testthat/_snaps/slice-interleave.md 0b57c320f923ac38785942ccfbc29c7f *tests/testthat/_snaps/slice.md c4961acb4782c454d9dc0d5545628819 *tests/testthat/_snaps/subscript-loc.md ca0504fb5ae7d131dd3e80700e961a77 *tests/testthat/_snaps/subscript.md 03ccb96343f8abbd5da6f1dc8f3e85c2 *tests/testthat/_snaps/type-asis.md a2160e562be4ebd1e519f070a8f5ebd5 *tests/testthat/_snaps/type-data-frame.md ea3a59312ad55942deadec7b6708e2cd *tests/testthat/_snaps/type-data-table.md e90f78c52769831ea831cb590d2a6fb0 *tests/testthat/_snaps/type-date-time.md d8c3e6040f0ac0aa9ba06bcdf97545a1 *tests/testthat/_snaps/type-factor.md 798a0c55de77284d93e051ed185b8520 *tests/testthat/_snaps/type-list-of.md d3b2f0b00f269145e8dc7e9fd4e1a3da *tests/testthat/_snaps/type-misc.md 7a96b1e292942be1722196424e8b924b *tests/testthat/_snaps/type-rcrd.md 42ceb2100a3ba4e0935e78a14af5ba1a *tests/testthat/_snaps/type-sf.md e83d5684f802acdfd5b1b26d903a640f *tests/testthat/_snaps/type-table.md 7dac22d321c2d7963974dee2bc498afc *tests/testthat/_snaps/type-tibble.md 4070cfc552068f834bf164dc7bb6c98c *tests/testthat/_snaps/type-unspecified.md 27d77098d05716a4a24928a504ffabe7 *tests/testthat/_snaps/type-vctr.md 67f8e861e0ddbeb3d0310ce48c9e2756 *tests/testthat/_snaps/type.md 6d6360a6bbbe36e327d03ced94f2a9a1 *tests/testthat/_snaps/type2.md 77de33e8c8693d37f99a280ea599a7b0 *tests/testthat/helper-c.R cab609efdc70e227e331646c2b3b99e5 *tests/testthat/helper-cast.R f7b1708b9e6ed3488af8ffca2fd68cb7 *tests/testthat/helper-conditions.R 76c635db233174b0c228e30b12b09ce3 *tests/testthat/helper-encoding.R c9b8bce2e5e7cced93267450126f473c *tests/testthat/helper-expectations.R ead65126fe8184d9cddc7f63cff8e786 *tests/testthat/helper-memory.R 87aa31dfdb729ace70d16bcafeb1d67d *tests/testthat/helper-names.R 01768424b3f2525728e0a2d92b52c5a0 *tests/testthat/helper-order.R acccb888200b46aee1cbccb3b196dfe9 *tests/testthat/helper-performance.R e10c358d67d972e77d4c7d95f3c67182 *tests/testthat/helper-rational.R 6c7c09bb9ba81f9d4b8f122792b26656 *tests/testthat/helper-restart.R 001f8d9af72df89bd170bd91fd96bd2b *tests/testthat/helper-s3.R f1a6538c32d7adb0b67d38196afee201 *tests/testthat/helper-s4.R 1a6c1bf6b317385ea29d194695ceaf3d *tests/testthat/helper-shape.R dc794d40870fcdfc375b93ad91a6854d *tests/testthat/helper-size.R a195f504649598e6e9cd5b41ac922300 *tests/testthat/helper-type-dplyr.R 4e96c9aba5d25277c76f2eafbf4797a0 *tests/testthat/helper-types.R 98f3fab4e026dd81b5a86ca88489e26a *tests/testthat/helper-vctrs.R 66aed74f1596d63d18733b1fe5bf6607 *tests/testthat/test-arith.R 2f0b8c143d230c11a57179183731e573 *tests/testthat/test-assert.R d5968e181ac4a8a7ac7526084bc0a613 *tests/testthat/test-bind.R 7e5eb2322359ef97c2341c153212163d *tests/testthat/test-c.R 42a3cbf0d70e9b48172a96c6f6db7cb4 *tests/testthat/test-cast.R 3629c836a533e5c073c04b7a5897bc59 *tests/testthat/test-compare.R ad4e987c949a69f30b2f0f1c3f85f778 *tests/testthat/test-complete.R 80ae503aac29e26041285cacf9c0be12 *tests/testthat/test-conditions.R 9e799c591b82086f5d987205509a2dbf *tests/testthat/test-dictionary.R d67c0927e0bbee60571e58452b9c0b5a *tests/testthat/test-dim.R f03ff11ea06f572230237862ddaa49c9 *tests/testthat/test-empty.R 094f0d070677c9a64ac42b958b2a898e *tests/testthat/test-equal.R ccb96b6fd97df3d72459bf79b3b7e7b3 *tests/testthat/test-error-call.R 2df17ce1c1a8aea406a2e51c6a2e114e *tests/testthat/test-expand.R 82c226106468a9b9b0194c25b73faf69 *tests/testthat/test-fields.R bd48cd5bcd6b514e65ee591d83638329 *tests/testthat/test-fill.R 5a39c2b8041d7e1c11b7e1f202d19394 *tests/testthat/test-group.R 1a56091941b82c85ad0eceffd903253e *tests/testthat/test-hash.R 82221732b8a1647127363b10c8d912c9 *tests/testthat/test-interval.R 63e526afb54d81d35ea6d175692b6d8f *tests/testthat/test-lifecycle-deprecated.R 976ede266f5951c35f563a385f368471 *tests/testthat/test-match.R 14f9a4218e41e303dd2c826c138763ca *tests/testthat/test-missing.R 64fbe5396f2e56767923df38e953eff3 *tests/testthat/test-names.R f9a852e45ef31fb67fdfb72642957442 *tests/testthat/test-order.R c8a3b00c0e0da572d862d05009333f95 *tests/testthat/test-partial-factor.R faa59af2a13c60d0a6c414d8b5b26665 *tests/testthat/test-partial-frame.R 96b15045d0a9d189bd8a0faeae2bd0a9 *tests/testthat/test-print-str.R 98bc21dc6338ff1e77d517f81c485898 *tests/testthat/test-proxy-restore.R 6e85ad163eed935a1bc2ab2c761e23e3 *tests/testthat/test-proxy.R f6d1a19c960c09ea8353bc391ed0712d *tests/testthat/test-ptype-abbr-full.R 965b0d8eb104c8768491bc18f0466047 *tests/testthat/test-rank.R 800eea6357ba92d60f8e6201c1bdaa4a *tests/testthat/test-recycle.R cd3fafb9f7c74eab2477b6bd83a17779 *tests/testthat/test-rep.R 3034696516dfa719dc9b5fb538971de7 *tests/testthat/test-runs.R 240acbbf684c59e1be0a64cb362ecea2 *tests/testthat/test-s4.R 401fd12bdb3e7d26099ccfbc45bf8819 *tests/testthat/test-set.R a1284c1c014465bf31c2a0aef958e1b9 *tests/testthat/test-shape.R 9a392de979d068fd3f45412952abf149 *tests/testthat/test-size.R 98e2c708b19a9f48bccf32fb6ce37b32 *tests/testthat/test-slice-assign.R a80a57ca94e050f8c8d867878c522a11 *tests/testthat/test-slice-chop.R 67da07de8d4f61664ecc036bd805ec3e *tests/testthat/test-slice-interleave.R a01db827914ec7f1edfefbbe07193405 *tests/testthat/test-slice.R ade7ef62ecdbac77c01275c67ba1bc0b *tests/testthat/test-split.R 503fc3bc7438d595cd74eca9b7cfc45c *tests/testthat/test-subscript-loc.R b4088244c50a2bc053c95a872351fd46 *tests/testthat/test-subscript.R 8eaae25fb4ec6dbb832ddb714a3d2636 *tests/testthat/test-translate.R 6cadf712fbe378068dca8f15edd8edc1 *tests/testthat/test-type-asis.R 20b4c5a4f7d6d117236157491dc573d8 *tests/testthat/test-type-bare.R 5fee6cb3a9b7069a2b0c28b2f286ca6e *tests/testthat/test-type-data-frame.R 051c2a8ab161e9f2ded850a1331e5275 *tests/testthat/test-type-data-table.R 0b9a4018ea47f41170eeafdf7d11212d *tests/testthat/test-type-date-time.R 96ce90b3dccf9e644e40b964191ec6a7 *tests/testthat/test-type-dplyr.R aedac30767993b9ecbb775b1f77d8f64 *tests/testthat/test-type-factor.R a711733b6c258bc68260e5c3e51dcef2 *tests/testthat/test-type-integer64.R eefc89c26f7e9c3f55ed4ea1763006e7 *tests/testthat/test-type-list-of.R be6bade47d0fd2ee3c09b686ef2379ff *tests/testthat/test-type-misc.R d7d2157a1dfd6e48b8a737e61b603da9 *tests/testthat/test-type-rational.R 757155cf3e27c766b316d7fedd3858d5 *tests/testthat/test-type-rcrd.R eabcddd56636ae5f54d21f0d2c9296f3 *tests/testthat/test-type-sclr.R eb37056819cf6b1a2c48ed62b29593c2 *tests/testthat/test-type-sf.R 94cbf6fb9de023b52bf5dcc2775fdf74 *tests/testthat/test-type-table.R ac76245dae6636f5163c09fe926f477f *tests/testthat/test-type-tibble.R d37c99880490b0a72bbe897f6e123f85 *tests/testthat/test-type-unspecified.R 18b995b0bdf00f82928ed8b32f55c041 *tests/testthat/test-type-vctr.R 07f570c509cba433a5f97419f9c6e9b3 *tests/testthat/test-type.R 47d48f7df0feebcd44bc6d8c8ae9db8e *tests/testthat/test-type2.R 197c30fe0d707de9b6e50ca89f25b917 *tests/testthat/test-utils.R 9c1793ea42fd231a18fd557062f59736 *tests/testthat/test-vctrs.R 7dec628565582a504f5aa9744f865cd9 *vignettes/pillar.Rmd e9656a0f57974885a1cf61ebfba88fcb *vignettes/s3-vector.Rmd 4839ddd2a359470622d6edcca6c49edf *vignettes/stability.Rmd 317b475c17b8b2a70f68cca0402f9eb9 *vignettes/type-size.Rmd vctrs/inst/0000755000176200001440000000000014532404540012371 5ustar liggesusersvctrs/inst/doc/0000755000176200001440000000000014532404540013136 5ustar liggesusersvctrs/inst/doc/pillar.R0000644000176200001440000001027214532404535014552 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----setup-------------------------------------------------------------------- library(vctrs) library(pillar) ## ----eval = FALSE------------------------------------------------------------- # usethis::use_package("vctrs") # usethis::use_package("pillar") ## ----------------------------------------------------------------------------- #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ## ----------------------------------------------------------------------------- library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ## ----------------------------------------------------------------------------- #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ## ----------------------------------------------------------------------------- deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ## ----------------------------------------------------------------------------- data ## ----------------------------------------------------------------------------- print(data, width = 30) ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ## ----------------------------------------------------------------------------- deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ## ----------------------------------------------------------------------------- #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ## ----eval = FALSE------------------------------------------------------------- # expect_snapshot(pillar_shaft(data$loc)) vctrs/inst/doc/s3-vector.html0000644000176200001440000044672614532404537015702 0ustar liggesusers S3 vectors

S3 vectors

This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you’ll also need to provide methods that actually make the vector useful.

I assume that you’re already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of the S3 chapter of Advanced R.

This article refers to “vectors of numbers” as double vectors. Here, “double” stands for “double precision floating point number”, see also double().

library(vctrs)
library(rlang)
library(zeallot)

This vignette works through five big topics:

  • The basics of creating a new vector class with vctrs.
  • The coercion and casting system.
  • The record and list-of types.
  • Equality and comparison proxies.
  • Arithmetic operators.

They’re collectively demonstrated with a number of simple S3 classes:

  • Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting.

  • Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions.

  • Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care.

  • Rational: a pair of integer vectors that defines a rational number like 2 / 3. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for +, -, and friends.

  • Polynomial: a list of integer vectors that define polynomials like 1 + x - x^3. Sorting such vectors correctly requires a custom equality method.

  • Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties.

  • Period and frequency: a pair of classes represent a period, or its inverse, frequency. This allows us to explore more arithmetic operators.

Basics

In this section you’ll learn how to create a new vctrs class by calling new_vctr(). This creates an object with class vctrs_vctr which has a number of methods. These are designed to make your life as easy as possible. For example:

  • The print() and str() methods are defined in terms of format() so you get a pleasant, consistent display as soon as you’ve made your format() method.

  • You can immediately put your new vector class in a data frame because as.data.frame.vctrs_vctr() does the right thing.

  • Subsetting ([, [[, and $), length<-, and rep() methods automatically preserve attributes because they use vec_restore(). A default vec_restore() works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data.

  • Default subset-assignment methods ([<-, [[<-, and $<-) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages.

Percent class

In this section, I’ll show you how to make a percent class, i.e., a double vector that is printed as a percentage. We start by defining a low-level constructor to check types and/or sizes and call new_vctr().

percent is built on a double vector of any length and doesn’t have any attributes.

new_percent <- function(x = double()) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  new_vctr(x, class = "vctrs_percent")
}

x <- new_percent(c(seq(0, 1, length.out = 4), NA))
x
#> <vctrs_percent[5]>
#> [1] 0.0000000 0.3333333 0.6666667 1.0000000        NA

str(x)
#>  vctrs_pr [1:5] 0.0000000, 0.3333333, 0.6666667, 1.0000000,        NA

Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as blob), it’s fine to use the package name without prefix as the class name.

We then follow up with a user friendly helper. Here we’ll use vec_cast() to allow it to accept anything coercible to a double:

percent <- function(x = double()) {
  x <- vec_cast(x, double())
  new_percent(x)
}

Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype.

new_percent()
#> <vctrs_percent[0]>
percent()
#> <vctrs_percent[0]>

For the convenience of your users, consider implementing an is_percent() function:

is_percent <- function(x) {
  inherits(x, "vctrs_percent")
}

format() method

The first method for every class should almost always be a format() method. This should return a character vector the same length as x. The easiest way to do this is to rely on one of R’s low-level formatting functions like formatC():

format.vctrs_percent <- function(x, ...) {
  out <- formatC(signif(vec_data(x) * 100, 3))
  out[is.na(x)] <- NA
  out[!is.na(x)] <- paste0(out[!is.na(x)], "%")
  out
}
x
#> <vctrs_percent[5]>
#> [1] 0%    33.3% 66.7% 100%  <NA>

(Note the use of vec_data() so format() doesn’t get stuck in an infinite loop, and that I take a little care to not convert NA to "NA"; this leads to better printing.)

The format method is also used by data frames, tibbles, and str():

data.frame(x)
#>       x
#> 1    0%
#> 2 33.3%
#> 3 66.7%
#> 4  100%
#> 5  <NA>

For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in str():

vec_ptype_abbr.vctrs_percent <- function(x, ...) {
  "prcnt"
}

tibble::tibble(x)
#> # A tibble: 5 × 1
#>         x
#>   <prcnt>
#> 1      0%
#> 2   33.3%
#> 3   66.7%
#> 4    100%
#> 5      NA

str(x)
#>  prcnt [1:5] 0%, 33.3%, 66.7%, 100%, <NA>

If you need more control over printing in tibbles, implement a method for pillar::pillar_shaft(). See vignette("pillar", package = "vctrs") for details.

Casting and coercion

The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens implicitly (e.g in c()) we call it coercion; when the change happens explicitly (e.g. with as.integer(x)), we call it casting.

One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it’s possible to make accurate predictions about what (e.g.) c(x, y) should do when x and y have different prototypes. vctrs achieves this goal through two generics:

  • vec_ptype2(x, y) defines possible set of coercions. It returns a prototype if x and y can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes.

  • vec_cast(x, to) defines the possible sets of casts. It returns x translated to have prototype to, or throws an error if the conversion isn’t possible. The set of possible casts is a superset of possible coercions because they’re requested explicitly.

Double dispatch

Both generics use double dispatch which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means:

  • You end up with method names with two classes, like vec_ptype2.foo.bar().

  • You don’t need to implement default methods (they would never be called if you do).

  • You can’t call NextMethod().

Percent class

We’ll make our percent class coercible back and forth with double vectors.

vec_ptype2() provides a user friendly error message if the coercion doesn’t exist and makes sure NA is handled in a standard way. NA is technically a logical vector, but we want to stand in for a missing value of any type.

vec_ptype2("bogus", percent())
#> Error:
#> ! Can't combine `"bogus"` <character> and `percent()` <vctrs_percent>.
vec_ptype2(percent(), NA)
#> <vctrs_percent[0]>
vec_ptype2(NA, percent())
#> <vctrs_percent[0]>

By default and in simple cases, an object of the same class is compatible with itself:

vec_ptype2(percent(), percent())
#> <vctrs_percent[0]>

However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we’ll start by saying that a vctrs_percent combined with a vctrs_percent yields a vctrs_percent, which we indicate by returning a prototype generated by the constructor.

vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent()

Next we define methods that say that combining a percent and double should yield a double. We avoid returning a percent here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers.

Because double dispatch is a bit of a hack, we need to provide two methods. It’s your responsibility to ensure that each member of the pair returns the same result: if they don’t you will get weird and unpredictable behaviour.

The double dispatch mechanism requires us to refer to the underlying type, double, in the method name. If we implemented vec_ptype2.vctrs_percent.numeric(), it would never be called.

vec_ptype2.vctrs_percent.double <- function(x, y, ...) double()
vec_ptype2.double.vctrs_percent <- function(x, y, ...) double()

We can check that we’ve implemented this correctly with vec_ptype_show():

vec_ptype_show(percent(), double(), percent())
#> Prototype: <double>
#> 0. (                 , <vctrs_percent> ) = <vctrs_percent>
#> 1. ( <vctrs_percent> , <double>        ) = <double>       
#> 2. ( <double>        , <vctrs_percent> ) = <double>

The vec_ptype2() methods define which input is the richer type that vctrs should coerce to. However, they don’t perform any conversion. This is the job of vec_cast(), which we implement next. We’ll provide a method to cast a percent to a percent:

vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x

And then for converting back and forth between doubles. To convert a double to a percent we use the percent() helper (not the constructor; this is unvalidated user input). To convert a percent to a double, we strip the attributes.

Note that for historical reasons the order of argument in the signature is the opposite as for vec_ptype2(). The class for to comes first, and the class for x comes second.

Again, the double dispatch mechanism requires us to refer to the underlying type, double, in the method name. Implementing vec_cast.vctrs_percent.numeric() has no effect.

vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x)
vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x)

Then we can check this works with vec_cast():

vec_cast(0.5, percent())
#> <vctrs_percent[1]>
#> [1] 50%
vec_cast(percent(0.5), double())
#> [1] 0.5

Once you’ve implemented vec_ptype2() and vec_cast(), you get vec_c(), [<-, and [[<- implementations for free.

vec_c(percent(0.5), 1)
#> [1] 0.5 1.0
vec_c(NA, percent(0.5))
#> <vctrs_percent[2]>
#> [1] <NA> 50%
# but
vec_c(TRUE, percent(0.5))
#> Error in `vec_c()`:
#> ! Can't combine `..1` <logical> and `..2` <vctrs_percent>.

x <- percent(c(0.5, 1, 2))
x[1:2] <- 2:1
#> Error in `vec_restore_dispatch()`:
#> ! Can't convert <integer> to <vctrs_percent>.
x[[3]] <- 0.5
x
#> <vctrs_percent[3]>
#> [1] 50%  100% 50%

You’ll also get mostly correct behaviour for c(). The exception is when you use c() with a base R class:

# Correct
c(percent(0.5), 1)
#> [1] 0.5 1.0
c(percent(0.5), factor(1))
#> Error in `vec_c()`:
#> ! Can't combine `..1` <vctrs_percent> and `..2` <factor<25c7e>>.

# Incorrect
c(factor(1), percent(0.5))
#> [1] 1.0 0.5

Unfortunately there’s no way to fix this problem with the current design of c().

Again, as a convenience, consider providing an as_percent() function that makes use of the casts defined in your vec_cast.vctrs_percent() methods:

as_percent <- function(x) {
  vec_cast(x, new_percent())
}

Occasionally, it is useful to provide conversions that go beyond what’s allowed in casting. For example, we could offer a parsing method for character vectors. In this case, as_percent() should be generic, the default method should cast, and then additional methods should implement more flexible conversion:

as_percent <- function(x, ...) {
  UseMethod("as_percent")
}

as_percent.default <- function(x, ...) {
  vec_cast(x, new_percent())
}

as_percent.character <- function(x) {
  value <- as.numeric(gsub(" *% *$", "", x)) / 100
  new_percent(value)
}

Decimal class

Now that you’ve seen the basics with a very simple S3 class, we’ll gradually explore more complicated scenarios. This section creates a decimal class that prints with the specified number of decimal places. This is very similar to percent but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1).

We start off as before, defining a low-level constructor, a user-friendly constructor, a format() method, and a vec_ptype_abbr(). Note that additional object attributes are simply passed along to new_vctr():

new_decimal <- function(x = double(), digits = 2L) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  if (!is_integer(digits)) {
    abort("`digits` must be an integer vector.")
  }
  vec_check_size(digits, size = 1L)

  new_vctr(x, digits = digits, class = "vctrs_decimal")
}

decimal <- function(x = double(), digits = 2L) {
  x <- vec_cast(x, double())
  digits <- vec_recycle(vec_cast(digits, integer()), 1L)

  new_decimal(x, digits = digits)
}

digits <- function(x) attr(x, "digits")

format.vctrs_decimal <- function(x, ...) {
  sprintf(paste0("%-0.", digits(x), "f"), x)
}

vec_ptype_abbr.vctrs_decimal <- function(x, ...) {
  "dec"
}

x <- decimal(runif(10), 1L)
x
#> <vctrs_decimal[10]>
#>  [1] 0.1 0.8 0.6 0.2 0.0 0.5 0.5 0.3 0.7 0.8

Note that I provide a little helper to extract the digits attribute. This makes the code a little easier to read and should not be exported.

By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You’ll see what to do if the attributes are data dependent in the next section.

x[1:2]
#> <vctrs_decimal[2]>
#> [1] 0.1 0.8
x[[1]]
#> <vctrs_decimal[1]>
#> [1] 0.1

For the sake of exposition, we’ll assume that digits is an important attribute of the class and should be included in the full type:

vec_ptype_full.vctrs_decimal <- function(x, ...) {
  paste0("decimal<", digits(x), ">")
}

x
#> <decimal<1>[10]>
#>  [1] 0.1 0.8 0.6 0.2 0.0 0.5 0.5 0.3 0.7 0.8

Now consider vec_cast() and vec_ptype2(). Casting and coercing from one decimal to another requires a little thought as the values of the digits attribute might be different, and we need some way to reconcile them. Here I’ve decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error.

vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) {
  new_decimal(digits = max(digits(x), digits(y)))
}
vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) {
  new_decimal(vec_data(x), digits = digits(to))
}

vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2))
#> <decimal<3>[2]>
#> [1] 0.010 0.020

Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal).

vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x
vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y

vec_cast.vctrs_decimal.double  <- function(x, to, ...) new_decimal(x, digits = digits(to))
vec_cast.double.vctrs_decimal  <- function(x, to, ...) vec_data(x)

vec_c(decimal(1, digits = 1), pi)
#> <decimal<1>[2]>
#> [1] 1.0 3.1
vec_c(pi, decimal(1, digits = 1))
#> <decimal<1>[2]>
#> [1] 3.1 1.0

If type x has greater resolution than y, there will be some inputs that lose precision. These should generate errors using stop_lossy_cast(). You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution.

vec_cast(c(1, 2, 10), to = integer())
#> [1]  1  2 10

vec_cast(c(1.5, 2, 10.5), to = integer())
#> Error:
#> ! Can't convert from `c(1.5, 2, 10.5)` <double> to <integer> due to loss of precision.
#> • Locations: 1, 3

Cached sum class

The next level up in complexity is an object that has data-dependent attributes. To explore this idea we’ll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors:

new_cached_sum <- function(x = double(), sum = 0L) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  if (!is_double(sum)) {
    abort("`sum` must be a double vector.")
  }
  vec_check_size(sum, size = 1L)

  new_vctr(x, sum = sum, class = "vctrs_cached_sum")
}

cached_sum <- function(x) {
  x <- vec_cast(x, double())
  new_cached_sum(x, sum(x))
}

For this class, we can use the default format() method, and instead, we’ll customise the obj_print_footer() method. This is a good place to display user facing attributes.

obj_print_footer.vctrs_cached_sum <- function(x, ...) {
  cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "")
}

x <- cached_sum(runif(10))
x
#> <vctrs_cached_sum[10]>
#>  [1] 0.87460066 0.17494063 0.03424133 0.32038573 0.40232824 0.19566983
#>  [7] 0.40353812 0.06366146 0.38870131 0.97554784
#> # Sum: 3.83

We’ll also override sum() and mean() to use the attribute. This is easiest to do with vec_math(), which you’ll learn about later.

vec_math.vctrs_cached_sum <- function(.fn, .x, ...) {
  cat("Using cache\n")
  switch(.fn,
    sum = attr(.x, "sum"),
    mean = attr(.x, "sum") / length(.x),
    vec_math_base(.fn, .x, ...)
  )
}

sum(x)
#> Using cache
#> [1] 3.833615

As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they’ll work, but return the incorrect result:

x[1:2]
#> <vctrs_cached_sum[2]>
#> [1] 0.8746007 0.1749406
#> # Sum: 3.83

To fix this, you need to provide a vec_restore() method. Note that this method dispatches on the to argument.

vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) {
  new_cached_sum(x, sum(x))
}

x[1]
#> <vctrs_cached_sum[1]>
#> [1] 0.8746007
#> # Sum: 0.875

This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with vec_data() and then reapplying them again with vec_restore(). The default vec_restore() method copies over all attributes, which is not appropriate when the attributes depend on the data.

Note that vec_restore.class is subtly different from vec_cast.class.class(). vec_restore() is used when restoring attributes that have been lost; vec_cast() is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with new_vctr(). vec_restore.factor() would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with vec_cast().

Record-style objects

Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is POSIXlt, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override length() and subsetting methods to conceal this implementation detail.

x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3))
x
#> [1] "2020-01-01 00:00:01 EST" "2020-01-01 00:00:02 EST"
#> [3] "2020-01-01 00:00:03 EST"

length(x)
#> [1] 3
length(unclass(x))
#> [1] 11

x[[1]] # the first date time
#> [1] "2020-01-01 00:00:01 EST"
unclass(x)[[1]] # the first component, the number of seconds
#> [1] 1 2 3

vctrs makes it easy to create new record-style classes using new_rcrd(), which has a wide selection of default methods.

Rational class

A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short).

As usual we start with low-level and user-friendly constructors. The low-level constructor calls new_rcrd(), which needs a named list of equal-length vectors.

new_rational <- function(n = integer(), d = integer()) {
  if (!is_integer(n)) {
    abort("`n` must be an integer vector.")
  }
  if (!is_integer(d)) {
    abort("`d` must be an integer vector.")
  }

  new_rcrd(list(n = n, d = d), class = "vctrs_rational")
}

Our user friendly constructor casts n and d to integers and recycles them to the same length.

rational <- function(n = integer(), d = integer()) {
  c(n, d) %<-% vec_cast_common(n, d, .to = integer())
  c(n, d) %<-% vec_recycle_common(n, d)

  new_rational(n, d)
}

x <- rational(1, 1:10)

Behind the scenes, x is a named list with two elements. But those details are hidden so that it behaves like a vector:

names(x)
#> NULL
length(x)
#> [1] 10

To access the underlying fields we need to use field() and fields():

fields(x)
#> [1] "n" "d"
field(x, "n")
#>  [1] 1 1 1 1 1 1 1 1 1 1

Notice that we can’t print() or str() the new rational vector x yet. Printing causes an error:

x
#> <vctrs_rational[10]>
#> Error in `format()`:
#> ! `format.vctrs_rational()` not implemented.

str(x)
#> Error in `format()`:
#> ! `format.vctrs_rational()` not implemented.

This is because we haven’t defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call vec_data(x).

vec_data(x)
#>    n  d
#> 1  1  1
#> 2  1  2
#> 3  1  3
#> 4  1  4
#> 5  1  5
#> 6  1  6
#> 7  1  7
#> 8  1  8
#> 9  1  9
#> 10 1 10

str(vec_data(x))
#> 'data.frame':    10 obs. of  2 variables:
#>  $ n: int  1 1 1 1 1 1 1 1 1 1
#>  $ d: int  1 2 3 4 5 6 7 8 9 10

It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way:

format.vctrs_rational <- function(x, ...) {
  n <- field(x, "n")
  d <- field(x, "d")

  out <- paste0(n, "/", d)
  out[is.na(n) | is.na(d)] <- NA

  out
}

vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl"
vec_ptype_full.vctrs_rational <- function(x, ...) "rational"

x
#> <rational[10]>
#>  [1] 1/1  1/2  1/3  1/4  1/5  1/6  1/7  1/8  1/9  1/10

vctrs uses the format() method in str(), hiding the underlying implementation details from the user:

str(x)
#>  rtnl [1:10] 1/1, 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10

For rational, vec_ptype2() and vec_cast() follow the same pattern as percent(). We allow coercion from integer and to doubles.

vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational()
vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational()
vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational()

vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x
vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d")
vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1)

vec_c(rational(1, 2), 1L, NA)
#> <rational[3]>
#> [1] 1/2  1/1  <NA>

Decimal2 class

The previous implementation of decimal was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems.

A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a scale). The following code is a very quick sketch of how you might start creating such a class:

new_decimal2 <- function(l, r, scale = 2L) {
  if (!is_integer(l)) {
    abort("`l` must be an integer vector.")
  }
  if (!is_integer(r)) {
    abort("`r` must be an integer vector.")
  }
  if (!is_integer(scale)) {
    abort("`scale` must be an integer vector.")
  }
  vec_check_size(scale, size = 1L)

  new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2")
}

decimal2 <- function(l, r, scale = 2L) {
  l <- vec_cast(l, integer())
  r <- vec_cast(r, integer())
  c(l, r) %<-% vec_recycle_common(l, r)
  scale <- vec_cast(scale, integer())

  # should check that r < 10^scale
  new_decimal2(l = l, r = r, scale = scale)
}

format.vctrs_decimal2 <- function(x, ...) {
  val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale")
  sprintf(paste0("%.0", attr(x, "scale"), "f"), val)
}

decimal2(10, c(0, 5, 99))
#> <vctrs_decimal2[3]>
#> [1] 10.00 10.05 10.99

Equality and comparison

vctrs provides four “proxy” generics. Two of these let you control how your class determines equality and comparison:

  • vec_proxy_equal() returns a data vector suitable for comparison. It underpins ==, !=, unique(), anyDuplicated(), and is.na().

  • vec_proxy_compare() specifies how to compare the elements of your vector. This proxy is used in <, <=, >=, >, min(), max(), median(), and quantile().

Two other proxy generic are used for sorting for unordered data types and for accessing the raw data for exotic storage formats:

  • vec_proxy_order() specifies how to sort the elements of your vector. It is used in xtfrm(), which in turn is called by the order() and sort() functions.

    This proxy was added to implement the behaviour of lists, which are sortable (their order proxy sorts by first occurrence) but not comparable (comparison operators cause an error). Its default implementation for other classes calls vec_proxy_compare() and you normally don’t need to implement this proxy.

  • vec_proxy() returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn’t need to implement vec_proxy().

The default behavior is as follows:

  • vec_proxy_equal() calls vec_proxy()
  • vec_proxy_compare() calls vec_proxy_equal()
  • vec_proxy_order() calls vec_proxy_compare()

You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work.

These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C.

Rational class

Let’s explore these ideas by with the rational class we started on above. By default, vec_proxy() converts a record to a data frame, and the default comparison works column by column:

x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2))
x
#> <rational[4]>
#> [1] 1/1 2/1 1/2 2/2

vec_proxy(x)
#>   n d
#> 1 1 1
#> 2 2 1
#> 3 1 2
#> 4 2 2

x == rational(1, 1)
#> [1]  TRUE FALSE FALSE FALSE

This makes sense as a default but isn’t correct here because rational(1, 1) represents the same number as rational(2, 2), so they should be equal. We can fix that by implementing a vec_proxy_equal() method that divides n and d by their greatest common divisor:

# Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632
gcd <- function(x, y) {
  r <- x %% y
  ifelse(r, gcd(y, r), y)
}

vec_proxy_equal.vctrs_rational <- function(x, ...) {
  n <- field(x, "n")
  d <- field(x, "d")
  gcd <- gcd(n, d)

  data.frame(n = n / gcd, d = d / gcd)
}
vec_proxy_equal(x)
#>   n d
#> 1 1 1
#> 2 2 1
#> 3 1 2
#> 4 1 1

x == rational(1, 1)
#> [1]  TRUE FALSE FALSE  TRUE

vec_proxy_equal() is also used by unique():

unique(x)
#> <rational[3]>
#> [1] 1/1 2/1 1/2

We now need to fix the comparison operations similarly, since comparison currently happens lexicographically by n, then by d:

rational(1, 2) < rational(2, 3)
#> [1] TRUE
rational(2, 4) < rational(2, 3)
#> [1] TRUE

The easiest fix is to convert the fraction to a floating point number and use this as a proxy:

vec_proxy_compare.vctrs_rational <- function(x, ...) {
  field(x, "n") / field(x, "d")
}

rational(2, 4) < rational(2, 3)
#> [1] TRUE

This also fixes sort(), because the default implementation of vec_proxy_order() calls vec_proxy_compare().

sort(x)
#> <rational[4]>
#> [1] 1/2 1/1 2/2 2/1

(We could have used the same approach in vec_proxy_equal(), but when working with floating point numbers it’s not necessarily true that x == y implies that d * x == d * y.)

Polynomial class

A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like 1 + 3x - 2x^2) using a list of integer vectors (like c(1, 3, -2)). Note the use of new_list_of() in the constructor.

poly <- function(...) {
  x <- vec_cast_common(..., .to = integer())
  new_poly(x)
}
new_poly <- function(x) {
  new_list_of(x, ptype = integer(), class = "vctrs_poly_list")
}

vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial"
vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly"

format.vctrs_poly_list <- function(x, ...) {
  format_one <- function(x) {
    if (length(x) == 0) {
      return("")
    }

    if (length(x) == 1) {
      format(x)
    } else {
      suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "")
      out <- paste0(x, suffix)
      out <- out[x != 0L]
      paste0(out, collapse = " + ")
    }
  }

  vapply(x, format_one, character(1))
}

obj_print_data.vctrs_poly_list <- function(x, ...) {
  if (length(x) != 0) {
    print(format(x), quote = FALSE)
  }
}

p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1))
p
#> <polynomial[3]>
#> [1] 1         1⋅x^4 + 2 1⋅x^2 + 1

The resulting objects will inherit from the vctrs_list_of class, which provides tailored methods for $, [[, the corresponding assignment operators, and other methods.

class(p)
#> [1] "vctrs_poly_list" "vctrs_list_of"   "vctrs_vctr"      "list"
p[2]
#> <polynomial[1]>
#> [1] 1⋅x^4 + 2
p[[2]]
#> [1] 1 0 0 0 2

The class implements the list interface:

obj_is_list(p)
#> [1] TRUE

This is fine for the internal implementation of this class but it would be more appropriate if it behaved like an atomic vector rather than a list.

Make an atomic polynomial vector

An atomic vector is a vector like integer or character for which [[ returns the same type. Unlike lists, you can’t reach inside an atomic vector.

To make the polynomial class an atomic vector, we’ll wrap the internal list_of() class within a record vector. Usually records are used because they can store several fields of data for each observation. Here we have only one, but we use the class anyway to inherit its atomicity.

poly <- function(...) {
  x <- vec_cast_common(..., .to = integer())
  x <- new_poly(x)
  new_rcrd(list(data = x), class = "vctrs_poly")
}
format.vctrs_poly <- function(x, ...) {
  format(field(x, "data"))
}

The new format() method delegates to the one we wrote for the internal list. The vector looks just like before:

p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1))
p
#> <vctrs_poly[3]>
#> [1] 1         1⋅x^4 + 2 1⋅x^2 + 1

Making the class atomic means that obj_is_list() now returns FALSE. This prevents recursive algorithms that traverse lists from reaching too far inside the polynomial internals.

obj_is_list(p)
#> [1] FALSE

Most importantly, it prevents users from reaching into the internals with [[:

p[[2]]
#> <vctrs_poly[1]>
#> [1] 1⋅x^4 + 2

Implementing equality and comparison

Equality works out of the box because we can tell if two integer vectors are equal:

p == poly(c(1, 0, 1))
#> [1] FALSE FALSE  TRUE

We can’t compare individual elements, because the data is stored in a list and by default lists are not comparable:

p < p[2]
#> Error in `vec_proxy_compare()`:
#> ! `vec_proxy_compare.vctrs_poly_list()` not supported.

To enable comparison, we implement a vec_proxy_compare() method:

vec_proxy_compare.vctrs_poly <- function(x, ...) {
  # Get the list inside the record vector
  x_raw <- vec_data(field(x, "data"))

  # First figure out the maximum length
  n <- max(vapply(x_raw, length, integer(1)))

  # Then expand all vectors to this length by filling in with zeros
  full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x))

  # Then turn into a data frame
  as.data.frame(do.call(rbind, full))
}

p < p[2]
#> [1]  TRUE FALSE  TRUE

Often, this is sufficient to also implement sort(). However, for lists, there is already a default vec_proxy_order() method that sorts by first occurrence:

sort(p)
#> <vctrs_poly[3]>
#> [1] 1         1⋅x^2 + 1 1⋅x^4 + 2
sort(p[c(1:3, 1:2)])
#> <vctrs_poly[5]>
#> [1] 1         1         1⋅x^2 + 1 1⋅x^4 + 2 1⋅x^4 + 2

To ensure consistency between ordering and comparison, we forward vec_proxy_order() to vec_proxy_compare():

vec_proxy_order.vctrs_poly <- function(x, ...) {
  vec_proxy_compare(x, ...)
}

sort(p)
#> <vctrs_poly[3]>
#> [1] 1         1⋅x^2 + 1 1⋅x^4 + 2

Arithmetic

vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once:

  • vec_math(fn, x, ...) specifies the behaviour of mathematical functions like abs(), sum(), and mean(). (Note that var() and sd() can’t be overridden, see ?vec_math() for the complete list supported by vec_math().)

  • vec_arith(op, x, y) specifies the behaviour of the arithmetic operations like +, -, and %%. (See ?vec_arith() for the complete list.)

Both generics define the behaviour for multiple functions because sum.vctrs_vctr(x) calls vec_math.vctrs_vctr("sum", x), and x + y calls vec_math.x_class.y_class("+", x, y). They’re accompanied by vec_math_base() and vec_arith_base() which make it easy to call the underlying base R functions.

vec_arith() uses double dispatch and needs the following standard boilerplate:

vec_arith.MYCLASS <- function(op, x, y, ...) {
  UseMethod("vec_arith.MYCLASS", y)
}
vec_arith.MYCLASS.default <- function(op, x, y, ...) {
  stop_incompatible_op(op, x, y)
}

Correctly exporting vec_arith() methods from a package is currently a little awkward. See the instructions in the Arithmetic section of the “Implementing a vctrs S3 class in a package” section below.

Cached sum class

I showed an example of vec_math() to define sum() and mean() methods for cached_sum. Now let’s talk about exactly how it works. Most vec_math() functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to vec_math_base() for those that you don’t care about.

vec_math.vctrs_cached_sum <- function(.fn, .x, ...) {
  switch(.fn,
    sum = attr(.x, "sum"),
    mean = attr(.x, "sum") / length(.x),
    vec_math_base(.fn, .x, ...)
  )
}

Meter class

To explore the infix arithmetic operators exposed by vec_arith() I’ll create a new class that represents a measurement in meters:

new_meter <- function(x) {
  stopifnot(is.double(x))
  new_vctr(x, class = "vctrs_meter")
}

format.vctrs_meter <- function(x, ...) {
  paste0(format(vec_data(x)), " m")
}

meter <- function(x) {
  x <- vec_cast(x, double())
  new_meter(x)
}

x <- meter(1:10)
x
#> <vctrs_meter[10]>
#>  [1]  1 m  2 m  3 m  4 m  5 m  6 m  7 m  8 m  9 m 10 m

Because meter is built on top of a double vector, basic mathematic operations work:

sum(x)
#> <vctrs_meter[1]>
#> [1] 55 m
mean(x)
#> <vctrs_meter[1]>
#> [1] 5.5 m

But we can’t do arithmetic:

x + 1
#> Error in `vec_arith()`:
#> ! <vctrs_meter> + <double> is not permitted
meter(10) + meter(1)
#> Error in `vec_arith()`:
#> ! <vctrs_meter> + <vctrs_meter> is not permitted
meter(10) * 3
#> Error in `vec_arith()`:
#> ! <vctrs_meter> * <double> is not permitted

To allow these infix functions to work, we’ll need to provide vec_arith() generic. But before we do that, let’s think about what combinations of inputs we should support:

  • It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can’t multiply meters (because that would yield an area).

  • For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don’t make much sense as we, strictly speaking, are dealing with objects of different nature.

vec_arith() is another function that uses double dispatch, so as usual we start with a template.

vec_arith.vctrs_meter <- function(op, x, y, ...) {
  UseMethod("vec_arith.vctrs_meter", y)
}
vec_arith.vctrs_meter.default <- function(op, x, y, ...) {
  stop_incompatible_op(op, x, y)
}

Then write the method for two meter objects. We use a switch statement to cover the cases we care about and stop_incompatible_op() to throw an informative error message for everything else.

vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) {
  switch(
    op,
    "+" = ,
    "-" = new_meter(vec_arith_base(op, x, y)),
    "/" = vec_arith_base(op, x, y),
    stop_incompatible_op(op, x, y)
  )
}

meter(10) + meter(1)
#> <vctrs_meter[1]>
#> [1] 11 m
meter(10) - meter(1)
#> <vctrs_meter[1]>
#> [1] 9 m
meter(10) / meter(1)
#> [1] 10
meter(10) * meter(1)
#> Error in `vec_arith()`:
#> ! <vctrs_meter> * <vctrs_meter> is not permitted

Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while meter(10) / 2 makes sense, 2 / meter(10) does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over numeric here instead of double.

vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) {
  switch(
    op,
    "/" = ,
    "*" = new_meter(vec_arith_base(op, x, y)),
    stop_incompatible_op(op, x, y)
  )
}
vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) {
  switch(
    op,
    "*" = new_meter(vec_arith_base(op, x, y)),
    stop_incompatible_op(op, x, y)
  )
}

meter(2) * 10
#> <vctrs_meter[1]>
#> [1] 20 m
meter(2) * as.integer(10)
#> <vctrs_meter[1]>
#> [1] 20 m
10 * meter(2)
#> <vctrs_meter[1]>
#> [1] 20 m
meter(20) / 10
#> <vctrs_meter[1]>
#> [1] 2 m
10 / meter(20)
#> Error in `vec_arith()`:
#> ! <double> / <vctrs_meter> is not permitted
meter(20) + 10
#> Error in `vec_arith()`:
#> ! <vctrs_meter> + <double> is not permitted

For completeness, we also need vec_arith.vctrs_meter.MISSING for the unary + and - operators:

vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) {
  switch(op,
    `-` = x * -1,
    `+` = x,
    stop_incompatible_op(op, x, y)
  )
}
-meter(1)
#> <vctrs_meter[1]>
#> [1] -1 m
+meter(1)
#> <vctrs_meter[1]>
#> [1] 1 m

Implementing a vctrs S3 class in a package

Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things:

  • Register the S3 methods by listing them in the NAMESPACE file.

  • Create documentation around your methods, for the sake of your user and to satisfy R CMD check.

Let’s assume that the percent class is implemented in the pizza package in the file R/percent.R. Here we walk through the major sections of this hypothetical file. You’ve seen all of this code before, but now it’s augmented by the roxygen2 directives that produce the correct NAMESPACE entries and help topics.

Getting started

First, the pizza package needs to include vctrs in the Imports section of its DESCRIPTION (perhaps by calling usethis::use_package("vctrs"). While vctrs is under very active development, it probably makes sense to state a minimum version.

Imports:
    a_package,
    another_package,
    ...
    vctrs (>= x.y.z),
    ...

Then we make all vctrs functions available within the pizza package by including the directive #' @import vctrs somewhere. Usually, it’s not good practice to @import the entire namespace of a package, but vctrs is deliberately designed with this use case in mind.

Where should we put #' @import vctrs? There are two natural locations:

  • With package-level docs in R/pizza-doc.R. You can use usethis::use_package_doc() to initiate this package-level documentation.

  • In R/percent.R. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package.

We also must use one of these locations to dump some internal documentation that’s needed to avoid R CMD check complaints. We don’t expect any human to ever read this documentation. Here’s how this dummy documentation should look, combined with the #' @import vctrs directive described above.

#' Internal vctrs methods
#'
#' @import vctrs
#' @keywords internal
#' @name pizza-vctrs
NULL

This should appear in R/pizza-doc.R (package-level docs) or in R/percent.R (class-focused file).

Remember to call devtools::document() regularly, as you develop, to regenerate NAMESPACE and the .Rd files.

From this point on, the code shown is expected to appear in R/percent.R.

Low-level and user-friendly constructors

Next we add our constructor:

new_percent <- function(x = double()) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  new_vctr(x, class = "pizza_percent")
}

Note that the name of the package must be included in the class name (pizza_percent), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class.

We can also add a call to setOldClass() for compatibility with S4:

# for compatibility with the S4 system
methods::setOldClass(c("pizza_percent", "vctrs_vctr"))

Because we’ve used a function from the methods package, you’ll also need to add methods to Imports, with (e.g.) usethis::use_package("methods"). This is a “free” dependency because methods is bundled with every R install.

Next we implement, export, and document a user-friendly helper: percent().

#' `percent` vector
#'
#' This creates a double vector that represents percentages so when it is
#' printed, it is multiplied by 100 and suffixed with `%`.
#'
#' @param x A numeric vector
#' @return An S3 vector of class `pizza_percent`.
#' @export
#' @examples
#' percent(c(0.25, 0.5, 0.75))
percent <- function(x = double()) {
  x <- vec_cast(x, double())
  new_percent(x)
}

(Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do pizza::percent(); it would be redundant to have pizza::pizza_percent().)

Other helpers

It’s a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor percent():

#' @export
#' @rdname percent
is_percent <- function(x) {
  inherits(x, "pizza_percent")
}

You’ll also need to update the percent() documentation to reflect that x now means two different things:

#' @param x
#'  * For `percent()`: A numeric vector
#'  * For `is_percent()`: An object to test.

Next we provide the key methods to make printing work. These are S3 methods, so they don’t need to be documented, but they do need to be exported.

#' @export
format.pizza_percent <- function(x, ...) {
  out <- formatC(signif(vec_data(x) * 100, 3))
  out[is.na(x)] <- NA
  out[!is.na(x)] <- paste0(out[!is.na(x)], "%")
  out
}

#' @export
vec_ptype_abbr.pizza_percent <- function(x, ...) {
  "prcnt"
}

Finally, we implement methods for vec_ptype2() and vec_cast().

#' @export
vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent()
#' @export
vec_ptype2.double.vctrs_percent <- function(x, y, ...) double()

#' @export
vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x
#' @export
vec_cast.pizza_percent.double <- function(x, to, ...) percent(x)
#' @export
vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x)

Arithmetic

Writing double dispatch methods for vec_arith() is currently more awkward than writing them for vec_ptype2() or vec_cast(). We plan to improve this in the future. For now, you can use the following instructions.

If you define a new type and want to write vec_arith() methods for it, you’ll need to provide a new single dispatch S3 generic for it of the following form:

#' @export
#' @method vec_arith my_type
vec_arith.my_type <- function(op, x, y, ...) {
  UseMethod("vec_arith.my_type", y)
}

Note that this actually functions as both an S3 method for vec_arith() and an S3 generic called vec_arith.my_type() that dispatches off y. roxygen2 only recognizes it as an S3 generic, so you have to register the S3 method part of this with an explicit @method call.

After that, you can define double dispatch methods, but you still need an explicit @method tag to ensure it is registered with the correct generic:

#' @export
#' @method vec_arith.my_type my_type
vec_arith.my_type.my_type <- function(op, x, y, ...) {
  # implementation here
}

#' @export
#' @method vec_arith.my_type integer
vec_arith.my_type.integer <- function(op, x, y, ...) {
  # implementation here
}

#' @export
#' @method vec_arith.integer my_type
vec_arith.integer.my_type <- function(op, x, y, ...) {
  # implementation here
}

vctrs provides the hybrid S3 generics/methods for most of the base R types, like vec_arith.integer(). If you don’t fully import vctrs with @import vctrs, then you will need to explicitly import the generic you are registering double dispatch methods for with @importFrom vctrs vec_arith.integer.

Testing

It’s good practice to test your new class. Specific recommendations:

  • R/percent.R is the type of file where you really do want 100% test coverage. You can use devtools::test_coverage_file() to check this.

  • Make sure to test behaviour with zero-length inputs and missing values.

  • Use testthat::verify_output() to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about verify_output() in the testthat v2.3.0 blog post; it’s an example of a so-called golden test.

  • Check for method symmetry; use expect_s3_class(), probably with exact = TRUE, to ensure that vec_c(x, y) and vec_c(y, x) return the same type of output for the important xs and ys in your domain.

  • Use testthat::expect_error() to check that inputs that can’t be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include vctrs_error_assert_ptype, vctrs_error_assert_size, and vctrs_error_incompatible_type.

    expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type")

If your tests pass when run by devtools::test(), but fail when run in R CMD check, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated NAMESPACE.

Existing classes

Before you build your own class, you might want to consider using, or subclassing existing classes. You can check awesome-vctrs for a curated list of R vector classes, some of which are built with vctrs.

If you’ve built or extended a class, consider adding it to that list so other people can use it.

vctrs/inst/doc/s3-vector.R0000644000176200001440000004705714532404536015130 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ## ----setup-------------------------------------------------------------------- library(vctrs) library(rlang) library(zeallot) ## ----------------------------------------------------------------------------- new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ## ----------------------------------------------------------------------------- percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ## ----------------------------------------------------------------------------- new_percent() percent() ## ----------------------------------------------------------------------------- is_percent <- function(x) { inherits(x, "vctrs_percent") } ## ----------------------------------------------------------------------------- format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ## ----include = FALSE---------------------------------------------------------- # As of R 3.5, print.vctr can not find format.percent since it's not in # its lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ## ----------------------------------------------------------------------------- x ## ----------------------------------------------------------------------------- data.frame(x) ## ----------------------------------------------------------------------------- vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ## ----error = TRUE------------------------------------------------------------- vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ## ----------------------------------------------------------------------------- vec_ptype2(percent(), percent()) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ## ----------------------------------------------------------------------------- vec_ptype_show(percent(), double(), percent()) ## ----------------------------------------------------------------------------- vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ## ----------------------------------------------------------------------------- vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ## ----------------------------------------------------------------------------- vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ## ----error = TRUE------------------------------------------------------------- vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ## ----error = TRUE------------------------------------------------------------- # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ## ----------------------------------------------------------------------------- as_percent <- function(x) { vec_cast(x, new_percent()) } ## ----------------------------------------------------------------------------- as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ## ----------------------------------------------------------------------------- new_decimal <- function(x = double(), digits = 2L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_integer(digits)) { abort("`digits` must be an integer vector.") } vec_check_size(digits, size = 1L) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ## ----------------------------------------------------------------------------- x[1:2] x[[1]] ## ----------------------------------------------------------------------------- vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ## ----error = TRUE------------------------------------------------------------- vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ## ----------------------------------------------------------------------------- new_cached_sum <- function(x = double(), sum = 0L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_double(sum)) { abort("`sum` must be a double vector.") } vec_check_size(sum, size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ## ----------------------------------------------------------------------------- obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ## ----------------------------------------------------------------------------- vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ## ----------------------------------------------------------------------------- x[1:2] ## ----------------------------------------------------------------------------- vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ## ----------------------------------------------------------------------------- x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ## ----------------------------------------------------------------------------- new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer vector.") } if (!is_integer(d)) { abort("`d` must be an integer vector.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ## ----------------------------------------------------------------------------- rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ## ----------------------------------------------------------------------------- names(x) length(x) ## ----------------------------------------------------------------------------- fields(x) field(x, "n") ## ----error = TRUE------------------------------------------------------------- x str(x) ## ----------------------------------------------------------------------------- vec_data(x) str(vec_data(x)) ## ----------------------------------------------------------------------------- format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ## ----------------------------------------------------------------------------- str(x) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ## ----------------------------------------------------------------------------- new_decimal2 <- function(l, r, scale = 2L) { if (!is_integer(l)) { abort("`l` must be an integer vector.") } if (!is_integer(r)) { abort("`r` must be an integer vector.") } if (!is_integer(scale)) { abort("`scale` must be an integer vector.") } vec_check_size(scale, size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ## ----------------------------------------------------------------------------- x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ## ----------------------------------------------------------------------------- # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ## ----------------------------------------------------------------------------- unique(x) ## ----------------------------------------------------------------------------- rational(1, 2) < rational(2, 3) rational(2, 4) < rational(2, 3) ## ----------------------------------------------------------------------------- vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational(2, 4) < rational(2, 3) ## ----------------------------------------------------------------------------- sort(x) ## ----------------------------------------------------------------------------- poly <- function(...) { x <- vec_cast_common(..., .to = integer()) new_poly(x) } new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly_list") } vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly" format.vctrs_poly_list <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly_list <- function(x, ...) { if (length(x) != 0) { print(format(x), quote = FALSE) } } p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ## ----------------------------------------------------------------------------- class(p) p[2] p[[2]] ## ----------------------------------------------------------------------------- obj_is_list(p) ## ----------------------------------------------------------------------------- poly <- function(...) { x <- vec_cast_common(..., .to = integer()) x <- new_poly(x) new_rcrd(list(data = x), class = "vctrs_poly") } format.vctrs_poly <- function(x, ...) { format(field(x, "data")) } ## ----------------------------------------------------------------------------- p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ## ----------------------------------------------------------------------------- obj_is_list(p) ## ----------------------------------------------------------------------------- p[[2]] ## ----------------------------------------------------------------------------- p == poly(c(1, 0, 1)) ## ----error = TRUE------------------------------------------------------------- p < p[2] ## ----------------------------------------------------------------------------- vec_proxy_compare.vctrs_poly <- function(x, ...) { # Get the list inside the record vector x_raw <- vec_data(field(x, "data")) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } p < p[2] ## ----------------------------------------------------------------------------- sort(p) sort(p[c(1:3, 1:2)]) ## ----------------------------------------------------------------------------- vec_proxy_order.vctrs_poly <- function(x, ...) { vec_proxy_compare(x, ...) } sort(p) ## ----------------------------------------------------------------------------- vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ## ----------------------------------------------------------------------------- vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ## ----------------------------------------------------------------------------- new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ## ----------------------------------------------------------------------------- sum(x) mean(x) ## ----error = TRUE------------------------------------------------------------- x + 1 meter(10) + meter(1) meter(10) * 3 ## ----------------------------------------------------------------------------- vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ## ----error = TRUE------------------------------------------------------------- vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ## ----error = TRUE------------------------------------------------------------- vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ## ----------------------------------------------------------------------------- vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ## ----eval = FALSE------------------------------------------------------------- # #' Internal vctrs methods # #' # #' @import vctrs # #' @keywords internal # #' @name pizza-vctrs # NULL ## ----------------------------------------------------------------------------- new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "pizza_percent") } ## ----------------------------------------------------------------------------- # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ## ----------------------------------------------------------------------------- #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ## ----------------------------------------------------------------------------- #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ## ----------------------------------------------------------------------------- #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ## ----eval = FALSE------------------------------------------------------------- # #' @export # format.pizza_percent <- function(x, ...) { # out <- formatC(signif(vec_data(x) * 100, 3)) # out[is.na(x)] <- NA # out[!is.na(x)] <- paste0(out[!is.na(x)], "%") # out # } # # #' @export # vec_ptype_abbr.pizza_percent <- function(x, ...) { # "prcnt" # } ## ----eval = FALSE------------------------------------------------------------- # #' @export # vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() # #' @export # vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() # # #' @export # vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x # #' @export # vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) # #' @export # vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ## ----eval=FALSE--------------------------------------------------------------- # #' @export # #' @method vec_arith my_type # vec_arith.my_type <- function(op, x, y, ...) { # UseMethod("vec_arith.my_type", y) # } ## ----eval=FALSE--------------------------------------------------------------- # #' @export # #' @method vec_arith.my_type my_type # vec_arith.my_type.my_type <- function(op, x, y, ...) { # # implementation here # } # # #' @export # #' @method vec_arith.my_type integer # vec_arith.my_type.integer <- function(op, x, y, ...) { # # implementation here # } # # #' @export # #' @method vec_arith.integer my_type # vec_arith.integer.my_type <- function(op, x, y, ...) { # # implementation here # } ## ----eval = FALSE------------------------------------------------------------- # expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") vctrs/inst/doc/type-size.Rmd0000644000176200001440000003155214511320527015540 0ustar liggesusers--- title: "Prototypes and sizes" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Prototypes and sizes} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Rather than using `class()` and `length()`, vctrs has notions of prototype (`vec_ptype_show()`) and size (`vec_size()`). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules. Size and prototype are motivated by thinking about the optimal behaviour for `c()` and `rbind()`, particularly inspired by data frames with columns that are matrices or data frames. ```{r} library(vctrs) ``` ## Prototype The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the `class()` of an object is inadequate for this purpose: * The `class()` doesn't include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a `POSIXct`. You cannot combine two factors or two `POSIXct`s without thinking about the attributes. * The `class()` of a matrix is "matrix" and doesn't include the type of the underlying vector or the dimensionality. Instead, vctrs takes advantage of R's vectorised nature and uses a __prototype__, a 0-observation slice of the vector (this is basically `x[0]` but with some subtleties we'll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data. Conveniently, you can create many prototypes using existing base functions (e.g, `double()` and `factor(levels = c("a", "b"))`). vctrs provides a few helpers (e.g. `new_date()`, `new_datetime()`, and `new_duration()`) where the equivalents in base R are missing. ### Base prototypes `vec_ptype()` creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides `vec_ptype_show()`, which prints the prototype in a friendly way (and returns nothing). Using `vec_ptype_show()` allows us to see the prototypes base R classes: * Atomic vectors have no attributes and just display the underlying `typeof()`: ```{r} vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ``` * The prototype of matrices and arrays include the base type and the dimensions after the first: ```{r} vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ``` * The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it's highly likely that their levels are also equal. ```{r} vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ``` While `vec_ptype_show()` prints only the hash, the prototype object itself does contain all levels: ```{r} vec_ptype(factor("a")) ``` * Base R has three key date time classes: dates, date-times (`POSIXct`), and durations (`difftime)`. Date-times have a timezone, and durations have a unit. ```{r} vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ``` * Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column: ```{r} vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ``` Data frames can have columns that are themselves data frames, making this a "recursive" type: ```{r} df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ``` ### Coercing to common type It's often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via `vec_ptype_common()`. `vec_ptype_common()` possesses the following invariants: * `class(vec_ptype_common(x, y))` equals `class(vec_ptype_common(y, x))`. * `class(vec_ptype_common(x, vec_ptype_common(y, z))` equals `class(vec_ptype_common(vec_ptype_common(x, y), z))`. * `vec_ptype_common(x, NULL) == vec_ptype(x)`. i.e., `vec_ptype_common()` is both commutative and associative (with respect to class) and has an identity element, `NULL`; i.e., it's a __commutative monoid__. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects. Like with `vec_ptype()`, the easiest way to explore `vec_ptype_common()` is with `vec_ptype_show()`: when given multiple inputs, it will print their common prototype. (In other words: program with `vec_ptype_common()` but play with `vec_ptype_show()`.) * The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically: ```{r, error = TRUE} vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ``` * Matrices and arrays are automatically broadcast to higher dimensions: ```{r} vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ``` Provided that the dimensions follow the vctrs recycling rules: ```{r, error = TRUE} vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ``` * Factors combine levels in the order in which they appear. ```{r} fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ``` * Combining a date and date-time yields a date-time: ```{r} vec_ptype_show(new_date(), new_datetime()) ``` When combining two date times, the timezone is taken from the first input: ```{r} vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ``` Unless it's the local timezone, in which case any explicit time zone will win: ```{r} vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ``` * The common type of two data frames is the common type of each column that occurs in both data frames: ```{r} vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ``` And the union of the columns that only occur in one: ```{r} vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ``` Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled. ### Casting to specified type `vec_ptype_common()` finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That's the job of `vec_cast_common()`: ```{r} str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ``` Alternatively, you can cast to a specific prototype using `vec_cast()`: ```{r, error = TRUE} # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ``` If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error. ```{r, error = TRUE} vec_cast(c(1.5, 2), integer()) ``` You can suppress the lossy cast errors with `allow_lossy_cast()`: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ``` This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ``` The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound. ## Size `vec_size()` was motivated by the need to have an invariant that describes the number of "observations" in a data structure. This is particularly important for data frames, as it's useful to have some function such that `f(data.frame(x))` equals `f(x)`. No base function has this property: * `length(data.frame(x))` equals `1` because the length of a data frame is the number of columns. * `nrow(data.frame(x))` does not equal `nrow(x)` because `nrow()` of a vector is `NULL`. * `NROW(data.frame(x))` equals `NROW(x)` for vector `x`, so is almost what we want. But because `NROW()` is defined in terms of `length()`, it returns a value for every object, even types that can't go in a data frame, e.g. `data.frame(mean)` errors even though `NROW(mean)` is `1`. We define `vec_size()` as follows: * It is the length of 1d vectors. * It is the number of rows of data frames, matrices, and arrays. * It throws error for non vectors. Given `vec_size()`, we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns. ### Slicing `vec_slice()` is to `vec_size()` as `[` is to `length()`; i.e., it allows you to select observations regardless of the dimensionality of the underlying object. `vec_slice(x, i)` is equivalent to: * `x[i]` when `x` is a vector. * `x[i, , drop = FALSE]` when `x` is a data frame. * `x[i, , , drop = FALSE]` when `x` is a 3d array. ```{r} x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ``` `vec_slice(data.frame(x), i)` equals `data.frame(vec_slice(x, i))` (modulo variable and row names). Prototypes are generated with `vec_slice(x, 0L)`; given a prototype, you can initialize a vector of given size (filled with `NA`s) with `vec_init()`. ### Common sizes: recycling rules Closely related to the definition of size are the __recycling rules__. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in `vec_size_common()`, which gives the common size of a set of vectors: ```{r} vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ``` vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like `dest == c("IAH", "HOU"))`, at the cost of occasionally requiring an explicit calls to `rep()`. ```{r, echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates an error"} knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ``` You can apply the recycling rules in two ways: * If you have a vector and desired size, use `vec_recycle()`: ```{r} vec_recycle(1:3, 3) vec_recycle(1, 10) ``` * If you have multiple vectors and you want to recycle them to the same size, use `vec_recycle_common()`: ```{r} vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ``` ## Appendix: recycling in base R The recycling rules in base R are described in [The R Language Definition](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Recycling-rules) but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions. Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer: ```{r} rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ``` If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning: ```{r} invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ``` But some functions recycle silently: ```{r} length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ``` And `data.frame()` throws an error: ```{r, error = TRUE} data.frame(1:2, 1:3) ``` The R language definition states that "any arithmetic operation involving a zero-length vector has a zero-length result". But outside of arithmetic, this rule is not consistently followed: ```{r, error = TRUE} # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) ``` vctrs/inst/doc/pillar.Rmd0000644000176200001440000002126014315060310015056 0ustar liggesusers--- title: "Printing vectors nicely in tibbles" author: "Kirill Müller, Hadley Wickham" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Printing vectors nicely in tibbles} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` You can get basic control over how a vector is printed in a tibble by providing a `format()` method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics: * `vctrs::vec_ptype_abbr()` determines what goes into the column header. * `pillar::pillar_shaft()` determines what goes into the body, or the shaft, of the column. Technically a [*pillar*](https://en.wikipedia.org/wiki/Column#Nomenclature) is composed of a *shaft* (decorated with an *ornament*), with a *capital* above and a *base* below. Multiple pillars form a *colonnade*, which can be stacked in multiple *tiers*. This is the motivation behind the names in our API. This short vignette shows the basics of column styling using a `"latlon"` vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you'll need to create documentation and the `NAMESPACE` file. In this vignette, we'll attach pillar and vctrs: ```{r setup} library(vctrs) library(pillar) ``` You don't need to do this in a package. Instead, you'll need to _import_ the packages by then to the `Imports:` section of your `DESCRIPTION`. The following helper does this for you: ```{r, eval = FALSE} usethis::use_package("vctrs") usethis::use_package("pillar") ``` ## Prerequisites To illustrate the basic ideas we're going to create a `"latlon"` class that encodes geographic coordinates in a record. We'll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using `vctrs_rcrd()`, we already get the infrastructure to make this class fully compatible with data frames for free. See `vignette("s3-vector", package = "vctrs")` for details on the record data type. ```{r} #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ``` ## Using in a tibble Columns of this class can be used in a tibble right away because we've made a class using the vctrs infrastructure and have provided a `format()` method: ```{r} library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ``` This output is ok, but we could improve it by: 1. Using a more description type abbreviation than ``. 1. Using a dash of colour to highlight the most important parts of the value. 1. Providing a narrower view when horizontal space is at a premium. The following sections show how to enhance the rendering. ## Fixing the data type Instead of `` we'd prefer to use ``. We can do that by implementing the `vec_ptype_abbr()` method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that's under 6 characters. ```{r} #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ``` ## Custom rendering The `format()` method is used by default for rendering. For custom formatting you need to implement the `pillar_shaft()` method. This function should always return a pillar shaft object, created by `new_pillar_shaft_simple()` or similar. `new_pillar_shaft_simple()` accepts ANSI escape codes for colouring, and pillar includes some built in styles like `style_subtle()`. We can use subtle style for the degree and minute separators to make the data more obvious. First we define a degree formatter that makes use of `style_subtle()`: ```{r} deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ``` And then we pass that to our `format()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ``` Currently, ANSI escapes are not rendered in vignettes, so this result doesn't look any different, but if you run the code yourself you'll see an improved display. ```{r} data ``` As well as the functions in pillar, the [cli](https://cli.r-lib.org/) package provides a variety of tools for styling text. ## Truncation Tibbles can automatically compacts columns when there's no enough horizontal space to display everything: ```{r} print(data, width = 30) ``` Currently the latlon class isn't ever compacted because we haven't specified a minimum width when constructing the shaft. Let's fix that and re-print the data: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ``` ## Adaptive rendering Truncation may be useful for character data, but for lat-lon data it'd be nicer to show full degrees and remove the minutes. We'll first write a function that does this: ```{r} deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ``` Then use it as part of more sophisticated implementation of the `pillar_shaft()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ``` Now the `pillar_shaft()` method returns an object of class `"pillar_shaft_latlon"` created by `new_pillar_shaft()`. This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (`get_max_extent()` is a helper that computes the maximum display width occupied by the values in a character vector.) All that's left to do is to implement a `format()` method for our new `"pillar_shaft_latlon"` class. This method will be called with a `width` argument, which then determines which of the formats to choose. The formatting of our choice is passed to the `new_ornament()` function: ```{r} #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ``` ## Testing If you want to test the output of your code, you can compare it with a known state recorded in a text file. The `testthat::expect_snapshot()` function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won't make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN. Use this testthat expectation in one of your test files to create a snapshot test: ```{r eval = FALSE} expect_snapshot(pillar_shaft(data$loc)) ``` See for more information. vctrs/inst/doc/stability.R0000644000176200001440000001174714532404537015305 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(vctrs) library(rlang) library(zeallot) ## ----------------------------------------------------------------------------- vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ## ----------------------------------------------------------------------------- vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ## ----------------------------------------------------------------------------- vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ## ----------------------------------------------------------------------------- env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ## ----------------------------------------------------------------------------- vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ## ----------------------------------------------------------------------------- c(FALSE, 1L, 2.5) ## ----------------------------------------------------------------------------- vec_c(FALSE, 1L, 2.5) ## ----error = TRUE------------------------------------------------------------- c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ## ----------------------------------------------------------------------------- c(10.5, factor("x")) ## ----------------------------------------------------------------------------- c(mean, globalenv()) ## ----error = TRUE------------------------------------------------------------- c(getRversion(), "x") c("x", getRversion()) ## ----error = TRUE------------------------------------------------------------- vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ## ----------------------------------------------------------------------------- fa <- factor("a") fb <- factor("b") c(fa, fb) ## ----------------------------------------------------------------------------- vec_c(fa, fb) vec_c(fb, fa) ## ----------------------------------------------------------------------------- datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ## ----------------------------------------------------------------------------- vec_c(datetime_nz) ## ----------------------------------------------------------------------------- datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ## ----------------------------------------------------------------------------- date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ## ----------------------------------------------------------------------------- vec_c(date, datetime) vec_c(date, datetime_nz) ## ----------------------------------------------------------------------------- c(NA, fa) c(NA, date) c(NA, datetime) ## ----------------------------------------------------------------------------- vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ## ----------------------------------------------------------------------------- df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ## ----------------------------------------------------------------------------- vec_c(df1, df2) ## ----------------------------------------------------------------------------- m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ## ----------------------------------------------------------------------------- c(m, 1) vec_c(m, 1) ## ----eval = FALSE------------------------------------------------------------- # vec_c <- function(...) { # args <- compact(list2(...)) # # ptype <- vec_ptype_common(!!!args) # if (is.null(ptype)) # return(NULL) # # ns <- map_int(args, vec_size) # out <- vec_init(ptype, sum(ns)) # # pos <- 1 # for (i in seq_along(ns)) { # n <- ns[[i]] # # x <- vec_cast(args[[i]], to = ptype) # vec_slice(out, pos:(pos + n - 1)) <- x # pos <- pos + n # } # # out # } ## ----------------------------------------------------------------------------- if_else <- function(test, yes, no) { if (!is_logical(test)) { abort("`test` must be a logical vector.") } c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ## ----------------------------------------------------------------------------- if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) vctrs/inst/doc/s3-vector.Rmd0000644000176200001440000013076514511320527015442 0ustar liggesusers--- title: "S3 vectors" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{S3 vectors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ``` This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you'll also need to provide methods that actually make the vector useful. I assume that you're already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of [the S3 chapter](https://adv-r.hadley.nz/s3.html) of *Advanced R*. This article refers to "vectors of numbers" as *double vectors*. Here, "double" stands for ["double precision floating point number"](https://en.wikipedia.org/wiki/Double-precision_floating-point_format), see also `double()`. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` This vignette works through five big topics: - The basics of creating a new vector class with vctrs. - The coercion and casting system. - The record and list-of types. - Equality and comparison proxies. - Arithmetic operators. They're collectively demonstrated with a number of simple S3 classes: - Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting. - Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions. - Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care. - Rational: a pair of integer vectors that defines a rational number like `2 / 3`. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for `+`, `-`, and friends. - Polynomial: a list of integer vectors that define polynomials like `1 + x - x^3`. Sorting such vectors correctly requires a custom equality method. - Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties. - Period and frequency: a pair of classes represent a period, or its inverse, frequency. This allows us to explore more arithmetic operators. ## Basics In this section you'll learn how to create a new vctrs class by calling `new_vctr()`. This creates an object with class `vctrs_vctr` which has a number of methods. These are designed to make your life as easy as possible. For example: - The `print()` and `str()` methods are defined in terms of `format()` so you get a pleasant, consistent display as soon as you've made your `format()` method. - You can immediately put your new vector class in a data frame because `as.data.frame.vctrs_vctr()` does the right thing. - Subsetting (`[`, `[[`, and `$`), `length<-`, and `rep()` methods automatically preserve attributes because they use `vec_restore()`. A default `vec_restore()` works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data. - Default subset-assignment methods (`[<-`, `[[<-`, and `$<-`) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages. ### Percent class In this section, I'll show you how to make a `percent` class, i.e., a double vector that is printed as a percentage. We start by defining a low-level [constructor](https://adv-r.hadley.nz/s3.html#s3-constrcutor) to check types and/or sizes and call `new_vctr()`. `percent` is built on a double vector of any length and doesn't have any attributes. ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ``` Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as [blob](https://blob.tidyverse.org/)), it's fine to use the package name without prefix as the class name. We then follow up with a user friendly [helper](https://adv-r.hadley.nz/s3.html#helpers). Here we'll use `vec_cast()` to allow it to accept anything coercible to a double: ```{r} percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype. ```{r} new_percent() percent() ``` For the convenience of your users, consider implementing an `is_percent()` function: ```{r} is_percent <- function(x) { inherits(x, "vctrs_percent") } ``` ### `format()` method The first method for every class should almost always be a `format()` method. This should return a character vector the same length as `x`. The easiest way to do this is to rely on one of R's low-level formatting functions like `formatC()`: ```{r} format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ``` ```{r, include = FALSE} # As of R 3.5, print.vctr can not find format.percent since it's not in # its lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ``` ```{r} x ``` (Note the use of `vec_data()` so `format()` doesn't get stuck in an infinite loop, and that I take a little care to not convert `NA` to `"NA"`; this leads to better printing.) The format method is also used by data frames, tibbles, and `str()`: ```{r} data.frame(x) ``` For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in `str()`: ```{r} vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ``` If you need more control over printing in tibbles, implement a method for `pillar::pillar_shaft()`. See `vignette("pillar", package = "vctrs")` for details. ## Casting and coercion The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens *implicitly* (e.g in `c()`) we call it **coercion**; when the change happens *explicitly* (e.g. with `as.integer(x)`), we call it **casting**. One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it's possible to make accurate predictions about what (e.g.) `c(x, y)` should do when `x` and `y` have different prototypes. vctrs achieves this goal through two generics: - `vec_ptype2(x, y)` defines possible set of coercions. It returns a prototype if `x` and `y` can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes. - `vec_cast(x, to)` defines the possible sets of casts. It returns `x` translated to have prototype `to`, or throws an error if the conversion isn't possible. The set of possible casts is a superset of possible coercions because they're requested explicitly. ### Double dispatch Both generics use [**double dispatch**](https://en.wikipedia.org/wiki/Double_dispatch) which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means: - You end up with method names with two classes, like `vec_ptype2.foo.bar()`. - You don't need to implement default methods (they would never be called if you do). - You can't call `NextMethod()`. ### Percent class {#percent} We'll make our percent class coercible back and forth with double vectors. `vec_ptype2()` provides a user friendly error message if the coercion doesn't exist and makes sure `NA` is handled in a standard way. `NA` is technically a logical vector, but we want to stand in for a missing value of any type. ```{r, error = TRUE} vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ``` By default and in simple cases, an object of the same class is compatible with itself: ```{r} vec_ptype2(percent(), percent()) ``` However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we'll start by saying that a `vctrs_percent` combined with a `vctrs_percent` yields a `vctrs_percent`, which we indicate by returning a prototype generated by the constructor. ```{r} vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ``` Next we define methods that say that combining a `percent` and double should yield a `double`. We avoid returning a `percent` here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers. Because double dispatch is a bit of a hack, we need to provide two methods. It's your responsibility to ensure that each member of the pair returns the same result: if they don't you will get weird and unpredictable behaviour. The double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. If we implemented `vec_ptype2.vctrs_percent.numeric()`, it would never be called. ```{r} vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ``` We can check that we've implemented this correctly with `vec_ptype_show()`: ```{r} vec_ptype_show(percent(), double(), percent()) ``` The `vec_ptype2()` methods define which input is the richer type that vctrs should coerce to. However, they don't perform any conversion. This is the job of `vec_cast()`, which we implement next. We'll provide a method to cast a percent to a percent: ```{r} vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ``` And then for converting back and forth between doubles. To convert a double to a percent we use the `percent()` helper (not the constructor; this is unvalidated user input). To convert a `percent` to a double, we strip the attributes. Note that for historical reasons the order of argument in the signature is the opposite as for `vec_ptype2()`. The class for `to` comes first, and the class for `x` comes second. Again, the double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. Implementing `vec_cast.vctrs_percent.numeric()` has no effect. ```{r} vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ``` Then we can check this works with `vec_cast()`: ```{r} vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ``` Once you've implemented `vec_ptype2()` and `vec_cast()`, you get `vec_c()`, `[<-`, and `[[<-` implementations for free. ```{r, error = TRUE} vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ``` You'll also get mostly correct behaviour for `c()`. The exception is when you use `c()` with a base R class: ```{r, error = TRUE} # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ``` Unfortunately there's no way to fix this problem with the current design of `c()`. Again, as a convenience, consider providing an `as_percent()` function that makes use of the casts defined in your `vec_cast.vctrs_percent()` methods: ```{r} as_percent <- function(x) { vec_cast(x, new_percent()) } ``` Occasionally, it is useful to provide conversions that go beyond what's allowed in casting. For example, we could offer a parsing method for character vectors. In this case, `as_percent()` should be generic, the default method should cast, and then additional methods should implement more flexible conversion: ```{r} as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ``` ### Decimal class Now that you've seen the basics with a very simple S3 class, we'll gradually explore more complicated scenarios. This section creates a `decimal` class that prints with the specified number of decimal places. This is very similar to `percent` but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1). We start off as before, defining a low-level constructor, a user-friendly constructor, a `format()` method, and a `vec_ptype_abbr()`. Note that additional object attributes are simply passed along to `new_vctr()`: ```{r} new_decimal <- function(x = double(), digits = 2L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_integer(digits)) { abort("`digits` must be an integer vector.") } vec_check_size(digits, size = 1L) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ``` Note that I provide a little helper to extract the `digits` attribute. This makes the code a little easier to read and should not be exported. By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You'll see what to do if the attributes are data dependent in the next section. ```{r} x[1:2] x[[1]] ``` For the sake of exposition, we'll assume that `digits` is an important attribute of the class and should be included in the full type: ```{r} vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ``` Now consider `vec_cast()` and `vec_ptype2()`. Casting and coercing from one decimal to another requires a little thought as the values of the `digits` attribute might be different, and we need some way to reconcile them. Here I've decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error. ```{r} vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ``` Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal). ```{r} vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ``` If type `x` has greater resolution than `y`, there will be some inputs that lose precision. These should generate errors using `stop_lossy_cast()`. You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution. ```{r, error = TRUE} vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ``` ### Cached sum class {#cached-sum} The next level up in complexity is an object that has data-dependent attributes. To explore this idea we'll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors: ```{r} new_cached_sum <- function(x = double(), sum = 0L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_double(sum)) { abort("`sum` must be a double vector.") } vec_check_size(sum, size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ``` For this class, we can use the default `format()` method, and instead, we'll customise the `obj_print_footer()` method. This is a good place to display user facing attributes. ```{r} obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ``` We'll also override `sum()` and `mean()` to use the attribute. This is easiest to do with `vec_math()`, which you'll learn about later. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ``` As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they'll work, but return the incorrect result: ```{r} x[1:2] ``` To fix this, you need to provide a `vec_restore()` method. Note that this method dispatches on the `to` argument. ```{r} vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ``` This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with `vec_data()` and then reapplying them again with `vec_restore()`. The default `vec_restore()` method copies over all attributes, which is not appropriate when the attributes depend on the data. Note that `vec_restore.class` is subtly different from `vec_cast.class.class()`. `vec_restore()` is used when restoring attributes that have been lost; `vec_cast()` is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with `new_vctr()`. `vec_restore.factor()` would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with `vec_cast()`. ## Record-style objects Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is `POSIXlt`, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override `length()` and subsetting methods to conceal this implementation detail. ```{r} x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ``` vctrs makes it easy to create new record-style classes using `new_rcrd()`, which has a wide selection of default methods. ### Rational class A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short). As usual we start with low-level and user-friendly constructors. The low-level constructor calls `new_rcrd()`, which needs a named list of equal-length vectors. ```{r} new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer vector.") } if (!is_integer(d)) { abort("`d` must be an integer vector.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ``` Our user friendly constructor casts `n` and `d` to integers and recycles them to the same length. ```{r} rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ``` Behind the scenes, `x` is a named list with two elements. But those details are hidden so that it behaves like a vector: ```{r} names(x) length(x) ``` To access the underlying fields we need to use `field()` and `fields()`: ```{r} fields(x) field(x, "n") ``` Notice that we can't `print()` or `str()` the new rational vector `x` yet. Printing causes an error: ```{r, error = TRUE} x str(x) ``` This is because we haven't defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call `vec_data(x)`. ```{r} vec_data(x) str(vec_data(x)) ``` It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way: ```{r} format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ``` vctrs uses the `format()` method in `str()`, hiding the underlying implementation details from the user: ```{r} str(x) ``` For `rational`, `vec_ptype2()` and `vec_cast()` follow the same pattern as `percent()`. We allow coercion from integer and to doubles. ```{r} vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ``` ### Decimal2 class The previous implementation of `decimal` was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems. A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a `scale`). The following code is a very quick sketch of how you might start creating such a class: ```{r} new_decimal2 <- function(l, r, scale = 2L) { if (!is_integer(l)) { abort("`l` must be an integer vector.") } if (!is_integer(r)) { abort("`r` must be an integer vector.") } if (!is_integer(scale)) { abort("`scale` must be an integer vector.") } vec_check_size(scale, size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ``` ## Equality and comparison vctrs provides four "proxy" generics. Two of these let you control how your class determines equality and comparison: - `vec_proxy_equal()` returns a data vector suitable for comparison. It underpins `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()`. - `vec_proxy_compare()` specifies how to compare the elements of your vector. This proxy is used in `<`, `<=`, `>=`, `>`, `min()`, `max()`, `median()`, and `quantile()`. Two other proxy generic are used for sorting for unordered data types and for accessing the raw data for exotic storage formats: - `vec_proxy_order()` specifies how to sort the elements of your vector. It is used in `xtfrm()`, which in turn is called by the `order()` and `sort()` functions. This proxy was added to implement the behaviour of lists, which are sortable (their order proxy sorts by first occurrence) but not comparable (comparison operators cause an error). Its default implementation for other classes calls `vec_proxy_compare()` and you normally don't need to implement this proxy. - `vec_proxy()` returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn't need to implement `vec_proxy()`. The default behavior is as follows: - `vec_proxy_equal()` calls `vec_proxy()` - `vec_proxy_compare()` calls `vec_proxy_equal()` - `vec_proxy_order()` calls `vec_proxy_compare()` You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work. These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C. ### Rational class Let's explore these ideas by with the rational class we started on above. By default, `vec_proxy()` converts a record to a data frame, and the default comparison works column by column: ```{r} x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ``` This makes sense as a default but isn't correct here because `rational(1, 1)` represents the same number as `rational(2, 2)`, so they should be equal. We can fix that by implementing a `vec_proxy_equal()` method that divides `n` and `d` by their greatest common divisor: ```{r} # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ``` `vec_proxy_equal()` is also used by `unique()`: ```{r} unique(x) ``` We now need to fix the comparison operations similarly, since comparison currently happens lexicographically by `n`, then by `d`: ```{r} rational(1, 2) < rational(2, 3) rational(2, 4) < rational(2, 3) ``` The easiest fix is to convert the fraction to a floating point number and use this as a proxy: ```{r} vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational(2, 4) < rational(2, 3) ``` This also fixes `sort()`, because the default implementation of `vec_proxy_order()` calls `vec_proxy_compare()`. ```{r} sort(x) ``` (We could have used the same approach in `vec_proxy_equal()`, but when working with floating point numbers it's not necessarily true that `x == y` implies that `d * x == d * y`.) ### Polynomial class A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like `1 + 3x - 2x^2`) using a list of integer vectors (like `c(1, 3, -2)`). Note the use of `new_list_of()` in the constructor. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) new_poly(x) } new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly_list") } vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly" format.vctrs_poly_list <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly_list <- function(x, ...) { if (length(x) != 0) { print(format(x), quote = FALSE) } } p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` The resulting objects will inherit from the `vctrs_list_of` class, which provides tailored methods for `$`, `[[`, the corresponding assignment operators, and other methods. ```{r} class(p) p[2] p[[2]] ``` The class implements the list interface: ```{r} obj_is_list(p) ``` This is fine for the internal implementation of this class but it would be more appropriate if it behaved like an atomic vector rather than a list. #### Make an atomic polynomial vector An atomic vector is a vector like integer or character for which `[[` returns the same type. Unlike lists, you can't reach inside an atomic vector. To make the polynomial class an atomic vector, we'll wrap the internal `list_of()` class within a record vector. Usually records are used because they can store several fields of data for each observation. Here we have only one, but we use the class anyway to inherit its atomicity. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) x <- new_poly(x) new_rcrd(list(data = x), class = "vctrs_poly") } format.vctrs_poly <- function(x, ...) { format(field(x, "data")) } ``` The new `format()` method delegates to the one we wrote for the internal list. The vector looks just like before: ```{r} p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` Making the class atomic means that `obj_is_list()` now returns `FALSE`. This prevents recursive algorithms that traverse lists from reaching too far inside the polynomial internals. ```{r} obj_is_list(p) ``` Most importantly, it prevents users from reaching into the internals with `[[`: ```{r} p[[2]] ``` #### Implementing equality and comparison Equality works out of the box because we can tell if two integer vectors are equal: ```{r} p == poly(c(1, 0, 1)) ``` We can't compare individual elements, because the data is stored in a list and by default lists are not comparable: ```{r, error = TRUE} p < p[2] ``` To enable comparison, we implement a `vec_proxy_compare()` method: ```{r} vec_proxy_compare.vctrs_poly <- function(x, ...) { # Get the list inside the record vector x_raw <- vec_data(field(x, "data")) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } p < p[2] ``` Often, this is sufficient to also implement `sort()`. However, for lists, there is already a default `vec_proxy_order()` method that sorts by first occurrence: ```{r} sort(p) sort(p[c(1:3, 1:2)]) ``` To ensure consistency between ordering and comparison, we forward `vec_proxy_order()` to `vec_proxy_compare()`: ```{r} vec_proxy_order.vctrs_poly <- function(x, ...) { vec_proxy_compare(x, ...) } sort(p) ``` ## Arithmetic vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once: - `vec_math(fn, x, ...)` specifies the behaviour of mathematical functions like `abs()`, `sum()`, and `mean()`. (Note that `var()` and `sd()` can't be overridden, see `?vec_math()` for the complete list supported by `vec_math()`.) - `vec_arith(op, x, y)` specifies the behaviour of the arithmetic operations like `+`, `-`, and `%%`. (See `?vec_arith()` for the complete list.) Both generics define the behaviour for multiple functions because `sum.vctrs_vctr(x)` calls `vec_math.vctrs_vctr("sum", x)`, and `x + y` calls `vec_math.x_class.y_class("+", x, y)`. They're accompanied by `vec_math_base()` and `vec_arith_base()` which make it easy to call the underlying base R functions. `vec_arith()` uses double dispatch and needs the following standard boilerplate: ```{r} vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Correctly exporting `vec_arith()` methods from a package is currently a little awkward. See the instructions in the Arithmetic section of the "Implementing a vctrs S3 class in a package" section below. ### Cached sum class I showed an example of `vec_math()` to define `sum()` and `mean()` methods for `cached_sum`. Now let's talk about exactly how it works. Most `vec_math()` functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to `vec_math_base()` for those that you don't care about. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ``` ### Meter class To explore the infix arithmetic operators exposed by `vec_arith()` I'll create a new class that represents a measurement in `meter`s: ```{r} new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ``` Because `meter` is built on top of a double vector, basic mathematic operations work: ```{r} sum(x) mean(x) ``` But we can't do arithmetic: ```{r, error = TRUE} x + 1 meter(10) + meter(1) meter(10) * 3 ``` To allow these infix functions to work, we'll need to provide `vec_arith()` generic. But before we do that, let's think about what combinations of inputs we should support: - It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can't multiply meters (because that would yield an area). - For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don't make much sense as we, strictly speaking, are dealing with objects of different nature. `vec_arith()` is another function that uses double dispatch, so as usual we start with a template. ```{r} vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Then write the method for two meter objects. We use a switch statement to cover the cases we care about and `stop_incompatible_op()` to throw an informative error message for everything else. ```{r, error = TRUE} vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ``` Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while `meter(10) / 2` makes sense, `2 / meter(10)` does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over `numeric` here instead of `double`. ```{r, error = TRUE} vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ``` For completeness, we also need `vec_arith.vctrs_meter.MISSING` for the unary `+` and `-` operators: ```{r} vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ``` ## Implementing a vctrs S3 class in a package Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things: - Register the S3 methods by listing them in the `NAMESPACE` file. - Create documentation around your methods, for the sake of your user and to satisfy `R CMD check`. Let's assume that the `percent` class is implemented in the pizza package in the file `R/percent.R`. Here we walk through the major sections of this hypothetical file. You've seen all of this code before, but now it's augmented by the roxygen2 directives that produce the correct `NAMESPACE` entries and help topics. ### Getting started First, the pizza package needs to include vctrs in the `Imports` section of its `DESCRIPTION` (perhaps by calling `usethis::use_package("vctrs")`. While vctrs is under very active development, it probably makes sense to state a minimum version. Imports: a_package, another_package, ... vctrs (>= x.y.z), ... Then we make all vctrs functions available within the pizza package by including the directive `#' @import vctrs` somewhere. Usually, it's not good practice to `@import` the entire namespace of a package, but vctrs is deliberately designed with this use case in mind. Where should we put `#' @import vctrs`? There are two natural locations: - With package-level docs in `R/pizza-doc.R`. You can use `usethis::use_package_doc()` to initiate this package-level documentation. - In `R/percent.R`. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package. We also must use one of these locations to dump some internal documentation that's needed to avoid `R CMD check` complaints. We don't expect any human to ever read this documentation. Here's how this dummy documentation should look, combined with the `#' @import vctrs` directive described above. ```{r eval = FALSE} #' Internal vctrs methods #' #' @import vctrs #' @keywords internal #' @name pizza-vctrs NULL ``` This should appear in `R/pizza-doc.R` (package-level docs) or in `R/percent.R` (class-focused file). Remember to call `devtools::document()` regularly, as you develop, to regenerate `NAMESPACE` and the `.Rd` files. From this point on, the code shown is expected to appear in `R/percent.R`. ### Low-level and user-friendly constructors Next we add our constructor: ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "pizza_percent") } ``` Note that the name of the package must be included in the class name (`pizza_percent`), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class. We can also add a call to `setOldClass()` for compatibility with S4: ```{r} # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ``` Because we've used a function from the methods package, you'll also need to add methods to `Imports`, with (e.g.) `usethis::use_package("methods")`. This is a "free" dependency because methods is bundled with every R install. Next we implement, export, and document a user-friendly helper: `percent()`. ```{r} #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` (Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do `pizza::percent()`; it would be redundant to have `pizza::pizza_percent()`.) ### Other helpers It's a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor `percent()`: ```{r} #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ``` You'll also need to update the `percent()` documentation to reflect that `x` now means two different things: ```{r} #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ``` Next we provide the key methods to make printing work. These are S3 methods, so they don't need to be documented, but they do need to be exported. ```{r eval = FALSE} #' @export format.pizza_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } #' @export vec_ptype_abbr.pizza_percent <- function(x, ...) { "prcnt" } ``` Finally, we implement methods for `vec_ptype2()` and `vec_cast()`. ```{r, eval = FALSE} #' @export vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() #' @export vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() #' @export vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x #' @export vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) #' @export vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ``` ### Arithmetic Writing double dispatch methods for `vec_arith()` is currently more awkward than writing them for `vec_ptype2()` or `vec_cast()`. We plan to improve this in the future. For now, you can use the following instructions. If you define a new type and want to write `vec_arith()` methods for it, you'll need to provide a new single dispatch S3 generic for it of the following form: ```{r, eval=FALSE} #' @export #' @method vec_arith my_type vec_arith.my_type <- function(op, x, y, ...) { UseMethod("vec_arith.my_type", y) } ``` Note that this actually functions as both an S3 method for `vec_arith()` and an S3 generic called `vec_arith.my_type()` that dispatches off `y`. roxygen2 only recognizes it as an S3 generic, so you have to register the S3 method part of this with an explicit `@method` call. After that, you can define double dispatch methods, but you still need an explicit `@method` tag to ensure it is registered with the correct generic: ```{r, eval=FALSE} #' @export #' @method vec_arith.my_type my_type vec_arith.my_type.my_type <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.my_type integer vec_arith.my_type.integer <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.integer my_type vec_arith.integer.my_type <- function(op, x, y, ...) { # implementation here } ``` vctrs provides the hybrid S3 generics/methods for most of the base R types, like `vec_arith.integer()`. If you don't fully import vctrs with `@import vctrs`, then you will need to explicitly import the generic you are registering double dispatch methods for with `@importFrom vctrs vec_arith.integer`. ### Testing It's good practice to test your new class. Specific recommendations: - `R/percent.R` is the type of file where you really do want 100% test coverage. You can use `devtools::test_coverage_file()` to check this. - Make sure to test behaviour with zero-length inputs and missing values. - Use `testthat::verify_output()` to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about `verify_output()` in the [testthat v2.3.0 blog post](https://www.tidyverse.org/blog/2019/11/testthat-2-3-0/); it's an example of a so-called [golden test](https://ro-che.info/articles/2017-12-04-golden-tests). - Check for method symmetry; use `expect_s3_class()`, probably with `exact = TRUE`, to ensure that `vec_c(x, y)` and `vec_c(y, x)` return the same type of output for the important `x`s and `y`s in your domain. - Use `testthat::expect_error()` to check that inputs that can't be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include `vctrs_error_assert_ptype`, `vctrs_error_assert_size`, and `vctrs_error_incompatible_type`. ```{r, eval = FALSE} expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") ``` If your tests pass when run by `devtools::test()`, but fail when run in `R CMD check`, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated `NAMESPACE`. ### Existing classes Before you build your own class, you might want to consider using, or subclassing existing classes. You can check [awesome-vctrs](https://github.com/krlmlr/awesome-vctrs) for a curated list of R vector classes, some of which are built with vctrs. If you've built or extended a class, consider adding it to that list so other people can use it. vctrs/inst/doc/stability.Rmd0000644000176200001440000003142614376223322015617 0ustar liggesusers--- title: "Type and size stability" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Type and size stability} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the "shape" of the output you only need to know the "shape"s of the inputs. This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can't predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` ## Definitions We say a function is __type-stable__ iff: 1. You can predict the output type knowing only the input types. 1. The order of arguments in ... does not affect the output type. Similarly, a function is __size-stable__ iff: 1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size. Very few base R functions are size-stable, so I'll also define a slightly weaker condition. I'll call a function __length-stable__ iff: 1. You can predict the output _length_ knowing only the input _lengths_, or there is a single numeric input that specifies the output _length_. (But note that length-stable is not a particularly robust definition because `length()` returns a value for things that are not vectors.) We'll call functions that don't obey these principles __type-unstable__ and __size-unstable__ respectively. On top of type- and size-stability it's also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions. The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again. ### Examples To make these ideas concrete, let's apply them to a few base functions: 1. `mean()` is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error). 1. Surprisingly, `median()` is type-unstable: ```{r} vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ``` It is, however, size-stable, since it always returns a vector of length 1. 1. `sapply()` is type-unstable because you can't predict the output type only knowing the input types: ```{r} vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ``` It's not quite size-stable; `vec_size(sapply(x, f))` is `vec_size(x)` for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns). 1. `vapply()` is a type-stable version of `sapply()` because `vec_ptype_show(vapply(x, fn, template))` is always `vec_ptype_show(template)`. It is size-unstable for the same reasons as `sapply()`. 1. `c()` is type-unstable because `c(x, y)` doesn't always output the same type as `c(y, x)`. ```{r} vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ``` `c()` is *almost always* length-stable because `length(c(x, y))` *almost always* equals `length(x) + length(y)`. One common source of instability here is dealing with non-vectors (see the later section "Non-vectors"): ```{r} env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ``` 1. `paste(x1, x2)` is length-stable because `length(paste(x1, x2))` equals `max(length(x1), length(x2))`. However, it doesn't follow the usual arithmetic recycling rules because `paste(1:2, 1:3)` doesn't generate a warning. 1. `ifelse()` is length-stable because `length(ifelse(cond, true, false))` is always `length(cond)`. `ifelse()` is type-unstable because the output type depends on the value of `cond`: ```{r} vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ``` 1. `read.csv(file)` is type-unstable and size-unstable because, while you know it will return a data frame, you don't know what columns it will return or how many rows it will have. Similarly, `df[[i]]` is not type-stable because the result depends on the _value_ of `i`. There are many important functions that can not be made type-stable or size-stable! With this understanding of type- and size-stability in hand, we'll use them to analyse some base R functions in greater depth and then propose alternatives with better properties. ## `c()` and `vctrs::vec_c()` In this section we'll compare and contrast `c()` and `vec_c()`. `vec_c()` is both type- and size-stable because it possesses the following invariants: * `vec_ptype(vec_c(x, y))` equals `vec_ptype_common(x, y)`. * `vec_size(vec_c(x, y))` equals `vec_size(x) + vec_size(y)`. `c()` has another undesirable property in that it's not consistent with `unlist()`; i.e., `unlist(list(x, y))` does not always equal `c(x, y)`; i.e., base R has multiple sets of type-coercion rules. I won't consider this problem further here. I have two goals here: * To fully document the quirks of `c()`, hence motivating the development of an alternative. * To discuss non-obvious consequences of the type- and size-stability above. ### Atomic vectors If we only consider atomic vectors, `c()` is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical. ```{r} c(FALSE, 1L, 2.5) ``` `vec_c()` obeys similar rules: ```{r} vec_c(FALSE, 1L, 2.5) ``` But it does not automatically coerce to character vectors or lists: ```{r, error = TRUE} c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ``` ### Incompatible vectors and non-vectors In general, most base methods do not throw an error: ```{r} c(10.5, factor("x")) ``` If the inputs aren't vectors, `c()` automatically puts them in a list: ```{r} c(mean, globalenv()) ``` For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list: ```{r, error = TRUE} c(getRversion(), "x") c("x", getRversion()) ``` `vec_c()` throws an error if the inputs are not vectors or not automatically coercible: ```{r, error = TRUE} vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ``` ### Factors Combining two factors returns an integer vector: ```{r} fa <- factor("a") fb <- factor("b") c(fa, fb) ``` (This is documented in `c()` but is still undesirable.) `vec_c()` returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with `dplyr::bind_rows()`, which is stricter and is a common source of user difficulty.) ```{r} vec_c(fa, fb) vec_c(fb, fa) ``` ### Date-times `c()` strips the time zone associated with date-times: ```{r} datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ``` This behaviour is documented in `?DateTimeClasses` but is the source of considerable user pain. `vec_c()` preserves time zones: ```{r} vec_c(datetime_nz) ``` What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there's no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone: ```{r} datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ``` ### Dates and date-times Combining dates and date-times with `c()` gives silently incorrect results: ```{r} date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ``` This behaviour arises because neither `c.Date()` nor `c.POSIXct()` check that all inputs are of the same type. `vec_c()` uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time). ```{r} vec_c(date, datetime) vec_c(date, datetime_nz) ``` ### Missing values If a missing value comes at the beginning of the inputs, `c()` falls back to the internal behaviour, which strips all attributes: ```{r} c(NA, fa) c(NA, date) c(NA, datetime) ``` `vec_c()` takes a different approach treating a logical vector consisting only of `NA` as the `unspecified()` class which can be converted to any other 1d type: ```{r} vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ``` ### Data frames Because it is *almost always* length-stable, `c()` combines data frames column wise (into a list): ```{r} df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ``` `vec_c()` is size-stable, which implies it will row-bind data frames: ```{r} vec_c(df1, df2) ``` ### Matrices and arrays The same reasoning applies to matrices: ```{r} m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ``` One difference is that `vec_c()` will "broadcast" a vector to match the dimensions of a matrix: ```{r} c(m, 1) vec_c(m, 1) ``` ### Implementation The basic implementation of `vec_c()` is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with `vec_init()`, and then insert each input into the correct place in the output. ```{r, eval = FALSE} vec_c <- function(...) { args <- compact(list2(...)) ptype <- vec_ptype_common(!!!args) if (is.null(ptype)) return(NULL) ns <- map_int(args, vec_size) out <- vec_init(ptype, sum(ns)) pos <- 1 for (i in seq_along(ns)) { n <- ns[[i]] x <- vec_cast(args[[i]], to = ptype) vec_slice(out, pos:(pos + n - 1)) <- x pos <- pos + n } out } ``` (The real `vec_c()` is a bit more complicated in order to handle inner and outer names). ## `ifelse()` One of the functions that motivate the development of vctrs is `ifelse()`. It has the surprising property that the result value is "A vector of the same length and attributes (including dimensions and class) as `test`". To me, it seems more reasonable for the type of the output to be controlled by the type of the `yes` and `no` arguments. In `dplyr::if_else()` I swung too far towards strictness: it throws an error if `yes` and `no` are not the same type. This is annoying in practice because it requires typed missing values (`NA_character_` etc), and because the checks are only on the class (not the full prototype), it's easy to create invalid output. I found it much easier to understand what `ifelse()` _should_ do once I internalised the ideas of type- and size-stability: * The first argument must be logical. * `vec_ptype(if_else(test, yes, no))` equals `vec_ptype_common(yes, no)`. Unlike `ifelse()` this implies that `if_else()` must always evaluate both `yes` and `no` in order to figure out the correct type. I think this is consistent with `&&` (scalar operation, short circuits) and `&` (vectorised, evaluates both sides). * `vec_size(if_else(test, yes, no))` equals `vec_size_common(test, yes, no)`. I think the output could have the same size as `test` (i.e., the same behaviour as `ifelse`), but I _think_ as a general rule that your inputs should either be mutually recycling or not. This leads to the following implementation: ```{r} if_else <- function(test, yes, no) { if (!is_logical(test)) { abort("`test` must be a logical vector.") } c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ``` By using `vec_size()` and `vec_slice()`, this definition of `if_else()` automatically works with data.frames and matrices: ```{r} if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) ``` vctrs/inst/doc/type-size.R0000644000176200001440000001254314532404537015225 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- library(vctrs) ## ----------------------------------------------------------------------------- vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ## ----------------------------------------------------------------------------- vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ## ----------------------------------------------------------------------------- vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ## ----------------------------------------------------------------------------- vec_ptype(factor("a")) ## ----------------------------------------------------------------------------- vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ## ----------------------------------------------------------------------------- vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ## ----------------------------------------------------------------------------- df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ## ----error = TRUE------------------------------------------------------------- vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ## ----------------------------------------------------------------------------- vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ## ----error = TRUE------------------------------------------------------------- vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ## ----------------------------------------------------------------------------- fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ## ----------------------------------------------------------------------------- vec_ptype_show(new_date(), new_datetime()) ## ----------------------------------------------------------------------------- vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ## ----------------------------------------------------------------------------- vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ## ----------------------------------------------------------------------------- vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ## ----------------------------------------------------------------------------- vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ## ----------------------------------------------------------------------------- str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ## ----error = TRUE------------------------------------------------------------- # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ## ----error = TRUE------------------------------------------------------------- vec_cast(c(1.5, 2), integer()) ## ----------------------------------------------------------------------------- allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ## ----------------------------------------------------------------------------- allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ## ----------------------------------------------------------------------------- x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ## ----------------------------------------------------------------------------- vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ## ----echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates an error"---- knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ## ----------------------------------------------------------------------------- vec_recycle(1:3, 3) vec_recycle(1, 10) ## ----------------------------------------------------------------------------- vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ## ----------------------------------------------------------------------------- rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ## ----------------------------------------------------------------------------- invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ## ----------------------------------------------------------------------------- length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ## ----error = TRUE------------------------------------------------------------- data.frame(1:2, 1:3) ## ----error = TRUE------------------------------------------------------------- # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) vctrs/inst/doc/type-size.html0000644000176200001440000021415514532404537015773 0ustar liggesusers Prototypes and sizes

Prototypes and sizes

Rather than using class() and length(), vctrs has notions of prototype (vec_ptype_show()) and size (vec_size()). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules.

Size and prototype are motivated by thinking about the optimal behaviour for c() and rbind(), particularly inspired by data frames with columns that are matrices or data frames.

library(vctrs)

Prototype

The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the class() of an object is inadequate for this purpose:

  • The class() doesn’t include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a POSIXct. You cannot combine two factors or two POSIXcts without thinking about the attributes.

  • The class() of a matrix is “matrix” and doesn’t include the type of the underlying vector or the dimensionality.

Instead, vctrs takes advantage of R’s vectorised nature and uses a prototype, a 0-observation slice of the vector (this is basically x[0] but with some subtleties we’ll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data.

Conveniently, you can create many prototypes using existing base functions (e.g, double() and factor(levels = c("a", "b"))). vctrs provides a few helpers (e.g. new_date(), new_datetime(), and new_duration()) where the equivalents in base R are missing.

Base prototypes

vec_ptype() creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides vec_ptype_show(), which prints the prototype in a friendly way (and returns nothing).

Using vec_ptype_show() allows us to see the prototypes base R classes:

  • Atomic vectors have no attributes and just display the underlying typeof():

    vec_ptype_show(FALSE)
    #> Prototype: logical
    vec_ptype_show(1L)
    #> Prototype: integer
    vec_ptype_show(2.5)
    #> Prototype: double
    vec_ptype_show("three")
    #> Prototype: character
    vec_ptype_show(list(1, 2, 3))
    #> Prototype: list
  • The prototype of matrices and arrays include the base type and the dimensions after the first:

    vec_ptype_show(array(logical(), c(2, 3)))
    #> Prototype: logical[,3]
    vec_ptype_show(array(integer(), c(2, 3, 4)))
    #> Prototype: integer[,3,4]
    vec_ptype_show(array(character(), c(2, 3, 4, 5)))
    #> Prototype: character[,3,4,5]
  • The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it’s highly likely that their levels are also equal.

    vec_ptype_show(factor("a"))
    #> Prototype: factor<4d52a>
    vec_ptype_show(ordered("b"))
    #> Prototype: ordered<9b7e3>

    While vec_ptype_show() prints only the hash, the prototype object itself does contain all levels:

    vec_ptype(factor("a"))
    #> factor()
    #> Levels: a
  • Base R has three key date time classes: dates, date-times (POSIXct), and durations (difftime). Date-times have a timezone, and durations have a unit.

    vec_ptype_show(Sys.Date())
    #> Prototype: date
    vec_ptype_show(Sys.time())
    #> Prototype: datetime<local>
    vec_ptype_show(as.difftime(10, units = "mins"))
    #> Prototype: duration<mins>
  • Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column:

    vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x"))
    #> Prototype: data.frame<
    #>   a: logical
    #>   b: integer
    #>   c: double
    #>   d: character
    #> >

    Data frames can have columns that are themselves data frames, making this a “recursive” type:

    df <- data.frame(x = FALSE)
    df$y <- data.frame(a = 1L, b = 2.5)
    vec_ptype_show(df)
    #> Prototype: data.frame<
    #>   x: logical
    #>   y: 
    #>     data.frame<
    #>       a: integer
    #>       b: double
    #>     >
    #> >

Coercing to common type

It’s often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via vec_ptype_common(). vec_ptype_common() possesses the following invariants:

  • class(vec_ptype_common(x, y)) equals class(vec_ptype_common(y, x)).

  • class(vec_ptype_common(x, vec_ptype_common(y, z)) equals class(vec_ptype_common(vec_ptype_common(x, y), z)).

  • vec_ptype_common(x, NULL) == vec_ptype(x).

i.e., vec_ptype_common() is both commutative and associative (with respect to class) and has an identity element, NULL; i.e., it’s a commutative monoid. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects.

Like with vec_ptype(), the easiest way to explore vec_ptype_common() is with vec_ptype_show(): when given multiple inputs, it will print their common prototype. (In other words: program with vec_ptype_common() but play with vec_ptype_show().)

  • The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically:

    vec_ptype_show(logical(), integer(), double())
    #> Prototype: <double>
    #> 0. (           , <logical> ) = <logical>
    #> 1. ( <logical> , <integer> ) = <integer>
    #> 2. ( <integer> , <double>  ) = <double>
    
    vec_ptype_show(logical(), character())
    #> Error in `vec_ptype_show()`:
    #> ! Can't combine `out_types[[i - 1]]` <logical> and `in_types[[i]]` <character>.
  • Matrices and arrays are automatically broadcast to higher dimensions:

    vec_ptype_show(
      array(1, c(0, 1)), 
      array(1, c(0, 2))
    )
    #> Prototype: <double[,2]>
    #> 0. (              , <double[,1]> ) = <double[,1]>
    #> 1. ( <double[,1]> , <double[,2]> ) = <double[,2]>
    
    vec_ptype_show(
      array(1, c(0, 1)), 
      array(1, c(0, 3)),
      array(1, c(0, 3, 4)),
      array(1, c(0, 3, 4, 5))
    )
    #> Prototype: <double[,3,4,5]>
    #> 0. (                , <double[,1]>     ) = <double[,1]>    
    #> 1. ( <double[,1]>   , <double[,3]>     ) = <double[,3]>    
    #> 2. ( <double[,3]>   , <double[,3,4]>   ) = <double[,3,4]>  
    #> 3. ( <double[,3,4]> , <double[,3,4,5]> ) = <double[,3,4,5]>

    Provided that the dimensions follow the vctrs recycling rules:

    vec_ptype_show(
      array(1, c(0, 2)), 
      array(1, c(0, 3))
    )
    #> Error:
    #> ! Can't combine `out_types[[i - 1]]` <double[,2]> and `in_types[[i]]` <double[,3]>.
    #> ✖ Incompatible sizes 2 and 3 along axis 2.
  • Factors combine levels in the order in which they appear.

    fa <- factor("a")
    fb <- factor("b")
    
    levels(vec_ptype_common(fa, fb))
    #> [1] "a" "b"
    levels(vec_ptype_common(fb, fa))
    #> [1] "b" "a"
  • Combining a date and date-time yields a date-time:

    vec_ptype_show(new_date(), new_datetime())
    #> Prototype: <datetime<local>>
    #> 0. (        , <date>            ) = <date>           
    #> 1. ( <date> , <datetime<local>> ) = <datetime<local>>

    When combining two date times, the timezone is taken from the first input:

    vec_ptype_show(
      new_datetime(tzone = "US/Central"), 
      new_datetime(tzone = "Pacific/Auckland")
    )
    #> Prototype: <datetime<US/Central>>
    #> 0. (                        , <datetime<US/Central>>       ) = <datetime<US/Central>>
    #> 1. ( <datetime<US/Central>> , <datetime<Pacific/Auckland>> ) = <datetime<US/Central>>

    Unless it’s the local timezone, in which case any explicit time zone will win:

    vec_ptype_show(
      new_datetime(tzone = ""), 
      new_datetime(tzone = ""), 
      new_datetime(tzone = "Pacific/Auckland")
    )
    #> Prototype: <datetime<Pacific/Auckland>>
    #> 0. (                   , <datetime<local>>            ) = <datetime<local>>           
    #> 1. ( <datetime<local>> , <datetime<local>>            ) = <datetime<local>>           
    #> 2. ( <datetime<local>> , <datetime<Pacific/Auckland>> ) = <datetime<Pacific/Auckland>>
  • The common type of two data frames is the common type of each column that occurs in both data frames:

    vec_ptype_show(
      data.frame(x = FALSE), 
      data.frame(x = 1L),
      data.frame(x = 2.5)
    )
    #> Prototype: <data.frame<x:double>>
    #> 0. (                         , <data.frame<x:logical>> ) = <data.frame<x:logical>>
    #> 1. ( <data.frame<x:logical>> , <data.frame<x:integer>> ) = <data.frame<x:integer>>
    #> 2. ( <data.frame<x:integer>> , <data.frame<x:double>>  ) = <data.frame<x:double>>

    And the union of the columns that only occur in one:

    vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1))
    #> Prototype: <data.frame<
    #>   x: double
    #>   y: double
    #>   z: double
    #> >>
    #> 0. ┌              , <data.frame< ┐ = <data.frame<
    #>    │                  x: double  │     x: double 
    #>    │                  y: double  │     y: double 
    #>    └                >>           ┘   >>          
    #> 1. ┌ <data.frame< , <data.frame< ┐ = <data.frame<
    #>    │   x: double      y: double  │     x: double 
    #>    │   y: double      z: double  │     y: double 
    #>    │ >>             >>           │     z: double 
    #>    └                             ┘   >>

    Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled.

Casting to specified type

vec_ptype_common() finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That’s the job of vec_cast_common():

str(vec_cast_common(
  FALSE, 
  1:5, 
  2.5
))
#> List of 3
#>  $ : num 0
#>  $ : num [1:5] 1 2 3 4 5
#>  $ : num 2.5

str(vec_cast_common(
  factor("x"), 
  factor("y")
))
#> List of 2
#>  $ : Factor w/ 2 levels "x","y": 1
#>  $ : Factor w/ 2 levels "x","y": 2

str(vec_cast_common(
  data.frame(x = 1),
  data.frame(y = 1:2)
))
#> List of 2
#>  $ :'data.frame':    1 obs. of  2 variables:
#>   ..$ x: num 1
#>   ..$ y: int NA
#>  $ :'data.frame':    2 obs. of  2 variables:
#>   ..$ x: num [1:2] NA NA
#>   ..$ y: int [1:2] 1 2

Alternatively, you can cast to a specific prototype using vec_cast():

# Cast succeeds
vec_cast(c(1, 2), integer())
#> [1] 1 2

# Cast fails
vec_cast(c(1.5, 2.5), factor("a"))
#> Error:
#> ! Can't convert `c(1.5, 2.5)` <double> to <factor<4d52a>>.

If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error.

vec_cast(c(1.5, 2), integer())
#> Error:
#> ! Can't convert from `c(1.5, 2)` <double> to <integer> due to loss of precision.
#> • Locations: 1

You can suppress the lossy cast errors with allow_lossy_cast():

allow_lossy_cast(
  vec_cast(c(1.5, 2), integer())
)
#> [1] 1 2

This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed:

allow_lossy_cast(
  vec_cast(c(1.5, 2), integer()),
  x_ptype = double(),
  to_ptype = integer()
)
#> [1] 1 2

The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound.

Size

vec_size() was motivated by the need to have an invariant that describes the number of “observations” in a data structure. This is particularly important for data frames, as it’s useful to have some function such that f(data.frame(x)) equals f(x). No base function has this property:

  • length(data.frame(x)) equals 1 because the length of a data frame is the number of columns.

  • nrow(data.frame(x)) does not equal nrow(x) because nrow() of a vector is NULL.

  • NROW(data.frame(x)) equals NROW(x) for vector x, so is almost what we want. But because NROW() is defined in terms of length(), it returns a value for every object, even types that can’t go in a data frame, e.g. data.frame(mean) errors even though NROW(mean) is 1.

We define vec_size() as follows:

  • It is the length of 1d vectors.
  • It is the number of rows of data frames, matrices, and arrays.
  • It throws error for non vectors.

Given vec_size(), we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns.

Slicing

vec_slice() is to vec_size() as [ is to length(); i.e., it allows you to select observations regardless of the dimensionality of the underlying object. vec_slice(x, i) is equivalent to:

  • x[i] when x is a vector.
  • x[i, , drop = FALSE] when x is a data frame.
  • x[i, , , drop = FALSE] when x is a 3d array.
x <- sample(1:10)
df <- data.frame(x = x)

vec_slice(x, 5:6)
#> [1] 8 2
vec_slice(df, 5:6)
#>   x
#> 1 8
#> 2 2

vec_slice(data.frame(x), i) equals data.frame(vec_slice(x, i)) (modulo variable and row names).

Prototypes are generated with vec_slice(x, 0L); given a prototype, you can initialize a vector of given size (filled with NAs) with vec_init().

Common sizes: recycling rules

Closely related to the definition of size are the recycling rules. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in vec_size_common(), which gives the common size of a set of vectors:

vec_size_common(1:3, 1:3, 1:3)
#> [1] 3
vec_size_common(1:10, 1)
#> [1] 10
vec_size_common(integer(), 1)
#> [1] 0

vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like dest == c("IAH", "HOU")), at the cost of occasionally requiring an explicit calls to rep().

Summary of vctrs recycling rules. X indicates an error

Summary of vctrs recycling rules. X indicates an error

You can apply the recycling rules in two ways:

  • If you have a vector and desired size, use vec_recycle():

    vec_recycle(1:3, 3)
    #> [1] 1 2 3
    vec_recycle(1, 10)
    #>  [1] 1 1 1 1 1 1 1 1 1 1
  • If you have multiple vectors and you want to recycle them to the same size, use vec_recycle_common():

    vec_recycle_common(1:3, 1:3)
    #> [[1]]
    #> [1] 1 2 3
    #> 
    #> [[2]]
    #> [1] 1 2 3
    vec_recycle_common(1:10, 1)
    #> [[1]]
    #>  [1]  1  2  3  4  5  6  7  8  9 10
    #> 
    #> [[2]]
    #>  [1] 1 1 1 1 1 1 1 1 1 1

Appendix: recycling in base R

The recycling rules in base R are described in The R Language Definition but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions.

Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer:

rep(1, 6) + 1
#> [1] 2 2 2 2 2 2
rep(1, 6) + 1:2
#> [1] 2 3 2 3 2 3
rep(1, 6) + 1:3
#> [1] 2 3 4 2 3 4

If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning:

invisible(pmax(1:2, 1:3))
#> Warning in pmax(1:2, 1:3): an argument will be fractionally recycled
invisible(1:2 + 1:3)
#> Warning in 1:2 + 1:3: longer object length is not a multiple of shorter object
#> length
invisible(cbind(1:2, 1:3))
#> Warning in cbind(1:2, 1:3): number of rows of result is not a multiple of
#> vector length (arg 1)

But some functions recycle silently:

length(atan2(1:3, 1:2))
#> [1] 3
length(paste(1:3, 1:2))
#> [1] 3
length(ifelse(1:3, 1:2, 1:2))
#> [1] 3

And data.frame() throws an error:

data.frame(1:2, 1:3)
#> Error in data.frame(1:2, 1:3): arguments imply differing number of rows: 2, 3

The R language definition states that “any arithmetic operation involving a zero-length vector has a zero-length result”. But outside of arithmetic, this rule is not consistently followed:

# length-0 output
1:2 + integer()
#> integer(0)
atan2(1:2, integer())
#> numeric(0)
pmax(1:2, integer())
#> integer(0)

# dropped
cbind(1:2, integer())
#>      [,1]
#> [1,]    1
#> [2,]    2

# recycled to length of first
ifelse(rep(TRUE, 4), integer(), character())
#> [1] NA NA NA NA

# preserved-ish
paste(1:2, integer())
#> [1] "1 " "2 "

# Errors
data.frame(1:2, integer())
#> Error in data.frame(1:2, integer()): arguments imply differing number of rows: 2, 0
vctrs/inst/doc/pillar.html0000644000176200001440000012432714532404536015325 0ustar liggesusers Printing vectors nicely in tibbles

Printing vectors nicely in tibbles

Kirill Müller, Hadley Wickham

You can get basic control over how a vector is printed in a tibble by providing a format() method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics:

  • vctrs::vec_ptype_abbr() determines what goes into the column header.
  • pillar::pillar_shaft() determines what goes into the body, or the shaft, of the column.

Technically a pillar is composed of a shaft (decorated with an ornament), with a capital above and a base below. Multiple pillars form a colonnade, which can be stacked in multiple tiers. This is the motivation behind the names in our API.

This short vignette shows the basics of column styling using a "latlon" vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you’ll need to create documentation and the NAMESPACE file. In this vignette, we’ll attach pillar and vctrs:

library(vctrs)
library(pillar)

You don’t need to do this in a package. Instead, you’ll need to import the packages by then to the Imports: section of your DESCRIPTION. The following helper does this for you:

usethis::use_package("vctrs")
usethis::use_package("pillar")

Prerequisites

To illustrate the basic ideas we’re going to create a "latlon" class that encodes geographic coordinates in a record. We’ll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using vctrs_rcrd(), we already get the infrastructure to make this class fully compatible with data frames for free. See vignette("s3-vector", package = "vctrs") for details on the record data type.

#' @export
latlon <- function(lat, lon) {
  new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon")
}

#' @export
format.earth_latlon <- function(x, ..., formatter = deg_min) {
  x_valid <- which(!is.na(x))

  lat <- field(x, "lat")[x_valid]
  lon <- field(x, "lon")[x_valid]

  ret <- rep(NA_character_, vec_size(x))
  ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon"))
  # It's important to keep NA in the vector!
  ret
}

deg_min <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- trunc(x)
  x <- x - deg
  min <- round(x * 60)

  # Ensure the columns are always the same width so they line up nicely
  ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]]))
  format(ret, justify = "right")
}

latlon(c(32.71, 2.95), c(-117.17, 1.67))
#> <earth_latlon[2]>
#> [1] 32°43'N 117°10'W  2°57'N   1°40'E

Using in a tibble

Columns of this class can be used in a tibble right away because we’ve made a class using the vctrs infrastructure and have provided a format() method:

library(tibble)
#> 
#> Attaching package: 'tibble'
#> The following object is masked from 'package:vctrs':
#> 
#>     data_frame

loc <- latlon(
  c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA),
  c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA)
)

data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc)

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>       <erth_ltl>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

This output is ok, but we could improve it by:

  1. Using a more description type abbreviation than <erth_ltl>.

  2. Using a dash of colour to highlight the most important parts of the value.

  3. Providing a narrower view when horizontal space is at a premium.

The following sections show how to enhance the rendering.

Fixing the data type

Instead of <erth_ltl> we’d prefer to use <latlon>. We can do that by implementing the vec_ptype_abbr() method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that’s under 6 characters.

#' @export
vec_ptype_abbr.earth_latlon <- function(x) {
  "latlon"
}

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

Custom rendering

The format() method is used by default for rendering. For custom formatting you need to implement the pillar_shaft() method. This function should always return a pillar shaft object, created by new_pillar_shaft_simple() or similar. new_pillar_shaft_simple() accepts ANSI escape codes for colouring, and pillar includes some built in styles like style_subtle(). We can use subtle style for the degree and minute separators to make the data more obvious.

First we define a degree formatter that makes use of style_subtle():

deg_min_color <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- trunc(x)
  x <- x - deg
  rad <- round(x * 60)
  ret <- sprintf(
    "%d%s%.2d%s%s",
    deg,
    pillar::style_subtle("°"),
    rad,
    pillar::style_subtle("'"),
    pm[ifelse(sign >= 0, 1, 2)]
  )
  format(ret, justify = "right")
}

And then we pass that to our format() method:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  out <- format(x, formatter = deg_min_color)
  pillar::new_pillar_shaft_simple(out, align = "right")
}

Currently, ANSI escapes are not rendered in vignettes, so this result doesn’t look any different, but if you run the code yourself you’ll see an improved display.

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

As well as the functions in pillar, the cli package provides a variety of tools for styling text.

Truncation

Tibbles can automatically compacts columns when there’s no enough horizontal space to display everything:

print(data, width = 30)
#> # A tibble: 6 × 3
#>   venue  year              loc
#>   <chr> <int>         <latlon>
#> 1 rstu…  2017 28°20'N  81°33'W
#> 2 rstu…  2018 32°43'N 117°10'W
#> 3 rstu…  2019 30°16'N  97°44'W
#> 4 rstu…  2020 37°47'N 122°25'W
#> 5 rstu…  2021 28°30'N  81°24'W
#> 6 rstu…  2022               NA

Currently the latlon class isn’t ever compacted because we haven’t specified a minimum width when constructing the shaft. Let’s fix that and re-print the data:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  out <- format(x)
  pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10)
}

print(data, width = 30)
#> # A tibble: 6 × 3
#>   venue        year        loc
#>   <chr>       <int>   <latlon>
#> 1 rstudio::c…  2017 28°20'N  …
#> 2 rstudio::c…  2018 32°43'N 1…
#> 3 rstudio::c…  2019 30°16'N  …
#> 4 rstudio::c…  2020 37°47'N 1…
#> 5 rstudio::c…  2021 28°30'N  …
#> 6 rstudio::c…  2022         NA

Adaptive rendering

Truncation may be useful for character data, but for lat-lon data it’d be nicer to show full degrees and remove the minutes. We’ll first write a function that does this:

deg <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- round(x)

  ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)])
  format(ret, justify = "right")
}

Then use it as part of more sophisticated implementation of the pillar_shaft() method:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  deg <- format(x, formatter = deg)
  deg_min <- format(x)

  pillar::new_pillar_shaft(
    list(deg = deg, deg_min = deg_min),
    width = pillar::get_max_extent(deg_min),
    min_width = pillar::get_max_extent(deg),
    class = "pillar_shaft_latlon"
  )
}

Now the pillar_shaft() method returns an object of class "pillar_shaft_latlon" created by new_pillar_shaft(). This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (get_max_extent() is a helper that computes the maximum display width occupied by the values in a character vector.)

All that’s left to do is to implement a format() method for our new "pillar_shaft_latlon" class. This method will be called with a width argument, which then determines which of the formats to choose. The formatting of our choice is passed to the new_ornament() function:

#' @export
format.pillar_shaft_latlon <- function(x, width, ...) {
  if (get_max_extent(x$deg_min) <= width) {
    ornament <- x$deg_min
  } else {
    ornament <- x$deg
  }

  pillar::new_ornament(ornament, align = "right")
}

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA
print(data, width = 30)
#> # A tibble: 6 × 3
#>   venue        year        loc
#>   <chr>       <int>   <latlon>
#> 1 rstudio::c…  2017 28°N  82°W
#> 2 rstudio::c…  2018 33°N 117°W
#> 3 rstudio::c…  2019 30°N  98°W
#> 4 rstudio::c…  2020 38°N 122°W
#> 5 rstudio::c…  2021 28°N  81°W
#> 6 rstudio::c…  2022         NA

Testing

If you want to test the output of your code, you can compare it with a known state recorded in a text file. The testthat::expect_snapshot() function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won’t make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN.

Use this testthat expectation in one of your test files to create a snapshot test:

expect_snapshot(pillar_shaft(data$loc))

See https://testthat.r-lib.org/articles/snapshotting.html for more information.

vctrs/inst/doc/stability.html0000644000176200001440000014410114532404537016037 0ustar liggesusers Type and size stability

Type and size stability

This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the “shape” of the output you only need to know the “shape”s of the inputs.

This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can’t predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases.

library(vctrs)
library(rlang)
library(zeallot)

Definitions

We say a function is type-stable iff:

  1. You can predict the output type knowing only the input types.
  2. The order of arguments in … does not affect the output type.

Similarly, a function is size-stable iff:

  1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size.

Very few base R functions are size-stable, so I’ll also define a slightly weaker condition. I’ll call a function length-stable iff:

  1. You can predict the output length knowing only the input lengths, or there is a single numeric input that specifies the output length.

(But note that length-stable is not a particularly robust definition because length() returns a value for things that are not vectors.)

We’ll call functions that don’t obey these principles type-unstable and size-unstable respectively.

On top of type- and size-stability it’s also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions.

The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again.

Examples

To make these ideas concrete, let’s apply them to a few base functions:

  1. mean() is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error).

  2. Surprisingly, median() is type-unstable:

    vec_ptype_show(median(c(1L, 1L)))
    #> Prototype: double
    vec_ptype_show(median(c(1L, 1L, 1L)))
    #> Prototype: integer

    It is, however, size-stable, since it always returns a vector of length 1.

  3. sapply() is type-unstable because you can’t predict the output type only knowing the input types:

    vec_ptype_show(sapply(1L, function(x) c(x, x)))
    #> Prototype: integer[,1]
    vec_ptype_show(sapply(integer(), function(x) c(x, x)))
    #> Prototype: list

    It’s not quite size-stable; vec_size(sapply(x, f)) is vec_size(x) for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns).

  4. vapply() is a type-stable version of sapply() because vec_ptype_show(vapply(x, fn, template)) is always vec_ptype_show(template).
    It is size-unstable for the same reasons as sapply().

  5. c() is type-unstable because c(x, y) doesn’t always output the same type as c(y, x).

    vec_ptype_show(c(NA, Sys.Date()))
    #> Prototype: double
    vec_ptype_show(c(Sys.Date(), NA))
    #> Prototype: date

    c() is almost always length-stable because length(c(x, y)) almost always equals length(x) + length(y). One common source of instability here is dealing with non-vectors (see the later section “Non-vectors”):

    env <- new.env(parent = emptyenv())
    length(env)
    #> [1] 0
    length(mean)
    #> [1] 1
    length(c(env, mean))
    #> [1] 2
  6. paste(x1, x2) is length-stable because length(paste(x1, x2)) equals max(length(x1), length(x2)). However, it doesn’t follow the usual arithmetic recycling rules because paste(1:2, 1:3) doesn’t generate a warning.

  7. ifelse() is length-stable because length(ifelse(cond, true, false)) is always length(cond). ifelse() is type-unstable because the output type depends on the value of cond:

    vec_ptype_show(ifelse(NA, 1L, 1L))
    #> Prototype: logical
    vec_ptype_show(ifelse(FALSE, 1L, 1L))
    #> Prototype: integer
  8. read.csv(file) is type-unstable and size-unstable because, while you know it will return a data frame, you don’t know what columns it will return or how many rows it will have. Similarly, df[[i]] is not type-stable because the result depends on the value of i. There are many important functions that can not be made type-stable or size-stable!

With this understanding of type- and size-stability in hand, we’ll use them to analyse some base R functions in greater depth and then propose alternatives with better properties.

c() and vctrs::vec_c()

In this section we’ll compare and contrast c() and vec_c(). vec_c() is both type- and size-stable because it possesses the following invariants:

  • vec_ptype(vec_c(x, y)) equals vec_ptype_common(x, y).
  • vec_size(vec_c(x, y)) equals vec_size(x) + vec_size(y).

c() has another undesirable property in that it’s not consistent with unlist(); i.e., unlist(list(x, y)) does not always equal c(x, y); i.e., base R has multiple sets of type-coercion rules. I won’t consider this problem further here.

I have two goals here:

  • To fully document the quirks of c(), hence motivating the development of an alternative.

  • To discuss non-obvious consequences of the type- and size-stability above.

Atomic vectors

If we only consider atomic vectors, c() is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical.

c(FALSE, 1L, 2.5)
#> [1] 0.0 1.0 2.5

vec_c() obeys similar rules:

vec_c(FALSE, 1L, 2.5)
#> [1] 0.0 1.0 2.5

But it does not automatically coerce to character vectors or lists:

c(FALSE, "x")
#> [1] "FALSE" "x"
vec_c(FALSE, "x")
#> Error in `vec_c()`:
#> ! Can't combine `..1` <logical> and `..2` <character>.

c(FALSE, list(1))
#> [[1]]
#> [1] FALSE
#> 
#> [[2]]
#> [1] 1
vec_c(FALSE, list(1))
#> Error in `vec_c()`:
#> ! Can't combine `..1` <logical> and `..2` <list>.

Incompatible vectors and non-vectors

In general, most base methods do not throw an error:

c(10.5, factor("x"))
#> [1] 10.5  1.0

If the inputs aren’t vectors, c() automatically puts them in a list:

c(mean, globalenv())
#> [[1]]
#> function (x, ...) 
#> UseMethod("mean")
#> <bytecode: 0x103a05448>
#> <environment: namespace:base>
#> 
#> [[2]]
#> <environment: R_GlobalEnv>

For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list:

c(getRversion(), "x")
#> Error: invalid version specification 'x'

c("x", getRversion())
#> [[1]]
#> [1] "x"
#> 
#> [[2]]
#> [1] 4 3 1

vec_c() throws an error if the inputs are not vectors or not automatically coercible:

vec_c(mean, globalenv())
#> Error in `vec_c()`:
#> ! `..1` must be a vector, not a function.

vec_c(Sys.Date(), factor("x"), "x")
#> Error in `vec_c()`:
#> ! Can't combine `..1` <date> and `..2` <factor<bf275>>.

Factors

Combining two factors returns an integer vector:

fa <- factor("a")
fb <- factor("b")

c(fa, fb)
#> [1] a b
#> Levels: a b

(This is documented in c() but is still undesirable.)

vec_c() returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with dplyr::bind_rows(), which is stricter and is a common source of user difficulty.)

vec_c(fa, fb)
#> [1] a b
#> Levels: a b
vec_c(fb, fa)
#> [1] b a
#> Levels: b a

Date-times

c() strips the time zone associated with date-times:

datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland")
c(datetime_nz)
#> [1] "2020-01-01 09:00:00 NZDT"

This behaviour is documented in ?DateTimeClasses but is the source of considerable user pain.

vec_c() preserves time zones:

vec_c(datetime_nz)
#> [1] "2020-01-01 09:00:00 NZDT"

What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there’s no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone:

datetime_local <- as.POSIXct("2020-01-01 09:00")
datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central")

vec_c(datetime_local, datetime_houston, datetime_nz)
#> [1] "2020-01-01 08:00:00 CST" "2020-01-01 09:00:00 CST"
#> [3] "2019-12-31 14:00:00 CST"
vec_c(datetime_houston, datetime_nz)
#> [1] "2020-01-01 09:00:00 CST" "2019-12-31 14:00:00 CST"
vec_c(datetime_nz, datetime_houston)
#> [1] "2020-01-01 09:00:00 NZDT" "2020-01-02 04:00:00 NZDT"

Dates and date-times

Combining dates and date-times with c() gives silently incorrect results:

date <- as.Date("2020-01-01")
datetime <- as.POSIXct("2020-01-01 09:00")

c(date, datetime)
#> [1] "2020-01-01" "2020-01-01"
c(datetime, date)
#> [1] "2020-01-01 09:00:00 EST" "2019-12-31 19:00:00 EST"

This behaviour arises because neither c.Date() nor c.POSIXct() check that all inputs are of the same type.

vec_c() uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time).

vec_c(date, datetime)
#> [1] "2020-01-01 00:00:00 EST" "2020-01-01 09:00:00 EST"
vec_c(date, datetime_nz)
#> [1] "2020-01-01 00:00:00 NZDT" "2020-01-01 09:00:00 NZDT"

Missing values

If a missing value comes at the beginning of the inputs, c() falls back to the internal behaviour, which strips all attributes:

c(NA, fa)
#> [1] NA  1
c(NA, date)
#> [1]    NA 18262
c(NA, datetime)
#> [1]         NA 1577887200

vec_c() takes a different approach treating a logical vector consisting only of NA as the unspecified() class which can be converted to any other 1d type:

vec_c(NA, fa)
#> [1] <NA> a   
#> Levels: a
vec_c(NA, date)
#> [1] NA           "2020-01-01"
vec_c(NA, datetime)
#> [1] NA                        "2020-01-01 09:00:00 EST"

Data frames

Because it is almost always length-stable, c() combines data frames column wise (into a list):

df1 <- data.frame(x = 1)
df2 <- data.frame(x = 2)
str(c(df1, df1))
#> List of 2
#>  $ x: num 1
#>  $ x: num 1

vec_c() is size-stable, which implies it will row-bind data frames:

vec_c(df1, df2)
#>   x
#> 1 1
#> 2 2

Matrices and arrays

The same reasoning applies to matrices:

m <- matrix(1:4, nrow = 2)
c(m, m)
#> [1] 1 2 3 4 1 2 3 4
vec_c(m, m)
#>      [,1] [,2]
#> [1,]    1    3
#> [2,]    2    4
#> [3,]    1    3
#> [4,]    2    4

One difference is that vec_c() will “broadcast” a vector to match the dimensions of a matrix:

c(m, 1)
#> [1] 1 2 3 4 1

vec_c(m, 1)
#>      [,1] [,2]
#> [1,]    1    3
#> [2,]    2    4
#> [3,]    1    1

Implementation

The basic implementation of vec_c() is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with vec_init(), and then insert each input into the correct place in the output.

vec_c <- function(...) {
  args <- compact(list2(...))

  ptype <- vec_ptype_common(!!!args)
  if (is.null(ptype))
    return(NULL)

  ns <- map_int(args, vec_size)
  out <- vec_init(ptype, sum(ns))

  pos <- 1
  for (i in seq_along(ns)) {
    n <- ns[[i]]
    
    x <- vec_cast(args[[i]], to = ptype)
    vec_slice(out, pos:(pos + n - 1)) <- x
    pos <- pos + n
  }

  out
}

(The real vec_c() is a bit more complicated in order to handle inner and outer names).

ifelse()

One of the functions that motivate the development of vctrs is ifelse(). It has the surprising property that the result value is “A vector of the same length and attributes (including dimensions and class) as test”. To me, it seems more reasonable for the type of the output to be controlled by the type of the yes and no arguments.

In dplyr::if_else() I swung too far towards strictness: it throws an error if yes and no are not the same type. This is annoying in practice because it requires typed missing values (NA_character_ etc), and because the checks are only on the class (not the full prototype), it’s easy to create invalid output.

I found it much easier to understand what ifelse() should do once I internalised the ideas of type- and size-stability:

  • The first argument must be logical.

  • vec_ptype(if_else(test, yes, no)) equals vec_ptype_common(yes, no). Unlike ifelse() this implies that if_else() must always evaluate both yes and no in order to figure out the correct type. I think this is consistent with && (scalar operation, short circuits) and & (vectorised, evaluates both sides).

  • vec_size(if_else(test, yes, no)) equals vec_size_common(test, yes, no). I think the output could have the same size as test (i.e., the same behaviour as ifelse), but I think as a general rule that your inputs should either be mutually recycling or not.

This leads to the following implementation:

if_else <- function(test, yes, no) {
  if (!is_logical(test)) {
    abort("`test` must be a logical vector.")
  }
  
  c(yes, no) %<-% vec_cast_common(yes, no)
  c(test, yes, no) %<-% vec_recycle_common(test, yes, no)

  out <- vec_init(yes, vec_size(yes))
  vec_slice(out, test) <- vec_slice(yes, test)
  vec_slice(out, !test) <- vec_slice(no, !test)

  out
}

x <- c(NA, 1:4)
if_else(x > 2, "small", "big")
#> [1] NA      "big"   "big"   "small" "small"
if_else(x > 2, factor("small"), factor("big"))
#> [1] <NA>  big   big   small small
#> Levels: small big
if_else(x > 2, Sys.Date(), Sys.Date() + 7)
#> [1] NA           "2023-12-08" "2023-12-08" "2023-12-01" "2023-12-01"

By using vec_size() and vec_slice(), this definition of if_else() automatically works with data.frames and matrices:

if_else(x > 2, data.frame(x = 1), data.frame(y = 2))
#>    x  y
#> 1 NA NA
#> 2 NA  2
#> 3 NA  2
#> 4  1 NA
#> 5  1 NA

if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30))
#>      [,1] [,2]
#> [1,]   NA   NA
#> [2,]   30   30
#> [3,]   30   30
#> [4,]    4    9
#> [5,]    5   10
vctrs/inst/include/0000755000176200001440000000000014373202700014011 5ustar liggesusersvctrs/inst/include/vctrs.c0000644000176200001440000000107214373202700015316 0ustar liggesusers#include "vctrs.h" // Maturing bool (*obj_is_vector)(SEXP) = NULL; R_len_t (*short_vec_size)(SEXP) = NULL; SEXP (*short_vec_recycle)(SEXP, R_len_t) = NULL; // Deprecated bool (*vec_is_vector)(SEXP) = NULL; void vctrs_init_api(void) { obj_is_vector = (bool (*)(SEXP)) R_GetCCallable("vctrs", "obj_is_vector"); short_vec_size = (R_len_t (*)(SEXP)) R_GetCCallable("vctrs", "short_vec_size"); short_vec_recycle = (SEXP (*)(SEXP, R_len_t)) R_GetCCallable("vctrs", "short_vec_recycle"); vec_is_vector = (bool (*)(SEXP)) R_GetCCallable("vctrs", "vec_is_vector"); } vctrs/inst/include/vctrs.h0000644000176200001440000000061714373202700015327 0ustar liggesusers#ifndef VCTRS_H #define VCTRS_H #include #include #include // Maturing extern bool (*obj_is_vector)(SEXP); extern R_len_t (*short_vec_size)(SEXP); extern SEXP (*short_vec_recycle)(SEXP, R_len_t); // Deprecated in favor of `obj_is_vector()` // version: 0.5.3 // date: 2023-02-15 extern bool (*vec_is_vector)(SEXP); void vctrs_init_api(void); #endif vctrs/inst/WORDLIST0000644000176200001440000000001313347722504013563 0ustar liggesusersvectorised